├── test.js ├── comment.js ├── js ├── b.js └── a.js ├── README.md ├── helpers.py ├── browser.css ├── runtime.js ├── js_context.py ├── network.py ├── html_parser.py ├── css_parser.py ├── server.py └── main.py /test.js: -------------------------------------------------------------------------------- 1 | var x = 2 2 | x + x -------------------------------------------------------------------------------- /comment.js: -------------------------------------------------------------------------------- 1 | console.log('hello world') -------------------------------------------------------------------------------- /js/b.js: -------------------------------------------------------------------------------- 1 | console.log(x + x) 2 | //call_python("log", x + x) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # toy-browser 2 | An experimental browser written in python based on https://browser.engineering/ 3 | -------------------------------------------------------------------------------- /helpers.py: -------------------------------------------------------------------------------- 1 | def tree_to_list(tree, list): 2 | list.append(tree) 3 | for child in tree.children: 4 | tree_to_list(child, list) 5 | return list 6 | -------------------------------------------------------------------------------- /browser.css: -------------------------------------------------------------------------------- 1 | pre { background-color: gray; } 2 | a { color: blue; } 3 | i { font-style: italic; } 4 | b { font-weight: bold; } 5 | small { font-size: 90%; } 6 | big { font-size: 110%; } 7 | h1 { font-size: 300%; font-weight: bold} 8 | label { font-weight: bold; color: red; } 9 | input { 10 | font-size: 16px; font-weight: normal; font-style: normal; 11 | background-color: lightblue; 12 | } 13 | button { 14 | font-size: 16px; font-weight: normal; font-style: normal; 15 | background-color: orange; 16 | } -------------------------------------------------------------------------------- /js/a.js: -------------------------------------------------------------------------------- 1 | var x = 3; 2 | 3 | var nodes = document.querySelectorAll('p') 4 | console.log(nodes.map(function(node) { return node.getAttribute('class') })) 5 | 6 | var label = document.querySelectorAll("label")[0]; 7 | var inputs = document.querySelectorAll('input'); 8 | for (var i = 0; i < inputs.length; i++) { 9 | var input = inputs[i] 10 | input.addEventListener('keydown', function(e) { 11 | console.log(this, e) 12 | var name = this.getAttribute("name"); 13 | var value = this.getAttribute("value"); 14 | label.innerHTML = "Input " + name + " has a value of " + value; 15 | // console.log("Input " + name + " has a value of " + value) 16 | }) 17 | 18 | } 19 | 20 | 21 | var form = document.querySelectorAll("form")[0]; 22 | form.addEventListener("submit", function(e) { 23 | console.log('you shall not pass!!!') 24 | // e.preventDefault(); 25 | }); -------------------------------------------------------------------------------- /runtime.js: -------------------------------------------------------------------------------- 1 | console = { 2 | log: function(x) { 3 | call_python('log', x) 4 | } 5 | } 6 | 7 | document = { 8 | querySelectorAll: function(s) { 9 | var handles = call_python("querySelectorAll", s); 10 | return handles.map(function(handle) { return new Node(handle) }) 11 | } 12 | } 13 | 14 | var listeners = {} 15 | 16 | function Node(handle) { this.handle = handle; } 17 | 18 | Node.prototype.getAttribute = function(attr) { 19 | return call_python("getAttribute", this.handle, attr); 20 | } 21 | 22 | Node.prototype.addEventListener = function(type, listener) { 23 | if (!listeners[this.handle]) listeners[this.handle] = {} 24 | var dict = listeners[this.handle] 25 | if (!(type in dict)) dict[type] = [] 26 | dict[type].push(listener) 27 | } 28 | 29 | Node.prototype.dispatchEvent = function(event) { 30 | var type = event.type; 31 | var handle = this.handle 32 | var list = (listeners[handle] && listeners[handle][type]) || [] 33 | for (var i = 0; i < list.length; i++) { 34 | list[i].call(this, event); 35 | } 36 | return event.do_default 37 | } 38 | 39 | Object.defineProperty(Node.prototype, 'innerHTML', { 40 | set: function(s) { 41 | call_python('innerHTML_set', this.handle, s.toString()) 42 | } 43 | }) 44 | 45 | function Event(type) { 46 | this.type = type 47 | this.do_default = true 48 | } 49 | 50 | Event.prototype.preventDefault = function() { 51 | this.do_default = false 52 | } 53 | 54 | function XMLHttpRequest() {} 55 | 56 | XMLHttpRequest.prototype.open = function(method, url, is_async) { 57 | if (is_async) throw new Error('async not implemented yet') 58 | this.method = method 59 | this.url = url 60 | } 61 | 62 | XMLHttpRequest.prototype.send = function(body) { 63 | this.responseText = call_python('XMLHttpRequest_send', this.method, this.url, body) 64 | } -------------------------------------------------------------------------------- /js_context.py: -------------------------------------------------------------------------------- 1 | import dukpy 2 | 3 | from css_parser import CSSParser 4 | from helpers import tree_to_list 5 | from html_parser import HTMLParser 6 | from network import request, url_origin, resolve_url 7 | 8 | EVENT_DISPATCH_CODE = "new Node(dukpy.handle).dispatchEvent(new Event(dukpy.type))" 9 | 10 | class JSContext: 11 | def __init__(self, tab): 12 | self.tab = tab 13 | self.interp = dukpy.JSInterpreter() 14 | 15 | # js to python object mappers 16 | self.node_to_handle = {} 17 | self.handle_to_node = {} 18 | 19 | self.interp.export_function("log", print) 20 | self.interp.export_function("querySelectorAll", 21 | self.querySelectorAll) 22 | self.interp.export_function("getAttribute", 23 | self.getAttribute) 24 | self.interp.export_function("innerHTML_set", 25 | self.innerHTML_set) 26 | self.interp.export_function("XMLHttpRequest_send", 27 | self.XMLHttpRequest_send) 28 | 29 | with open("runtime.js") as f: 30 | self.interp.evaljs(f.read()) 31 | 32 | def run(self, code): 33 | return self.interp.evaljs(code) 34 | 35 | def dispatch_event(self, type, elt): 36 | handle = self.node_to_handle.get(elt, -1) 37 | do_default = self.interp.evaljs(EVENT_DISPATCH_CODE, type=type, handle=handle) 38 | return not do_default 39 | 40 | 41 | def querySelectorAll(self, selector_text): 42 | selector = CSSParser(selector_text).selector() 43 | nodes = [node for node in tree_to_list(self.tab.nodes, []) if selector.matches(node)] 44 | return [self.get_handle(node) for node in nodes] 45 | 46 | def getAttribute(self, handle, attr): 47 | etl = self.handle_to_node[handle] 48 | return etl.attributes.get(attr, None) 49 | 50 | def innerHTML_set(self, handle, s): 51 | doc = HTMLParser("" + s + "").parse() 52 | new_nodes = doc.children[0].children 53 | elt = self.handle_to_node[handle] 54 | elt.children = new_nodes 55 | for child in elt.children: 56 | child.parent = elt 57 | self.tab.render() 58 | 59 | def XMLHttpRequest_send(self, method, url, body): 60 | full_url = resolve_url(url, self.tab.url) 61 | if url_origin(full_url) != url_origin(self.tab.url): 62 | raise Exception("Cross-origin XHR request not allowed") 63 | headers, out = request(full_url, self.tab.url, body) 64 | return out 65 | 66 | def get_handle(self, elt): 67 | if elt not in self.node_to_handle: 68 | handle = len(self.node_to_handle) 69 | self.node_to_handle[elt] = handle 70 | self.handle_to_node[handle] = elt 71 | else: 72 | handle = self.node_to_handle[elt] 73 | return handle -------------------------------------------------------------------------------- /network.py: -------------------------------------------------------------------------------- 1 | import socket 2 | import ssl 3 | 4 | COOKIE_JAR = {} 5 | 6 | 7 | def url_origin(url): 8 | (scheme, host, path) = parse_url(url) 9 | return scheme + "://" + host 10 | 11 | 12 | def parse_url(url): 13 | scheme, url = url.split("://", 1) 14 | if "/" not in url: 15 | url = url + "/" 16 | host, path = url.split("/", 1) 17 | return scheme, host, "/" + path 18 | 19 | 20 | def request(url, top_level_url, payload=None): 21 | (scheme, host, path) = parse_url(url) 22 | assert scheme in ["http", "https"], \ 23 | "Unknown scheme {}".format(scheme) 24 | 25 | s = socket.socket( 26 | family=socket.AF_INET, 27 | type=socket.SOCK_STREAM, 28 | proto=socket.IPPROTO_TCP, 29 | ) 30 | 31 | port = 80 if scheme == "http" else 443 32 | 33 | if ":" in host: 34 | host, port = host.split(":", 1) 35 | port = int(port) 36 | 37 | s.connect((host, port)) 38 | 39 | if scheme == "https": 40 | ctx = ssl.create_default_context() 41 | s = ctx.wrap_socket(s, server_hostname=host) 42 | 43 | method = "POST" if payload else "GET" 44 | 45 | body = "{} {} HTTP/1.0\r\n".format(method, path) + "HOST: {}\r\n".format(host) 46 | if payload: 47 | length = len(payload.encode('utf8')) 48 | body += 'Content-Length: {}\r\n'.format(length) 49 | 50 | if host in COOKIE_JAR: 51 | cookie, params = COOKIE_JAR[host] 52 | allow_cookie = True 53 | if top_level_url and params.get('samesite', 'none') == 'lax': 54 | _, _, top_level_host, _ = top_level_url.split("/", 3) 55 | if ':' in top_level_host: 56 | top_level_host, _ = top_level_host.split(":", 1) 57 | allow_cookie = (host == top_level_host or method == "GET") 58 | if allow_cookie: 59 | body += 'Cookie: {}\r\n'.format(cookie) 60 | 61 | body += "\r\n" + (payload if payload else "") 62 | 63 | s.send(body.encode('utf8')) 64 | 65 | response = s.makefile("r", encoding="utf8", newline="\r\n") 66 | 67 | statusline = response.readline() 68 | version, status, explanation = statusline.split(" ", 2) 69 | assert status == "200", "{}: {}".format(status, explanation) 70 | 71 | headers = {} 72 | while True: 73 | line = response.readline() 74 | if line == "\r\n": break 75 | header, value = line.split(":", 1) 76 | headers[header.lower()] = value.strip() 77 | 78 | assert "transfer-encoding" not in headers 79 | assert "content-encoding" not in headers 80 | 81 | if 'set-cookie' in headers: 82 | cookie = headers['set-cookie'] 83 | params = {} 84 | if ';' in cookie: 85 | cookie, rest = cookie.split(';', 1) 86 | for param_pair in rest.split(';'): 87 | key, value = param_pair.strip().split('=', 1) 88 | params[key.lower()] = value.lower() 89 | COOKIE_JAR[host] = (cookie, params) 90 | 91 | body = response.read() 92 | s.close() 93 | 94 | return headers, body 95 | 96 | 97 | def resolve_url(url, current): 98 | if '://' in url: 99 | return url 100 | elif url.startswith('/'): 101 | scheme, hostpath = current.split('://', 1) 102 | host, oldpath = hostpath.split('/', 1) 103 | return scheme + "://" + host + url 104 | else: 105 | dir, _ = current.rsplit('/', 1) 106 | while url.startswith('../'): 107 | url = url[3:] 108 | if dir.count('/') == 2: continue 109 | dir, _ = dir.rsplit('/', 1) 110 | return dir + '/' + url 111 | -------------------------------------------------------------------------------- /html_parser.py: -------------------------------------------------------------------------------- 1 | from html import unescape 2 | 3 | HTML_ENTITIES = { 4 | '"': '"', 5 | ''': "'", 6 | '&': '&', 7 | '>': '>', 8 | '<': '<', 9 | '⁄': '/' 10 | } 11 | 12 | class Text: 13 | def __init__(self, text, parent): 14 | self.text = text 15 | self.parent = parent 16 | self.children = [] 17 | 18 | def __repr__(self): 19 | return repr(self.text) 20 | 21 | class Element: 22 | def __init__(self, tag, attributes, parent): 23 | self.tag = tag 24 | self.parent = parent 25 | self.attributes = attributes 26 | self.children = [] 27 | 28 | def __repr__(self): 29 | return "<" + self.tag + ">" 30 | 31 | class HTMLParser: 32 | def __init__(self, body): 33 | self.body = body 34 | self.unfinished = [] 35 | 36 | SELF_CLOSING_TAGS = [ 37 | "area", "base", "br", "col", "embed", "hr", "img", "input", 38 | "link", "meta", "param", "source", "track", "wbr", 39 | ] 40 | HEAD_TAGS = [ 41 | "base", "basefont", "bgsound", "noscript", 42 | "link", "meta", "title", "style", "script", 43 | ] 44 | def parse(self): 45 | text = '' 46 | in_angle = False 47 | for c in self.body: 48 | if c == "<": 49 | in_angle = True 50 | if text: self.add_text(text) 51 | text = '' 52 | elif c == ">": 53 | in_angle = False 54 | self.add_tag(text) 55 | text = '' 56 | else: 57 | text += c 58 | if not in_angle and text: 59 | self.add_text(text) 60 | return self.finish() 61 | 62 | def get_attributes(self, text): 63 | parts = text.split() 64 | tag = parts[0].lower() 65 | attributes = {} 66 | for attrpair in parts[1:]: 67 | if '=' in attrpair: 68 | key, value = attrpair.split("=", 1) 69 | if len(value) > 2 and value[0] in ["'", "\""]: 70 | value = value[1:-1] 71 | attributes[key.lower()] = value 72 | else: 73 | attributes[attrpair.lower()] = '' 74 | return tag, attributes 75 | 76 | def html_entities(self, text): 77 | # I'm cheating here cuz I couldn't get the algorithm right 78 | return unescape(text) 79 | # amp_index = None 80 | # out = '' 81 | # for i, c in enumerate(text): 82 | # if c == '&': 83 | # print('found amp') 84 | # amp_index = i 85 | # if c == ';' and amp_index: 86 | # print('found semi') 87 | # print(amp_index) 88 | # entity = text[amp_index + 1:i] 89 | # print(entity) 90 | # char = HTML_ENTITIES[entity] 91 | # out = out[:amp_index] + char 92 | # amp_index = None 93 | # else: 94 | # out += c 95 | # return out 96 | 97 | def add_text(self, text): 98 | if text.isspace(): return 99 | self.implicit_tags(None) 100 | text = self.html_entities(text) 101 | parent = self.unfinished[-1] 102 | node = Text(text, parent) 103 | parent.children.append(node) 104 | 105 | def add_tag(self, tag): 106 | if tag.startswith("!"): return 107 | self.implicit_tags(tag) 108 | tag, attributes = self.get_attributes(tag) 109 | if tag.startswith('/'): 110 | if len(self.unfinished) == 1: return 111 | node = self.unfinished.pop() 112 | parent = self.unfinished[-1] 113 | parent.children.append(node) 114 | elif tag in self.SELF_CLOSING_TAGS: 115 | parent = self.unfinished[-1] 116 | node = Element(tag, attributes, parent) 117 | parent.children.append(node) 118 | else: 119 | parent = self.unfinished[-1] if self.unfinished else None 120 | node = Element(tag, attributes, parent) 121 | self.unfinished.append(node) 122 | 123 | def implicit_tags(self, tag): 124 | while True: 125 | open_tags = [node.tag for node in self.unfinished] 126 | if open_tags == [] and tag != "html": 127 | self.add_tag("html") 128 | elif open_tags == ['html'] and tag not in ['head', 'body', '/html']: 129 | if tag in self.HEAD_TAGS: 130 | self.add_tag("head") 131 | else: 132 | self.add_tag("body") 133 | elif open_tags == ['html', 'head'] and tag not in ['/head'] + self.HEAD_TAGS: 134 | self.add_tag('/head') 135 | else: 136 | break 137 | 138 | def finish(self): 139 | if len(self.unfinished) == 0: 140 | self.add_tag('html') 141 | while len(self.unfinished) > 1: 142 | node = self.unfinished.pop() 143 | parent = self.unfinished[-1] 144 | parent.children.append(node) 145 | return self.unfinished.pop() 146 | -------------------------------------------------------------------------------- /css_parser.py: -------------------------------------------------------------------------------- 1 | from html_parser import Element 2 | 3 | INHERITED_PROPERTIES = { 4 | "font-size": "16px", 5 | "font-style": "normal", 6 | "font-weight": "normal", 7 | "color": "black", 8 | } 9 | 10 | class CSSParser: 11 | def __init__(self, s): 12 | self.s = s 13 | self.i = 0 14 | 15 | def whitespace(self): 16 | while self.i < len(self.s) and self.s[self.i].isspace(): 17 | self.i += 1 18 | 19 | def word(self): 20 | start = self.i 21 | while self.i < len(self.s): 22 | if self.s[self.i].isalnum() or self.s[self.i] in '#-.%': 23 | self.i += 1 24 | else: 25 | break 26 | 27 | assert self.i > start 28 | return self.s[start:self.i] 29 | 30 | def literal(self, literal): 31 | assert self.i < len(self.s) and self.s[self.i] == literal 32 | self.i += 1 33 | 34 | def pair(self): 35 | prop = self.word() 36 | self.whitespace() 37 | self.literal(':') 38 | self.whitespace() 39 | value = self.word() 40 | return prop.lower(), value 41 | 42 | # parse the body of a rule into key value pairs 43 | def body(self): 44 | pairs = {} 45 | while self.i < len(self.s) and self.s[self.i] != '}': 46 | try: 47 | prop, val = self.pair() 48 | pairs[prop.lower()] = val 49 | self.whitespace() 50 | self.literal(';') 51 | self.whitespace() 52 | except AssertionError: 53 | why = self.ignore_until([";", "}"]) 54 | if why == ";": 55 | self.literal(";") 56 | self.whitespace() 57 | else: 58 | break 59 | return pairs 60 | 61 | def ignore_until(self, chars): 62 | while self.i < len(self.s): 63 | if self.s[self.i] in chars: 64 | return self.s[self.i] 65 | else: 66 | self.i += 1 67 | 68 | def selector(self): 69 | out = TagSelector(self.word().lower()) 70 | self.whitespace() 71 | while self.i < len(self.s) and self.s[self.i] != '{': 72 | tag = self.word() 73 | descendant = TagSelector(tag.lower()) 74 | out = DescendantSelector(out, descendant) 75 | self.whitespace() 76 | return out 77 | 78 | def parse(self): 79 | rules = [] 80 | while self.i < len(self.s): 81 | try: 82 | self.whitespace() 83 | selector = self.selector() 84 | self.literal('{') 85 | self.whitespace() 86 | body = self.body() 87 | self.literal('}') 88 | rules.append((selector, body)) 89 | except AssertionError: 90 | why = self.ignore_until(["}"]) 91 | if why == "}": 92 | self.literal("}") 93 | self.whitespace() 94 | else: 95 | break 96 | 97 | return rules 98 | 99 | class TagSelector: 100 | def __init__(self, tag): 101 | self.tag = tag 102 | self.priority = 1 103 | 104 | def matches(self, node): 105 | return isinstance(node, Element) and self.tag == node.tag 106 | 107 | 108 | class DescendantSelector: 109 | def __init__(self, ancestor, descendant): 110 | self.ancestor = ancestor 111 | self.descendant = descendant 112 | self.priority = ancestor.priority + descendant.priority 113 | 114 | def matches(self, node): 115 | if not self.descendant.matches(node): return False 116 | while node.parent: 117 | if self.ancestor.matches(node.parent): return True 118 | node = node.parent 119 | return False 120 | 121 | 122 | def style(node, rules): 123 | node.style = {} 124 | for prop, default_value in INHERITED_PROPERTIES.items(): 125 | if node.parent: 126 | node.style[prop] = node.parent.style[prop] 127 | else: 128 | node.style[prop] = default_value 129 | 130 | for selector, body in rules: 131 | if not selector.matches(node): continue 132 | for prop, value in body.items(): 133 | node.style[prop] = value 134 | 135 | if isinstance(node, Element) and 'style' in node.attributes: 136 | pairs = CSSParser(node.attributes['style']).body() 137 | for prop, val in pairs.items(): 138 | node.style[prop] = val 139 | 140 | # normalize percentage fonts to pixels 141 | if node.style["font-size"].endswith("%"): 142 | if node.parent: 143 | parent_font_size = node.parent.style["font-size"] 144 | else: 145 | parent_font_size = INHERITED_PROPERTIES["font-size"] 146 | node_pct = float(node.style['font-size'][:-1]) / 100 147 | parent_px = float(parent_font_size[:-2]) 148 | node.style['font-size'] = str(node_pct * parent_px) + 'px' 149 | 150 | for child in node.children: 151 | style(child, rules) 152 | 153 | 154 | def cascade_priority(rule): 155 | selector, body = rule 156 | return selector.priority 157 | 158 | -------------------------------------------------------------------------------- /server.py: -------------------------------------------------------------------------------- 1 | import html 2 | import socket 3 | import urllib.parse 4 | import random 5 | 6 | s = socket.socket( 7 | family=socket.AF_INET, 8 | type=socket.SOCK_STREAM, 9 | proto=socket.IPPROTO_TCP, 10 | ) 11 | s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 12 | s.bind(('', 8000)) 13 | s.listen() 14 | 15 | LOGINS = { 16 | "crashoverride": "0cool", 17 | "cerealkiller": "emmanuel", 18 | "me": "you" 19 | } 20 | 21 | 22 | ENTRIES = [ 23 | ("No names. We are nameless!", "cerealkiller"), 24 | ("HACK THE PLANET!!!", "crashoverride"), 25 | ] 26 | 27 | 28 | def show_comments(session): 29 | out = "" 30 | nonce = str(random.random())[2:] 31 | session["nonce"] = nonce 32 | if "user" in session: 33 | out += "

Hello, " + session["user"] + "

" 34 | out += "
" 35 | out += "" 36 | out += "

" 37 | out += "

" 38 | out += "
" 39 | else: 40 | out += "Sign in to write in the guest book" 41 | for entry, who in ENTRIES: 42 | out += "

" + html.escape(entry) + "\n" 43 | out += "by " + html.escape(who) + "

" 44 | out += "" 45 | return out 46 | 47 | 48 | def login_form(session): 49 | nonce = str(random.random())[2:] 50 | session["nonce"] = nonce 51 | body = "" 52 | body += "
" 53 | body += "" 54 | body += "

Username:

" 55 | body += "

Password:

" 56 | body += "

" 57 | body += "
" 58 | return body 59 | 60 | 61 | def do_login(session, params): 62 | username = params.get("username") 63 | password = params.get("password") 64 | if username in LOGINS and LOGINS[username] == password: 65 | session["user"] = username 66 | return "200 OK", show_comments(session) 67 | else: 68 | out = "" 69 | out += "

Invalid password for {}

".format(username) 70 | return "401 Unauthorized", out 71 | 72 | 73 | def do_request(session, method, url, headers, body): 74 | if method == "GET" and url == "/": 75 | return "200 OK", show_comments(session) 76 | elif method == "GET" and url == "/login": 77 | return "200 OK", login_form(session) 78 | elif method == "POST" and url == "/": 79 | params = form_decode(body) 80 | return do_login(session, params) 81 | elif method == "POST" and url == "/add": 82 | params = form_decode(body) 83 | add_entry(session, params) 84 | return "200 OK", show_comments(session) 85 | elif method == "GET" and url == "/comment.js": 86 | with open("comment.js") as f: 87 | return "200 OK", f.read() 88 | else: 89 | return "404 Not Found", not_found(url, method) 90 | 91 | def not_found(url, method): 92 | out = "" 93 | out += "

{} {} not found!

".format(method, url) 94 | return out 95 | 96 | def form_decode(body): 97 | params = {} 98 | for field in body.split("&"): 99 | name, value = field.split("=", 1) 100 | name = urllib.parse.unquote_plus(name) 101 | value = urllib.parse.unquote_plus(value) 102 | params[name] = value 103 | return params 104 | 105 | 106 | def add_entry(session, params): 107 | if "user" not in session: return 108 | if "nonce" not in session or "nonce" not in params: return 109 | if session["nonce"] != params["nonce"]: return 110 | if 'guest' in params and len(params['guest']) <= 100: 111 | ENTRIES.append((params['guest'], session["user"])) 112 | return show_comments(session) 113 | 114 | 115 | SESSIONS = {} 116 | 117 | 118 | def handle_connection(conx): 119 | req = conx.makefile("b") 120 | reqline = req.readline().decode('utf8') 121 | method, url, version = reqline.split(" ", 2) 122 | assert method in ["GET", "POST"] 123 | headers = {} 124 | while True: 125 | line = req.readline().decode('utf8') 126 | if line == '\r\n': break 127 | header, value = line.split(":", 1) 128 | headers[header.lower()] = value.strip() 129 | if 'content-length' in headers: 130 | length = int(headers['content-length']) 131 | body = req.read(length).decode('utf8') 132 | else: 133 | body = None 134 | if "cookie" in headers: 135 | token = headers["cookie"][len("token="):] 136 | else: 137 | token = str(random.random())[2:] 138 | session = SESSIONS.setdefault(token, {}) 139 | status, body = do_request(session, method, url, headers, body) 140 | response = "HTTP/1.0 {}\r\n".format(status) 141 | response += "Content-Length: {}\r\n".format( 142 | len(body.encode("utf8"))) 143 | if 'cookie' not in headers: 144 | template = "Set-Cookie: token={}; SameSite=Lax\r\n" 145 | response += template.format(token) 146 | csp = "default-src http://127.0.0.1:8000" 147 | response += "Content-Security-Policy: {}\r\n".format(csp) 148 | response += "\r\n" + body 149 | conx.send(response.encode('utf8')) 150 | conx.close() 151 | 152 | 153 | if __name__ == "__main__": 154 | while True: 155 | conx, addr = s.accept() 156 | handle_connection(conx) 157 | 158 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import tkinter 2 | import tkinter.font 3 | import urllib.parse 4 | import dukpy 5 | 6 | from helpers import tree_to_list 7 | from html_parser import HTMLParser, Text, Element 8 | from css_parser import CSSParser, style, cascade_priority 9 | from js_context import JSContext 10 | from network import request, resolve_url, url_origin 11 | 12 | WIDTH, HEIGHT = 800, 600 13 | SCROLL_STEP = 100 14 | HSTEP, VSTEP = 13, 18 15 | CHROME_PX = 100 16 | 17 | 18 | class Tab: 19 | def __init__(self): 20 | self.url = None 21 | self.focus = None 22 | self.history = [] 23 | self.scroll = 0 24 | with open('browser.css') as f: 25 | self.default_style_sheet = CSSParser(f.read()).parse() 26 | 27 | def scrolldown(self): 28 | max_y = self.document.height - (HEIGHT - CHROME_PX) 29 | self.scroll = min(self.scroll + SCROLL_STEP, max_y) 30 | 31 | def scrollup(self): 32 | self.scroll -= SCROLL_STEP 33 | if self.scroll < 0: 34 | self.scroll = 0 35 | 36 | def click(self, x, y): 37 | self.focus = None 38 | 39 | y += self.scroll 40 | 41 | objs = [obj for obj in tree_to_list(self.document, []) 42 | if obj.x <= x < obj.x + obj.width 43 | and obj.y <= y < obj.y + obj.height] 44 | 45 | if not objs: return 46 | elt = objs[-1].node 47 | 48 | while elt: 49 | if isinstance(elt, Text): 50 | pass 51 | elif elt.tag == "input": 52 | if self.js.dispatch_event('click', elt): return 53 | self.focus = elt 54 | elt.attributes["value"] = "" 55 | return self.render() 56 | elif elt.tag == 'button': 57 | if self.js.dispatch_event('click', elt): return 58 | while elt: 59 | if elt.tag == 'form' and 'action' in elt.attributes: 60 | return self.submit_form(elt) 61 | else: 62 | elt = elt.parent 63 | return 64 | elif elt.tag == "a" and "href" in elt.attributes: 65 | if self.js.dispatch_event('click', elt): return 66 | url = resolve_url(elt.attributes["href"], self.url) 67 | return self.load(url) 68 | elt = elt.parent 69 | 70 | def keypress(self, char): 71 | if self.focus: 72 | self.focus.attributes['value'] += char 73 | if self.js.dispatch_event("keydown", self.focus): return 74 | self.render() 75 | 76 | def draw(self, canvas): 77 | for cmd in self.display_list: 78 | if cmd.top > self.scroll + HEIGHT - CHROME_PX: continue 79 | if cmd.bottom + VSTEP < self.scroll: continue 80 | cmd.execute(self.scroll - CHROME_PX, canvas) 81 | if self.focus: 82 | obj = [obj for obj in tree_to_list(self.document, []) 83 | if obj.node == self.focus and isinstance(obj, InputLayout)][0] 84 | text = self.focus.attributes.get('value', '') 85 | x = obj.x + obj.font.measure(text) 86 | y = obj.y - self.scroll + CHROME_PX 87 | canvas.create_line(x, y, x, y + obj.height) 88 | 89 | def go_back(self): 90 | if len(self.history) > 1: 91 | self.history.pop() 92 | back = self.history.pop() 93 | self.load(back) 94 | 95 | def load(self, url, body=None): 96 | headers, body = request(url, self.url, body) 97 | self.url = url 98 | self.history.append(url) 99 | self.nodes = HTMLParser(body).parse() 100 | self.rules = self.default_style_sheet.copy() 101 | self.js = JSContext(self) 102 | 103 | self.allowed_origins = None 104 | if "content-security-policy" in headers: 105 | csp = headers['content-security-policy'].split() 106 | if len(csp) > 0 and csp[0] == 'default-src': 107 | self.allowed_origins = csp[1:] 108 | 109 | # links = [node.attributes["href"] 110 | # for node in tree_to_list(self.nodes, []) 111 | # if isinstance(node, Element) 112 | # and node.tag == "link" 113 | # and "href" in node.attributes 114 | # and node.attributes.get("rel") == "stylesheet"] 115 | # for link in links: 116 | # try: 117 | # link_url = resolve_url(link, url) 118 | # if not self.allowed_request(link_url): 119 | # print("Blocked link", link, "due to CSP") 120 | # continue 121 | # header, body = request(link_url, url) 122 | # except: 123 | # continue 124 | # rules.extend(CSSParser(body).parse()) 125 | 126 | scripts = [node.attributes["src"] for node 127 | in tree_to_list(self.nodes, []) 128 | if isinstance(node, Element) 129 | and node.tag == "script" 130 | and "src" in node.attributes] 131 | 132 | for script in scripts: 133 | script_url = resolve_url(script, url) 134 | if not self.allowed_request(script_url): 135 | print("Blocked script", script, "due to CSP") 136 | continue 137 | header, body = request(script_url, url) 138 | try: 139 | self.js.run(body) 140 | except dukpy.JSRuntimeError as e: 141 | print("Script", script, "crashed", e) 142 | 143 | self.render() 144 | 145 | def allowed_request(self, url): 146 | return self.allowed_origins == None or url_origin(url) in self.allowed_origins 147 | 148 | def render(self): 149 | style(self.nodes, sorted(self.rules, key=cascade_priority)) 150 | self.document = DocumentLayout(self.nodes) 151 | self.document.layout() 152 | self.display_list = [] 153 | self.document.paint(self.display_list) 154 | 155 | def submit_form(self, elt): 156 | if self.js.dispatch_event("submit", elt): return 157 | 158 | inputs = [node for node in tree_to_list(elt, []) if 159 | isinstance(node, Element) and node.tag == 'input' and 'name' in node.attributes] 160 | body = '' 161 | for input in inputs: 162 | name = input.attributes['name'] 163 | value = input.attributes.get('value', '') 164 | name = urllib.parse.quote(name) 165 | value = urllib.parse.quote(value) 166 | body += '&' + name + '=' + value 167 | body = body[1:] 168 | url = resolve_url(elt.attributes["action"], self.url) 169 | self.load(url, body) 170 | 171 | 172 | class Browser: 173 | def __init__(self): 174 | self.window = tkinter.Tk() 175 | self.canvas = tkinter.Canvas( 176 | self.window, 177 | bg="white", 178 | width=WIDTH, 179 | height=HEIGHT 180 | 181 | ) 182 | self.canvas.pack() 183 | 184 | self.window.bind("", self.handle_down) 185 | self.window.bind("", self.handle_up) 186 | self.window.bind("", self.handle_click) 187 | self.window.bind("", self.handle_key) 188 | self.window.bind("", self.handle_enter) 189 | 190 | self.tabs = [] 191 | self.active_tab = None 192 | 193 | self.focus = None 194 | self.address_bar = '' 195 | 196 | def handle_down(self, e): 197 | self.tabs[self.active_tab].scrolldown() 198 | self.draw() 199 | 200 | def handle_click(self, e): 201 | if e.y < CHROME_PX: 202 | self.focus = None 203 | if 40 <= e.x < 40 + 80 * len(self.tabs) and 0 <= e.y < 40: 204 | self.active_tab = int((e.x - 40) / 80) 205 | elif 10 <= e.x < 30 and 10 <= e.y < 30: 206 | # clicked on new tab button 207 | self.load("https://browser.engineering/") 208 | elif 10 <= e.x < 35 and 50 <= e.y < 90: 209 | # clicked back button 210 | self.tabs[self.active_tab].go_back() 211 | elif 50 <= e.x < WIDTH - 10 and 50 <= e.y < 90: 212 | # clicked on address bar 213 | self.focus = "address_bar" 214 | self.address_bar = "" 215 | else: 216 | self.focus = 'content' 217 | self.tabs[self.active_tab].click(e.x, e.y - CHROME_PX) 218 | self.draw() 219 | 220 | def handle_key(self, e): 221 | if len(e.char) == 0: return 222 | if not (0x20 <= ord(e.char) < 0x7f): return 223 | if self.focus == 'address_bar': 224 | self.address_bar += e.char 225 | self.draw() 226 | elif self.focus == 'content': 227 | self.tabs[self.active_tab].keypress(e.char) 228 | self.draw() 229 | 230 | def handle_enter(self, e): 231 | if self.focus == 'address_bar': 232 | self.tabs[self.active_tab].load(self.address_bar) 233 | self.focus = None 234 | self.draw() 235 | 236 | def handle_up(self, e): 237 | self.tabs[self.active_tab].scrollup() 238 | self.draw() 239 | 240 | def draw(self): 241 | self.canvas.delete("all") 242 | self.tabs[self.active_tab].draw(self.canvas) 243 | self.canvas.create_rectangle(0, 0, WIDTH, CHROME_PX, 244 | fill="white", outline="black") 245 | tabfont = get_font(20, 'normal', 'roman') 246 | for i, tab in enumerate(self.tabs): 247 | name = 'Tab {}'.format(i) 248 | x1 = 40 + 80 * i 249 | x2 = 120 + 80 * i 250 | self.canvas.create_line(x1, 0, x1, 40, fill='black') 251 | self.canvas.create_line(x2, 0, x2, 40, fill='black') 252 | self.canvas.create_text(x1 + 10, 10, anchor="nw", text=name, font=tabfont, fill="black") 253 | if i == self.active_tab: 254 | self.canvas.create_line(0, 40, x1, 40, fill="black") 255 | self.canvas.create_line(x2, 40, WIDTH, 40, fill="black") 256 | 257 | # new tab button 258 | buttonfont = get_font(30, "normal", "roman") 259 | self.canvas.create_rectangle(10, 10, 30, 30, outline="black", width=1) 260 | self.canvas.create_text(11, 0, anchor="nw", text="+", font=buttonfont, fill="black") 261 | 262 | # url bar 263 | self.canvas.create_rectangle(40, 50, WIDTH - 10, 90, 264 | outline="black", width=1) 265 | if self.focus == 'address_bar': 266 | self.canvas.create_text(55, 55, anchor='nw', text=self.address_bar, 267 | font=buttonfont, fill="black") 268 | w = buttonfont.measure(self.address_bar) 269 | self.canvas.create_line(55 + w, 55, 55 + w, 85, fill="black") 270 | else: 271 | url = self.tabs[self.active_tab].url 272 | self.canvas.create_text(55, 55, anchor='nw', text=url, 273 | font=buttonfont, fill="black") 274 | 275 | # back button 276 | self.canvas.create_rectangle(10, 50, 35, 90, 277 | outline="black", width=1) 278 | self.canvas.create_polygon( 279 | 15, 70, 30, 55, 30, 85, fill='black') 280 | 281 | def load(self, url): 282 | new_tab = Tab() 283 | new_tab.load(url) 284 | self.active_tab = len(self.tabs) 285 | self.tabs.append(new_tab) 286 | self.draw() 287 | 288 | 289 | class DrawText: 290 | def __init__(self, x1, y1, text, font, color): 291 | self.top = y1 292 | self.left = x1 293 | self.text = text 294 | self.font = font 295 | self.bottom = y1 + font.metrics("linespace") 296 | self.color = color 297 | 298 | def execute(self, scroll, canvas): 299 | canvas.create_text( 300 | self.left, 301 | self.top - scroll, 302 | text=self.text, 303 | font=self.font, 304 | fill=self.color, 305 | anchor="nw" 306 | ) 307 | 308 | 309 | class DrawRect: 310 | def __init__(self, x1, y1, x2, y2, color): 311 | self.top = y1 312 | self.bottom = y2 313 | self.left = x1 314 | self.right = x2 315 | self.color = color 316 | 317 | def execute(self, scroll, canvas): 318 | canvas.create_rectangle( 319 | self.left, 320 | self.top - scroll, 321 | self.right, 322 | self.bottom - scroll, 323 | width=0, 324 | fill=self.color 325 | ) 326 | 327 | 328 | FONTS = {} 329 | 330 | 331 | def get_font(size, weight, slant): 332 | key = (size, weight, slant) 333 | if key not in FONTS: 334 | font = tkinter.font.Font(size=size, weight=weight, slant=slant) 335 | FONTS[key] = font 336 | return FONTS[key] 337 | 338 | 339 | class DocumentLayout: 340 | def __init__(self, node): 341 | self.node = node 342 | self.parent = None 343 | self.children = [] 344 | 345 | def paint(self, display_list): 346 | self.children[0].paint(display_list) 347 | 348 | def layout(self): 349 | self.width = WIDTH - 2 * HSTEP 350 | self.x = HSTEP 351 | self.y = VSTEP 352 | child = BlockLayout(self.node, self, None) 353 | self.children.append(child) 354 | child.layout() 355 | self.height = child.height + 2 * VSTEP 356 | 357 | 358 | BLOCK_ELEMENTS = [ 359 | "html", "body", "article", "section", "nav", "aside", 360 | "h1", "h2", "h3", "h4", "h5", "h6", "hgroup", "header", 361 | "footer", "address", "p", "hr", "pre", "blockquote", 362 | "ol", "ul", "menu", "li", "dl", "dt", "dd", "figure", 363 | "figcaption", "main", "div", "table", "form", "fieldset", 364 | "legend", "details", "summary" 365 | ] 366 | 367 | 368 | def layout_mode(node): 369 | if isinstance(node, Text): 370 | return "inline" 371 | elif node.children: 372 | if any([isinstance(child, Element) and child.tag in BLOCK_ELEMENTS for child in node.children]): 373 | return "block" 374 | else: 375 | return "inline" 376 | elif node.tag == "input": 377 | return "inline" 378 | else: 379 | return "block" 380 | 381 | 382 | class BlockLayout: 383 | def __init__(self, node, parent, previous): 384 | self.node = node 385 | self.parent = parent 386 | self.previous = previous 387 | self.children = [] 388 | self.display_list = [] 389 | 390 | def paint(self, display_list): 391 | bgcolor = self.node.style.get('background-color', 'transparent') 392 | is_atomic = not isinstance(self.node, Text) and \ 393 | (self.node.tag == "input" or self.node.tag == "button") 394 | 395 | if not is_atomic: 396 | if bgcolor != 'transparent': 397 | x2 = self.x + self.width 398 | y2 = self.y + self.height 399 | rect = DrawRect(self.x, self.y, x2, y2, bgcolor) 400 | display_list.append(rect) 401 | 402 | for x, y, text, font, color in self.display_list: 403 | display_list.append(DrawText(x, y, text, font, color)) 404 | for child in self.children: 405 | child.paint(display_list) 406 | 407 | def layout(self): 408 | self.x = self.parent.x 409 | if self.previous: 410 | self.y = self.previous.y + self.previous.height 411 | else: 412 | self.y = self.parent.y 413 | self.width = self.parent.width 414 | 415 | mode = layout_mode(self.node) 416 | if mode == "block": 417 | previous = None 418 | for child in self.node.children: 419 | next = BlockLayout(child, self, previous) 420 | self.children.append(next) 421 | previous = next 422 | else: 423 | self.new_line() 424 | self.recurse(self.node) 425 | 426 | for child in self.children: 427 | child.layout() 428 | 429 | # height must be computed _after_ children layout 430 | self.height = sum([child.height for child in self.children]) 431 | 432 | def recurse(self, node): 433 | if isinstance(node, Text): 434 | self.text(node) 435 | else: 436 | if node.tag == 'br': 437 | self.new_line() 438 | elif node.tag == 'input' or node.tag == 'button': 439 | self.input(node) 440 | else: 441 | for child in node.children: 442 | self.recurse(child) 443 | 444 | def get_font(self, node): 445 | weight = node.style['font-weight'] 446 | style = node.style['font-style'] 447 | if style == 'normal': style = 'roman' 448 | size = int(float(node.style['font-size'][:-2]) * .75) 449 | return get_font(size, weight, style) 450 | 451 | def text(self, node): 452 | font = self.get_font(node) 453 | for word in node.text.split(): 454 | width = font.measure(word) 455 | if self.cursor_x + width > self.width: 456 | self.new_line() 457 | 458 | line = self.children[-1] 459 | text = TextLayout(node, word, line, self.previous_word) 460 | line.children.append(text) 461 | self.previous_word = text 462 | 463 | self.cursor_x += width + font.measure(" ") 464 | 465 | def input(self, node): 466 | width = INPUT_WIDTH_PX 467 | if self.cursor_x + width > self.width: 468 | self.new_line() 469 | line = self.children[-1] 470 | input = InputLayout(node, line, self.previous_word) 471 | line.children.append(input) 472 | self.previous_word = input 473 | font = self.get_font(node) 474 | self.cursor_x += width + font.measure(" ") 475 | 476 | def new_line(self): 477 | self.previous_word = None 478 | self.cursor_x = 0 479 | last_line = self.children[-1] if self.children else None 480 | new_line = LineLayout(self.node, self, last_line) 481 | self.children.append(new_line) 482 | 483 | 484 | class LineLayout: 485 | def __init__(self, node, parent, previous): 486 | self.node = node 487 | self.parent = parent 488 | self.previous = previous 489 | self.children = [] 490 | 491 | def layout(self): 492 | self.width = self.parent.width 493 | self.x = self.parent.x 494 | 495 | if self.previous: 496 | self.y = self.previous.y + self.previous.height 497 | else: 498 | self.y = self.parent.y 499 | 500 | for word in self.children: 501 | word.layout() 502 | 503 | max_ascent = max([word.font.metrics('ascent') for word in self.children]) 504 | baseline = self.y + 1.25 * max_ascent 505 | for word in self.children: 506 | word.y = baseline - word.font.metrics('ascent') 507 | max_descent = max([word.font.metrics("descent") for word in self.children]) 508 | self.height = 1.25 * (max_ascent + max_descent) 509 | 510 | def paint(self, display_list): 511 | for child in self.children: 512 | child.paint(display_list) 513 | 514 | 515 | class TextLayout: 516 | def __init__(self, node, word, parent, previous): 517 | self.node = node 518 | self.word = word 519 | self.children = [] 520 | self.parent = parent 521 | self.previous = previous 522 | 523 | def layout(self): 524 | weight = self.node.style['font-weight'] 525 | style = self.node.style['font-style'] 526 | if style == 'normal': style = 'roman' 527 | size = int(float(self.node.style["font-size"][:-2]) * .75) 528 | self.font = get_font(size, weight, style) 529 | 530 | self.width = self.font.measure(self.word) 531 | if self.previous: 532 | space = self.previous.font.measure(' ') 533 | self.x = self.previous.x + self.previous.width + space 534 | else: 535 | self.x = self.parent.x 536 | 537 | self.height = self.font.metrics('linespace') 538 | 539 | def paint(self, display_list): 540 | color = self.node.style["color"] 541 | display_list.append( 542 | DrawText(self.x, self.y, self.word, self.font, color)) 543 | 544 | 545 | INPUT_WIDTH_PX = 200 546 | 547 | 548 | class InputLayout: 549 | def __init__(self, node, parent, previous): 550 | self.node = node 551 | self.children = [] 552 | self.parent = parent 553 | self.previous = previous 554 | 555 | def layout(self): 556 | weight = self.node.style['font-weight'] 557 | style = self.node.style['font-style'] 558 | if style == 'normal': style = 'roman' 559 | size = int(float(self.node.style["font-size"][:-2]) * .75) 560 | self.font = get_font(size, weight, style) 561 | 562 | self.width = INPUT_WIDTH_PX 563 | if self.previous: 564 | space = self.previous.font.measure(' ') 565 | self.x = self.previous.x + self.previous.width + space 566 | else: 567 | self.x = self.parent.x 568 | 569 | self.height = self.font.metrics('linespace') 570 | 571 | def paint(self, display_list): 572 | bgcolor = self.node.style.get("background-color", 573 | "transparent") 574 | if bgcolor != "transparent": 575 | x2, y2 = self.x + self.width, self.y + self.height 576 | rect = DrawRect(self.x, self.y, x2, y2, bgcolor) 577 | display_list.append(rect) 578 | 579 | if self.node.tag == 'input': 580 | text = self.node.attributes.get('value') 581 | elif self.node.tag == 'button': 582 | if len(self.node.children) == 1 and isinstance(self.node.children[0], Text): 583 | text = self.node.children[0].text 584 | else: 585 | print('Ignoring HTML content inside button') 586 | text = '' 587 | 588 | color = self.node.style["color"] 589 | display_list.append( 590 | DrawText(self.x, self.y, text, self.font, color)) 591 | 592 | 593 | def print_tree(node, indent=0): 594 | print(" " * indent, node) 595 | for child in node.children: 596 | print_tree(child, indent + 2) 597 | 598 | 599 | if __name__ == "__main__": 600 | import sys 601 | 602 | Browser().load(sys.argv[1]) 603 | tkinter.mainloop() 604 | 605 | # headers, body = request(sys.argv[1]) 606 | # nodes = HTMLParser(body).parse() 607 | # print_tree(nodes) 608 | 609 | # load('http://example.org/index.html') 610 | --------------------------------------------------------------------------------