├── test.js
├── comment.js
├── js
├── b.js
└── a.js
├── README.md
├── helpers.py
├── browser.css
├── runtime.js
├── js_context.py
├── network.py
├── html_parser.py
├── css_parser.py
├── server.py
└── main.py
/test.js:
--------------------------------------------------------------------------------
1 | var x = 2
2 | x + x
--------------------------------------------------------------------------------
/comment.js:
--------------------------------------------------------------------------------
1 | console.log('hello world')
--------------------------------------------------------------------------------
/js/b.js:
--------------------------------------------------------------------------------
1 | console.log(x + x)
2 | //call_python("log", x + x)
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # toy-browser
2 | An experimental browser written in python based on https://browser.engineering/
3 |
--------------------------------------------------------------------------------
/helpers.py:
--------------------------------------------------------------------------------
1 | def tree_to_list(tree, list):
2 | list.append(tree)
3 | for child in tree.children:
4 | tree_to_list(child, list)
5 | return list
6 |
--------------------------------------------------------------------------------
/browser.css:
--------------------------------------------------------------------------------
1 | pre { background-color: gray; }
2 | a { color: blue; }
3 | i { font-style: italic; }
4 | b { font-weight: bold; }
5 | small { font-size: 90%; }
6 | big { font-size: 110%; }
7 | h1 { font-size: 300%; font-weight: bold}
8 | label { font-weight: bold; color: red; }
9 | input {
10 | font-size: 16px; font-weight: normal; font-style: normal;
11 | background-color: lightblue;
12 | }
13 | button {
14 | font-size: 16px; font-weight: normal; font-style: normal;
15 | background-color: orange;
16 | }
--------------------------------------------------------------------------------
/js/a.js:
--------------------------------------------------------------------------------
1 | var x = 3;
2 |
3 | var nodes = document.querySelectorAll('p')
4 | console.log(nodes.map(function(node) { return node.getAttribute('class') }))
5 |
6 | var label = document.querySelectorAll("label")[0];
7 | var inputs = document.querySelectorAll('input');
8 | for (var i = 0; i < inputs.length; i++) {
9 | var input = inputs[i]
10 | input.addEventListener('keydown', function(e) {
11 | console.log(this, e)
12 | var name = this.getAttribute("name");
13 | var value = this.getAttribute("value");
14 | label.innerHTML = "Input " + name + " has a value of " + value;
15 | // console.log("Input " + name + " has a value of " + value)
16 | })
17 |
18 | }
19 |
20 |
21 | var form = document.querySelectorAll("form")[0];
22 | form.addEventListener("submit", function(e) {
23 | console.log('you shall not pass!!!')
24 | // e.preventDefault();
25 | });
--------------------------------------------------------------------------------
/runtime.js:
--------------------------------------------------------------------------------
1 | console = {
2 | log: function(x) {
3 | call_python('log', x)
4 | }
5 | }
6 |
7 | document = {
8 | querySelectorAll: function(s) {
9 | var handles = call_python("querySelectorAll", s);
10 | return handles.map(function(handle) { return new Node(handle) })
11 | }
12 | }
13 |
14 | var listeners = {}
15 |
16 | function Node(handle) { this.handle = handle; }
17 |
18 | Node.prototype.getAttribute = function(attr) {
19 | return call_python("getAttribute", this.handle, attr);
20 | }
21 |
22 | Node.prototype.addEventListener = function(type, listener) {
23 | if (!listeners[this.handle]) listeners[this.handle] = {}
24 | var dict = listeners[this.handle]
25 | if (!(type in dict)) dict[type] = []
26 | dict[type].push(listener)
27 | }
28 |
29 | Node.prototype.dispatchEvent = function(event) {
30 | var type = event.type;
31 | var handle = this.handle
32 | var list = (listeners[handle] && listeners[handle][type]) || []
33 | for (var i = 0; i < list.length; i++) {
34 | list[i].call(this, event);
35 | }
36 | return event.do_default
37 | }
38 |
39 | Object.defineProperty(Node.prototype, 'innerHTML', {
40 | set: function(s) {
41 | call_python('innerHTML_set', this.handle, s.toString())
42 | }
43 | })
44 |
45 | function Event(type) {
46 | this.type = type
47 | this.do_default = true
48 | }
49 |
50 | Event.prototype.preventDefault = function() {
51 | this.do_default = false
52 | }
53 |
54 | function XMLHttpRequest() {}
55 |
56 | XMLHttpRequest.prototype.open = function(method, url, is_async) {
57 | if (is_async) throw new Error('async not implemented yet')
58 | this.method = method
59 | this.url = url
60 | }
61 |
62 | XMLHttpRequest.prototype.send = function(body) {
63 | this.responseText = call_python('XMLHttpRequest_send', this.method, this.url, body)
64 | }
--------------------------------------------------------------------------------
/js_context.py:
--------------------------------------------------------------------------------
1 | import dukpy
2 |
3 | from css_parser import CSSParser
4 | from helpers import tree_to_list
5 | from html_parser import HTMLParser
6 | from network import request, url_origin, resolve_url
7 |
8 | EVENT_DISPATCH_CODE = "new Node(dukpy.handle).dispatchEvent(new Event(dukpy.type))"
9 |
10 | class JSContext:
11 | def __init__(self, tab):
12 | self.tab = tab
13 | self.interp = dukpy.JSInterpreter()
14 |
15 | # js to python object mappers
16 | self.node_to_handle = {}
17 | self.handle_to_node = {}
18 |
19 | self.interp.export_function("log", print)
20 | self.interp.export_function("querySelectorAll",
21 | self.querySelectorAll)
22 | self.interp.export_function("getAttribute",
23 | self.getAttribute)
24 | self.interp.export_function("innerHTML_set",
25 | self.innerHTML_set)
26 | self.interp.export_function("XMLHttpRequest_send",
27 | self.XMLHttpRequest_send)
28 |
29 | with open("runtime.js") as f:
30 | self.interp.evaljs(f.read())
31 |
32 | def run(self, code):
33 | return self.interp.evaljs(code)
34 |
35 | def dispatch_event(self, type, elt):
36 | handle = self.node_to_handle.get(elt, -1)
37 | do_default = self.interp.evaljs(EVENT_DISPATCH_CODE, type=type, handle=handle)
38 | return not do_default
39 |
40 |
41 | def querySelectorAll(self, selector_text):
42 | selector = CSSParser(selector_text).selector()
43 | nodes = [node for node in tree_to_list(self.tab.nodes, []) if selector.matches(node)]
44 | return [self.get_handle(node) for node in nodes]
45 |
46 | def getAttribute(self, handle, attr):
47 | etl = self.handle_to_node[handle]
48 | return etl.attributes.get(attr, None)
49 |
50 | def innerHTML_set(self, handle, s):
51 | doc = HTMLParser("
" + s + "").parse()
52 | new_nodes = doc.children[0].children
53 | elt = self.handle_to_node[handle]
54 | elt.children = new_nodes
55 | for child in elt.children:
56 | child.parent = elt
57 | self.tab.render()
58 |
59 | def XMLHttpRequest_send(self, method, url, body):
60 | full_url = resolve_url(url, self.tab.url)
61 | if url_origin(full_url) != url_origin(self.tab.url):
62 | raise Exception("Cross-origin XHR request not allowed")
63 | headers, out = request(full_url, self.tab.url, body)
64 | return out
65 |
66 | def get_handle(self, elt):
67 | if elt not in self.node_to_handle:
68 | handle = len(self.node_to_handle)
69 | self.node_to_handle[elt] = handle
70 | self.handle_to_node[handle] = elt
71 | else:
72 | handle = self.node_to_handle[elt]
73 | return handle
--------------------------------------------------------------------------------
/network.py:
--------------------------------------------------------------------------------
1 | import socket
2 | import ssl
3 |
4 | COOKIE_JAR = {}
5 |
6 |
7 | def url_origin(url):
8 | (scheme, host, path) = parse_url(url)
9 | return scheme + "://" + host
10 |
11 |
12 | def parse_url(url):
13 | scheme, url = url.split("://", 1)
14 | if "/" not in url:
15 | url = url + "/"
16 | host, path = url.split("/", 1)
17 | return scheme, host, "/" + path
18 |
19 |
20 | def request(url, top_level_url, payload=None):
21 | (scheme, host, path) = parse_url(url)
22 | assert scheme in ["http", "https"], \
23 | "Unknown scheme {}".format(scheme)
24 |
25 | s = socket.socket(
26 | family=socket.AF_INET,
27 | type=socket.SOCK_STREAM,
28 | proto=socket.IPPROTO_TCP,
29 | )
30 |
31 | port = 80 if scheme == "http" else 443
32 |
33 | if ":" in host:
34 | host, port = host.split(":", 1)
35 | port = int(port)
36 |
37 | s.connect((host, port))
38 |
39 | if scheme == "https":
40 | ctx = ssl.create_default_context()
41 | s = ctx.wrap_socket(s, server_hostname=host)
42 |
43 | method = "POST" if payload else "GET"
44 |
45 | body = "{} {} HTTP/1.0\r\n".format(method, path) + "HOST: {}\r\n".format(host)
46 | if payload:
47 | length = len(payload.encode('utf8'))
48 | body += 'Content-Length: {}\r\n'.format(length)
49 |
50 | if host in COOKIE_JAR:
51 | cookie, params = COOKIE_JAR[host]
52 | allow_cookie = True
53 | if top_level_url and params.get('samesite', 'none') == 'lax':
54 | _, _, top_level_host, _ = top_level_url.split("/", 3)
55 | if ':' in top_level_host:
56 | top_level_host, _ = top_level_host.split(":", 1)
57 | allow_cookie = (host == top_level_host or method == "GET")
58 | if allow_cookie:
59 | body += 'Cookie: {}\r\n'.format(cookie)
60 |
61 | body += "\r\n" + (payload if payload else "")
62 |
63 | s.send(body.encode('utf8'))
64 |
65 | response = s.makefile("r", encoding="utf8", newline="\r\n")
66 |
67 | statusline = response.readline()
68 | version, status, explanation = statusline.split(" ", 2)
69 | assert status == "200", "{}: {}".format(status, explanation)
70 |
71 | headers = {}
72 | while True:
73 | line = response.readline()
74 | if line == "\r\n": break
75 | header, value = line.split(":", 1)
76 | headers[header.lower()] = value.strip()
77 |
78 | assert "transfer-encoding" not in headers
79 | assert "content-encoding" not in headers
80 |
81 | if 'set-cookie' in headers:
82 | cookie = headers['set-cookie']
83 | params = {}
84 | if ';' in cookie:
85 | cookie, rest = cookie.split(';', 1)
86 | for param_pair in rest.split(';'):
87 | key, value = param_pair.strip().split('=', 1)
88 | params[key.lower()] = value.lower()
89 | COOKIE_JAR[host] = (cookie, params)
90 |
91 | body = response.read()
92 | s.close()
93 |
94 | return headers, body
95 |
96 |
97 | def resolve_url(url, current):
98 | if '://' in url:
99 | return url
100 | elif url.startswith('/'):
101 | scheme, hostpath = current.split('://', 1)
102 | host, oldpath = hostpath.split('/', 1)
103 | return scheme + "://" + host + url
104 | else:
105 | dir, _ = current.rsplit('/', 1)
106 | while url.startswith('../'):
107 | url = url[3:]
108 | if dir.count('/') == 2: continue
109 | dir, _ = dir.rsplit('/', 1)
110 | return dir + '/' + url
111 |
--------------------------------------------------------------------------------
/html_parser.py:
--------------------------------------------------------------------------------
1 | from html import unescape
2 |
3 | HTML_ENTITIES = {
4 | '"': '"',
5 | ''': "'",
6 | '&': '&',
7 | '>': '>',
8 | '<': '<',
9 | '⁄': '/'
10 | }
11 |
12 | class Text:
13 | def __init__(self, text, parent):
14 | self.text = text
15 | self.parent = parent
16 | self.children = []
17 |
18 | def __repr__(self):
19 | return repr(self.text)
20 |
21 | class Element:
22 | def __init__(self, tag, attributes, parent):
23 | self.tag = tag
24 | self.parent = parent
25 | self.attributes = attributes
26 | self.children = []
27 |
28 | def __repr__(self):
29 | return "<" + self.tag + ">"
30 |
31 | class HTMLParser:
32 | def __init__(self, body):
33 | self.body = body
34 | self.unfinished = []
35 |
36 | SELF_CLOSING_TAGS = [
37 | "area", "base", "br", "col", "embed", "hr", "img", "input",
38 | "link", "meta", "param", "source", "track", "wbr",
39 | ]
40 | HEAD_TAGS = [
41 | "base", "basefont", "bgsound", "noscript",
42 | "link", "meta", "title", "style", "script",
43 | ]
44 | def parse(self):
45 | text = ''
46 | in_angle = False
47 | for c in self.body:
48 | if c == "<":
49 | in_angle = True
50 | if text: self.add_text(text)
51 | text = ''
52 | elif c == ">":
53 | in_angle = False
54 | self.add_tag(text)
55 | text = ''
56 | else:
57 | text += c
58 | if not in_angle and text:
59 | self.add_text(text)
60 | return self.finish()
61 |
62 | def get_attributes(self, text):
63 | parts = text.split()
64 | tag = parts[0].lower()
65 | attributes = {}
66 | for attrpair in parts[1:]:
67 | if '=' in attrpair:
68 | key, value = attrpair.split("=", 1)
69 | if len(value) > 2 and value[0] in ["'", "\""]:
70 | value = value[1:-1]
71 | attributes[key.lower()] = value
72 | else:
73 | attributes[attrpair.lower()] = ''
74 | return tag, attributes
75 |
76 | def html_entities(self, text):
77 | # I'm cheating here cuz I couldn't get the algorithm right
78 | return unescape(text)
79 | # amp_index = None
80 | # out = ''
81 | # for i, c in enumerate(text):
82 | # if c == '&':
83 | # print('found amp')
84 | # amp_index = i
85 | # if c == ';' and amp_index:
86 | # print('found semi')
87 | # print(amp_index)
88 | # entity = text[amp_index + 1:i]
89 | # print(entity)
90 | # char = HTML_ENTITIES[entity]
91 | # out = out[:amp_index] + char
92 | # amp_index = None
93 | # else:
94 | # out += c
95 | # return out
96 |
97 | def add_text(self, text):
98 | if text.isspace(): return
99 | self.implicit_tags(None)
100 | text = self.html_entities(text)
101 | parent = self.unfinished[-1]
102 | node = Text(text, parent)
103 | parent.children.append(node)
104 |
105 | def add_tag(self, tag):
106 | if tag.startswith("!"): return
107 | self.implicit_tags(tag)
108 | tag, attributes = self.get_attributes(tag)
109 | if tag.startswith('/'):
110 | if len(self.unfinished) == 1: return
111 | node = self.unfinished.pop()
112 | parent = self.unfinished[-1]
113 | parent.children.append(node)
114 | elif tag in self.SELF_CLOSING_TAGS:
115 | parent = self.unfinished[-1]
116 | node = Element(tag, attributes, parent)
117 | parent.children.append(node)
118 | else:
119 | parent = self.unfinished[-1] if self.unfinished else None
120 | node = Element(tag, attributes, parent)
121 | self.unfinished.append(node)
122 |
123 | def implicit_tags(self, tag):
124 | while True:
125 | open_tags = [node.tag for node in self.unfinished]
126 | if open_tags == [] and tag != "html":
127 | self.add_tag("html")
128 | elif open_tags == ['html'] and tag not in ['head', 'body', '/html']:
129 | if tag in self.HEAD_TAGS:
130 | self.add_tag("head")
131 | else:
132 | self.add_tag("body")
133 | elif open_tags == ['html', 'head'] and tag not in ['/head'] + self.HEAD_TAGS:
134 | self.add_tag('/head')
135 | else:
136 | break
137 |
138 | def finish(self):
139 | if len(self.unfinished) == 0:
140 | self.add_tag('html')
141 | while len(self.unfinished) > 1:
142 | node = self.unfinished.pop()
143 | parent = self.unfinished[-1]
144 | parent.children.append(node)
145 | return self.unfinished.pop()
146 |
--------------------------------------------------------------------------------
/css_parser.py:
--------------------------------------------------------------------------------
1 | from html_parser import Element
2 |
3 | INHERITED_PROPERTIES = {
4 | "font-size": "16px",
5 | "font-style": "normal",
6 | "font-weight": "normal",
7 | "color": "black",
8 | }
9 |
10 | class CSSParser:
11 | def __init__(self, s):
12 | self.s = s
13 | self.i = 0
14 |
15 | def whitespace(self):
16 | while self.i < len(self.s) and self.s[self.i].isspace():
17 | self.i += 1
18 |
19 | def word(self):
20 | start = self.i
21 | while self.i < len(self.s):
22 | if self.s[self.i].isalnum() or self.s[self.i] in '#-.%':
23 | self.i += 1
24 | else:
25 | break
26 |
27 | assert self.i > start
28 | return self.s[start:self.i]
29 |
30 | def literal(self, literal):
31 | assert self.i < len(self.s) and self.s[self.i] == literal
32 | self.i += 1
33 |
34 | def pair(self):
35 | prop = self.word()
36 | self.whitespace()
37 | self.literal(':')
38 | self.whitespace()
39 | value = self.word()
40 | return prop.lower(), value
41 |
42 | # parse the body of a rule into key value pairs
43 | def body(self):
44 | pairs = {}
45 | while self.i < len(self.s) and self.s[self.i] != '}':
46 | try:
47 | prop, val = self.pair()
48 | pairs[prop.lower()] = val
49 | self.whitespace()
50 | self.literal(';')
51 | self.whitespace()
52 | except AssertionError:
53 | why = self.ignore_until([";", "}"])
54 | if why == ";":
55 | self.literal(";")
56 | self.whitespace()
57 | else:
58 | break
59 | return pairs
60 |
61 | def ignore_until(self, chars):
62 | while self.i < len(self.s):
63 | if self.s[self.i] in chars:
64 | return self.s[self.i]
65 | else:
66 | self.i += 1
67 |
68 | def selector(self):
69 | out = TagSelector(self.word().lower())
70 | self.whitespace()
71 | while self.i < len(self.s) and self.s[self.i] != '{':
72 | tag = self.word()
73 | descendant = TagSelector(tag.lower())
74 | out = DescendantSelector(out, descendant)
75 | self.whitespace()
76 | return out
77 |
78 | def parse(self):
79 | rules = []
80 | while self.i < len(self.s):
81 | try:
82 | self.whitespace()
83 | selector = self.selector()
84 | self.literal('{')
85 | self.whitespace()
86 | body = self.body()
87 | self.literal('}')
88 | rules.append((selector, body))
89 | except AssertionError:
90 | why = self.ignore_until(["}"])
91 | if why == "}":
92 | self.literal("}")
93 | self.whitespace()
94 | else:
95 | break
96 |
97 | return rules
98 |
99 | class TagSelector:
100 | def __init__(self, tag):
101 | self.tag = tag
102 | self.priority = 1
103 |
104 | def matches(self, node):
105 | return isinstance(node, Element) and self.tag == node.tag
106 |
107 |
108 | class DescendantSelector:
109 | def __init__(self, ancestor, descendant):
110 | self.ancestor = ancestor
111 | self.descendant = descendant
112 | self.priority = ancestor.priority + descendant.priority
113 |
114 | def matches(self, node):
115 | if not self.descendant.matches(node): return False
116 | while node.parent:
117 | if self.ancestor.matches(node.parent): return True
118 | node = node.parent
119 | return False
120 |
121 |
122 | def style(node, rules):
123 | node.style = {}
124 | for prop, default_value in INHERITED_PROPERTIES.items():
125 | if node.parent:
126 | node.style[prop] = node.parent.style[prop]
127 | else:
128 | node.style[prop] = default_value
129 |
130 | for selector, body in rules:
131 | if not selector.matches(node): continue
132 | for prop, value in body.items():
133 | node.style[prop] = value
134 |
135 | if isinstance(node, Element) and 'style' in node.attributes:
136 | pairs = CSSParser(node.attributes['style']).body()
137 | for prop, val in pairs.items():
138 | node.style[prop] = val
139 |
140 | # normalize percentage fonts to pixels
141 | if node.style["font-size"].endswith("%"):
142 | if node.parent:
143 | parent_font_size = node.parent.style["font-size"]
144 | else:
145 | parent_font_size = INHERITED_PROPERTIES["font-size"]
146 | node_pct = float(node.style['font-size'][:-1]) / 100
147 | parent_px = float(parent_font_size[:-2])
148 | node.style['font-size'] = str(node_pct * parent_px) + 'px'
149 |
150 | for child in node.children:
151 | style(child, rules)
152 |
153 |
154 | def cascade_priority(rule):
155 | selector, body = rule
156 | return selector.priority
157 |
158 |
--------------------------------------------------------------------------------
/server.py:
--------------------------------------------------------------------------------
1 | import html
2 | import socket
3 | import urllib.parse
4 | import random
5 |
6 | s = socket.socket(
7 | family=socket.AF_INET,
8 | type=socket.SOCK_STREAM,
9 | proto=socket.IPPROTO_TCP,
10 | )
11 | s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
12 | s.bind(('', 8000))
13 | s.listen()
14 |
15 | LOGINS = {
16 | "crashoverride": "0cool",
17 | "cerealkiller": "emmanuel",
18 | "me": "you"
19 | }
20 |
21 |
22 | ENTRIES = [
23 | ("No names. We are nameless!", "cerealkiller"),
24 | ("HACK THE PLANET!!!", "crashoverride"),
25 | ]
26 |
27 |
28 | def show_comments(session):
29 | out = ""
30 | nonce = str(random.random())[2:]
31 | session["nonce"] = nonce
32 | if "user" in session:
33 | out += "Hello, " + session["user"] + "
"
34 | out += ""
39 | else:
40 | out += "Sign in to write in the guest book"
41 | for entry, who in ENTRIES:
42 | out += "" + html.escape(entry) + "\n"
43 | out += "by " + html.escape(who) + "
"
44 | out += ""
45 | return out
46 |
47 |
48 | def login_form(session):
49 | nonce = str(random.random())[2:]
50 | session["nonce"] = nonce
51 | body = ""
52 | body += ""
58 | return body
59 |
60 |
61 | def do_login(session, params):
62 | username = params.get("username")
63 | password = params.get("password")
64 | if username in LOGINS and LOGINS[username] == password:
65 | session["user"] = username
66 | return "200 OK", show_comments(session)
67 | else:
68 | out = ""
69 | out += "Invalid password for {}
".format(username)
70 | return "401 Unauthorized", out
71 |
72 |
73 | def do_request(session, method, url, headers, body):
74 | if method == "GET" and url == "/":
75 | return "200 OK", show_comments(session)
76 | elif method == "GET" and url == "/login":
77 | return "200 OK", login_form(session)
78 | elif method == "POST" and url == "/":
79 | params = form_decode(body)
80 | return do_login(session, params)
81 | elif method == "POST" and url == "/add":
82 | params = form_decode(body)
83 | add_entry(session, params)
84 | return "200 OK", show_comments(session)
85 | elif method == "GET" and url == "/comment.js":
86 | with open("comment.js") as f:
87 | return "200 OK", f.read()
88 | else:
89 | return "404 Not Found", not_found(url, method)
90 |
91 | def not_found(url, method):
92 | out = ""
93 | out += "{} {} not found!
".format(method, url)
94 | return out
95 |
96 | def form_decode(body):
97 | params = {}
98 | for field in body.split("&"):
99 | name, value = field.split("=", 1)
100 | name = urllib.parse.unquote_plus(name)
101 | value = urllib.parse.unquote_plus(value)
102 | params[name] = value
103 | return params
104 |
105 |
106 | def add_entry(session, params):
107 | if "user" not in session: return
108 | if "nonce" not in session or "nonce" not in params: return
109 | if session["nonce"] != params["nonce"]: return
110 | if 'guest' in params and len(params['guest']) <= 100:
111 | ENTRIES.append((params['guest'], session["user"]))
112 | return show_comments(session)
113 |
114 |
115 | SESSIONS = {}
116 |
117 |
118 | def handle_connection(conx):
119 | req = conx.makefile("b")
120 | reqline = req.readline().decode('utf8')
121 | method, url, version = reqline.split(" ", 2)
122 | assert method in ["GET", "POST"]
123 | headers = {}
124 | while True:
125 | line = req.readline().decode('utf8')
126 | if line == '\r\n': break
127 | header, value = line.split(":", 1)
128 | headers[header.lower()] = value.strip()
129 | if 'content-length' in headers:
130 | length = int(headers['content-length'])
131 | body = req.read(length).decode('utf8')
132 | else:
133 | body = None
134 | if "cookie" in headers:
135 | token = headers["cookie"][len("token="):]
136 | else:
137 | token = str(random.random())[2:]
138 | session = SESSIONS.setdefault(token, {})
139 | status, body = do_request(session, method, url, headers, body)
140 | response = "HTTP/1.0 {}\r\n".format(status)
141 | response += "Content-Length: {}\r\n".format(
142 | len(body.encode("utf8")))
143 | if 'cookie' not in headers:
144 | template = "Set-Cookie: token={}; SameSite=Lax\r\n"
145 | response += template.format(token)
146 | csp = "default-src http://127.0.0.1:8000"
147 | response += "Content-Security-Policy: {}\r\n".format(csp)
148 | response += "\r\n" + body
149 | conx.send(response.encode('utf8'))
150 | conx.close()
151 |
152 |
153 | if __name__ == "__main__":
154 | while True:
155 | conx, addr = s.accept()
156 | handle_connection(conx)
157 |
158 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import tkinter
2 | import tkinter.font
3 | import urllib.parse
4 | import dukpy
5 |
6 | from helpers import tree_to_list
7 | from html_parser import HTMLParser, Text, Element
8 | from css_parser import CSSParser, style, cascade_priority
9 | from js_context import JSContext
10 | from network import request, resolve_url, url_origin
11 |
12 | WIDTH, HEIGHT = 800, 600
13 | SCROLL_STEP = 100
14 | HSTEP, VSTEP = 13, 18
15 | CHROME_PX = 100
16 |
17 |
18 | class Tab:
19 | def __init__(self):
20 | self.url = None
21 | self.focus = None
22 | self.history = []
23 | self.scroll = 0
24 | with open('browser.css') as f:
25 | self.default_style_sheet = CSSParser(f.read()).parse()
26 |
27 | def scrolldown(self):
28 | max_y = self.document.height - (HEIGHT - CHROME_PX)
29 | self.scroll = min(self.scroll + SCROLL_STEP, max_y)
30 |
31 | def scrollup(self):
32 | self.scroll -= SCROLL_STEP
33 | if self.scroll < 0:
34 | self.scroll = 0
35 |
36 | def click(self, x, y):
37 | self.focus = None
38 |
39 | y += self.scroll
40 |
41 | objs = [obj for obj in tree_to_list(self.document, [])
42 | if obj.x <= x < obj.x + obj.width
43 | and obj.y <= y < obj.y + obj.height]
44 |
45 | if not objs: return
46 | elt = objs[-1].node
47 |
48 | while elt:
49 | if isinstance(elt, Text):
50 | pass
51 | elif elt.tag == "input":
52 | if self.js.dispatch_event('click', elt): return
53 | self.focus = elt
54 | elt.attributes["value"] = ""
55 | return self.render()
56 | elif elt.tag == 'button':
57 | if self.js.dispatch_event('click', elt): return
58 | while elt:
59 | if elt.tag == 'form' and 'action' in elt.attributes:
60 | return self.submit_form(elt)
61 | else:
62 | elt = elt.parent
63 | return
64 | elif elt.tag == "a" and "href" in elt.attributes:
65 | if self.js.dispatch_event('click', elt): return
66 | url = resolve_url(elt.attributes["href"], self.url)
67 | return self.load(url)
68 | elt = elt.parent
69 |
70 | def keypress(self, char):
71 | if self.focus:
72 | self.focus.attributes['value'] += char
73 | if self.js.dispatch_event("keydown", self.focus): return
74 | self.render()
75 |
76 | def draw(self, canvas):
77 | for cmd in self.display_list:
78 | if cmd.top > self.scroll + HEIGHT - CHROME_PX: continue
79 | if cmd.bottom + VSTEP < self.scroll: continue
80 | cmd.execute(self.scroll - CHROME_PX, canvas)
81 | if self.focus:
82 | obj = [obj for obj in tree_to_list(self.document, [])
83 | if obj.node == self.focus and isinstance(obj, InputLayout)][0]
84 | text = self.focus.attributes.get('value', '')
85 | x = obj.x + obj.font.measure(text)
86 | y = obj.y - self.scroll + CHROME_PX
87 | canvas.create_line(x, y, x, y + obj.height)
88 |
89 | def go_back(self):
90 | if len(self.history) > 1:
91 | self.history.pop()
92 | back = self.history.pop()
93 | self.load(back)
94 |
95 | def load(self, url, body=None):
96 | headers, body = request(url, self.url, body)
97 | self.url = url
98 | self.history.append(url)
99 | self.nodes = HTMLParser(body).parse()
100 | self.rules = self.default_style_sheet.copy()
101 | self.js = JSContext(self)
102 |
103 | self.allowed_origins = None
104 | if "content-security-policy" in headers:
105 | csp = headers['content-security-policy'].split()
106 | if len(csp) > 0 and csp[0] == 'default-src':
107 | self.allowed_origins = csp[1:]
108 |
109 | # links = [node.attributes["href"]
110 | # for node in tree_to_list(self.nodes, [])
111 | # if isinstance(node, Element)
112 | # and node.tag == "link"
113 | # and "href" in node.attributes
114 | # and node.attributes.get("rel") == "stylesheet"]
115 | # for link in links:
116 | # try:
117 | # link_url = resolve_url(link, url)
118 | # if not self.allowed_request(link_url):
119 | # print("Blocked link", link, "due to CSP")
120 | # continue
121 | # header, body = request(link_url, url)
122 | # except:
123 | # continue
124 | # rules.extend(CSSParser(body).parse())
125 |
126 | scripts = [node.attributes["src"] for node
127 | in tree_to_list(self.nodes, [])
128 | if isinstance(node, Element)
129 | and node.tag == "script"
130 | and "src" in node.attributes]
131 |
132 | for script in scripts:
133 | script_url = resolve_url(script, url)
134 | if not self.allowed_request(script_url):
135 | print("Blocked script", script, "due to CSP")
136 | continue
137 | header, body = request(script_url, url)
138 | try:
139 | self.js.run(body)
140 | except dukpy.JSRuntimeError as e:
141 | print("Script", script, "crashed", e)
142 |
143 | self.render()
144 |
145 | def allowed_request(self, url):
146 | return self.allowed_origins == None or url_origin(url) in self.allowed_origins
147 |
148 | def render(self):
149 | style(self.nodes, sorted(self.rules, key=cascade_priority))
150 | self.document = DocumentLayout(self.nodes)
151 | self.document.layout()
152 | self.display_list = []
153 | self.document.paint(self.display_list)
154 |
155 | def submit_form(self, elt):
156 | if self.js.dispatch_event("submit", elt): return
157 |
158 | inputs = [node for node in tree_to_list(elt, []) if
159 | isinstance(node, Element) and node.tag == 'input' and 'name' in node.attributes]
160 | body = ''
161 | for input in inputs:
162 | name = input.attributes['name']
163 | value = input.attributes.get('value', '')
164 | name = urllib.parse.quote(name)
165 | value = urllib.parse.quote(value)
166 | body += '&' + name + '=' + value
167 | body = body[1:]
168 | url = resolve_url(elt.attributes["action"], self.url)
169 | self.load(url, body)
170 |
171 |
172 | class Browser:
173 | def __init__(self):
174 | self.window = tkinter.Tk()
175 | self.canvas = tkinter.Canvas(
176 | self.window,
177 | bg="white",
178 | width=WIDTH,
179 | height=HEIGHT
180 |
181 | )
182 | self.canvas.pack()
183 |
184 | self.window.bind("", self.handle_down)
185 | self.window.bind("", self.handle_up)
186 | self.window.bind("", self.handle_click)
187 | self.window.bind("", self.handle_key)
188 | self.window.bind("", self.handle_enter)
189 |
190 | self.tabs = []
191 | self.active_tab = None
192 |
193 | self.focus = None
194 | self.address_bar = ''
195 |
196 | def handle_down(self, e):
197 | self.tabs[self.active_tab].scrolldown()
198 | self.draw()
199 |
200 | def handle_click(self, e):
201 | if e.y < CHROME_PX:
202 | self.focus = None
203 | if 40 <= e.x < 40 + 80 * len(self.tabs) and 0 <= e.y < 40:
204 | self.active_tab = int((e.x - 40) / 80)
205 | elif 10 <= e.x < 30 and 10 <= e.y < 30:
206 | # clicked on new tab button
207 | self.load("https://browser.engineering/")
208 | elif 10 <= e.x < 35 and 50 <= e.y < 90:
209 | # clicked back button
210 | self.tabs[self.active_tab].go_back()
211 | elif 50 <= e.x < WIDTH - 10 and 50 <= e.y < 90:
212 | # clicked on address bar
213 | self.focus = "address_bar"
214 | self.address_bar = ""
215 | else:
216 | self.focus = 'content'
217 | self.tabs[self.active_tab].click(e.x, e.y - CHROME_PX)
218 | self.draw()
219 |
220 | def handle_key(self, e):
221 | if len(e.char) == 0: return
222 | if not (0x20 <= ord(e.char) < 0x7f): return
223 | if self.focus == 'address_bar':
224 | self.address_bar += e.char
225 | self.draw()
226 | elif self.focus == 'content':
227 | self.tabs[self.active_tab].keypress(e.char)
228 | self.draw()
229 |
230 | def handle_enter(self, e):
231 | if self.focus == 'address_bar':
232 | self.tabs[self.active_tab].load(self.address_bar)
233 | self.focus = None
234 | self.draw()
235 |
236 | def handle_up(self, e):
237 | self.tabs[self.active_tab].scrollup()
238 | self.draw()
239 |
240 | def draw(self):
241 | self.canvas.delete("all")
242 | self.tabs[self.active_tab].draw(self.canvas)
243 | self.canvas.create_rectangle(0, 0, WIDTH, CHROME_PX,
244 | fill="white", outline="black")
245 | tabfont = get_font(20, 'normal', 'roman')
246 | for i, tab in enumerate(self.tabs):
247 | name = 'Tab {}'.format(i)
248 | x1 = 40 + 80 * i
249 | x2 = 120 + 80 * i
250 | self.canvas.create_line(x1, 0, x1, 40, fill='black')
251 | self.canvas.create_line(x2, 0, x2, 40, fill='black')
252 | self.canvas.create_text(x1 + 10, 10, anchor="nw", text=name, font=tabfont, fill="black")
253 | if i == self.active_tab:
254 | self.canvas.create_line(0, 40, x1, 40, fill="black")
255 | self.canvas.create_line(x2, 40, WIDTH, 40, fill="black")
256 |
257 | # new tab button
258 | buttonfont = get_font(30, "normal", "roman")
259 | self.canvas.create_rectangle(10, 10, 30, 30, outline="black", width=1)
260 | self.canvas.create_text(11, 0, anchor="nw", text="+", font=buttonfont, fill="black")
261 |
262 | # url bar
263 | self.canvas.create_rectangle(40, 50, WIDTH - 10, 90,
264 | outline="black", width=1)
265 | if self.focus == 'address_bar':
266 | self.canvas.create_text(55, 55, anchor='nw', text=self.address_bar,
267 | font=buttonfont, fill="black")
268 | w = buttonfont.measure(self.address_bar)
269 | self.canvas.create_line(55 + w, 55, 55 + w, 85, fill="black")
270 | else:
271 | url = self.tabs[self.active_tab].url
272 | self.canvas.create_text(55, 55, anchor='nw', text=url,
273 | font=buttonfont, fill="black")
274 |
275 | # back button
276 | self.canvas.create_rectangle(10, 50, 35, 90,
277 | outline="black", width=1)
278 | self.canvas.create_polygon(
279 | 15, 70, 30, 55, 30, 85, fill='black')
280 |
281 | def load(self, url):
282 | new_tab = Tab()
283 | new_tab.load(url)
284 | self.active_tab = len(self.tabs)
285 | self.tabs.append(new_tab)
286 | self.draw()
287 |
288 |
289 | class DrawText:
290 | def __init__(self, x1, y1, text, font, color):
291 | self.top = y1
292 | self.left = x1
293 | self.text = text
294 | self.font = font
295 | self.bottom = y1 + font.metrics("linespace")
296 | self.color = color
297 |
298 | def execute(self, scroll, canvas):
299 | canvas.create_text(
300 | self.left,
301 | self.top - scroll,
302 | text=self.text,
303 | font=self.font,
304 | fill=self.color,
305 | anchor="nw"
306 | )
307 |
308 |
309 | class DrawRect:
310 | def __init__(self, x1, y1, x2, y2, color):
311 | self.top = y1
312 | self.bottom = y2
313 | self.left = x1
314 | self.right = x2
315 | self.color = color
316 |
317 | def execute(self, scroll, canvas):
318 | canvas.create_rectangle(
319 | self.left,
320 | self.top - scroll,
321 | self.right,
322 | self.bottom - scroll,
323 | width=0,
324 | fill=self.color
325 | )
326 |
327 |
328 | FONTS = {}
329 |
330 |
331 | def get_font(size, weight, slant):
332 | key = (size, weight, slant)
333 | if key not in FONTS:
334 | font = tkinter.font.Font(size=size, weight=weight, slant=slant)
335 | FONTS[key] = font
336 | return FONTS[key]
337 |
338 |
339 | class DocumentLayout:
340 | def __init__(self, node):
341 | self.node = node
342 | self.parent = None
343 | self.children = []
344 |
345 | def paint(self, display_list):
346 | self.children[0].paint(display_list)
347 |
348 | def layout(self):
349 | self.width = WIDTH - 2 * HSTEP
350 | self.x = HSTEP
351 | self.y = VSTEP
352 | child = BlockLayout(self.node, self, None)
353 | self.children.append(child)
354 | child.layout()
355 | self.height = child.height + 2 * VSTEP
356 |
357 |
358 | BLOCK_ELEMENTS = [
359 | "html", "body", "article", "section", "nav", "aside",
360 | "h1", "h2", "h3", "h4", "h5", "h6", "hgroup", "header",
361 | "footer", "address", "p", "hr", "pre", "blockquote",
362 | "ol", "ul", "menu", "li", "dl", "dt", "dd", "figure",
363 | "figcaption", "main", "div", "table", "form", "fieldset",
364 | "legend", "details", "summary"
365 | ]
366 |
367 |
368 | def layout_mode(node):
369 | if isinstance(node, Text):
370 | return "inline"
371 | elif node.children:
372 | if any([isinstance(child, Element) and child.tag in BLOCK_ELEMENTS for child in node.children]):
373 | return "block"
374 | else:
375 | return "inline"
376 | elif node.tag == "input":
377 | return "inline"
378 | else:
379 | return "block"
380 |
381 |
382 | class BlockLayout:
383 | def __init__(self, node, parent, previous):
384 | self.node = node
385 | self.parent = parent
386 | self.previous = previous
387 | self.children = []
388 | self.display_list = []
389 |
390 | def paint(self, display_list):
391 | bgcolor = self.node.style.get('background-color', 'transparent')
392 | is_atomic = not isinstance(self.node, Text) and \
393 | (self.node.tag == "input" or self.node.tag == "button")
394 |
395 | if not is_atomic:
396 | if bgcolor != 'transparent':
397 | x2 = self.x + self.width
398 | y2 = self.y + self.height
399 | rect = DrawRect(self.x, self.y, x2, y2, bgcolor)
400 | display_list.append(rect)
401 |
402 | for x, y, text, font, color in self.display_list:
403 | display_list.append(DrawText(x, y, text, font, color))
404 | for child in self.children:
405 | child.paint(display_list)
406 |
407 | def layout(self):
408 | self.x = self.parent.x
409 | if self.previous:
410 | self.y = self.previous.y + self.previous.height
411 | else:
412 | self.y = self.parent.y
413 | self.width = self.parent.width
414 |
415 | mode = layout_mode(self.node)
416 | if mode == "block":
417 | previous = None
418 | for child in self.node.children:
419 | next = BlockLayout(child, self, previous)
420 | self.children.append(next)
421 | previous = next
422 | else:
423 | self.new_line()
424 | self.recurse(self.node)
425 |
426 | for child in self.children:
427 | child.layout()
428 |
429 | # height must be computed _after_ children layout
430 | self.height = sum([child.height for child in self.children])
431 |
432 | def recurse(self, node):
433 | if isinstance(node, Text):
434 | self.text(node)
435 | else:
436 | if node.tag == 'br':
437 | self.new_line()
438 | elif node.tag == 'input' or node.tag == 'button':
439 | self.input(node)
440 | else:
441 | for child in node.children:
442 | self.recurse(child)
443 |
444 | def get_font(self, node):
445 | weight = node.style['font-weight']
446 | style = node.style['font-style']
447 | if style == 'normal': style = 'roman'
448 | size = int(float(node.style['font-size'][:-2]) * .75)
449 | return get_font(size, weight, style)
450 |
451 | def text(self, node):
452 | font = self.get_font(node)
453 | for word in node.text.split():
454 | width = font.measure(word)
455 | if self.cursor_x + width > self.width:
456 | self.new_line()
457 |
458 | line = self.children[-1]
459 | text = TextLayout(node, word, line, self.previous_word)
460 | line.children.append(text)
461 | self.previous_word = text
462 |
463 | self.cursor_x += width + font.measure(" ")
464 |
465 | def input(self, node):
466 | width = INPUT_WIDTH_PX
467 | if self.cursor_x + width > self.width:
468 | self.new_line()
469 | line = self.children[-1]
470 | input = InputLayout(node, line, self.previous_word)
471 | line.children.append(input)
472 | self.previous_word = input
473 | font = self.get_font(node)
474 | self.cursor_x += width + font.measure(" ")
475 |
476 | def new_line(self):
477 | self.previous_word = None
478 | self.cursor_x = 0
479 | last_line = self.children[-1] if self.children else None
480 | new_line = LineLayout(self.node, self, last_line)
481 | self.children.append(new_line)
482 |
483 |
484 | class LineLayout:
485 | def __init__(self, node, parent, previous):
486 | self.node = node
487 | self.parent = parent
488 | self.previous = previous
489 | self.children = []
490 |
491 | def layout(self):
492 | self.width = self.parent.width
493 | self.x = self.parent.x
494 |
495 | if self.previous:
496 | self.y = self.previous.y + self.previous.height
497 | else:
498 | self.y = self.parent.y
499 |
500 | for word in self.children:
501 | word.layout()
502 |
503 | max_ascent = max([word.font.metrics('ascent') for word in self.children])
504 | baseline = self.y + 1.25 * max_ascent
505 | for word in self.children:
506 | word.y = baseline - word.font.metrics('ascent')
507 | max_descent = max([word.font.metrics("descent") for word in self.children])
508 | self.height = 1.25 * (max_ascent + max_descent)
509 |
510 | def paint(self, display_list):
511 | for child in self.children:
512 | child.paint(display_list)
513 |
514 |
515 | class TextLayout:
516 | def __init__(self, node, word, parent, previous):
517 | self.node = node
518 | self.word = word
519 | self.children = []
520 | self.parent = parent
521 | self.previous = previous
522 |
523 | def layout(self):
524 | weight = self.node.style['font-weight']
525 | style = self.node.style['font-style']
526 | if style == 'normal': style = 'roman'
527 | size = int(float(self.node.style["font-size"][:-2]) * .75)
528 | self.font = get_font(size, weight, style)
529 |
530 | self.width = self.font.measure(self.word)
531 | if self.previous:
532 | space = self.previous.font.measure(' ')
533 | self.x = self.previous.x + self.previous.width + space
534 | else:
535 | self.x = self.parent.x
536 |
537 | self.height = self.font.metrics('linespace')
538 |
539 | def paint(self, display_list):
540 | color = self.node.style["color"]
541 | display_list.append(
542 | DrawText(self.x, self.y, self.word, self.font, color))
543 |
544 |
545 | INPUT_WIDTH_PX = 200
546 |
547 |
548 | class InputLayout:
549 | def __init__(self, node, parent, previous):
550 | self.node = node
551 | self.children = []
552 | self.parent = parent
553 | self.previous = previous
554 |
555 | def layout(self):
556 | weight = self.node.style['font-weight']
557 | style = self.node.style['font-style']
558 | if style == 'normal': style = 'roman'
559 | size = int(float(self.node.style["font-size"][:-2]) * .75)
560 | self.font = get_font(size, weight, style)
561 |
562 | self.width = INPUT_WIDTH_PX
563 | if self.previous:
564 | space = self.previous.font.measure(' ')
565 | self.x = self.previous.x + self.previous.width + space
566 | else:
567 | self.x = self.parent.x
568 |
569 | self.height = self.font.metrics('linespace')
570 |
571 | def paint(self, display_list):
572 | bgcolor = self.node.style.get("background-color",
573 | "transparent")
574 | if bgcolor != "transparent":
575 | x2, y2 = self.x + self.width, self.y + self.height
576 | rect = DrawRect(self.x, self.y, x2, y2, bgcolor)
577 | display_list.append(rect)
578 |
579 | if self.node.tag == 'input':
580 | text = self.node.attributes.get('value')
581 | elif self.node.tag == 'button':
582 | if len(self.node.children) == 1 and isinstance(self.node.children[0], Text):
583 | text = self.node.children[0].text
584 | else:
585 | print('Ignoring HTML content inside button')
586 | text = ''
587 |
588 | color = self.node.style["color"]
589 | display_list.append(
590 | DrawText(self.x, self.y, text, self.font, color))
591 |
592 |
593 | def print_tree(node, indent=0):
594 | print(" " * indent, node)
595 | for child in node.children:
596 | print_tree(child, indent + 2)
597 |
598 |
599 | if __name__ == "__main__":
600 | import sys
601 |
602 | Browser().load(sys.argv[1])
603 | tkinter.mainloop()
604 |
605 | # headers, body = request(sys.argv[1])
606 | # nodes = HTMLParser(body).parse()
607 | # print_tree(nodes)
608 |
609 | # load('http://example.org/index.html')
610 |
--------------------------------------------------------------------------------