├── .gitignore
├── README.md
├── autoit.sh
├── ghidra
    └── Reflex.java
├── py
    ├── reflex.egg-info
    │   ├── PKG-INFO
    │   ├── SOURCES.txt
    │   ├── dependency_links.txt
    │   └── top_level.txt
    ├── reflex.py
    ├── reflex
    │   ├── __init__.py
    │   ├── fuzzer.py
    │   ├── re.py
    │   ├── reflex.py
    │   └── simplify.py
    ├── setup.py
    ├── simplify.py
    ├── sleigh_fuzz.py
    └── tests
    │   └── test_fuzz.py
├── rust-mutator
    ├── Cargo.lock
    ├── Cargo.toml
    ├── make.sh
    ├── src
    │   ├── common.rs
    │   ├── lib.rs
    │   └── main.rs
    └── test.py
├── scala
    ├── .gitignore
    ├── README.md
    ├── build.sbt
    ├── find-tables.sh
    ├── jreflex.sh
    ├── project
    │   ├── build.properties
    │   └── plugins.sbt
    └── src
    │   ├── main
    │       └── scala
    │       │   └── io
    │       │       └── github
    │       │           └── thebabush
    │       │               └── reflex
    │       │                   ├── Counter.scala
    │       │                   ├── DfaToRegex.scala
    │       │                   ├── Generator.scala
    │       │                   ├── MakeProto.scala
    │       │                   ├── MakeRust.scala
    │       │                   ├── Pretty.scala
    │       │                   ├── Simple.scala
    │       │                   ├── graph
    │       │                       └── main.scala
    │       │                   └── main.scala
    │   └── test
    │       └── scala
    │           └── SimplificationTest.scala
├── sleigh.sh
├── subs2pdf.sh
└── tryone.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | subs/
 2 | data/
 3 | stroz/
 4 | test.sh
 5 | .ipynb_checkpoints/
 6 | *.pyc
 7 | __pycache__/
 8 | ram/
 9 | gen.rs
10 | *.so
11 | afl-proto/
12 | afl-proto-orig/
13 | mutator/
14 | .idea/
15 | rust-mutator/target/
16 | workdir/
17 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Reflex
  2 | 
  3 | Flex 'em lexers.
  4 | 
  5 | Also known as *extract parsers' tokens from a binary*.
  6 | 
  7 | Also known as *a tool that helped us gain a 5th place at DEF CON*.
  8 | 
  9 | ## Tutorial
 10 | 
 11 | > *Tutorials are for n00bs.* - someone, somewhere, probably.
 12 | 
 13 | First of all, find `yylex()` in your binary.
 14 | Then you look around a bit and find the various `yy_XXX` tables used by flex to
 15 | encode the DFA and the size of their elements.
 16 | You can easily do it by comparing your target program with a flex-based program
 17 | that you compile from source.
 18 | We had plans to automate this process with some heuristics but never got around
 19 | it.
 20 | 
 21 | ```sh
 22 | # download the example binary
 23 | mkdir -p ./example
 24 | wget 'https://github.com/thebabush/reflex/files/4605997/liq.zip' -O ./example/liq.zip
 25 | unzip ./example/liq.zip -d ./example/
 26 | # extract & uncompress the DFAs
 27 | ./py/reflex.py --accept 0x12BC7E0 2 \
 28 |                --base   0x12BCA40 2 \
 29 |                --chk    0x12BCF20 2 \
 30 |                --def    0x12BCB80 2 \
 31 |                --ec     0x12BC900 1 \
 32 |                --meta   0x12BCA00 1 \
 33 |                --nxt    0x12BCCC0 2 \
 34 |                --max-state 142 \
 35 |                ./example/liq \
 36 |                ./workdir/
 37 | # make PDF of default starting state
 38 | dot -Tpdf -o ./workdir/out.png ./workdir/1.dot
 39 | ./py/simplify.py ./workdir/G.gpickle ./workdir/simple/
 40 | # make PDFs/PNGs
 41 | ./subs2pdf.sh ./workdir/simple
 42 | # build the jar
 43 | pushd ./scala && sbt assembly && popd
 44 | # print the regexes
 45 | for f in ./workdir/simple/*.dfa
 46 | do
 47 |   echo "== $f =="
 48 |   ./scala/jreflex.sh print /dev/stdout "$f"
 49 |   echo
 50 | done
 51 | ```
 52 | 
 53 | Example graph:
 54 | 
 55 | ![whole graph](https://user-images.githubusercontent.com/1985669/81504105-b7faf780-92e7-11ea-916a-f7f6189df119.png)
 56 | 
 57 | Example regexps:
 58 | 
 59 | ```
 60 | == ./workdir/simple/27_6.dfa ==
 61 | if
 62 | == ./workdir/simple/28_1.dfa ==
 63 | else
 64 | == ./workdir/simple/28_2.dfa ==
 65 | else
 66 | == ./workdir/simple/28_5.dfa ==
 67 | else
 68 | == ./workdir/simple/28_6.dfa ==
 69 | else
 70 | == ./workdir/simple/29_1.dfa ==
 71 | return(( ?|\t))*\n
 72 | == ./workdir/simple/29_2.dfa ==
 73 | return(( ?|\t))*\n
 74 | == ./workdir/simple/29_5.dfa ==
 75 | ```
 76 | 
 77 | If you are crazy enough, you can create a crappy AFL/Rust mutator using `jreflex`.
 78 | Just know that in order to build you'll have to wait **A LOT**.
 79 | 
 80 | ```sh
 81 | ./scala/jreflex.sh rust ./rust-mutator/src/gen.rs ./workdir/simple/*.dfa
 82 | ```
 83 | 
 84 | ## Ghidra Script
 85 | 
 86 | I made a quickly hacked ghidra script to export `yylex` data flow into graphviz
 87 | and graphml files.
 88 | The latter can be used together with `find-tables.sh` to (hopefully) recover
 89 | the tables from the binary automatically.
 90 | It doesn't always work (ghidra's IR is funky), but even when it fails it can
 91 | recover at least some of the tables.
 92 | 
 93 | Instructions:
 94 | 
 95 | 1. `cp ./ghidra/Reflex.java ~/ghidra_script/`
 96 | 2. Open ghidra
 97 | 3. Navigate to `yylex()` in your binary
 98 | 4. Run the script
 99 |    (it will create `/tmp/out.xml`, `/tmp/out.dot` and `/tmp/simple.dot`)
100 | 5. Run `scala/find-tables.sh` and it should output the arguments to forward to
101 |    `reflex.py`
102 | 
103 | ## A bit of history
104 | 
105 | This was all part of a PhD at Politecnico di Milano, that I dropped :D
106 | 
107 | Reflex started the day in which I had a Twitter argument with the _unofficial_
108 | account of a very well known open-source reverse engineering tool (wink wink).
109 | Actually, they just replied to me in an **very** hostile way for no reason.
110 | I was angry, the source code of the tool was not available back then, so I
111 | decided to do the only reasonable thing: take the only non-java binary and
112 | find bugs to prove my point.
113 | Turns out it (sleigh) was using a flex-based parser to read architecture
114 | definitions or something like that.
115 | So I built a python script to extract and uncompress the DFA embedded in the
116 | program as a graph.
117 | Then a bunch of stuff happened, but in the current form you have a scala tool
118 | that can output the regexps in readable form (among other half-assed things).
119 | 
120 | What's kinda interesting is that I did most of this in May 2019.
121 | In August I was at **DEF CON CTF** with mhackeroni and there was a challenge
122 | based on flex and bison.
123 | We succesfully used my tool and it worked perfectly.
124 | How likely is that you write a tool because of a mean comment and then use it
125 | at DEF CON?
126 | I found it pretty funny.
127 | 
128 | Anyway, the idea was to sell Reflex as a research paper, but as I said I
129 | dropped out. Arguably there's no real science in all of this. We wanted to try
130 | fuzzing programs with the tokens extracted using it, but the project died thanks
131 | to my life choices.
132 | 
133 | By the way, there's an integer underflow in `sleigh` because it doesn't account
134 | for null characters inside strings (aka strings with length 0).
135 | This leads to a `new std::string()` of `-1` characters.
136 | Hurray for real science.
137 | 
138 | ## Fun facts
139 | 
140 | Programs that I know of using flex/bison:
141 | 
142 | - AutoIt's Au3check and malware using AutoIt
143 | - Autodesk Maya: libOGSDeviceOGL4.dylib
144 | - Unity: libunity.so (probably for shaders)
145 | - VxWorks: has a C interpreter written in flex/bison (WTF... if you know why there's such thing, tell me in an issue)
146 | - Zyxel: the shell of some routers (e.g.: `WAC6103D-I_5.50(AAXH.3)C0`)
147 | - A lot of open source projects (not interesting)
148 | 


--------------------------------------------------------------------------------
/autoit.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | rm -rf ../flex/autoit/stage0/ ../flex/autoit/out/
 4 | 
 5 | echo "=== STAGE 0 ==="
 6 | 
 7 | ./py/reflex.py \
 8 |   --accept 0x0001FEE0 2 \
 9 |   --base   0x00020DF0 2 \
10 |   --chk    0x00021150 2 \
11 |   --def    0x00021C80 2 \
12 |   --ec     0x00021FE0 4 \
13 |   --meta   0x000201F0 4 \
14 |   --nxt    0x000202C0 2 \
15 |   --max-state 0x185 \
16 |   ../flex/autoit/Au3Check.exe \
17 |   ../flex/autoit/stage0/
18 | 
19 | ./py/simplify.py \
20 |   ../flex/autoit/stage0/G.gpickle \
21 |   ../flex/autoit/out/
22 | 
23 | echo "=== STAGE 1 ==="
24 | 
25 | echo "SCALA"
26 | parallel -j 12 echo print {} ';' ./scala/jreflex.sh print "{}.regexp" "{}" ::: ../flex/autoit/out/*.dfa
27 | #for f in ../flex/autoit/out/*.dfa;
28 | #do
29 |   #echo "SCALA $f"
30 |   #./scala/jreflex.sh print "$f.regexp" "$f"
31 |   #cat "$f.regexp"
32 | #done
33 | 
34 | echo "PROTO"
35 | ./scala/jreflex.sh proto ../flex/autoit/out/out.proto ../flex/autoit/out/*.dfa
36 | 
37 | 


--------------------------------------------------------------------------------
/ghidra/Reflex.java:
--------------------------------------------------------------------------------
  1 | //@category Analysis
  2 | 
  3 | import java.io.File;
  4 | import java.io.FileNotFoundException;
  5 | import java.io.IOException;
  6 | import java.io.PrintWriter;
  7 | import java.util.ArrayList;
  8 | import java.util.HashMap;
  9 | import java.util.HashSet;
 10 | import java.util.List;
 11 | import java.util.Map;
 12 | import java.util.Optional;
 13 | import java.util.Set;
 14 | import java.util.function.BiConsumer;
 15 | import java.util.function.BiFunction;
 16 | import java.util.function.Consumer;
 17 | import java.util.stream.Collectors;
 18 | 
 19 | import ghidra.app.decompiler.DecompInterface;
 20 | import ghidra.app.script.GhidraScript;
 21 | import ghidra.app.util.bin.format.elf.ElfHeader;
 22 | import ghidra.app.util.bin.format.elf.ElfProgramHeader;
 23 | import ghidra.graph.GEdge;
 24 | import ghidra.graph.jung.JungDirectedGraph;
 25 | import ghidra.program.flatapi.FlatProgramAPI;
 26 | import ghidra.program.model.address.Address;
 27 | import ghidra.program.model.address.AddressSpace;
 28 | import ghidra.program.model.address.GenericAddress;
 29 | import ghidra.program.model.listing.Function;
 30 | import ghidra.program.model.listing.Program;
 31 | import ghidra.program.model.pcode.HighConstant;
 32 | import ghidra.program.model.pcode.HighFunction;
 33 | import ghidra.program.model.pcode.PcodeOp;
 34 | import ghidra.program.model.pcode.PcodeOpAST;
 35 | import ghidra.program.model.pcode.Varnode;
 36 | import ghidra.program.model.pcode.VarnodeAST;
 37 | import ghidra.util.xml.XmlAttributes;
 38 | import ghidra.util.xml.XmlWriter;
 39 | 
 40 | public class Reflex extends GhidraScript {
 41 | 	
 42 | 	final static int DECOMPILATION_TIMEOUT = 60 * 5;
 43 | 	
 44 | 	public static class XmlAttributesBuilder {
 45 | 		private XmlAttributes xml;
 46 | 		
 47 | 		public XmlAttributesBuilder() {
 48 | 			xml = new XmlAttributes();
 49 | 		}
 50 | 		XmlAttributesBuilder addAttribute(String name, boolean value) {
 51 | 			xml.addAttribute(name, value);
 52 | 			return this;
 53 | 		}
 54 | 		XmlAttributesBuilder addAttribute(String name, String value) {
 55 | 			xml.addAttribute(name, value);
 56 | 			return this;
 57 | 		}
 58 | 		XmlAttributesBuilder addAttribute(String name, long value) {
 59 | 			xml.addAttribute(name, value);
 60 | 			return this;
 61 | 		}
 62 | 		public XmlAttributes build() {
 63 | 			return xml;
 64 | 		}
 65 | 	}
 66 | 	
 67 | 	public static class Util {
 68 | 		
 69 | 		public static Optional<Long> tryOffsetFromAddress(Address addr) {
 70 | 			if (addr == null) {
 71 | 				return Optional.empty();
 72 | 			}
 73 | 			return Optional.of(addr.getUnsignedOffset());
 74 | 		}
 75 | 		
 76 | 	}
 77 | 	
 78 | 	public static class ReflexResults {
 79 | 		private long yyAccept;
 80 | 		private long yyBase;
 81 | 		private long yyChk;
 82 | 		private long yyDef;
 83 | 		private long yyEc;
 84 | 		private long yyMeta;
 85 | 		private long yyNxt;
 86 | 		private int maxState;
 87 | 		
 88 | 		public ReflexResults(long accept, long base, long chk, long def, long ec, long meta, long nxt, int maxState) {
 89 | 			yyAccept = accept;
 90 | 			yyBase = base;
 91 | 			yyChk = chk;
 92 | 			yyDef = def;
 93 | 			yyEc = ec;
 94 | 			yyMeta = meta;
 95 | 			yyNxt = nxt;
 96 | 			this.maxState = maxState;
 97 | 		}
 98 | 		
 99 | 		public long[] getTables() {
100 | 			return new long[] {
101 | 				yyAccept,
102 | 				yyBase,
103 | 				yyChk,
104 | 				yyDef,
105 | 				yyEc,
106 | 				yyMeta,
107 | 				yyNxt
108 | 			};
109 | 		}
110 | 	}
111 | 	
112 | 	protected interface AnalysisListener {
113 | 		public void onTablesGuessed(Set<Address> guesses);
114 | 	}
115 | 	
116 | 	enum NodeAttributes {
117 | 		ADDRESS,
118 | 		PC_ADDRESS,
119 | 		FILE_OFFSET,
120 | 		COULD_BE_TABLE,
121 | 		IS_OP,
122 | 		IS_VARNODE,
123 | 		IS_INPUT,
124 | 		IS_SINK,
125 | 		IS_SOURCE,
126 | 		CONST_VALUE,
127 | 		SIZE,
128 | 		OP,
129 | 		NAME,
130 | 		OP_MNEMONIC,
131 | 		OP_SIZE,
132 | 		AS_STRING,
133 | 		// Edges
134 | 		ORDER,
135 | 		// neo4j
136 | 	}
137 | 	
138 | 	public static Optional<String> nodeAttrToXmlType(NodeAttributes attr) {
139 | 		switch (attr) {
140 | 		case PC_ADDRESS:
141 | 		case FILE_OFFSET:
142 | 			return Optional.of("long");
143 | 		case COULD_BE_TABLE:
144 | 			return Optional.of("boolean");
145 | 		case IS_OP:
146 | 		case IS_VARNODE:
147 | 		case IS_INPUT:
148 | 		case IS_SINK:
149 | 		case IS_SOURCE:
150 | 			return Optional.of("boolean");
151 | 		case CONST_VALUE:
152 | 		case SIZE:
153 | 		case OP:
154 | 			return Optional.of("long");
155 | 		case ORDER:
156 | 			return Optional.of("int");
157 | 		case NAME:
158 | 		case OP_MNEMONIC:
159 | 			return Optional.of("string");
160 | 		case OP_SIZE:
161 | 			return Optional.of("long");
162 | 		default:
163 | 			return Optional.empty();
164 | 		}
165 | 	}
166 | 
167 | 	public static Optional<String> nodeAttrToXmlStr(NodeAttributes attr, Object value) {
168 | 		if (nodeAttrToXmlType(attr).isPresent()) {
169 | 			return Optional.of("" + value);
170 | 		}
171 | 		return Optional.empty();
172 | 	}
173 | 	
174 | 	static class Node {
175 | 		Map<NodeAttributes, Object> attributes;
176 | 		int id;
177 | 		
178 | 		public Node(int id) {
179 | 			this.id = id;
180 | 			attributes = new HashMap<>();
181 | 		}
182 | 		
183 | 		public int getId() {
184 | 			return id;
185 | 		}
186 | 		
187 | 		public boolean isVarnode() {
188 | 			// HACK
189 | 			return id >= 0;
190 | 		}
191 | 		
192 | 		public Map<NodeAttributes, Object> getAttributes() {
193 | 			return attributes;
194 | 		}
195 | 		
196 | 		public Object getAttribute(NodeAttributes k) {
197 | 			return attributes.get(k);
198 | 		}
199 | 		
200 | 		public void setAttribute(NodeAttributes k, Object v) {
201 | 			attributes.put(k, v);
202 | 		}
203 | 		
204 | 		public void setAttributeFromMaybe(NodeAttributes k, Optional<? extends Object> v) {
205 | 			if (v.isPresent()) {
206 | 				attributes.put(k, v.get());
207 | 			}
208 | 		}
209 | 		
210 | 		@Override
211 | 		public String toString() {
212 | 			var sb = new StringBuilder();
213 | 			sb.append("<node id=" + id + ">\n");
214 | 			for (var kv : attributes.entrySet()) {
215 | 				if (kv.getKey().name().equals("AS_STRING")) {
216 | 					continue;
217 | 				}
218 | 				sb.append(String.format("%s=%s\n", kv.getKey().name(), kv.getValue().toString()));
219 | 			}
220 | 			sb.append("</node>\n");
221 | 			return sb.toString();
222 | 		}
223 | 	}
224 | 	
225 | 	static class Edge implements GEdge<Node> {
226 | 		Map<NodeAttributes, Object> attributes;
227 | 		Node u;
228 | 		Node v;
229 | 		
230 | 		public Edge(Node u, Node v, Node shareAttrs) {
231 | 			this.u = u;
232 | 			this.v = v;
233 | 			attributes = new HashMap<>();
234 | 			attributes.putAll(shareAttrs.attributes);
235 | 		}
236 | 		
237 | 		public Edge(Node u, Node v, Edge shareAttrs) {
238 | 			this.u = u;
239 | 			this.v = v;
240 | 			attributes = new HashMap<>();
241 | 			attributes.putAll(shareAttrs.attributes);
242 | 		}
243 | 
244 | 		public Edge(Node u, Node v, int order) {
245 | 			this.u = u;
246 | 			this.v = v;
247 | 			attributes = new HashMap<>();
248 | 			attributes.put(NodeAttributes.ORDER, order);
249 | 		}
250 | 
251 | 		public void copyAttrsFrom(Map<NodeAttributes, Object> attrs) {
252 | 			attributes.putAll(attrs);
253 | 		}
254 | 		
255 | 		public int getOrder() {
256 | 			return (int)attributes.get(NodeAttributes.ORDER);
257 | 		}
258 | 
259 | 		@Override
260 | 		public Node getStart() {
261 | 			return u;
262 | 		}
263 | 
264 | 		@Override
265 | 		public Node getEnd() {
266 | 			return v;
267 | 		}
268 | 		
269 | 		public void setAttribute(NodeAttributes name, Object value) {
270 | 			attributes.put(name, value);
271 | 		}
272 | 		
273 | 		public Object getAttribute(NodeAttributes name) {
274 | 			return attributes.get(name);
275 | 		}
276 | 	}
277 | 	
278 | 	static class Graph extends JungDirectedGraph<Node, Edge> {
279 | 
280 | 		private String escapeString(String s) {
281 | 			return s.replace("\"", "\\\"").replace("\n", "\\n");
282 | 		}
283 | 
284 | 		public void exportGraphviz(String path) throws FileNotFoundException {
285 | 			var writer = new PrintWriter(path);
286 | 			writer.write("digraph Out {\n    ordering=in;\n");
287 | 			for (var vertex : getVertices()) {
288 | 				writer.write(
289 | 					String.format(
290 | 						"    \"%s\" [ label=\"%s\" ];\n",
291 | 						"" + vertex.getId(),
292 | 						escapeString("" + vertex)
293 | 					)
294 | 				);
295 | 			}
296 | 			List<Edge> edges = new ArrayList<Edge>(getEdges());
297 | 			edges.sort((left, right) -> Integer.compare(left.getOrder(), right.getOrder()));
298 | 			for (var edge : edges) {
299 | 				writer.write(
300 | 					String.format(
301 | 						"    \"%d\" -> \"%d\" [ label=\"%d\" ];\n",
302 | 						edge.getStart().getId(),
303 | 						edge.getEnd().getId(),
304 | 						edge.getOrder()
305 | 					)
306 | 				);
307 | 			}
308 | 			writer.write("}\n");
309 | 			writer.close();
310 | 		}
311 | 		
312 | 		public Optional<Graph> simplify(boolean removeCopies, boolean removeIndirects) {
313 | 			var other = new Graph();
314 | 			
315 | 			// Check correctness
316 | 			for (var node : getVertices()) {
317 | 				if (node.isVarnode() && inDegree(node) > 1) {
318 | 					return Optional.empty();
319 | 				}
320 | 			}
321 | 			
322 | 			var kept = new HashSet<Node>();
323 | 			// Add opnodes and source/sink varnodes
324 | 			for (var node : getVertices()) {
325 | 				// Remove a varnode if it's alone or if it is intermediary
326 | 				if (node.isVarnode() && !(inDegree(node) > 0 ^ outDegree(node) > 0)) {
327 | 					continue;
328 | 				}
329 | 				other.addVertex(node);
330 | 				kept.add(node);
331 | 			}
332 | 			
333 | 			// Rewrite edges
334 | 			for (var edge : getEdges()) {
335 | 				var u = edge.getStart();
336 | 				var v = edge.getEnd();
337 | 				if (u.isVarnode()) {
338 | 					// Take care of source varnodes
339 | 					if (kept.contains(u)) {
340 | 						other.addEdge(edge);
341 | 					}
342 | 				} else {
343 | 					// v is a varnode by construction
344 | 					var nextEdges = getOutEdges(v);
345 | 					if (nextEdges.size() == 0) {
346 | 						// v is a terminator and is a varnode
347 | 						other.addEdge(edge);
348 | 					} else {
349 | 						// Take care of opnodes (we just need the out edges)
350 | 						for (var nextEdge : nextEdges) {
351 | 							var nextV = nextEdge.getEnd();
352 | 							nextEdge.copyAttrsFrom(v.getAttributes());
353 | 							other.addEdge(new Edge(u, nextV, nextEdge));
354 | 						}
355 | 					}
356 | 				}
357 | 			}
358 | 			
359 | 			// Remove copies
360 | 			if (removeCopies) {
361 | 				var copies = new HashSet<Node>();
362 | 				var newEdges = new ArrayList<Edge>();
363 | 				for (var node : other.getVertices()) {
364 | 					if (!node.isVarnode() && PcodeOp.COPY == (Integer)node.getAttribute(NodeAttributes.OP)) {
365 | 						copies.add(node);
366 | 					}
367 | 				}
368 | 				for (var edge : other.getEdges()) {
369 | 					var u = edge.getStart();
370 | 					var v = edge.getEnd();
371 | 					if (copies.contains(v)) {
372 | 						for (var nextEdge : other.getOutEdges(v)) {
373 | 							newEdges.add(new Edge(u, nextEdge.getEnd(), nextEdge));
374 | 						}
375 | 					}
376 | 				}
377 | 				for (var e : newEdges) {
378 | 					other.addEdge(e);
379 | 				}
380 | 				other.removeVertices(copies);
381 | 			}
382 | 			
383 | 			// Remove indirects
384 | 			if (removeIndirects) {
385 | 				var indirects = 
386 | 					other
387 | 					.getVertices()
388 | 					.stream()
389 | 					.filter(n -> !n.isVarnode() && PcodeOp.INDIRECT == (Integer)n.getAttribute(NodeAttributes.OP))
390 | 					.collect(Collectors.toList());
391 | 				other.removeVertices(indirects);
392 | 			}
393 | 			
394 | 			return Optional.of(other);
395 | 		}
396 | 
397 | 	}
398 | 	
399 | 	static class DefaultMap<T, R> {
400 | 		private java.util.function.Function<T, R> creator;
401 | 		private HashMap<T, R> map;
402 | 		
403 | 		public DefaultMap(java.util.function.Function<T, R> creator) {
404 | 			this.creator = creator;
405 | 			map = new HashMap<>();
406 | 		}
407 | 		
408 | 		public R getOrCreate(T id) {
409 | 			var value = map.get(id);
410 | 			if (value == null) {
411 | 				var newVal = creator.apply(id);
412 | 				map.put(id, newVal);
413 | 				return newVal;
414 | 			}
415 | 			return value;
416 | 		}
417 | 	}
418 | 
419 | 	protected static class ReflexAnalysis {
420 | 		
421 | 		private AnalysisListener listener;
422 | 
423 | 		private Consumer<String> log;
424 | 		private FlatProgramAPI flatApi;
425 | 		private DecompInterface decomp;
426 | 		private AddressSpace addressSpace;
427 | 
428 | 		public ReflexAnalysis(Program currentProgram, Consumer<String> log) {
429 | 			this(currentProgram, log, new AnalysisListener() {
430 | 				@Override
431 | 				public void onTablesGuessed(Set<Address> guesses) {
432 | 				}
433 | 			});
434 | 		}
435 | 
436 | 		public ReflexAnalysis(Program currentProgram, Consumer<String> log, AnalysisListener listener) {
437 | 			this.listener = listener;
438 | 			this.log = log;
439 | 			
440 | 			flatApi = new FlatProgramAPI(currentProgram);
441 | 
442 | 			decomp = new DecompInterface();
443 | 			decomp.openProgram(currentProgram);
444 | //			decomp.setSimplificationStyle("normalize");
445 | 			
446 | 			addressSpace = flatApi.getAddressFactory().getDefaultAddressSpace();
447 | 		}
448 | 		
449 | 		private boolean isAddressValid(Address addr) {
450 | 			var block = flatApi.getMemoryBlock(addr);
451 | 			if (block != null) {
452 | 				if (block.isInitialized() && block.isLoaded() && block.isRead() && !block.isWrite()) {
453 | 					return true;
454 | 				}
455 | 			}
456 | 			return false;
457 | 		}
458 | 		
459 | 		private Optional<HighConstant> tryVarnodeToHighConstant(Varnode node) {
460 | 			// Skip raw varnodes
461 | 			if (!(node instanceof VarnodeAST)) {
462 | 				return Optional.empty();
463 | 			}
464 | 			// Some constant VarnodeAST's are not HighConstant, so use instanceof
465 | 			var hiVar = ((VarnodeAST)node).getHigh();
466 | 			if (!(hiVar instanceof HighConstant)) {
467 | 				return Optional.empty();
468 | 			}
469 | 			return Optional.of((HighConstant)hiVar);
470 | 		}
471 | 		
472 | 		private Optional<Address> tryVarnodeToConstAddress(Varnode node) {
473 | 			// Try to get a HighConstant
474 | 			var maybeK = tryVarnodeToHighConstant(node);
475 | 			if (maybeK.isEmpty()) {
476 | 				return Optional.empty();
477 | 			}
478 | 			var k = maybeK.get();
479 | 			// Map it to an address
480 | 			var addrRaw = k.getScalar().getUnsignedValue();
481 | 			if (!addressSpace.isValidRange(addrRaw, k.getSize())) {
482 | 				return Optional.empty();
483 | 			}
484 | 			var addr = addressSpace.getAddress(addrRaw);
485 | 			if (isAddressValid(addr)) {
486 | 				return Optional.of(addr);
487 | 			}
488 | 			return Optional.empty();
489 | 		}
490 | 		
491 | 		private Optional<HighFunction> decompile(Function f) {
492 | 			var decompileResults = decomp.decompileFunction(f, DECOMPILATION_TIMEOUT, null);
493 | 			if (decompileResults == null || !decompileResults.decompileCompleted()) {
494 | 				return Optional.empty();
495 | 			}
496 | 			return Optional.ofNullable(decompileResults.getHighFunction());
497 | 		}
498 | 		
499 | 		public Set<Address> findTableGuesses(HighFunction f) {
500 | 			var out = new HashSet<Address>();
501 | 			var it = f.getPcodeOps();
502 | 			while (it.hasNext()) {
503 | 				var op = it.next();
504 | 				for (var input : op.getInputs()) {
505 | 					var address = tryVarnodeToConstAddress(input);
506 | 					if (address.isPresent()) {
507 | 						out.add(address.get());
508 | 					}
509 | 				}
510 | 			}
511 | 			return out;
512 | 		}
513 | 		
514 | 		public void doit(Function yylex) {
515 | 			// Get the decompiled function
516 | 			var maybeHi = decompile(yylex);
517 | 			if (maybeHi.isEmpty()) {
518 | 				log.accept("Couldn't decompile the given function :/");
519 | 				return;
520 | 			}
521 | 			var hf = maybeHi.get();
522 | 			
523 | 			// Guess the tables
524 | 			var tables = findTableGuesses(hf);
525 | 			if (tables.size() < 7) {
526 | 				log.accept("Less than 7 table guesses :/");
527 | 				return;
528 | 			}
529 | 			listener.onTablesGuessed(tables);
530 | 			
531 | 			var graph = buildGraph(hf);
532 | 			try {
533 | 				graph.exportGraphviz("/tmp/out.dot");
534 | 			} catch (FileNotFoundException e) {
535 | 				log.accept("Couldn't open graphviz output file...");
536 | 			}
537 | 			
538 | 			var maybeSimple = graph.simplify(true, true);
539 | 			if (maybeSimple.isPresent()) {
540 | 				var simple = maybeSimple.get();
541 | 				try {
542 | 					simple.exportGraphviz("/tmp/simple.dot");
543 | 				} catch (FileNotFoundException e) {
544 | 					log.accept("Couldn't open graphviz output file...");
545 | 				}
546 | 				try {
547 | 					var xml = new XmlWriter(new File("/tmp/out.xml"), null);
548 | 					xml.startElement("graphml");
549 | 					BiConsumer<NodeAttributes, String> mkAttr = (attr, type) -> {
550 | 						String name = attr.name();
551 | 						xml.startElement(
552 | 							"key",
553 | 							new XmlAttributesBuilder()
554 | 								.addAttribute("id", name)
555 | 								.addAttribute("attr.name", name.toLowerCase())
556 | 								.addAttribute("attr.type", type)
557 | 								.build()
558 | 						);
559 | //						xml.writeElement("default", null, "");
560 | 						xml.endElement("key");
561 | 					};
562 | 					for (var attr : NodeAttributes.values()) {
563 | 						var maybeTy = nodeAttrToXmlType(attr);
564 | 						if (maybeTy.isPresent()) {
565 | 							mkAttr.accept(attr, maybeTy.get());
566 | 						}
567 | 					}
568 | 					xml.startElement(
569 | 						"graph",
570 | 						new XmlAttributesBuilder()
571 | 							.addAttribute("id", "G")
572 | 							.addAttribute("edgedefault", "directed")
573 | 							.build()
574 | 					);
575 | 
576 | 					//  boolean, int, long, float, double, or string
577 | 					for (var node : simple.getVertices()) {
578 | 						xml.startElement(
579 | 							"node",
580 | 							new XmlAttributesBuilder()
581 | 								.addAttribute("id", "" + node.getId())
582 | 								.addAttribute("labels", ":Ghidra")
583 | 								.build()
584 | 						);
585 | 						for (var attr : node.attributes.entrySet()) {
586 | 							var maybeValue = nodeAttrToXmlStr(attr.getKey(), attr.getValue());
587 | 							if (maybeValue.isEmpty()) {
588 | 								continue;
589 | 							}
590 | 							xml.writeElement(
591 | 								"data",
592 | 								new XmlAttributesBuilder().addAttribute("key", attr.getKey().name()).build(),
593 | 								maybeValue.get()
594 | 							);
595 | 						}
596 | 						xml.endElement("node");
597 | 					}
598 | 					for (var edge : simple.getEdges()) {
599 | 						xml.startElement(
600 | 							"edge",
601 | 							new XmlAttributesBuilder()
602 | 								.addAttribute("source", "" + edge.getStart().getId())
603 | 								.addAttribute("target", "" + edge.getEnd().getId())
604 | 								.build()
605 | 						);
606 | 						for (var attr : edge.attributes.entrySet()) {
607 | 							var maybeValue = nodeAttrToXmlStr(attr.getKey(), attr.getValue());
608 | 							if (maybeValue.isEmpty()) {
609 | 								continue;
610 | 							}
611 | 							xml.writeElement(
612 | 								"data",
613 | 								new XmlAttributesBuilder().addAttribute("key", attr.getKey().name()).build(),
614 | 								maybeValue.get()
615 | 							);
616 | 						}
617 | 						xml.endElement("edge");
618 | 					}
619 | 					xml.endElement("graph");
620 | 					xml.endElement("graphml");
621 | 					xml.close();
622 | 				} catch (IOException e) {
623 | 					e.printStackTrace();
624 | 				}
625 | 			}
626 | 		}
627 | 		
628 | 		private void extractInfo(Node node, VarnodeAST ast, boolean isInput) {
629 | 			var addr = tryVarnodeToConstAddress(ast);
630 | 			
631 | 			node.setAttribute(NodeAttributes.IS_VARNODE, true);
632 | 			node.setAttribute(NodeAttributes.IS_INPUT, isInput);
633 | 			node.setAttribute(NodeAttributes.AS_STRING, ast.toString());
634 | 			node.setAttribute(NodeAttributes.NAME, ast.getAddress().toString(true));
635 | 			node.setAttribute(NodeAttributes.SIZE, ast.getSize());
636 | 			node.setAttributeFromMaybe(NodeAttributes.ADDRESS, addr);
637 | 			node.setAttributeFromMaybe(NodeAttributes.PC_ADDRESS, Util.tryOffsetFromAddress(ast.getPCAddress()));
638 | 			
639 | 			if (ast.isConstant()) {
640 | 				node.setAttribute(NodeAttributes.CONST_VALUE, ast.getOffset());
641 | 			}
642 | 			
643 | 			var fileOffset = Optional.empty();
644 | 			if (addr.isPresent()) {
645 | 				var block = flatApi.getMemoryBlock(addr.get());
646 | 				var infos = block.getSourceInfos();
647 | 				if (infos != null) {
648 | 					for (var info : infos) {
649 | 						if (!info.contains(addr.get())) {
650 | 							continue;
651 | 						}
652 | 						fileOffset = Optional.of(info.getFileBytesOffset(addr.get()));
653 | 					}
654 | 				}
655 | 			}
656 | 			node.setAttributeFromMaybe(NodeAttributes.FILE_OFFSET, fileOffset);
657 | 			
658 | 			// Guess tables
659 | 			if (isInput) {
660 | 				node.setAttribute(NodeAttributes.COULD_BE_TABLE, addr.isPresent());
661 | 			}
662 | 		}
663 | 		
664 | 		private void extractInfo(Node node, PcodeOpAST ast) {
665 | 			node.setAttribute(NodeAttributes.IS_OP, true);
666 | 			node.setAttribute(NodeAttributes.AS_STRING, ast.toString());
667 | 			node.setAttribute(NodeAttributes.OP, ast.getOpcode());
668 | 			node.setAttribute(NodeAttributes.OP_MNEMONIC, ast.getMnemonic());
669 | 			if (ast.getOutput() != null) {
670 | 				node.setAttribute(NodeAttributes.OP_SIZE, ast.getOutput().getSize());
671 | 			}
672 | 		}
673 | 		
674 | 		private Graph buildGraph(final HighFunction hf) {
675 | 			var graph = new Graph();
676 | 			
677 | 			var varnodeCache = new DefaultMap<Integer, Node>(id -> {
678 | 				var node = new Node(id);
679 | 				graph.addVertex(node);
680 | 				return node;
681 | 			});
682 | 			// HACK: we are going to use negative id numbers for operations...
683 | 			int opnodeId = 0;
684 | 
685 | 			var it = hf.getPcodeOps();
686 | 			while (it.hasNext()) {
687 | 				var op = it.next();
688 | 				var opcode = op.getOpcode();
689 | 
690 | 				var opnode = varnodeCache.getOrCreate(--opnodeId);
691 | 				extractInfo(opnode, op);
692 | 
693 | 				// Iteration is faster than functional API :(
694 | 				for (int i = 0; i < op.getNumInputs(); ++i) {
695 | 					// Filter uninteresting nodes
696 | 					if (i == 0 && (opcode == PcodeOp.STORE || opcode == PcodeOp.LOAD)) {
697 | 						// STOREs/LOADs have an address space parameter we don't care about
698 | 						continue;
699 | 					} else if (i == 1 && opcode == PcodeOp.INDIRECT) {
700 | 						// INDIRECTs have an opcode int as second input to describe the type of the operation
701 | 						// they depends upon. Not interesting.
702 | 						continue;
703 | 					}
704 | 
705 | 					// Convert the input to a properVarnode
706 | 					var rawVarnode = op.getInput(i);
707 | 					if (rawVarnode == null || !(rawVarnode instanceof VarnodeAST)) {
708 | 						continue;
709 | 					}
710 | 					var varnode = (VarnodeAST)rawVarnode;
711 | 					var graphVn = varnodeCache.getOrCreate(varnode.getUniqueId());
712 | 					extractInfo(graphVn, varnode, true);
713 | 					
714 | 					// Add input edge
715 | 					graph.addEdge(new Edge(graphVn, opnode, i));
716 | 				}
717 | 				
718 | 				// Take care of the output
719 | 				var rawVarnode = op.getOutput();
720 | 				if (rawVarnode != null && rawVarnode instanceof VarnodeAST) {
721 | 					var varnode = (VarnodeAST)rawVarnode;
722 | 					var graphVn = varnodeCache.getOrCreate(varnode.getUniqueId());
723 | 					extractInfo(graphVn, varnode, false);
724 | 					
725 | 					// Add output edge
726 | 					graph.addEdge(new Edge(opnode, graphVn, 0));
727 | 				}
728 | 			}
729 | 
730 | 			return graph;
731 | 		}
732 | 	}
733 | 	
734 | 	public Optional<Function> getFunk(String name) {
735 | 		var funkList = getGlobalFunctions(name);
736 | 		if (funkList == null || funkList.size() == 0) {
737 | 			return Optional.empty();
738 | 		}
739 | 		for (var funk : funkList) {
740 | 			if (!funk.isThunk()) {
741 | 				return Optional.of(funk);
742 | 			}
743 | 		}
744 | 		return Optional.empty();
745 | 	}
746 | 	
747 | 	public Optional<Function> getYylex() {
748 | 		return getFunk("yylex");
749 | 	}
750 | 	
751 | 	@Override
752 | 	public void run() throws Exception {
753 | 		var reflex = new ReflexAnalysis(currentProgram, msg -> println(msg));
754 | 
755 | 		var yylexAddrStr = System.getenv("REFLEX_YYLEX");
756 | 		if (yylexAddrStr != null) {
757 | 			Long newAddr;
758 | 			if (yylexAddrStr.startsWith("0x")) {
759 | 				yylexAddrStr = yylexAddrStr.substring(2);
760 | 				newAddr = Long.parseLong(yylexAddrStr, 16);
761 | 			} else {
762 | 				newAddr = Long.parseLong(yylexAddrStr, 10);
763 | 			}
764 | 			var flatApi = new FlatProgramAPI(currentProgram);
765 | 			currentAddress = flatApi.toAddr(newAddr);
766 | 		}
767 | 
768 | 		var funk = getFunctionContaining(currentAddress);
769 | 		if (funk == null) {
770 | 			println("This script must be run inside a function!");
771 | 			return;
772 | 		}
773 | 		
774 | 		reflex.doit(funk);
775 | 	}
776 | 
777 | }
778 | 


--------------------------------------------------------------------------------
/py/reflex.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 1.0
 2 | Name: reflex
 3 | Version: 0.0.1
 4 | Summary: UNKNOWN
 5 | Home-page: UNKNOWN
 6 | Author: UNKNOWN
 7 | Author-email: UNKNOWN
 8 | License: UNKNOWN
 9 | Description: UNKNOWN
10 | Platform: UNKNOWN
11 | 


--------------------------------------------------------------------------------
/py/reflex.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
1 | reflex/lexfuzz.py
2 | reflex/reflex.py
3 | reflex/simplify.py
4 | reflex.egg-info/PKG-INFO
5 | reflex.egg-info/SOURCES.txt
6 | reflex.egg-info/dependency_links.txt
7 | reflex.egg-info/top_level.txt


--------------------------------------------------------------------------------
/py/reflex.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/py/reflex.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | reflex
2 | 


--------------------------------------------------------------------------------
/py/reflex.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import argparse
 4 | import os
 5 | 
 6 | import reflex.reflex
 7 | from reflex.reflex import SizedOffset
 8 | 
 9 | 
10 | def s2i(s):
11 |     if s.startswith('0x'):
12 |         return int(s, 16)
13 |     else:
14 |         return int(s)
15 | 
16 | 
17 | def main():
18 |     parser = argparse.ArgumentParser()
19 |     parser.add_argument('target', type=str, nargs=1)
20 |     parser.add_argument('out_path', metavar="out-path/", type=str, nargs=1)
21 |     parser.add_argument('--accept', metavar=('offset', 'size'), type=s2i, nargs=2, required=1)
22 |     parser.add_argument('--base', metavar=('offset', 'size'), type=s2i, nargs=2, required=1)
23 |     parser.add_argument('--chk', metavar=('offset', 'size'), type=s2i, nargs=2, required=1)
24 |     parser.add_argument('--def', metavar=('offset', 'size'), type=s2i, nargs=2, required=1)
25 |     parser.add_argument('--ec', metavar=('offset', 'size'), type=s2i, nargs=2, required=1)
26 |     parser.add_argument('--meta', metavar=('offset', 'size'), type=s2i, nargs=2, required=1)
27 |     parser.add_argument('--nxt', metavar=('offset', 'size'), type=s2i, nargs=2, required=1)
28 |     parser.add_argument('--max-state', type=s2i, nargs=1, required=1)
29 |     parser.add_argument('--endianness', type=str, nargs=1, default='little')
30 |     parser.add_argument('--strip-nulls', type=bool, nargs=1, default=True)
31 |     args = parser.parse_args()
32 | 
33 |     assert args.endianness in {'little', 'big'}
34 | 
35 |     target = args.target[0]
36 |     target = os.path.abspath(target)
37 | 
38 |     out_path = args.out_path[0]
39 |     out_path = os.path.abspath(out_path)
40 |     os.makedirs(out_path, exist_ok=True)
41 | 
42 |     config = reflex.reflex.Config(
43 |         target,
44 |         out_path,
45 |         args.max_state[0],
46 |         args.strip_nulls,
47 |         set(),
48 |         args.endianness,
49 |         SizedOffset(args.accept[0], args.accept[1]),
50 |         SizedOffset(args.base[0]  , args.base[1]),
51 |         SizedOffset(args.chk[0]   , args.chk[1]),
52 |         SizedOffset(getattr(args, 'def')[0], getattr(args, 'def')[1]),
53 |         SizedOffset(args.ec[0]    , args.ec[1]),
54 |         SizedOffset(args.meta[0]  , args.meta[1]),
55 |         SizedOffset(args.nxt[0]   , args.nxt[1]),
56 |     )
57 | 
58 |     reflex.reflex.reflex(config)
59 |     # reflex.reflex.reflex(SYMBOLS, SLEIGH, '../data/', MAX_STATE)
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     main()
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/py/reflex/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thebabush/reflex/998f643033c14830ac1e2284904f962cf336b5bc/py/reflex/__init__.py


--------------------------------------------------------------------------------
/py/reflex/fuzzer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import collections
  4 | import glob
  5 | import itertools
  6 | import json
  7 | import os
  8 | import pickle
  9 | import random
 10 | import shlex
 11 | import subprocess
 12 | 
 13 | import numpy as np
 14 | 
 15 | from . import re
 16 | 
 17 | 
 18 | MIN_TOKENS = 5
 19 | MAX_TOKENS = 10
 20 | MAX_TEST_LEN = 10
 21 | 
 22 | 
 23 | RS = collections.namedtuple('RS', ('rule', 'state'))
 24 | 
 25 | 
 26 | def parse_file_name(fn):
 27 |     _, fn = os.path.split(fn)
 28 |     fn = fn.split('.')[0]
 29 |     rule, state = fn.split('_')
 30 |     return RS(int(rule), int(state))
 31 | 
 32 | 
 33 | def solve_regex(re):
 34 |     if re.is_re_set():
 35 |         return random.choice(re.contents),
 36 |     elif re.is_literal():
 37 |         return re.contents
 38 |     elif re.is_then():
 39 |         return itertools.chain(solve_regex(re.contents[0]), solve_regex(re.contents[1]))
 40 |     elif re.is_or():
 41 |         return solve_regex(random.choice(re.contents))
 42 |     elif re.is_optional():
 43 |         if random.random() >= 0.5:
 44 |             return solve_regex(random.choice(re.contents))
 45 |         else:
 46 |             return []
 47 |     elif re.is_star():
 48 |         ret = []
 49 |         while random.random() >= 0.3:
 50 |             ret = itertools.chain(ret, solve_regex(re.contents[0]))
 51 |         return ret
 52 |     elif re.is_plus():
 53 |         ret = solve_regex(re.contents[0])
 54 |         while random.random() >= 0.3:
 55 |             ret = itertools.chain(ret, solve_regex(re.contents[0]))
 56 |         return ret
 57 |     else:
 58 |         print(re.__class__.__name__)
 59 |         return []
 60 | 
 61 | 
 62 | class Testcase(object):
 63 |     def __init__(self, tokens, coverage=None):
 64 |         self.tokens = list(tokens)
 65 |         self.coverage = coverage
 66 | 
 67 | 
 68 | def mk_generate(rm):
 69 |     states = list(rm.keys())
 70 |     def gen():
 71 |         tokens = []
 72 |         for i in range(random.randint(MIN_TOKENS, MAX_TOKENS)):
 73 |             state = random.choice(states)
 74 |             re = random.choice(rm[state])
 75 |             token = solve_regex(re)
 76 |             tokens.append(list(token))
 77 |         return Testcase(tokens)
 78 |     return gen
 79 | 
 80 | 
 81 | def mk_generate_one(rm):
 82 |     states = list(rm.keys())
 83 |     def gen():
 84 |         state = random.choice(states)
 85 |         re = random.choice(rm[state])
 86 |         token = solve_regex(re)
 87 |         return token
 88 |     return gen
 89 | 
 90 | 
 91 | def trans1_intersperse_maybe_rand(rm, t):
 92 |     it = iter(t)
 93 |     yield next(it)
 94 |     for x in it:
 95 |         if random.random() > 0.5:
 96 |             yield [random.randint(0, 255)]
 97 |         yield x
 98 | 
 99 | 
100 | def trans1_intersperse_maybe_whitespace(rm, t):
101 |     k = random.choice(b' \t\n')
102 |     it = iter(t)
103 |     yield next(it)
104 |     for x in it:
105 |         if random.random() < 0.2:
106 |             yield [k]
107 |         yield x
108 | 
109 | 
110 | # def trans1_intersperse_maybe_const(rm, t):
111 |     # k = random.randint(0, 255)
112 |     # it = iter(t)
113 |     # yield next(it)
114 |     # for x in it:
115 |         # if random.random() > 0.95:
116 |             # yield [k]
117 |         # yield x
118 | 
119 | 
120 | def trans1_replicate_maybe(rm, tt):
121 |     for t in tt:
122 |         if random.random() > 0.95:
123 |             for i in range(random.randint(1, 10)):
124 |                 yield t
125 |         yield t
126 | 
127 | 
128 | # def trans1_filter_rand(rm, tt):
129 |     # for t in tt:
130 |         # if random.random() < 0.9:
131 |             # yield t
132 | 
133 | 
134 | # def trans1_strip_pre(rm, tt):
135 |     # if len(tt) < 3:
136 |         # return tt
137 |     # start = random.randint(0, min(len(tt), 3) - 1)
138 |     # return tt[start:]
139 | 
140 | 
141 | # def trans1_strip_post(rm, tt):
142 |     # if len(tt) < 3:
143 |         # return tt
144 |     # end = random.randint(0, min(len(tt), 3) - 1)
145 |     # return tt[:-end]
146 | 
147 | 
148 | def trans1_concat_token(gen, tt):
149 |     for t in tt:
150 |         yield t
151 |     yield gen()
152 | 
153 | 
154 | def trans1_swap_token_maybe(gen, tt):
155 |     for t in tt:
156 |         if random.random() < 0.1:
157 |             t = gen()
158 |         yield t
159 | 
160 | 
161 | def transform(tfunk, testcase):
162 |     return Testcase(tfunk(testcase.tokens))
163 | 
164 | 
165 | def trans2_sex(aa, bb, prob=0.5):
166 |     for a, b in zip(aa, bb):
167 |         if random.random() < prob:
168 |             yield a
169 |         else:
170 |             yield b
171 | 
172 | 
173 | def trans2_simple_sex(aa, bb):
174 |     for t in trans2_sex(aa, bb, prob=0.95):
175 |         yield t
176 | 
177 | 
178 | def trans2_concat(aa, bb):
179 |     if random.random() > 0.5:
180 |         return aa + bb
181 |     else:
182 |         return bb + aa
183 | 
184 | 
185 | def trans2_splice(aa, bb):
186 |     aa = list(aa)
187 |     bb = list(bb)
188 |     aa = aa[random.randint(0, len(aa)-1):]
189 |     bb = bb[random.randint(0, len(bb)-1):]
190 |     return aa + bb
191 | 
192 | 
193 | def transform2(tfunk, t1, t2):
194 |     return Testcase(tfunk(t1.tokens, t2.tokens))
195 | 
196 | 
197 | def get_transforms():
198 |     transforms1 = []
199 |     transforms2 = []
200 |     g = globals()
201 |     for t in list(g):
202 |         if t.startswith('trans1'):
203 |             transforms1.append(g[t])
204 |         elif t.startswith('trans2'):
205 |             transforms2.append(g[t])
206 |     return transforms1, transforms2
207 | 
208 | 
209 | def afl_showmap(argv, file_path, coverage_path, test):
210 |     CMD = 'afl-showmap -Q -b -o {coverage} -- {argv}'
211 |     cmd = CMD.format(argv=argv, coverage=coverage_path)
212 |     # print(cmd)
213 |     with open(file_path, 'wb') as f:
214 |         f.write(test)
215 | 
216 |     # print(test)
217 | 
218 |     ret = subprocess.run(
219 |             shlex.split(cmd),
220 |             stdin=subprocess.DEVNULL,
221 |             stdout=subprocess.DEVNULL,
222 |             stderr=subprocess.DEVNULL,
223 |     )
224 | 
225 |     if ret.returncode < 0:
226 |         return ret.returncode, None
227 | 
228 |     cov = None
229 |     with open(coverage_path, 'rb') as f:
230 |         cov = np.fromfile(coverage_path, dtype=np.uint8)
231 |     return ret.returncode, cov
232 | 
233 | 
234 | class Fuzzer(object):
235 |     def __init__(self, out_dir, regmap, argv, file_path, name='bbz'):
236 |         self.sync_dir = out_dir
237 |         self.my_dir = os.path.join(out_dir, name)
238 |         self.my_queue = os.path.join(self.my_dir, 'queue')
239 |         self.my_coverage = os.path.join(self.my_dir, 'coverage.bin')
240 | 
241 |         self.regmap = regmap
242 |         self.argv = argv
243 |         self.fp = file_path
244 |         self.tests = []
245 |         self.global_coverage = np.zeros(65536, dtype=np.uint8)
246 | 
247 |         self.init_path()
248 |         self.transforms = get_transforms()
249 | 
250 |     def init_path(self):
251 |         os.makedirs(self.sync_dir, exist_ok=True)
252 |         os.makedirs(self.my_dir, exist_ok=True)
253 |         os.makedirs(self.my_queue, exist_ok=True)
254 | 
255 |     def fuzz(self):
256 |         generate = mk_generate(self.regmap)
257 |         self.generate_one = mk_generate_one(self.regmap)
258 |         while True:
259 |             print('Creating a new test and fuzzing it...')
260 |             test = generate()
261 |             self.fuzz_one(test, times=max(1, 100 - len(self.tests) // 2))
262 | 
263 |             print('Iterating on old tests...')
264 |             for i in range(100):
265 |                 test = random.choice(self.tests)
266 |                 print('Old {}...'.format(i))
267 |                 self.fuzz_one(test, times=10)
268 | 
269 |     def fuzz_one(self, test, times):
270 |         for i in range(times):
271 |             if i % 10 == 0:
272 |                 print(i)
273 | 
274 |             # TODO: do we really need a copy?
275 |             new_test = test.tokens[:]
276 |             for j in range(1 << random.randint(1, 4)):
277 |                 transform = random.choice(self.transforms[0])
278 |                 new_test = list(transform(self.generate_one, new_test))
279 | 
280 |             if len(new_test) > MAX_TEST_LEN:
281 |                 new_test = new_test[:MAX_TEST_LEN]
282 | 
283 |             # Skip empty testcases
284 |             if not new_test:
285 |                 continue
286 | 
287 |             new_test = Testcase(new_test)
288 |             buff = b''.join(bytes(t) for t in new_test.tokens)
289 | 
290 |             # Get the coverage
291 |             exitcode, cov = afl_showmap(self.argv, self.fp, self.my_coverage, buff)
292 |             if exitcode < 0:
293 |                 print('CRASH!!!')
294 |                 exit()
295 |             cov = (cov > 0).astype(np.uint8)
296 |             new_test.coverage = cov
297 | 
298 |             new_global_coverage = cov | self.global_coverage
299 |             if np.any(new_global_coverage != self.global_coverage):
300 |                 print('Increased coverage :D')
301 |                 print(buff)
302 |                 self.global_coverage = new_global_coverage
303 |                 self.tests.append(new_test)
304 | 
305 |         SPLICE_ROUNDS = len(self.tests)
306 |         for i in range(SPLICE_ROUNDS):
307 |             other = random.choice(self.tests)
308 |             new_test = random.choice(self.transforms[1])(test.tokens[:], other.tokens[:])
309 | 
310 |             new_test = Testcase(new_test)
311 |             buff = b''.join(bytes(t) for t in new_test.tokens)
312 | 
313 |             # Get the coverage
314 |             exitcode, cov = afl_showmap(self.argv, self.fp, self.my_coverage, buff)
315 |             if exitcode < 0:
316 |                 print('CRASH!!!')
317 |                 exit()
318 |             cov = (cov > 0).astype(np.uint8)
319 |             new_test.coverage = cov
320 | 
321 |             new_global_coverage = cov | self.global_coverage
322 |             if np.any(new_global_coverage != self.global_coverage):
323 |                 print('Splice increased coverage :D')
324 |                 print(buff)
325 |                 self.global_coverage = new_global_coverage
326 |                 self.tests.append(new_test)
327 |             
328 | 
329 | def main(out_dir, file_path, argv, regexps_path):
330 |     s = random.randint(0, 100000)
331 |     random.seed(s)
332 |     # random.seed(54136)
333 |     print('SEED:', s)
334 | 
335 |     regmap = collections.defaultdict(list)
336 |     for fn in sorted(glob.glob(os.path.join(regexps_path, '*.regexp')), key=lambda x: parse_file_name(x)):
337 |         with open(fn, 'r') as f:
338 |             rs = parse_file_name(fn)
339 |             regexp = re.load_regexp(f.read())
340 |             regmap[rs.state].append(regexp)
341 | 
342 |     fuzzer = Fuzzer(out_dir, regmap, argv, file_path)
343 |     fuzzer.fuzz()
344 | 
345 | 


--------------------------------------------------------------------------------
/py/reflex/re.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | 
  4 | class Regexp(object):
  5 |     def __init__(self, contents=None):
  6 |         self.contents = contents
  7 | 
  8 |     @classmethod
  9 |     def from_json(clazz, j):
 10 |         if not isinstance(j, dict):
 11 |             return j
 12 |         tag = j['tag']
 13 |         if tag == 'Then':
 14 |             r = Then()
 15 |         elif tag == 'Literal':
 16 |             r = Literal()
 17 |         elif tag == 'RESet':
 18 |             r = RESet()
 19 |         elif tag == 'Star':
 20 |             r = Star()
 21 |         elif tag == 'OneOrMore':
 22 |             r = Plus()
 23 |         elif tag == 'Or':
 24 |             r = Or()
 25 |         elif tag == 'Optional':
 26 |             r = Optional()
 27 |         else:
 28 |             raise Exception(tag)
 29 |         r.from_contents(j['contents'])
 30 |         return r
 31 | 
 32 |     def from_contents(self, c):
 33 |         if not isinstance(c, list):
 34 |             c = [c]
 35 |         self.contents = [Regexp.from_json(ci) for ci in c]
 36 | 
 37 |     def pp(self, indent=0):
 38 |         ret = []
 39 |         if self.is_re_set():
 40 |             ret.append('  ' * (indent) + repr(''.join(chr(x) for x in self.contents)))
 41 |         else:
 42 |             ret.append((' ' * indent) + self.__class__.__name__)
 43 |             for c in self.contents:
 44 |                 if isinstance(c, int):
 45 |                     ret.append(('  ' * (indent + 1)) + repr(chr(c)))
 46 |                 elif c.is_re_set():
 47 |                     ret.append('  ' * (indent + 1) + repr(''.join(chr(x) for x in c.contents)))
 48 |                 else:
 49 |                     ret.append(('  ' * (indent + 1)) + c.pp(indent + 1))
 50 |         return '\n'.join(ret)
 51 | 
 52 |     def __str__(self):
 53 |         return self.pp()
 54 | 
 55 |     def is_empty(self):
 56 |         return False
 57 | 
 58 |     def is_literal(self):
 59 |         return False
 60 | 
 61 |     def is_optional(self):
 62 |         return False
 63 | 
 64 |     def is_or(self):
 65 |         return False
 66 | 
 67 |     def is_then(self):
 68 |         return False
 69 | 
 70 |     def is_re_set(self):
 71 |         return False
 72 | 
 73 |     def is_star(self):
 74 |         return False
 75 |     
 76 |     def is_plus(self):
 77 |         return False
 78 | 
 79 |     def is_leaf(self):
 80 |         return self.is_re_set() or self.is_literal()
 81 | 
 82 | 
 83 | class Empty(Regexp):
 84 |     def __init__(self):
 85 |         pass
 86 | 
 87 |     def is_empty(self):
 88 |         return True
 89 | 
 90 | 
 91 | class Optional(Regexp):
 92 |     def is_optional(self):
 93 |         return True
 94 | 
 95 | 
 96 | class Or(Regexp):
 97 |     def is_or(self):
 98 |         return True
 99 | 
100 | 
101 | class Plus(Regexp):
102 |     def is_plus(self):
103 |         return True
104 | 
105 | 
106 | class Then(Regexp):
107 |     def is_then(self):
108 |         return True
109 | 
110 | 
111 | class Literal(Regexp):
112 |     def is_literal(self):
113 |         return True
114 | 
115 | 
116 | class RESet(Regexp):
117 |     def is_re_set(self):
118 |         return True
119 | 
120 | 
121 | class Star(Regexp):
122 |     def is_star(self):
123 |         return True
124 | 
125 | 
126 | def load_regexp(s):
127 |     j = json.loads(s)
128 |     o = Regexp.from_json(j)
129 |     return o
130 | 
131 | 


--------------------------------------------------------------------------------
/py/reflex/reflex.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import itertools
  4 | import json
  5 | import os
  6 | import pickle
  7 | import struct
  8 | import types
  9 | 
 10 | from collections import defaultdict
 11 | from collections import namedtuple
 12 | 
 13 | import cxxfilt
 14 | import lief
 15 | import networkx as nx
 16 | 
 17 | 
 18 | def analyze_exits(G):
 19 |     # map an accepting state into its exit values
 20 |     exits = {}
 21 |     for u, u_data in G.nodes(data=True):
 22 |         if not u_data['accepts']:
 23 |             continue
 24 | 
 25 |         for _, v, data in G.edges(u, data=True):
 26 |             # ee = set(chr(i) for i in range(0, 256)).difference(data['alphabet'])
 27 |             ee = set(chr(i) for i in range(1, 256)).difference(data['alphabet'])
 28 |             # assert ee
 29 |             exits[u] = ''.join(sorted(ee))
 30 |     return exits
 31 | 
 32 | 
 33 | SIZE_TO_FORMAT = {
 34 |     1: 'B',
 35 |     2: 'H',
 36 |     4: 'I',
 37 | }
 38 | 
 39 | 
 40 | SizedOffset = namedtuple(
 41 |     'SizedOffset',
 42 |     [
 43 |         'offset',
 44 |         'size',
 45 |     ]
 46 | )
 47 | 
 48 | 
 49 | Config = namedtuple(
 50 |     'Config',
 51 |     [
 52 |         'target',
 53 |         'out_path',
 54 |         'max_state',
 55 |         'strip_nulls',
 56 |         'states_to_strip',
 57 |         'endianness',
 58 |         'yy_accept',
 59 |         'yy_base',
 60 |         'yy_chk',
 61 |         'yy_def',
 62 |         'yy_ec',
 63 |         'yy_meta',
 64 |         'yy_nxt',
 65 |     ]
 66 | )
 67 | 
 68 | 
 69 | class Target(object):
 70 | 
 71 |     def __init__(self, config):
 72 |         self._bin = open(config.target, 'rb')
 73 |         self._cfg = config
 74 |         self._fmt = '<' if config.endianness == 'little' else '>'
 75 | 
 76 |     def _read(self, w, i):
 77 |         size   = w.size
 78 |         offset = w.offset + i * size
 79 |         fmt    = self._fmt + SIZE_TO_FORMAT[w.size]
 80 |         self._bin.seek(offset)
 81 |         data = self._bin.read(size)
 82 |         return struct.unpack(fmt, data)[0]
 83 | 
 84 |     def yy_accept(self, i):
 85 |         return self._read(self._cfg.yy_accept, i)
 86 | 
 87 |     def yy_base(self, i):
 88 |         return self._read(self._cfg.yy_base, i)
 89 | 
 90 |     def yy_chk(self, i):
 91 |         return self._read(self._cfg.yy_chk, i)
 92 | 
 93 |     def yy_def(self, i):
 94 |         return self._read(self._cfg.yy_def, i)
 95 | 
 96 |     def yy_ec(self, i):
 97 |         return self._read(self._cfg.yy_ec, i)
 98 | 
 99 |     def yy_meta(self, i):
100 |         return self._read(self._cfg.yy_meta, i)
101 | 
102 |     def yy_nxt(self, i):
103 |         return self._read(self._cfg.yy_nxt, i)
104 | 
105 | 
106 | def reflex(config):
107 |     flex = Target(config)
108 |     max_state = config.max_state
109 |     strip_states = config.states_to_strip
110 |     strip_nulls = config.strip_nulls
111 |     out_path = config.out_path
112 | 
113 | 
114 |     class2chars = defaultdict(set)
115 |     # for i in range(0, 256):
116 |     for i in range(1, 256):
117 |         class2chars[flex.yy_ec(i)].add(chr(i))
118 | 
119 | 
120 |     def class2string(c):
121 |         return ''.join(map(chr,sorted(class2chars[c])))
122 | 
123 | 
124 |     G = nx.DiGraph()
125 |     for state in range(max_state):
126 |         G.add_node(state, label='|' + str(state) + '|', accepts=flex.yy_accept(state))
127 | 
128 | 
129 |     def follow(H, state, clazz):
130 |         s = state
131 |         c = clazz
132 |         while flex.yy_chk(flex.yy_base(state) + clazz) != state:
133 |             # print(state)
134 |             state = flex.yy_def(state)
135 |             if state >= max_state:
136 |                 clazz = flex.yy_meta(clazz)
137 |         next_state = flex.yy_nxt(flex.yy_base(state) + clazz)
138 | 
139 |         if strip_states and next_state in strip_states:
140 |             return
141 | 
142 |         # if (state, next_state) in H.edges:
143 |         if (s, next_state) in H.edges:
144 |             # H.edges[(state, next_state)]['alphabet'] = H.edges[(state, next_state)]['alphabet'].union(class2chars[c])
145 |             H.edges[(s, next_state)]['alphabet'] = H.edges[(s, next_state)]['alphabet'].union(class2chars[c])
146 |         else:
147 |             # H.add_edge(state, next_state, alphabet=class2chars[c], accepts=flex.yy_accept[next_state])
148 | 
149 |             if strip_nulls and c == 0:
150 |                 return
151 |             H.add_edge(s, next_state, alphabet=class2chars[c])
152 | 
153 | 
154 |     for curr_state in range(1, max_state):
155 |         print(curr_state)
156 |         if flex.yy_accept(curr_state):
157 |             G.nodes[curr_state]['shape'] = 'doublecircle'
158 | 
159 |         for clazz in class2chars:
160 |             follow(G, curr_state, clazz)
161 | 
162 |         # if (curr_state, flex.yy_def[curr_state]) not in G.edges:
163 |             # G.add_edge(curr_state, flex.yy_def[curr_state], alphabet=set(), accepts=flex.yy_accept[curr_state])
164 | 
165 | 
166 |     for u, v, data in G.edges(data=True):
167 |         label = repr(''.join(sorted(data['alphabet']))) if data['alphabet'] else '•'
168 |         label = label.replace('\\', '\\\\')
169 | 
170 |         if len(data['alphabet']) >= 255:
171 |             label = 'all'
172 |         elif len(data['alphabet']) > 50:
173 |             label = 'long'
174 | 
175 |         data['label'] = label
176 | 
177 |     # Remove nodes without label (AKA added by transitions and not by us)
178 |     # TODO(babush): properly tag them instead of relying on the label
179 |     to_remove = set()
180 |     for n, data in G.nodes(data=True):
181 |         if 'label' not in data:
182 |             to_remove.add(n)
183 |             continue
184 | 
185 |         label = data['label']
186 | 
187 |         if data['accepts']:
188 |             label += '/{}'.format(data['accepts'])
189 | 
190 |         data['label'] = label
191 | 
192 |     for n in to_remove:
193 |         G.remove_node(n)
194 | 
195 | 
196 |     import networkx.drawing.nx_agraph
197 |     networkx.drawing.nx_agraph.write_dot(G, os.path.join(out_path, 'out.dot'))
198 |     networkx.readwrite.gpickle.write_gpickle(G, os.path.join(out_path, 'G.gpickle'))
199 | 
200 |     sub1 = nx.descendants(G, 1)
201 |     sub1.add(1)
202 |     sub1 = G.subgraph(sub1)
203 |     networkx.drawing.nx_agraph.write_dot(sub1, os.path.join(out_path, '1.dot'))
204 | 
205 |     # Export DFA map
206 |     dfa_transitions = {}
207 |     for u, v, e_data in G.edges(data=True):
208 |         v_data = G.nodes[v]
209 |         for ch in e_data['alphabet']:
210 |             dfa_transitions[(u, ch)] = (v, v_data['accepts'])
211 |     pickle.dump(dfa_transitions, open(os.path.join(out_path, 'dfa_transitions.pickle'), 'wb'))
212 | 
213 |     # Dump exits
214 |     pickle.dump(analyze_exits(G), open(os.path.join(out_path, 'exits.pickle'), 'wb'))
215 | 
216 | 


--------------------------------------------------------------------------------
/py/reflex/simplify.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import functools
 4 | import json
 5 | import os
 6 | 
 7 | import networkx as nx
 8 | import networkx.drawing.nx_agraph
 9 | 
10 | 
11 | def write_dfa(fp, g, start):
12 |     if len(g.edges) == 0:
13 |         print("NO EDGES")
14 |         return
15 | 
16 |     with open(fp, 'w') as f:
17 |         alphabet = set()
18 |         for _, _, aa in g.edges(data=True):
19 |             alphabet.update(aa['alphabet'])
20 | 
21 |         f.write('{}\n'.format(start))
22 |         f.write('{}\n'.format(len(g.nodes)))
23 |         for n, data in g.nodes(data=True):
24 |             f.write('{} {}\n'.format(n, 1 if data['accepts'] >= 1 else 0))
25 | 
26 |         edges = []
27 |         for u, v, data in g.edges(data=True):
28 |             edges.append('{} {} {}'.format(
29 |                 u,
30 |                 v,
31 |                 ' '.join(
32 |                     str(ord(c)) for c in data['alphabet']
33 |                 )
34 |             ))
35 | 
36 |         # print(alphabet)
37 |         f.write('{}\n'.format(len(edges)))
38 |         f.write('\n'.join(edges) + '\n')
39 | 
40 |     with open(fp + '.nfa', 'w') as f:
41 |         f.write('{}\n'.format(start))
42 |         f.write('{}\n'.format(len(g.nodes)))
43 |         for n, data in g.nodes(data=True):
44 |             f.write('{} {}\n'.format(n, 1 if data['accepts'] >= 1 else 0))
45 |         f.write('{}\n'.format(len(g.edges)))
46 |         for u, v, data in g.edges(data=True):
47 |             f.write('{} {} {}\n'.format(
48 |                 u,
49 |                 v,
50 |                 ' '.join(
51 |                     str(ord(c)) for c in data['alphabet']
52 |                 )
53 |             ))
54 | 
55 | 
56 | def simplify(g_pickle_path, out_path):
57 |     G = nx.readwrite.gpickle.read_gpickle(g_pickle_path)
58 |     R = G.reverse(copy=True)
59 | 
60 |     max_accepts = functools.reduce(lambda x, y: max(x, y[1]['accepts']), G.nodes(data=True), 0) + 1
61 |     print('max_accepts:', max_accepts)
62 | 
63 |     for out in range(1, max_accepts):
64 |         out_nodes = set()
65 |         r = R.copy()
66 |         for n, data in r.nodes(data=True):
67 |             if data['accepts'] == out:
68 |                 out_nodes.add(n)
69 | 
70 |         subnodes = set(out_nodes)
71 |         # print(subnodes)
72 |         for n in out_nodes:
73 |             r2 = nx.descendants(r, n)
74 |             subnodes.update(r2)
75 | 
76 |         r2 = G.subgraph(subnodes).copy()
77 | 
78 |         # print(r2.copy())
79 |         for n, data in r2.nodes(data=True):
80 |             accepts = data['accepts']
81 |             if accepts > 0 and accepts != out:
82 |                 data['accepts'] = 0
83 |                 data['label'] += '/None'
84 |                 del data['shape']
85 | 
86 |         # Write the simplified graph as dot-file
87 |         networkx.drawing.nx_agraph.write_dot(r2, os.path.join(out_path, '{}.dot'.format(out)))
88 | 
89 |         # Dump the DFA for the Haskell part
90 |         sources = list(n for n, d in r2.in_degree() if d == 0)
91 |         # assert sources
92 |         for start in sources:
93 |             s = nx.descendants(r2, start)
94 |             s.add(start)
95 |             s = r2.subgraph(s).copy()
96 |             write_dfa(os.path.join(out_path, '{}_{}.dfa'.format(out, start)), s, start)
97 | 
98 | 


--------------------------------------------------------------------------------
/py/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | setup(name='reflex',
4 |       version='0.0.1',
5 |       packages=['reflex'],
6 |       install_requires=[], #TODO
7 |      )
8 | 


--------------------------------------------------------------------------------
/py/simplify.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import argparse
 4 | import os
 5 | 
 6 | import reflex.simplify
 7 | 
 8 | 
 9 | def main():
10 |     parser = argparse.ArgumentParser()
11 |     parser.add_argument('graph', metavar='graph.gpickle', type=str, nargs=1)
12 |     parser.add_argument('out_path', metavar="out-path", type=str, nargs=1)
13 |     args = parser.parse_args()
14 | 
15 |     graph = args.graph[0]
16 |     graph = os.path.abspath(graph)
17 | 
18 |     out_path = args.out_path[0]
19 |     out_path = os.path.abspath(out_path)
20 |     os.makedirs(out_path, exist_ok=True)
21 | 
22 |     reflex.simplify.simplify(graph, out_path)
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     main()
27 | 
28 | 


--------------------------------------------------------------------------------
/py/sleigh_fuzz.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | 
3 | import reflex.fuzzer
4 | 
5 | 
6 | reflex.fuzzer.main('../ram/fuzz/', '../ram/test.slaspec', '../stroz/sleigh ../ram/test', '../subs/')
7 | 
8 | 


--------------------------------------------------------------------------------
/py/tests/test_fuzz.py:
--------------------------------------------------------------------------------
 1 | from reflex import fuzzer as F
 2 | 
 3 | 
 4 | def test_partial_regexp_eval():
 5 |     re_set = F.RESet([ord(c) for c in 'abc'])
 6 |     lit_a = F.Literal([ord('a')])
 7 |     lit_b = F.Literal([ord('b')])
 8 |     lit_c = F.Literal([ord('c')])
 9 |     
10 |     r = F.partial_regexp_eval(re_set, lit_a)
11 |     # print(r[1].pp())
12 |     assert r == (True, None)
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/rust-mutator/Cargo.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Cargo.
  2 | # It is not intended for manual editing.
  3 | [[package]]
  4 | name = "autocfg"
  5 | version = "0.1.7"
  6 | source = "registry+https://github.com/rust-lang/crates.io-index"
  7 | 
  8 | [[package]]
  9 | name = "bincode"
 10 | version = "1.2.0"
 11 | source = "registry+https://github.com/rust-lang/crates.io-index"
 12 | dependencies = [
 13 |  "autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
 14 |  "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
 15 |  "serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
 16 | ]
 17 | 
 18 | [[package]]
 19 | name = "byteorder"
 20 | version = "1.3.2"
 21 | source = "registry+https://github.com/rust-lang/crates.io-index"
 22 | 
 23 | [[package]]
 24 | name = "c2-chacha"
 25 | version = "0.2.3"
 26 | source = "registry+https://github.com/rust-lang/crates.io-index"
 27 | dependencies = [
 28 |  "ppv-lite86 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
 29 | ]
 30 | 
 31 | [[package]]
 32 | name = "cfg-if"
 33 | version = "0.1.10"
 34 | source = "registry+https://github.com/rust-lang/crates.io-index"
 35 | 
 36 | [[package]]
 37 | name = "getrandom"
 38 | version = "0.1.13"
 39 | source = "registry+https://github.com/rust-lang/crates.io-index"
 40 | dependencies = [
 41 |  "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
 42 |  "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)",
 43 |  "wasi 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
 44 | ]
 45 | 
 46 | [[package]]
 47 | name = "itoa"
 48 | version = "0.4.4"
 49 | source = "registry+https://github.com/rust-lang/crates.io-index"
 50 | 
 51 | [[package]]
 52 | name = "libc"
 53 | version = "0.2.65"
 54 | source = "registry+https://github.com/rust-lang/crates.io-index"
 55 | 
 56 | [[package]]
 57 | name = "ppv-lite86"
 58 | version = "0.2.6"
 59 | source = "registry+https://github.com/rust-lang/crates.io-index"
 60 | 
 61 | [[package]]
 62 | name = "proc-macro2"
 63 | version = "1.0.6"
 64 | source = "registry+https://github.com/rust-lang/crates.io-index"
 65 | dependencies = [
 66 |  "unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
 67 | ]
 68 | 
 69 | [[package]]
 70 | name = "quote"
 71 | version = "1.0.2"
 72 | source = "registry+https://github.com/rust-lang/crates.io-index"
 73 | dependencies = [
 74 |  "proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
 75 | ]
 76 | 
 77 | [[package]]
 78 | name = "rand"
 79 | version = "0.7.2"
 80 | source = "registry+https://github.com/rust-lang/crates.io-index"
 81 | dependencies = [
 82 |  "getrandom 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)",
 83 |  "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)",
 84 |  "rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
 85 |  "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
 86 |  "rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
 87 | ]
 88 | 
 89 | [[package]]
 90 | name = "rand_chacha"
 91 | version = "0.2.1"
 92 | source = "registry+https://github.com/rust-lang/crates.io-index"
 93 | dependencies = [
 94 |  "c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
 95 |  "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
 96 | ]
 97 | 
 98 | [[package]]
 99 | name = "rand_core"
100 | version = "0.5.1"
101 | source = "registry+https://github.com/rust-lang/crates.io-index"
102 | dependencies = [
103 |  "getrandom 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)",
104 | ]
105 | 
106 | [[package]]
107 | name = "rand_hc"
108 | version = "0.2.0"
109 | source = "registry+https://github.com/rust-lang/crates.io-index"
110 | dependencies = [
111 |  "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
112 | ]
113 | 
114 | [[package]]
115 | name = "rust-mutator"
116 | version = "0.1.0"
117 | dependencies = [
118 |  "bincode 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
119 |  "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)",
120 |  "rand 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
121 |  "serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
122 |  "serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
123 |  "serde_json 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)",
124 | ]
125 | 
126 | [[package]]
127 | name = "ryu"
128 | version = "1.0.2"
129 | source = "registry+https://github.com/rust-lang/crates.io-index"
130 | 
131 | [[package]]
132 | name = "serde"
133 | version = "1.0.103"
134 | source = "registry+https://github.com/rust-lang/crates.io-index"
135 | 
136 | [[package]]
137 | name = "serde_derive"
138 | version = "1.0.103"
139 | source = "registry+https://github.com/rust-lang/crates.io-index"
140 | dependencies = [
141 |  "proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
142 |  "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
143 |  "syn 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)",
144 | ]
145 | 
146 | [[package]]
147 | name = "serde_json"
148 | version = "1.0.42"
149 | source = "registry+https://github.com/rust-lang/crates.io-index"
150 | dependencies = [
151 |  "itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
152 |  "ryu 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
153 |  "serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
154 | ]
155 | 
156 | [[package]]
157 | name = "syn"
158 | version = "1.0.8"
159 | source = "registry+https://github.com/rust-lang/crates.io-index"
160 | dependencies = [
161 |  "proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
162 |  "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
163 |  "unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
164 | ]
165 | 
166 | [[package]]
167 | name = "unicode-xid"
168 | version = "0.2.0"
169 | source = "registry+https://github.com/rust-lang/crates.io-index"
170 | 
171 | [[package]]
172 | name = "wasi"
173 | version = "0.7.0"
174 | source = "registry+https://github.com/rust-lang/crates.io-index"
175 | 
176 | [metadata]
177 | "checksum autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2"
178 | "checksum bincode 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b8ab639324e3ee8774d296864fbc0dbbb256cf1a41c490b94cba90c082915f92"
179 | "checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5"
180 | "checksum c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb"
181 | "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
182 | "checksum getrandom 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "e7db7ca94ed4cd01190ceee0d8a8052f08a247aa1b469a7f68c6a3b71afcf407"
183 | "checksum itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "501266b7edd0174f8530248f87f99c88fbe60ca4ef3dd486835b8d8d53136f7f"
184 | "checksum libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)" = "1a31a0627fdf1f6a39ec0dd577e101440b7db22672c0901fe00a9a6fbb5c24e8"
185 | "checksum ppv-lite86 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b"
186 | "checksum proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "9c9e470a8dc4aeae2dee2f335e8f533e2d4b347e1434e5671afc49b054592f27"
187 | "checksum quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe"
188 | "checksum rand 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)" = "3ae1b169243eaf61759b8475a998f0a385e42042370f3a7dbaf35246eacc8412"
189 | "checksum rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "03a2a90da8c7523f554344f921aa97283eadf6ac484a6d2a7d0212fa7f8d6853"
190 | "checksum rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
191 | "checksum rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
192 | "checksum ryu 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "bfa8506c1de11c9c4e4c38863ccbe02a305c8188e85a05a784c9e11e1c3910c8"
193 | "checksum serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)" = "1217f97ab8e8904b57dd22eb61cde455fa7446a9c1cf43966066da047c1f3702"
194 | "checksum serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)" = "a8c6faef9a2e64b0064f48570289b4bf8823b7581f1d6157c1b52152306651d0"
195 | "checksum serde_json 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)" = "1a3351dcbc1f067e2c92ab7c3c1f288ad1a4cffc470b5aaddb4c2e0a3ae80043"
196 | "checksum syn 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)" = "661641ea2aa15845cddeb97dad000d22070bb5c1fb456b96c1cba883ec691e92"
197 | "checksum unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"
198 | "checksum wasi 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b89c3ce4ce14bdc6fb6beaf9ec7928ca331de5df7e5ea278375642a2f478570d"
199 | 


--------------------------------------------------------------------------------
/rust-mutator/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rust-mutator"
 3 | version = "0.1.0"
 4 | authors = ["thebabush <paolo.montesel+gh@gmail.com>"]
 5 | edition = "2018"
 6 | 
 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 8 | 
 9 | [dependencies]
10 | bincode = "1.2.0"
11 | serde = "*"
12 | serde_derive = "*"
13 | rand = "*"
14 | serde_json = "1.0"
15 | libc = "*"
16 | 
17 | [lib]
18 | name = "mutator"
19 | crate-type = ["rlib", "dylib"]
20 | 
21 | [[bin]]
22 | name = "dump"
23 | path = "src/main.rs"
24 | 


--------------------------------------------------------------------------------
/rust-mutator/make.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export RUST_BACKTRACE=1
4 | 
5 | cargo build && \
6 |   RUST_BACKTRACE=1 ./test.py
7 | 
8 | 


--------------------------------------------------------------------------------
/rust-mutator/src/common.rs:
--------------------------------------------------------------------------------
1 | 
2 | pub trait Node {
3 |     fn pp(&self, s: &mut String) -> ();
4 | }
5 | 
6 | 


--------------------------------------------------------------------------------
/rust-mutator/src/lib.rs:
--------------------------------------------------------------------------------
  1 | mod gen;
  2 | mod common;
  3 | 
  4 | use bincode;
  5 | use libc::{c_uint, size_t};
  6 | use rand::distributions::{Standard};
  7 | use rand::Rng;
  8 | use std::cell::UnsafeCell;
  9 | 
 10 | use gen::{ActualRoot};
 11 | use common::{Node};
 12 | 
 13 | pub type Root = ActualRoot;
 14 | 
 15 | pub fn pretty(lol: &dyn Node) -> String {
 16 |     let mut s = String::new();
 17 |     lol.pp(&mut s);
 18 |     s
 19 | }
 20 | 
 21 | enum Mutation {
 22 |     Insert(usize),
 23 |     Remove(usize),
 24 |     Replace(usize),
 25 |     Append(),
 26 | }
 27 | 
 28 | // libprotobuf-mutator
 29 | struct WeightedReservoirSampler {
 30 |     total_weight: u64,
 31 |     selected: Mutation,
 32 | }
 33 | 
 34 | const DEFAULT_MUTATE_WEIGHT: u64 = 1000000u64;
 35 | 
 36 | impl WeightedReservoirSampler {
 37 |     pub const fn new() -> WeightedReservoirSampler {
 38 |         WeightedReservoirSampler {
 39 |             total_weight: 0,
 40 |             selected: Mutation::Append(),
 41 |         }
 42 |     }
 43 | 
 44 |     // TODO(babush): factor this out of the sampler
 45 |     fn mutate(&mut self, tokens: &mut Root) -> () {
 46 |         
 47 |         for token_ith in 0..tokens.children.len() {
 48 |             for mutation in vec!(
 49 |                 Mutation::Insert(token_ith),
 50 |                 Mutation::Remove(token_ith),
 51 |                 Mutation::Replace(token_ith),
 52 |                 ) {
 53 |                 self.try_select(mutation, DEFAULT_MUTATE_WEIGHT);
 54 |             }
 55 |         }
 56 | 
 57 |         self.try_select(
 58 |             Mutation::Append(),
 59 |             DEFAULT_MUTATE_WEIGHT,
 60 |             );
 61 | 
 62 |         match self.selected {
 63 |             Mutation::Insert(ith) => {
 64 |                 tokens.children.insert(
 65 |                     ith,
 66 |                     rand::thread_rng().sample(Standard),
 67 |                     );
 68 |             },
 69 |             Mutation::Remove(ith) => {
 70 |                 tokens.children.remove(ith);
 71 |             },
 72 |             Mutation::Replace(ith)=> {
 73 |                 tokens.children[ith] = rand::thread_rng().sample(Standard);
 74 |             },
 75 |             Mutation::Append() => {
 76 |                 tokens.children.push(rand::thread_rng().sample(Standard));
 77 |             },
 78 |         }
 79 | 
 80 |     }
 81 | 
 82 |     fn pick(&mut self, weight: u64) -> bool {
 83 |         if weight == 0 {
 84 |             return false;
 85 |         }
 86 |         self.total_weight += weight;
 87 |         return weight == self.total_weight || (
 88 |             rand::thread_rng().gen_range(1, self.total_weight) <= weight
 89 |             )
 90 |     }
 91 | 
 92 |     fn try_select(&mut self, mutation: Mutation, weight: u64) -> () {
 93 |         if self.pick(weight) {
 94 |             self.selected = mutation;
 95 |         }
 96 |     }
 97 | }
 98 | 
 99 | #[no_mangle]
100 | pub extern fn afl_custom_mutator(data: *const u8,
101 |                      size: size_t, mutated_out: *mut u8, max_size: size_t, _seed: c_uint) -> size_t {
102 |     unsafe {
103 |         let safe_data = std::slice::from_raw_parts(data, size);
104 |         let result: Result<Root, bincode::Error> = bincode::deserialize_from(safe_data);
105 | 
106 |         let mut c = bincode::config();
107 |         c.limit(max_size as u64);
108 | 
109 |         match result {
110 |             Ok(mut r) => {
111 |                 //println!("Original: {:?}", &r);
112 | 
113 |                 let mut sampler = WeightedReservoirSampler::new();
114 |                 sampler.mutate(&mut r);
115 | 
116 |                 //println!("Mutated: {:?}", &r);
117 | 
118 |                 let out_slice: &mut [u8] = std::slice::from_raw_parts_mut(mutated_out, max_size);
119 |                 let res = c.serialize_into(out_slice, &r);
120 | 
121 |                 //println!("PP: {}", pretty(&r));
122 | 
123 |                 match res {
124 |                     Ok(_) => c.serialized_size(&r).unwrap() as usize,
125 |                     Err(_) => 0,
126 |                 }
127 |             },
128 |             Err(_) => {
129 |                 //dbg!("Error deserializing: {}", s);
130 |                 let r: Root = rand::thread_rng().sample(Standard);
131 |                 let out_slice: &mut [u8] = std::slice::from_raw_parts_mut(mutated_out, max_size);
132 |                 let res = c.serialize_into(out_slice, &r);
133 | 
134 |                 //println!("{:?}", c.serialize(&r).unwrap());
135 | 
136 |                 match res {
137 |                     Ok(_) => c.serialized_size(&r).unwrap() as usize,
138 |                     Err(_) => 0,
139 |                 }
140 |             },
141 |         }
142 |     }
143 | }
144 | 
145 | // 1MB limit
146 | const MAX_SIZE: usize = 1024 * 1024;
147 | 
148 | /*
149 |  * AFL-pp wants a buffer in *new_data and such buffer must be allocated by us.
150 |  * In addition to that, the buffer is never free'd by AFL.
151 |  * So, we need a global string of some sorts because we don't want to waste
152 |  * time allocating memory at every execution.
153 |  */
154 | std::thread_local! {
155 |     static AFL_OUTPUT: UnsafeCell<String> = UnsafeCell::new(String::with_capacity(MAX_SIZE));
156 | }
157 | 
158 | #[no_mangle]
159 | pub extern fn afl_pre_save_handler(data: *const u8, size: size_t, new_data: *mut *mut u8) -> size_t {
160 |     AFL_OUTPUT.with(|out_cell| {
161 |         unsafe {
162 |             let mut out = &mut *out_cell.get();
163 |             out.clear();
164 | 
165 |             let safe_data = std::slice::from_raw_parts(data, size);
166 |             let result: Result<Root, bincode::Error> = bincode::deserialize_from(safe_data);
167 | 
168 |             match result {
169 |                 Ok(r) => {
170 |                     r.pp(&mut out);
171 |                     *new_data = (&mut out[..]).as_mut_ptr();
172 |                     out.len()
173 |                 },
174 |                 Err(s) => {
175 |                     dbg!("Error deserializing: {}", s);
176 |                     0
177 |                 }
178 |             }
179 |         }
180 |     })
181 | }
182 | 
183 | fn get_splicing_points(start: usize, end: usize) -> (usize, usize) {
184 |     let mut a: usize = rand::thread_rng().gen_range(start, end);
185 | 
186 |     let mut b: usize;
187 |     b = rand::thread_rng().gen_range(start, end);
188 |     if b > a {
189 |         let tmp = b;
190 |         b = a;
191 |         a = tmp;
192 |     }
193 | 
194 |     (a, b)
195 | }
196 | 
197 | std::thread_local! {
198 |     static AFL_SPLICE_OUTPUT: UnsafeCell<Vec<u8>> = UnsafeCell::new(Vec::with_capacity(MAX_SIZE));
199 | }
200 | 
201 | /*
202 |  * TODO: port to AFLpp
203 |  */
204 | #[no_mangle]
205 | pub extern fn afl_custom_splicer(data1: *const u8, size1: size_t,
206 |                                  data2: *const u8, size2: size_t,
207 |                                  new_data: *mut *mut u8) -> size_t {
208 |     /*
209 |      * See afl_pre_save_handler for how memory management works.
210 |      */
211 |     AFL_SPLICE_OUTPUT.with(|splice_cell| {
212 |         unsafe {
213 |             let mut splice = &mut *splice_cell.get();
214 | 
215 |             let safe_data1 = std::slice::from_raw_parts(data1, size1);
216 |             let result1: Result<Root, bincode::Error> = bincode::deserialize_from(safe_data1);
217 |             let safe_data2 = std::slice::from_raw_parts(data2, size2);
218 |             let result2: Result<Root, bincode::Error> = bincode::deserialize_from(safe_data2);
219 | 
220 |             match result1 {
221 |                 Ok(_) => {
222 |                     match result2 {
223 |                         Ok(_) => {
224 |                             let mut new_root = Root { children: Vec::new() };
225 | 
226 |                             let r1: Root = rand::thread_rng().sample(Standard);
227 |                             let r2: Root = rand::thread_rng().sample(Standard);
228 | 
229 |                             let a: usize;
230 |                             let b: usize;
231 |                             let c: usize;
232 |                             let d: usize;
233 | 
234 |                             if r1.children.len() == 0 {
235 |                                 a = 0;
236 |                                 b = 0;
237 |                             } else {
238 |                                 let (q, r) = get_splicing_points(0, r1.children.len());
239 |                                 a = q;
240 |                                 b = r;
241 |                             }
242 | 
243 |                             if r2.children.len() == 0 {
244 |                                 c = 0;
245 |                                 d = 0;
246 |                             } else {
247 |                                 let (q, r) = get_splicing_points(0, r2.children.len());
248 |                                 c = q;
249 |                                 d = r;
250 |                             }
251 | 
252 |                             // This is **VERY** slow
253 |                             for i in 0..a {
254 |                                 new_root.children.push(r1.children[i].clone());
255 |                             }
256 |                             for i in c..d {
257 |                                 new_root.children.push(r2.children[i].clone());
258 |                             }
259 |                             for i in b..r1.children.len() {
260 |                                 new_root.children.push(r1.children[i].clone());
261 |                             }
262 | 
263 |                             //let out_slice: &mut [u8] = std::slice::from_raw_parts_mut(splice, MAX_SIZE);
264 |                             let mut c = bincode::config();
265 |                             c.limit(MAX_SIZE as u64);
266 | 
267 |                             let res = c.serialize_into(&mut splice, &new_root);
268 | 
269 |                             println!("\n\n\n\nPP: {}", pretty(&new_root));
270 | 
271 |                             match res {
272 |                                 Ok(_) => {
273 |                                     *new_data = (&mut splice[..]).as_mut_ptr();
274 |                                     c.serialized_size(&new_root).unwrap() as usize
275 |                                 },
276 |                                 Err(_) => 0,
277 |                             }
278 |                         },
279 |                         Err(_) => {
280 |                             dbg!("ERROR SPLICING");
281 |                             0
282 |                         }
283 |                     }
284 |                 },
285 |                 Err(s) => {
286 |                     dbg!("ERROR SPLICING");
287 |                     0
288 |                 }
289 |             }
290 |         }
291 |     })
292 | }
293 | 
294 | 


--------------------------------------------------------------------------------
/rust-mutator/src/main.rs:
--------------------------------------------------------------------------------
 1 | use std::fs::File;
 2 | 
 3 | use mutator::{Root, pretty};
 4 | 
 5 | fn main() {
 6 |     let args : Vec<String> = std::env::args().collect();
 7 |     if args.len() != 2 {
 8 |         println!("Usage: {} /path/to/input.bin", &args[0]);
 9 |         return;
10 |     }
11 | 
12 |     let filename = &args[1];
13 |     //println!("Dumping \"{}\"...", filename);
14 | 
15 |     let f = File::open(filename).unwrap();
16 |     let root: Result<Root, bincode::Error> = bincode::deserialize_from(f);
17 | 
18 |     match root {
19 |         Ok(root) => {
20 |             //println!("{:?}", root);
21 |             println!("{}", pretty(&root));
22 |         },
23 |         Err(e) => println!("{}", e.to_string()),
24 |     };
25 | }
26 | 


--------------------------------------------------------------------------------
/rust-mutator/test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import ctypes
 4 | 
 5 | 
 6 | so = ctypes.CDLL('./target/debug/libmutator.so')
 7 | 
 8 | data = bytes([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 9 | data = bytes([0, 0, 0, 0, 0, 0, 0, 0])
10 | mutated_out = ctypes.create_string_buffer(0x1000)
11 | spliced_out = ctypes.create_string_buffer(0x1000)
12 | 
13 | so.afl_custom_mutator.argtypes = [
14 |     ctypes.c_char_p,
15 |     ctypes.c_size_t,
16 |     ctypes.c_char_p,
17 |     ctypes.c_size_t,
18 |     ctypes.c_int,
19 | ]
20 | 
21 | r = so.afl_custom_mutator(
22 |     data,
23 |     len(data),
24 |     mutated_out,
25 |     len(mutated_out),
26 |     666,
27 | )
28 | 
29 | print(r)
30 | 
31 | mem = ctypes.POINTER(ctypes.c_ubyte)()
32 | p_mem = ctypes.byref(mem)
33 | 
34 | so.afl_custom_splicer(
35 |     data,
36 |     len(data),
37 |     data,
38 |     len(data),
39 |     p_mem,
40 | )
41 | 
42 | 


--------------------------------------------------------------------------------
/scala/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | *.jar
3 | *.class
4 | *.iml
5 | *.ipr
6 | *.iws
7 | .idea
8 | out
9 | 


--------------------------------------------------------------------------------
/scala/README.md:
--------------------------------------------------------------------------------
 1 | # jReflex
 2 | 
 3 | Scala port of the original Haskell part of Reflex.
 4 | 
 5 | ## Building
 6 | 
 7 | ```sh
 8 | $ sbt assembly
 9 | ```
10 | 
11 | If the JVM stars are JVM aligned you will find `jreflex.jar` in the current
12 | directory.
13 | 
14 | ## Run
15 | 
16 | After the assembly phase, you can use the helper script `optimize.sh`.
17 | Otherwise, you can use `sbt run`.
18 | 
19 | 


--------------------------------------------------------------------------------
/scala/build.sbt:
--------------------------------------------------------------------------------
 1 | name := "reflex"
 2 | 
 3 | version := "0.1"
 4 | 
 5 | scalaVersion := "2.13.1"
 6 | 
 7 | resolvers += "Maven Central" at "https://repo1.maven.org/maven2/"
 8 | 
 9 | libraryDependencies += "com.github.scopt" %% "scopt"     % "4.0.0-RC2"
10 | libraryDependencies += "dk.brics"         %  "automaton" % "1.12-1"
11 | libraryDependencies += "com.michaelpollmeier" %% "gremlin-scala" % "3.4.7.2"
12 | libraryDependencies += "org.apache.tinkerpop" % "tinkergraph-gremlin" % "3.4.8"
13 | // Kill warnings
14 | libraryDependencies += "org.slf4j" % "slf4j-nop" % "1.7.30"
15 | 
16 | val circeVersion = "0.12.0"
17 | 
18 | libraryDependencies ++= Seq(
19 |   "io.circe" %% "circe-core",
20 |   "io.circe" %% "circe-generic",
21 |   "io.circe" %% "circe-parser"
22 | ).map(_ % circeVersion)
23 | 
24 | libraryDependencies += "org.scalatest" %% "scalatest" % "3.0.8" % "test"
25 | 
26 | mainClass in assembly := some("io.github.thebabush.reflex.Main")
27 | assemblyOutputPath in assembly := file("./jreflex.jar")
28 | 


--------------------------------------------------------------------------------
/scala/find-tables.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | DIR="$(dirname $0)"
4 | JAR="$DIR/jreflex.jar"
5 | 
6 | java -cp $JAR io.github.thebabush.reflex.graph.main "$@"
7 | 
8 | 


--------------------------------------------------------------------------------
/scala/jreflex.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DIR="$(dirname $0)"
 4 | JAR="$DIR/jreflex.jar"
 5 | 
 6 | # I lost many hours of sleep because of this. Fuck you. Fucking fuck you bad.
 7 | # WHY NOT USE NON-DETERMINISTIC SHIT IN YOUR DFA LIBRARY?
 8 | # FUCK YOU
 9 | java -Ddk.brics.automaton.debug=1 -jar $JAR "$@"
10 | 
11 | 


--------------------------------------------------------------------------------
/scala/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version = 1.3.3


--------------------------------------------------------------------------------
/scala/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.10")
2 | 


--------------------------------------------------------------------------------
/scala/src/main/scala/io/github/thebabush/reflex/Counter.scala:
--------------------------------------------------------------------------------
 1 | package io.github.thebabush.reflex
 2 | 
 3 | class Counter {
 4 | 
 5 |   private var counter = 0
 6 | 
 7 |   private def mk(s: String) = {
 8 |     counter += 1
 9 |     s"${s}${counter}"
10 |   }
11 | 
12 |   def mkMessage(obj: Object): String = {
13 |     mkMessage(obj.getClass.getSimpleName)
14 |   }
15 | 
16 |   def mkMessage(prefix: String): String = {
17 |     mk(prefix)
18 |   }
19 | 
20 | }
21 | 


--------------------------------------------------------------------------------
/scala/src/main/scala/io/github/thebabush/reflex/DfaToRegex.scala:
--------------------------------------------------------------------------------
  1 | package io.github.thebabush.reflex
  2 | 
  3 | import dk.brics.automaton.{Automaton, State}
  4 | 
  5 | import scala.jdk.CollectionConverters._
  6 | 
  7 | sealed trait Regex
  8 | case class Empty() extends Regex
  9 | case class Epsilon() extends Regex
 10 | case class Literal(value: Char) extends Regex
 11 | case class OneOrMore(child: Regex) extends Regex
 12 | case class Optional(child: Regex) extends Regex
 13 | case class Or(left: Regex, right: Regex) extends Regex
 14 | case class RESet(children: Set[Char]) extends Regex
 15 | case class Star(child: Regex) extends Regex
 16 | case class Then(left: Regex, right: Regex) extends Regex
 17 | 
 18 | class DfaToRegex
 19 | object DfaToRegex {
 20 | 
 21 |   val MAX_CHARACTER = 256;
 22 | 
 23 |   def rangeToString(m: Int, M: Int): String = {
 24 |     (m to M).mkString
 25 |   }
 26 | 
 27 |   def mkThen(list: List[Regex]): Regex = list match {
 28 |     case x :: y :: xs   => Then(x, mkThen(y :: xs))
 29 |     case x :: xs        => x
 30 |     case Nil            => Empty()
 31 |   }
 32 | 
 33 |   def mkOr(list: List[Regex]): Regex = list match {
 34 |     case x :: y :: xs   => Or(x, mkOr(y :: xs))
 35 |     case x :: _         => x
 36 |     case Nil            => Empty()
 37 |   }
 38 | 
 39 |   def hopcroft(dfa: Automaton): Regex = {
 40 |     val n = dfa.getNumberOfStates + 1
 41 |     var out = Array.ofDim[Regex](n, n, n);
 42 | 
 43 |     val s2i: Map[State, Int] = dfa.getStates.asScala.zipWithIndex
 44 |       .map({ case (v, i) => (v, i + 1) }).toMap
 45 | 
 46 |     val start = s2i(dfa.getInitialState)
 47 |     val finals = Set.empty ++ dfa.getAcceptStates.asScala.filter(v => v.isAccept)
 48 | 
 49 |     def hopcroftBase(): Unit = {
 50 |       for (i <- 1 until n) {
 51 |         for (j <- 1 until n) {
 52 |           if (i == j) {
 53 |             out(0)(i)(j) = Epsilon()
 54 |           } else {
 55 |             out(0)(i)(j) = Empty()
 56 |           }
 57 |         }
 58 |       }
 59 | 
 60 |       dfa.getStates.asScala.foreach(beg => {
 61 |         beg.getTransitions.asScala.foreach(t => {
 62 |           val end: State = t.getDest
 63 |           val i = s2i(beg)
 64 |           val j = s2i(end)
 65 | 
 66 |           val rx = if (t.getMin == t.getMax)
 67 |             Literal(t.getMin)
 68 |           else
 69 |             RESet((t.getMin to t.getMax).toSet)
 70 | 
 71 |           out(0)(i)(j) = out(0)(i)(j) match {
 72 |             case Empty() => rx
 73 |             case x       => Or(x, rx)
 74 |           }
 75 |           out(0)(i)(j) = Simple.simplify(out(0)(i)(j))
 76 |         })
 77 |       })
 78 |     }
 79 |     hopcroftBase()
 80 | 
 81 |     def hopcroftInduction(): Unit = {
 82 |       for (k <- 1 until n) {
 83 |         for (i <- 1 until n) {
 84 |           for (j <- 1 until n) {
 85 |             val outVal = Or(
 86 |               out(k-1)(i)(j),
 87 |               mkThen(
 88 |                 out(k-1)(i)(k)
 89 |                 :: Star(out(k-1)(k)(k))
 90 |                 :: out(k-1)(k)(j)
 91 |                 :: List.empty
 92 |               )
 93 |             )
 94 |             out(k)(i)(j) = Simple.simplify(outVal)
 95 |           }
 96 |         }
 97 |       }
 98 |     }
 99 |     hopcroftInduction()
100 | 
101 |     var regexParts: Set[Regex] = Set.empty
102 |     finals.foreach(f => regexParts += out(n-1)(start)(s2i(f)))
103 |     val ret = mkOr(regexParts.toList)
104 | 
105 |     Simple.simplify(ret)
106 |   }
107 | 
108 | }
109 | 


--------------------------------------------------------------------------------
/scala/src/main/scala/io/github/thebabush/reflex/Generator.scala:
--------------------------------------------------------------------------------
 1 | package io.github.thebabush.reflex
 2 | 
 3 | import scala.util.Random
 4 | 
 5 | object Generator {
 6 |   val rand: Random = Random
 7 | 
 8 |   def generate(r: Regex): String = {
 9 |     r match {
10 |       case Empty() => ""
11 |       case Epsilon() => ""
12 |       case Literal(c) => "" + c
13 |       case OneOrMore(cr) => List.fill(1 + rand.nextInt(3))(cr).map(generate).mkString("")
14 |       case Optional(cr) => if (rand.nextBoolean) "" else generate(cr)
15 |       case Or(a, b) => if (rand.nextBoolean) generate(a) else generate(b)
16 |       case RESet(cc) => "" + cc.toSeq(rand.nextInt(cc.size))
17 |       case Star(c) => if (rand.nextBoolean) "" else generate(c)
18 |       case Then(a, b) => generate(a) + generate(b)
19 |     }
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/scala/src/main/scala/io/github/thebabush/reflex/MakeProto.scala:
--------------------------------------------------------------------------------
 1 | package io.github.thebabush.reflex
 2 | 
 3 | class MakeProto {
 4 | 
 5 |   private var messages = Seq[String]()
 6 |   private val counter = new Counter;
 7 | 
 8 |   def serialize(regexes: Seq[Regex]): String = {
 9 |     val sb = new StringBuilder
10 | 
11 |     sb ++= "message RootHelper {\n"
12 |     sb ++= "oneof T {\n"
13 |     regexes.zipWithIndex.foreach((ri) => {
14 |       val (child, i) = ri
15 |       val childName = counter.mkMessage(child)
16 |       serialize(childName, child)
17 |       sb ++= s"${childName} ${childName.toLowerCase} = ${i + 1};"
18 |     })
19 |     sb ++= "};\n"
20 |     sb ++= "}\n"
21 |     sb ++=
22 |       s"""
23 |       |message Root {
24 |       |  repeated RootHelper rh = 1;
25 |       |}
26 |       |""".stripMargin
27 | 
28 |     messages :+= sb.toString()
29 | 
30 |     "syntax = \"proto2\";\n\n" + messages.mkString("\n")
31 |   }
32 | 
33 |   private def serialize(name: String, r: Regex): Unit = {
34 |     var sb = new StringBuilder
35 |     sb ++= s"message ${name} {\n"
36 |     r match {
37 |       case Empty() => throw new IllegalArgumentException("serialize => Empty()")
38 |       case Epsilon() => throw new IllegalArgumentException("serialize => Epsilon()")
39 |       case Literal(v) =>
40 |         sb ++=
41 |           s"""
42 |           |enum Literal {
43 |           |  ${mkLiteral(v)} = ${v.asInstanceOf[Int]};
44 |           |};
45 |           |required Literal literal = 1;
46 |           |""".stripMargin
47 |       case RESet(vv) =>
48 |         sb ++= s"enum Set {\n"
49 |         vv.toSeq.foreach(c => {
50 |           sb ++= s"${mkLiteral(c)} = ${c.asInstanceOf[Int]};\n"
51 |         })
52 |         sb ++= "};\n"
53 |         sb ++= "required Set set = 1;"
54 |       case OneOrMore(child) =>
55 |         val childName = counter.mkMessage(child)
56 |         sb ++=
57 |           s"""
58 |              |required ${childName} one  = 1;
59 |              |required ${childName} more = 2;
60 |              |""".stripMargin
61 |         serialize(childName, child)
62 |       case Optional(child) =>
63 |         var childName = counter.mkMessage(child)
64 |         sb ++= s"optional ${childName} child = 1;\n"
65 |         serialize(childName, child)
66 |       case Or(left, right) =>
67 |         sb ++= "oneof T {\n"
68 |         Seq(left, right).zipWithIndex.foreach((args) => {
69 |           val (child, i) = args
70 |           val childName = counter.mkMessage(child)
71 |           serialize(childName, child)
72 |           sb ++= s"${childName} ${childName.toLowerCase()} = ${i + 1};\n";
73 |         })
74 |         sb ++= "};\n"
75 |       case Star(child) =>
76 |         val childName = counter.mkMessage(child)
77 |         sb ++= s"repeated ${childName} child = 1;\n"
78 |         serialize(childName, child)
79 |       case Then(left, right) =>
80 |         Seq(left, right).zipWithIndex.foreach((args) => {
81 |           val (child, i) = args
82 |           val childName = counter.mkMessage(child)
83 |           serialize(childName, child)
84 |           sb ++= s"required ${childName} ${childName.toLowerCase()} = ${i + 1};\n";
85 |         })
86 |     }
87 |     sb ++= "}\n"
88 | 
89 |     messages :+= sb.toString()
90 |   }
91 | 
92 |   private def mkLiteral(ch: Char): String = {
93 |     f"L${ch.asInstanceOf[Int]}%02X"
94 |   }
95 | }
96 | 


--------------------------------------------------------------------------------
/scala/src/main/scala/io/github/thebabush/reflex/MakeRust.scala:
--------------------------------------------------------------------------------
  1 | package io.github.thebabush.reflex
  2 | 
  3 | class MakeRust {
  4 | 
  5 |   private val counter = new Counter
  6 |   private var code = Seq[String]()
  7 | 
  8 |   private def mkLiteral(ch: Char): String = {
  9 |     f"L${ch.asInstanceOf[Int]}%02X"
 10 |   }
 11 | 
 12 |   private def mkEscape(ch: Char): String = {
 13 |     f"\\u{${ch.asInstanceOf[Int]}%02X}"
 14 |   }
 15 | 
 16 |   def serialize(origRegexes: Seq[Regex]): String = {
 17 |     // Add a default "catch all" regex
 18 | 
 19 |     val regexes = origRegexes :+ RESet((0 until 255).map(_.asInstanceOf[Char]).toSet)
 20 |     code :+= PRELUDE
 21 | 
 22 |     val names = regexes.map(r => counter.mkMessage(r))
 23 | 
 24 |     regexes.zip(names).foreach(rn => {
 25 |       val (regex, name) = rn;
 26 |       serialize(name, regex);
 27 |     })
 28 | 
 29 |     val name = "BaseUnion"
 30 |     val length = regexes.length
 31 | 
 32 |     // BaseUnion declaration
 33 |     val sb = new StringBuilder
 34 |     sb ++= "#[derive(Clone, Debug, Serialize, Deserialize)]\n"
 35 |     sb ++= "pub enum BaseUnion {\n"
 36 |     names.foreach(child => {
 37 |       sb ++= s"    ${child}(${child}),\n"
 38 |     })
 39 |     sb ++= "}\n\n"
 40 | 
 41 |     // BaseUnion sampling
 42 |     sb ++= s"impl Distribution<${name}> for Standard {\n"
 43 |     sb ++= s"    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> ${name} {\n"
 44 |     sb ++= s"        match rng.gen_range(0, ${length}) {\n"
 45 |     names.zipWithIndex.foreach(ci => {
 46 |       val (child, i) = ci
 47 |       sb ++= s"            ${i} => ${name}::${child}(rng.sample(Standard)),\n"
 48 |     })
 49 |     sb ++= s"            _ => unreachable!(),\n"
 50 |     sb ++= s"        }\n"
 51 |     sb ++= s"    }\n"
 52 |     sb ++= s"}\n\n"
 53 | 
 54 |     // BaseUnion impl Node
 55 |     sb ++= s"impl Node for ${name} {\n"
 56 |     sb ++= s"    fn pp(&self, s: &mut String) -> () {\n"
 57 |     sb ++= s"        match self {\n"
 58 |     names.foreach(child => {
 59 |       sb ++= s"            ${name}::${child}(vv) => vv.pp(s),\n"
 60 |     })
 61 |     sb ++= s"        }\n"
 62 |     sb ++= s"    }\n"
 63 |     sb ++= s"}\n"
 64 | 
 65 | 
 66 |     code :+= sb.toString
 67 | 
 68 |     serialize_star("ActualRoot", "BaseUnion", 0)
 69 | 
 70 |     code.mkString("\n")
 71 |   }
 72 | 
 73 |   private def serialize(name: String, regex: Regex): Unit = {
 74 |     regex match {
 75 |       case Empty() => throw new IllegalArgumentException("serialize => Empty()")
 76 |       case Epsilon() => throw new IllegalArgumentException("serialize => Epsilon()")
 77 |       case Literal(v) => serialize_literal(name, Seq() :+ v)
 78 |       case RESet(vv) => serialize_literal(name, vv.toSeq)
 79 |       case OneOrMore(child) => {
 80 |         val childName = counter.mkMessage(child)
 81 |         serialize(childName, child)
 82 |         serialize_star(name, childName, 1)
 83 |       }
 84 |       case Optional(child) => {
 85 |         val childName = counter.mkMessage(child)
 86 |         serialize(childName, child)
 87 |         serialize_optional(name, childName)
 88 |       }
 89 |       case Or(left, right) => {
 90 |         val children = Seq() :+ left :+ right
 91 |         val childrenNames = children.map(counter.mkMessage)
 92 |         children.zip(childrenNames).foreach(cn => {
 93 |           val (child, name) = cn
 94 |           serialize(name, child)
 95 |         })
 96 |         serialize_or(name, childrenNames)
 97 |       }
 98 |       case Star(child) => {
 99 |         val childName = counter.mkMessage(child)
100 |         serialize(childName, child)
101 |         serialize_star(name, childName, 0)
102 |       }
103 |       case Then(left, right) => {
104 |         val children = Seq() :+ left :+ right
105 |         val childrenNames = children.map(counter.mkMessage)
106 |         children.zip(childrenNames).foreach(cn => {
107 |           val (child, name) = cn
108 |           serialize(name, child)
109 |         })
110 |         serialize_then(name, childrenNames)
111 |       }
112 |     }
113 |     ()
114 |   }
115 | 
116 |   private def serialize_then(name: String, childrenNames: Seq[String]): Unit = {
117 |     val length = childrenNames.length
118 | 
119 |     // Then declaration
120 |     val sb = new StringBuilder
121 |     sb ++= s"#[derive(Clone, Debug, Serialize, Deserialize)]\n"
122 |     sb ++= s"pub struct ${name} {\n"
123 |     childrenNames.foreach(child => {
124 |       sb ++= s"    ${child.toLowerCase}: ${child},\n"
125 |     })
126 |     sb ++= s"}\n\n"
127 | 
128 |     // Then sampling
129 |     sb ++= s"impl Distribution<${name}> for Standard {\n"
130 |     sb ++= s"    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> ${name} {\n"
131 |     sb ++= s"        ${name} {\n"
132 |     childrenNames.foreach(child => {
133 |       sb ++= s"            ${child.toLowerCase}: rng.sample(Standard),\n"
134 |     })
135 |     sb ++= s"        }\n"
136 |     sb ++= s"    }\n"
137 |     sb ++= s"}\n\n"
138 | 
139 |     // Then impl Node
140 |     sb ++= s"impl Node for ${name} {\n"
141 |     sb ++= s"    fn pp(&self, s: &mut String) -> () {\n"
142 |     childrenNames.foreach(child => {
143 |       sb ++= s"        self.${child.toLowerCase}.pp(s);\n"
144 |     })
145 |     sb ++= s"    }\n"
146 |     sb ++= s"}\n"
147 | 
148 |     code :+= sb.toString
149 |   }
150 | 
151 |   private def serialize_or(name: String, childrenNames: Seq[String]): Unit = {
152 |     val length = childrenNames.length
153 | 
154 |     // Or declaration
155 |     val sb = new StringBuilder
156 |     sb ++= s"#[derive(Clone, Debug, Serialize, Deserialize)]\n"
157 |     sb ++= s"pub enum ${name} {\n"
158 |     childrenNames.foreach(child => {
159 |       sb ++= s"    ${child}(${child}),\n"
160 |     })
161 |     sb ++= s"}\n\n"
162 | 
163 |     // Or sampling
164 |     sb ++= s"impl Distribution<${name}> for Standard {\n"
165 |     sb ++= s"    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> ${name} {\n"
166 |     sb ++= s"        match rng.gen_range(0, ${length}) {\n"
167 |     childrenNames.zipWithIndex.foreach(ci => {
168 |       val (child, i) = ci
169 |       sb ++= s"            ${i} => ${name}::${child}(rng.sample(Standard)),\n"
170 |     })
171 |     sb ++= s"            _ => unreachable!(),\n"
172 |     sb ++= s"        }\n"
173 |     sb ++= s"    }\n"
174 |     sb ++= s"}\n\n"
175 | 
176 |     // BaseUnion impl Node
177 |     sb ++= s"impl Node for ${name} {\n"
178 |     sb ++= s"    fn pp(&self, s: &mut String) -> () {\n"
179 |     sb ++= s"        match self {\n"
180 |     childrenNames.foreach(child => {
181 |       sb ++= s"            ${name}::${child}(vv) => vv.pp(s),\n"
182 |     })
183 |     sb ++= s"        }\n"
184 |     sb ++= s"    }\n"
185 |     sb ++= s"}\n"
186 | 
187 | 
188 |     code :+= sb.toString
189 |   }
190 | 
191 |   private def serialize_literal(name: String, chars: Seq[Char]): Unit = {
192 |     val childrenNames = chars.map(char => mkLiteral(char))
193 |     val childrenNo = chars.length
194 | 
195 |     val sb = new StringBuilder
196 |     sb ++= s"#[derive(Clone, Debug, Serialize, Deserialize)]\n"
197 |     sb ++= s"pub enum ${name} {\n"
198 |     childrenNames.foreach(childName => {
199 |       sb ++= s"    ${childName},\n"
200 |     })
201 |     sb ++= s"}\n\n"
202 | 
203 |     // impl Sample
204 |     sb ++= s"impl Distribution<${name}> for Standard {\n"
205 |     sb ++= s"    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> ${name} {\n"
206 |     sb ++= s"        match rng.gen_range(0, ${childrenNo}) {\n"
207 |     childrenNames.zipWithIndex.foreach(ci => {
208 |       val (childName, i) = ci;
209 |       sb ++= s"            ${i} => ${name}::${childName},\n"
210 |     })
211 |     sb ++= s"            _ => unreachable!(),\n"
212 |     sb ++= s"        }\n"
213 |     sb ++= s"    }\n"
214 |     sb ++= s"}\n\n"
215 | 
216 |     // impl Node
217 |     sb ++= s"impl Node for ${name} {\n"
218 |     sb ++= s"    fn pp(&self, s: &mut String) -> () {\n"
219 |     sb ++= s"        match self {\n"
220 |     childrenNames.zip(chars).foreach(cc => {
221 |       val (child, ch) = cc;
222 |       sb ++= s"""            ${name}::${child} => s.push_str("${mkEscape(ch)}"),\n"""
223 |     })
224 |     sb ++= s"        }\n"
225 |     sb ++= s"    }\n"
226 |     sb ++= s"}\n"
227 | 
228 | 
229 |     code :+= sb.toString
230 |   }
231 | 
232 |   private def serialize_optional(name: String, childName: String): Unit = {
233 |     val sb = new StringBuilder
234 | 
235 |     // type definition
236 |     sb ++= s"type ${name} = Option<${childName}>;\n\n"
237 | 
238 |     // impl Sample
239 | //    sb ++= s"impl Distribution<${name}> for Standard {"
240 | 
241 |     // impl Node
242 |     sb ++=
243 | s"""
244 | impl Node for ${name} {
245 |     fn pp(&self, s: &mut String) -> () {
246 |         match self {
247 |             None => (),
248 |             Some(r) => r.pp(s),
249 |         };
250 |     }
251 | }
252 | """
253 | 
254 |     code :+= sb.toString
255 |   }
256 | 
257 |   private def serialize_star(name: String, child: String, min: Int): Unit = {
258 |     val sb = new StringBuilder
259 | 
260 |     // struct definition
261 |     sb ++= "#[derive(Clone, Debug, Serialize, Deserialize)]\n"
262 |     sb ++= s"pub struct ${name} {\n"
263 |     sb ++= s"    pub children: Vec<${child}>,"
264 |     sb ++= s"}\n\n"
265 | 
266 |     // impl Sample
267 |     sb ++= s"impl Distribution<${name}> for Standard {\n"
268 |     sb ++= s"    fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> ${name} {\n"
269 |     sb ++= s"        let children_no = rng.gen_range(${min}, 3);\n"
270 |     sb ++= s"        let mut children = Vec::with_capacity(children_no as usize);\n"
271 |     sb ++= s"        for _ in 0..children_no {\n"
272 |     sb ++= s"            children.push(rng.sample(Standard));\n"
273 |     sb ++= s"        };\n"
274 |     sb ++= s"        ${name} { children: children }\n"
275 |     sb ++= s"    }\n"
276 |     sb ++= s"}\n\n"
277 | 
278 |     // impl Node
279 |     sb ++= s"impl Node for ${name} {\n"
280 |     sb ++= s"    fn pp(&self, s: &mut String) -> () {\n"
281 |     sb ++= s"        for i in 0..self.children.len() {\n"
282 |     sb ++= s"            self.children[i].pp(s);\n\n"
283 |     sb ++= s"        }\n"
284 |     sb ++= s"    }\n"
285 |     sb ++= s"}\n\n"
286 | 
287 |     code :+= sb.toString
288 |   }
289 | 
290 |   private val PRELUDE: String =
291 | s"""
292 | use serde_derive::{Deserialize, Serialize};
293 | use rand::distributions::{Distribution, Standard};
294 | use rand::Rng;
295 | 
296 | use crate::common::{Node};
297 | """
298 | }
299 | 


--------------------------------------------------------------------------------
/scala/src/main/scala/io/github/thebabush/reflex/Pretty.scala:
--------------------------------------------------------------------------------
 1 | package io.github.thebabush.reflex
 2 | 
 3 | class Pretty
 4 | object Pretty {
 5 |   def print(regex: Regex): StringBuilder = {
 6 |     def np(r: Regex): Boolean = r match {
 7 |       case RESet(_)   => false
 8 |       case Literal(_) => false
 9 |       case _          => true
10 |     }
11 | 
12 |     def ppp(regex: Regex): StringBuilder = {
13 |       var sb = new StringBuilder()
14 |       if (np(regex)) {
15 |         sb += '('
16 |         sb ++= pp(regex)
17 |         sb += ')'
18 |       } else {
19 |         sb = pp(regex)
20 |       }
21 |       sb
22 |     }
23 | 
24 |     def ppr(s: String): StringBuilder = {
25 |       var sb = new StringBuilder()
26 | 
27 |       if (s.equals((1.asInstanceOf[Char] to 255).mkString)) {
28 |         sb += '.'
29 |       } else if (s.length > 0x80) {
30 |         sb ++= "[^"
31 |         sb ++= Set.from[Char](1.asInstanceOf[Char] to 255).diff(Set.from(s)).toSeq.sorted.flatMap(i2c)
32 |         sb += ']'
33 |       } else {
34 |         sb += '['
35 |         sb ++= s.toSeq.sorted.flatMap(i2c)
36 |         sb += ']'
37 |       }
38 | 
39 |       sb
40 |     }
41 | 
42 |     def i2c(c: Char): String = {
43 |       if (c >= 'a' && c <= 'z')
44 |         "" + c
45 |       else if (c >= 'A' && c <= 'Z')
46 |         "" + c
47 |       else if (c >= '0' && c <= '9')
48 |         "" + c
49 |       else if (c == ' ')
50 |         " "
51 |       else if (c == '<')
52 |         "<"
53 |       else c match {
54 |         case '#' => "#"
55 |         case '\t' => "\\t"
56 |         case '\n' => "\\n"
57 |         case '-' => "\\-"
58 |         case ch => f"\\x${ch.asInstanceOf[Int]}%02X"
59 |       }
60 |     }
61 | 
62 |     def pp(regex: Regex): StringBuilder = {
63 |       var sb = new StringBuilder()
64 |       regex match {
65 |         case Empty()          => sb ++= "∅"
66 |         case Epsilon()        => sb ++= "ε"
67 |         case Literal(c)       => sb ++= i2c(c)
68 |         case Optional(r)      => { sb ++= ppp(r); sb += '?' }
69 |         case Then(r1, r2)     => { sb ++= pp(r1); sb ++= pp(r2) }
70 |         case Or(r1, r2)       => { sb += '('; sb ++= pp(r1); sb += '|'; sb ++= pp(r2); sb += ')' }
71 |         case Star(r)          => { sb ++= ppp(r); sb += '*' }
72 |         case OneOrMore(r)     => { sb ++= ppp(r); sb += '+' }
73 |         case RESet(rs)        => { sb ++= ppr(rs.toSeq.sorted.mkString) }
74 |       }
75 |     }
76 | 
77 |     regex match {
78 |       case Optional(r) => pp(r)
79 |       case r           => pp(r)
80 |     }
81 |   }
82 | }
83 | 


--------------------------------------------------------------------------------
/scala/src/main/scala/io/github/thebabush/reflex/Simple.scala:
--------------------------------------------------------------------------------
  1 | package io.github.thebabush.reflex
  2 | 
  3 | import scala.util.control.Breaks.{break, breakable}
  4 | 
  5 | class Simple
  6 | object Simple {
  7 | 
  8 |   def simplify(root: Regex): Regex = {
  9 |     var ret: Regex = root
 10 |     var modified = true
 11 | 
 12 |     while (modified) {
 13 |       val t = traverse(ret)
 14 |       ret = t._1
 15 |       modified = t._2
 16 |     }
 17 | 
 18 |     ret
 19 |   }
 20 | 
 21 |   def simplify(root: Regex, max: Int): Regex = {
 22 |     var ret: Regex = root
 23 |     var modified = true
 24 | 
 25 |     breakable {
 26 |       for (i <- 0 until max) {
 27 |         val t = traverse(ret)
 28 |         ret = t._1
 29 |         modified = t._2
 30 |         if (!modified) break
 31 |       }
 32 |     }
 33 | 
 34 |     ret
 35 |   }
 36 | 
 37 |   private def traverse(root: Regex): (Regex, Boolean) = {
 38 |     var modified = false;
 39 |     var ret = root;
 40 | 
 41 |     val applied = applyRules(ret)
 42 |     ret = applied._1
 43 |     modified = applied._2
 44 | 
 45 |     if (!modified) {
 46 |       root match {
 47 |         case Empty() => ()
 48 |         case Epsilon() => ()
 49 |         case Literal(_) => ()
 50 |         case OneOrMore(r) => {
 51 |           val (c, m) = traverse(r)
 52 |           modified |= m
 53 |           ret = OneOrMore(c)
 54 |         }
 55 |         case Optional(r) => {
 56 |           val (c, m) = traverse(r)
 57 |           modified |= m
 58 |           ret = Optional(c)
 59 |         }
 60 |         case Or(l, r) => {
 61 |           val ll = traverse(l)
 62 |           modified |= ll._2
 63 |           val rr = traverse(r)
 64 |           modified |= rr._2
 65 |           ret = Or(ll._1, rr._1)
 66 |         }
 67 |         case RESet(_) => ()
 68 |         case Star(r) => {
 69 |           val (c, m) = traverse(r)
 70 |           modified |= m
 71 |           ret = Star(c)
 72 |         }
 73 |         case Then(l, r) => {
 74 |           val (lr, lm) = traverse(l)
 75 |           modified |= lm
 76 |           val (rr, rm) = traverse(r)
 77 |           modified |= rm
 78 |           ret = Then(lr, rr)
 79 |         }
 80 |       }
 81 |     }
 82 | 
 83 |     (ret, modified)
 84 |   }
 85 | 
 86 |   private def applyRules(regex: Regex): (Regex, Boolean) = {
 87 |     var ret = regex;
 88 |     var modified = true;
 89 | 
 90 |     /*
 91 |     Simplification rules.
 92 |     Some of them are taken from https://github.com/izuzak/noam/blob/277242b16ac3f8ced94c4c61a4082610cc876792/src/noam.re.js
 93 |      */
 94 |     ret = regex match {
 95 |       // TODO: Test all this lol
 96 |       case Or(Empty(), r) => r
 97 |       case Or(r, Empty()) => r
 98 |       case Or(r, Epsilon()) => Optional(r)
 99 |       case Or(Epsilon(), r) => Optional(r)
100 |       case Then(Empty(), _) => Empty()
101 |       case Then(_, Empty()) => Empty()
102 |       case Then(Epsilon(), a) => a
103 |       case Then(a, Epsilon()) => a
104 |       // TESTED (r*)* => r*
105 |       case Star(Star(r))    => Star(r)
106 |       // TESTED (a|b*)* => (a|b)*
107 |       case Star(Or(r, Star(s))) => Star(Or(r, s))
108 |       // TESTED (b*|a)* => (a|b)*
109 |       case Star(Or(Star(r), s)) => Star(Or(r, s))
110 |       case RESet(xs) if xs.size == 1 => Literal(xs.head)
111 |       case Optional(Empty()) => Epsilon()
112 |       case Optional(Star(r)) => Star(r)
113 |       case Or(Literal(a), Literal(b)) => RESet(Set(a, b))
114 |       case Or(RESet(aa), RESet(bb)) => RESet(aa ++ bb)
115 |       case Star(Epsilon()) => Epsilon()
116 |       case Star(Empty()) => Epsilon()
117 |       // TESTED (a?|b)+ => (a|b)*
118 |       case OneOrMore(Or(Optional(a), b)) => Star(Or(a, b))
119 |       // TESTED (a|b?)+ => (a|b)*
120 |       case OneOrMore(Or(a, Optional(b))) => Star(Or(a, b))
121 |       // TESTED (a?|b?)+ => (a|b)*
122 |       //case OneOrMore(Or(Optional(a), Optional(b))) => Star(Or(a, b)) // UNREACHABLE
123 |       /* Kinda expensive */
124 |       // TESTED aa* => a+
125 |       case Then(r, Star(s)) if r == s => OneOrMore(r)
126 |       // TESTED a*a => a+
127 |       case Then(Star(r), s) if r == s => OneOrMore(r)
128 |       // TESTED aa*b => a+b
129 |       case Then(a, Then(Star(b), c)) if a == b => Then(OneOrMore(a), c)
130 |       // TESTED a*a*b => a*b
131 |       case Then(Star(a), Then(Star(b), c)) if a == b => Then(Star(a), c)
132 |       // TESTED (a|ba) => b?a
133 |       case Or(a, Then(b, c)) if a == c => Then(Optional(b), a)
134 |       // (a|(a|b)) => (a|b)
135 |       case Or(a, Or(b, c)) if a == b => Or(a, c)
136 |       // TESTED (ab|ac) => a(b|c)
137 |       case Or(Then(a, b), Then(c, d)) if a == c => Then(a, Or(b, d))
138 |       // TESTED (a|a) => a
139 |       case Or(a, b) if a == b => a
140 |       /* Ad-hoc */
141 |       // TESTED ((a?|aa))* => a*
142 |       case Star(Or(Optional(a), Then(b, c))) if a == b && b == c => Star(a)
143 |       // a|ac => ac?
144 |       case Or(a, Then(b, c)) if a == b => Then(a, Optional(c))
145 |       // (a+)? => a*
146 |       case Optional(OneOrMore(a)) => Star(a)
147 |       // (a?)* => a*
148 |       case Star(Optional(a)) => Star(a)
149 |       // abb* => ab+
150 |       case Then(Then(a, b), Star(c)) if b == c => Then(a, OneOrMore(b))
151 |       // ab+|acb+ => ac?b+
152 |       case Or(Then(a, OneOrMore(b)), Then(Then(c, d), OneOrMore(e)))
153 |         //if a == c && b == e => Then(Then(c, Optional(d)), OneOrMore(e))
154 |         if a == c && b == e => Then(c, Then(Optional(d), OneOrMore(e)))
155 |       // qr
156 |       case Then(Then(q, Then(r, a)), Star(b)) if a == b
157 |         => Then(q, Then(r, OneOrMore(a)))
158 |       // xaa*y => xa+y
159 |       case Then(Then(x, a), Then(Star(b), y)) if a == b
160 |         => Then(x, Then(OneOrMore(a), y))
161 |       // ab | ac => a(b|c)
162 |       case Or(Then(a, b), Then(Then(c, d), e)) if c == a
163 |         => Then(a, Or(b, Then(d, e)))
164 |       // ab|a => ab?
165 |       case Or(Then(a, b), c) if a == c => Then(a, Optional(b))
166 |       // [...]|x => [...x]
167 |       case Or(RESet(xs), Literal(x)) => RESet(xs + x)
168 |       case Or(Literal(x), RESet(xs)) => RESet(xs + x)
169 |       /* No simplification */
170 |       case r => modified = false; r
171 |     }
172 | 
173 |     (ret, modified)
174 |   }
175 | }
176 | 


--------------------------------------------------------------------------------
/scala/src/main/scala/io/github/thebabush/reflex/graph/main.scala:
--------------------------------------------------------------------------------
  1 | package io.github.thebabush.reflex.graph
  2 | 
  3 | import gremlin.scala._
  4 | import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerGraph
  5 | 
  6 | import scala.collection.mutable
  7 | 
  8 | /**
  9 |  * WARNING: shitty code ahead.
 10 |  * I don't know scala very well, nor gremlin... and scala-gremlin was probably a mistake.
 11 |  * Also, I don't have much time left and I have to make tons of beautiful slides (:
 12 |  */
 13 | 
 14 | object main {
 15 | 
 16 |   final val PCODE_INT_EQS: Seq[String] = Seq("INT_EQUAL", "INT_NOTEQUAL")
 17 |   final val PCODE_INT_GTS_LTS: Seq[String]
 18 |   = Seq("INT_SLESS", "INT_LESS")
 19 |   final val PTR_OPS: Seq[String] = Seq("PTRADD", "PTRSUB")
 20 | 
 21 |   final val CouldBeTable = Key[Boolean]("could_be_table")
 22 |   final val IsInput = Key[Boolean]("is_input")
 23 |   final val OpMnemonic = Key[String]("op_mnemonic")
 24 |   final val ConstValue = Key[Long]("const_value")
 25 |   final val FileOffset = Key[Long]("file_offset")
 26 |   final val OpSize = Key[Long]("op_size")
 27 |   final val Id = Key[Long]("id")
 28 | 
 29 |   private class Results {
 30 |     var tables: mutable.Map[String, List[(Long, Long)]] = mutable.Map[String, List[(Long, Long)]]().withDefaultValue(List.empty)
 31 |     var maxState: List[Long] = List.empty
 32 | 
 33 |     override def toString: String = {
 34 |       Seq(
 35 |         maxState.map(ms => "--max-state 0x" + String.format("%08X", ms)).mkString("\n"),
 36 |         tables.map({
 37 |           case (k, xs) => {
 38 |               xs.map({
 39 |                 case (addr, size) => {
 40 |                   k.replaceFirst("yy_", "--") + String.format(" 0x%08X", addr) + " " + size
 41 |                 }
 42 |               }).mkString("\n")
 43 |           }
 44 |         }).toSeq.sorted.mkString("\n")
 45 |       ).mkString("\n")
 46 |     }
 47 |   }
 48 | 
 49 |   private def findTables(g: TraversalSource): Results = {
 50 |     val ret = new Results
 51 | 
 52 |     // Φ(state) => int operations => yy_base[_] => sum with `class` variable
 53 |     // => yy_chk[_] =>  => comparison with state => same
 54 | 
 55 |     val YyBase = StepLabel[Vertex]("yy_base")
 56 |     val YyBaseLoad = StepLabel[Vertex]("yy_base_load")
 57 |     val ClassPhi = StepLabel[Vertex]("class_phi")
 58 |     val OtherTable = StepLabel[Vertex]("other_table")
 59 |     val OtherTableLoad = StepLabel[Vertex]("other_table_load")
 60 | 
 61 |     val Tmp0 = StepLabel[Vertex]("tmp_find_yy_base")
 62 |     val Tmp1 = StepLabel[Vertex]("tmp_find_class_var")
 63 |     val Tmp2 = StepLabel[Vertex]("tmp_find_yy_table")
 64 | 
 65 |     val StatePhi = StepLabel[Vertex]("state_phi")
 66 | 
 67 |     // match `class + yy_base[state]`
 68 |     val classPlusYyBase = g.V
 69 |       .repeat(_.both.has(OpMnemonic, "MULTIEQUAL").dedup).emit() // Group together all multiequals
 70 |       .as(StatePhi)
 71 |       .optional(_.out.has(OpMnemonic, "CAST"))
 72 |       .repeat(
 73 |         _.out.has(OpMnemonic, P.within(Seq("INT_SEXT", "INT_MULT", "SUBPIECE")))
 74 |       )
 75 |       .emit()
 76 |       .out.has(OpMnemonic, P.within(PTR_OPS))
 77 |       .as(Tmp0)
 78 |       // The other operand should be yy_base... Find it
 79 |       .repeat(_.in.has(OpMnemonic, P.within(PTR_OPS))).emit()
 80 |       // Tag yy_base
 81 |       .in.has(CouldBeTable, true).as(YyBase)
 82 |       // Go back to where we were before
 83 |       .select(Tmp0)
 84 |       .optional(_.out.has(OpMnemonic, "CAST"))
 85 |       // Tag the load so we will be able to get its size
 86 |       .out.has(OpMnemonic, "LOAD").as(YyBaseLoad)
 87 |       // Skip integer operations
 88 |       .repeat(_.out.has(OpMnemonic, P.within(Seq("INT_SEXT", "INT_ADD")))).emit()
 89 |       .as(Tmp1)
 90 |       // Go look for a phi(class)
 91 |       .optional(_.in.has(OpMnemonic, P.within(Seq("INT_ZEXT"))))
 92 |       .in.has(OpMnemonic, "MULTIEQUAL")
 93 |       .as(ClassPhi)
 94 |       // Look for the first ptr (referring to either yy_chk or yy_nxt)
 95 |       .select(Tmp1)
 96 |       .optional(_.out.has(OpMnemonic, "CAST"))
 97 |       // (AND used as a way to trunc values... blame ghidra)
 98 |       .repeat(_.out.has(OpMnemonic, P.within(Seq("INT_AND", "INT_SEXT", "INT_MULT", "INT_ZEXT")))).emit()
 99 |       .out.has(OpMnemonic, P.within(PTR_OPS))
100 |       .as(Tmp2)
101 |       // Look for the other table
102 |       .repeat(_.in.has(OpMnemonic, P.within(PTR_OPS))).emit()
103 |       .in.has(CouldBeTable, true).as(OtherTable)
104 |       // Rewind
105 |       .select(Tmp2)
106 |       .optional(_.out.has(OpMnemonic, "CAST"))
107 |       .out.has(OpMnemonic, "LOAD").as(OtherTableLoad)
108 | 
109 | //    println(classPlusYyBase.clone.path.toIterator.mkString("\n"))
110 | 
111 |     // Find yy_chk: state != classPlusYyBase
112 |     val TmpCmp = StepLabel[Vertex]("tmp_cmp")
113 |     val stateNeqYyChk = classPlusYyBase.clone()
114 |       .out.has(OpMnemonic, "INT_SEXT")
115 |       .out.has(OpMnemonic, P.within(PCODE_INT_EQS)).as(TmpCmp)
116 |       .optional(_.in.has(OpMnemonic, "SUBPIECE"))
117 |       .in.has(OpMnemonic, "MULTIEQUAL")
118 |       // Check the last node is the same from which we started from
119 |       .where(P.eq(StatePhi.name): P[String])
120 | 
121 |     val queryDefMaxState = stateNeqYyChk.clone()
122 |     val queryYyAccept = stateNeqYyChk.clone()
123 | 
124 | //    println(stateNeqYyChk.clone.path.toIterator.mkString("\n"))
125 | 
126 |     // Extract the values
127 |     stateNeqYyChk.select((YyBase, YyBaseLoad, OtherTable, OtherTableLoad))
128 |       .toIterator.foreach({
129 |       case (base, baseLoad, otherTable, otherTableLoad) => {
130 |         val yyBase = base.asScala.property(FileOffset).value()
131 |         val yyBaseElementSize = baseLoad.asScala.property(OpSize).value()
132 |         val yyChk = otherTable.asScala.property(FileOffset).value()
133 |         val yyChkElementSize = otherTableLoad.asScala.property(OpSize).value()
134 |         ret.tables("yy_base") :+= (yyBase, yyBaseElementSize)
135 |         ret.tables("yy_chk") :+= (yyChk, yyChkElementSize)
136 |       }
137 |     })
138 | 
139 |     val TmpPtr = StepLabel[Vertex]("tmp_ptr")
140 | 
141 |     val queryMaxState2 = classPlusYyBase.clone()
142 |       .repeat(_.out.has(OpMnemonic, P.within(Seq("INT_SEXT", "CAST", "INT_MULT")))).emit()
143 |       .out.has(OpMnemonic, P.within(PTR_OPS)).as(TmpPtr)
144 |       // Find yy_base again
145 |       .repeat(_.in.has(OpMnemonic, P.within(PTR_OPS))).emit()
146 |       .in.where(P.eq(YyBase.name): P[String]).by(ConstValue.name)
147 |       // Reach the comparison with max_state
148 |       .select(TmpPtr)
149 |       .repeat(_.out.has(OpMnemonic, P.within(PTR_OPS ++ Seq("CAST", "LOAD")))).emit()
150 |       .out.has(OpMnemonic, P.within(PCODE_INT_EQS)).as(TmpCmp)
151 |       .out.has(OpMnemonic, "CBRANCH")
152 |       .select(TmpCmp)
153 |       .in.has(ConstValue)
154 | //    println(queryMaxState2.clone.path.toIterator.mkString("\n"))
155 |     queryMaxState2.select((OtherTable, OtherTableLoad)).toIterator().foreach({
156 |       case (other, otherLoad) => {
157 |         val yyNxt = other.asScala.property(FileOffset).value
158 |         val yyNxtElementSize = otherLoad.asScala.property(OpSize).value
159 |         ret.tables("yy_nxt") :+= (yyNxt, yyNxtElementSize)
160 |       }
161 |     })
162 | 
163 |     val TmpPtrMeta = StepLabel[Vertex]("tmp_yy_meta")
164 |     val YyMeta = StepLabel[Vertex]("yy_meta")
165 |     val YyMetaLoad = StepLabel[Vertex]("yy_meta_load")
166 |     val YyEc = StepLabel[Vertex]("yy_ec")
167 |     val YyEcLoad = StepLabel[Vertex]("yy_ec_load")
168 |     val queryEcMetaMaxState = classPlusYyBase.clone
169 |       .select(ClassPhi)
170 |       // Goddamn ghidra... Sometimes PTR_*, sometimes integer operations T.T
171 |       .repeat(_.out.has(OpMnemonic, P.within(Seq("INT_ZEXT", "CAST", "INT_SEXT", "INT_MULT")))).emit()
172 |       // Go back and find table (hack for integers used as pointers)
173 |       .out.has(OpMnemonic, P.within(PTR_OPS ++ Seq("INT_ADD", "CAST"))).as(TmpPtrMeta)
174 |       .repeat(_.in.has(OpMnemonic, P.within(PTR_OPS ++ Seq("CAST", "INT_ADD")))).emit()
175 |       .in.has(CouldBeTable, true).as(YyMeta)
176 |       // Go back to ptr
177 |       .select(TmpPtrMeta)
178 |         .optional(_.out.has(OpMnemonic, "CAST"))
179 |       .out.has(OpMnemonic, "LOAD").as(YyMetaLoad)
180 |       // Go back to phi node
181 |       .out.where(P.eq(ClassPhi.name): P[String])
182 |       // Find yy_ec
183 |       .in.has(OpMnemonic, "LOAD").as(YyEcLoad)
184 |       .optional(_.in.has(OpMnemonic, "CAST"))
185 |       // Integer arithmetic hack
186 |       .repeat(_.in.has(OpMnemonic, P.within(PTR_OPS ++ Seq("INT_ADD", "CAST")))).emit()
187 |       .in.has(CouldBeTable, true).as(YyEc)
188 |       // Disambiguate yy_ec from yy_meta
189 |       .select(YyEcLoad)
190 |       .optional(_.out.has(OpMnemonic, "MULTILABEL")) // HACK
191 |       .repeat(_.in.has(OpMnemonic, P.within(PTR_OPS))).emit()
192 |       .repeat(_.in.has(OpMnemonic, P.within(Seq("INT_SEXT", "INT_ZEXT", "INT_MULT", "CAST")))).emit()
193 |       .in.has(OpMnemonic, "LOAD")
194 |     //println(queryEcMetaMaxState.clone.path.toIterator.mkString("\n"))
195 |     val queryEc = queryEcMetaMaxState.clone
196 |     queryEc.select((YyEc, YyEcLoad)).dedup.toIterator().foreach({
197 |       case (yyEc, yyEcLoad) => {
198 |         ret.tables("yy_ec") :+= (
199 |           yyEc.asScala.property(FileOffset).value,
200 |           yyEcLoad.asScala.property(OpSize).value
201 |         )
202 |       }
203 |     })
204 |     val queryMeta = queryEcMetaMaxState.clone
205 |     queryMeta.select((YyMeta, YyMetaLoad)).dedup.toIterator().foreach({
206 |       case (yyMeta, yyMetaLoadSize) => {
207 |         ret.tables("yy_meta") :+= (
208 |           yyMeta.asScala.property(FileOffset).value,
209 |           yyMetaLoadSize.asScala.property(OpSize).value
210 |         )
211 |       }
212 |     })
213 | 
214 |     // state = yy_def[state]
215 |     // if (const < state)
216 |     val YyDefPtrTmp = StepLabel[Vertex]("yy_def_ptr_tmp")
217 |     val YyDef = StepLabel[Vertex]("yy_def")
218 |     val YyDefLoad = StepLabel[Vertex]("yy_def_load")
219 |     val MaxState = StepLabel[Vertex]("max_state")
220 |     val MaxStateCond = StepLabel[Vertex]("max_state_cond")
221 |     val YyDefBackEdge = StepLabel[Vertex]("yy_def_back_edge")
222 |     val doQueryDefMaxState = queryDefMaxState
223 |       .select(StatePhi)
224 |       .repeat(_.out.has(OpMnemonic, P.within(Seq("CAST", "INT_SEXT", "INT_MULT", "INT_ZEXT")))).emit()
225 |       .out.has(OpMnemonic, P.within(PTR_OPS)).as(YyDefPtrTmp)
226 |       // Find yy_def
227 |       .repeat(_.in.has(OpMnemonic, P.within(PTR_OPS))).emit()
228 |       .optional(_.in.has(OpMnemonic, "CAST"))
229 |       .in.has(CouldBeTable, true).as(YyDef)
230 |       // Go back to ptr
231 |       .select(YyDefPtrTmp)
232 |       .optional(_.out.has(OpMnemonic, "CAST"))
233 |       .out.has(OpMnemonic, "LOAD").as(YyDefLoad)
234 |       // Skip extensions
235 |       .repeat(_.out.has(OpMnemonic, P.within(Seq("INT_SEXT", "INT_ZEXT")))).emit().as(YyDefBackEdge)
236 |       // Check back edge
237 |       .out.where(P.eq(StatePhi.name): P[String])
238 |       .select(YyDefBackEdge)
239 |       // Look for comparison + branch
240 |       .optional(_.out.has(OpMnemonic, "CAST"))
241 |       .out.has(OpMnemonic, P.within(PCODE_INT_GTS_LTS)).as(MaxStateCond)
242 |       // Find max_state
243 |       .in.has(ConstValue).as(MaxState)
244 |       // Find cbranch
245 |       .select(MaxStateCond)
246 |       .out.has(OpMnemonic, "CBRANCH")
247 | 
248 |     doQueryDefMaxState.select((YyDef, YyDefLoad, MaxState)).toIterator().foreach({
249 |       case (yyDef, yyDefLoad, maxState) => {
250 |         ret.tables("yy_def") :+= (
251 |           yyDef.asScala.property(FileOffset).value,
252 |           yyDefLoad.asScala.property(OpSize).value,
253 |         )
254 |         ret.maxState :+= maxState.asScala.property(ConstValue).value
255 |       }
256 |     })
257 | 
258 |     val YyAcceptPtr = StepLabel[Vertex]("yy_accept_ptr")
259 |     val YyAccept = StepLabel[Vertex]("yy_accept")
260 |     val YyAcceptLoad = StepLabel[Vertex]("yy_accept_load")
261 |     val YyAcceptCmp = StepLabel[Vertex]("yy_accept_cmp")
262 |     val doQueryYyAccept = queryYyAccept
263 |       .select(StatePhi)
264 |       .repeat(_.both.has(OpMnemonic, "MULTIEQUAL").dedup).emit() // Group together all multiequals
265 |       .optional(_.out.has(OpMnemonic, "CAST"))
266 |       .repeat(_.out.has(OpMnemonic, P.within(Seq("INT_SEXT", "INT_MULT")))).emit()
267 |       .out.has(OpMnemonic, P.within(PTR_OPS)).as(YyAcceptPtr)
268 |       // Look for yy_accept
269 |       .repeat(_.in.has(OpMnemonic, P.within(PTR_OPS))).emit()
270 |       .in.has(CouldBeTable, true).as(YyAccept)
271 |       // Look for cbranch 0
272 |       .select(YyAcceptPtr)
273 |       .optional(_.out.has(OpMnemonic, "CAST"))
274 |       .out.has(OpMnemonic, "LOAD").as(YyAcceptLoad)
275 |       .out.has(OpMnemonic, P.within(PCODE_INT_EQS)).as(YyAcceptCmp)
276 |       .in.has(ConstValue, 0L)
277 |       .select(YyAcceptCmp)
278 |       .out.has(OpMnemonic, "CBRANCH")
279 |     doQueryYyAccept.select((YyAccept, YyAcceptLoad)).dedup.toIterator().foreach({
280 |       case (yyAccept, yyAcceptLoad) => {
281 |         ret.tables("yy_accept") :+= (
282 |           yyAccept.asScala.property(FileOffset).value, yyAcceptLoad.asScala.property(OpSize).value
283 |         )
284 |       }
285 |     })
286 | 
287 |     ret
288 |   }
289 | 
290 |   def main(args: Array[String]): Unit = {
291 |     if (args.length != 1) {
292 |       println("Usage: find-tables /path/to/graphml.xml")
293 |       System.exit(1);
294 |     }
295 | 
296 |     val graphmlPath = args(0);
297 | 
298 |     val jgraph = TinkerGraph.open()
299 |     jgraph.traversal().io(graphmlPath).read().iterate()
300 | 
301 |     val graph = jgraph.asScala
302 |     val g = graph.traversal
303 | 
304 |     val results = findTables(g)
305 |     println(results)
306 |   }
307 | }
308 | 


--------------------------------------------------------------------------------
/scala/src/main/scala/io/github/thebabush/reflex/main.scala:
--------------------------------------------------------------------------------
  1 | package io.github.thebabush.reflex
  2 | 
  3 | import java.io.File
  4 | import java.nio.file.Files
  5 | import java.util
  6 | 
  7 | import dk.brics.automaton.{Automaton, State, Transition}
  8 | import io.circe
  9 | import io.circe._
 10 | import io.circe.syntax._
 11 | import scopt.OParser
 12 | 
 13 | import scala.jdk.CollectionConverters._
 14 | import scala.util.Random
 15 | 
 16 | 
 17 | sealed trait Command
 18 | case class Json()  extends Command
 19 | case class Print() extends Command
 20 | case class Proto() extends Command
 21 | case class Rust()  extends Command
 22 | case class Gen()  extends Command
 23 | 
 24 | 
 25 | case class Config(
 26 |                  debug:   Boolean   = false,
 27 |                  command: Command   = Print(),
 28 |                  files:   Seq[File] = Seq(),
 29 |                  out:     File      = null,
 30 |                  )
 31 | 
 32 | class Main
 33 | object Main {
 34 |   var IS_DEBUGGING = false;
 35 | 
 36 |   def parseArgs(args: Array[String]): Option[Config] = {
 37 |     val builder = OParser.builder[Config]
 38 |     val parser = {
 39 |       import builder._
 40 |       OParser.sequence(
 41 |         programName("jreflex"),
 42 |         head("jreflex", "0.0a"),
 43 |         opt[Unit]('d', "debug")
 44 |           .action((_, c) => c.copy(debug = true))
 45 |           .text("enables verbose output on stderr"),
 46 |         help("help")
 47 |           .text("print this usage text"),
 48 |         arg[File]("output")
 49 |           .action((x, c) => c.copy(out = x))
 50 |           .text("the output file"),
 51 |         arg[File]("<file>...")
 52 |           .unbounded()
 53 |           .action((f, c) => c.copy(files = c.files :+ f))
 54 |           .text("the input files"),
 55 |         note(""),
 56 |         cmd("print")
 57 |           .action((_, c) => c.copy(command = Print()))
 58 |           .text("prints an (hopefully) human-readable version of the regexp"),
 59 |         note(""),
 60 |         cmd("json")
 61 |           .action((_, c) => c.copy(command = Json()))
 62 |           .text("prints a json-encoded version of the regexp"),
 63 |         note(""),
 64 |         cmd("proto")
 65 |           .action((_, c) => c.copy(command = Proto()))
 66 |           .text("prints a proto2 definition of the regexp"),
 67 |         note(""),
 68 |         cmd("rust")
 69 |           .action((_, c) => c.copy(command = Rust()))
 70 |           .text("prints a rust definition of the regexp"),
 71 |         cmd("gen")
 72 |           .action((_, c) => c.copy(command = Gen()))
 73 |           .text("generate a valid token"),
 74 |       )
 75 |     }
 76 |     OParser.parse(parser, args, Config()) match {
 77 |       case Some(config) => if (config.files.nonEmpty) Some(config) else None
 78 |       case x => x
 79 |     }
 80 |   }
 81 | 
 82 |   def main(args: Array[String]): Unit = {
 83 |     parseArgs(args) match {
 84 |       case Some(config) => process(config)
 85 |       case _ => ()
 86 |     }
 87 |   }
 88 | 
 89 |   def process(config: Config): Unit = {
 90 | 
 91 |     // TODO: learn Scala :P
 92 |     implicit val objEncoder: Encoder[Regex] = new Encoder[Regex] {
 93 |       final def apply(regex: Regex): circe.Json = circe.Json.obj(
 94 |         ("type", regex.getClass.getSimpleName.asJson),
 95 |         ("children", regex match {
 96 |           case Empty()      => throw new IllegalArgumentException("Empty() in json encoding")
 97 |           case Epsilon()    => circe.Json.arr()
 98 |           case Literal(v)   => circe.Json.arr(circe.Json.fromString("" + v))
 99 |           case OneOrMore(v) => circe.Json.arr(apply(v))
100 |           case Optional(v)  => circe.Json.arr(apply(v))
101 |           case Or(a, b)     => circe.Json.arr(apply(a), apply(b))
102 |           case RESet(xs)    => circe.Json.arr(circe.Json.fromString(xs.toArray.sorted.mkString))
103 |           case Star(v)      => circe.Json.arr(apply(v))
104 |           case Then(a, b)   => circe.Json.arr(apply(a), apply(b))
105 |         })
106 |       )
107 |     }
108 | 
109 |     IS_DEBUGGING = config.debug;
110 | 
111 |     config.command match {
112 |       case Print() =>
113 |         val dfa = parse(config.files.head)
114 |         val regexp = DfaToRegex.hopcroft(dfa)
115 |         val out = Pretty.print(regexp)
116 |         if (IS_DEBUGGING) {
117 |           println(out)
118 |         }
119 |         Files.writeString(config.out.toPath, out.toString() + "\n")
120 |       case Json() =>
121 |         val dfa = parse(config.files.head)
122 |         val regexp = DfaToRegex.hopcroft(dfa)
123 |         val out = regexp.asJson(objEncoder).toString()
124 |         if (IS_DEBUGGING) {
125 |           Console.err.println(out)
126 |         }
127 |         Files.writeString(config.out.toPath, out)
128 |       case Proto() =>
129 |         val regexps = config.files.map(f => DfaToRegex.hopcroft(parse(f)))
130 |         val out = (new MakeProto).serialize(regexps)
131 |         if (IS_DEBUGGING) {
132 |           println(out)
133 |         }
134 |         Files.writeString(config.out.toPath, out)
135 |       case Rust() =>
136 |         val regexps = config.files.map(f => DfaToRegex.hopcroft(parse(f)))
137 | //        val regexps = Seq() :+ Then(Literal('A'), RESet("0123456789".toSet))
138 |         val out = (new MakeRust).serialize(regexps)
139 |         if (IS_DEBUGGING) {
140 |           println(out)
141 |         }
142 |         Files.writeString(config.out.toPath, out)
143 |       case Gen() =>
144 |         val regexps = config.files.map(f => DfaToRegex.hopcroft(parse(f)))
145 |         while (true) {
146 |           println(Generator.generate(regexps(Random.nextInt(regexps.length))))
147 |         }
148 |     }
149 |   }
150 | 
151 |   def parse(file: File): Automaton = {
152 |     // TODO: Ugly, converted from the old java version
153 |     val lines: Iterator[String] = Files.lines(file.toPath).iterator().asScala
154 | 
155 |     val stateMap: util.Map[String, State] = new util.HashMap[String, State]
156 | 
157 |     val start: String = lines.next
158 |     val noNodes: Int = lines.next.toInt
159 | 
160 |     var ctr: Int = 0
161 |     while (ctr < noNodes) {
162 |       val tokens: Array[String] = lines.next.split("\\s+")
163 | 
164 |       assert(tokens.length == 2)
165 | 
166 |       val state: String = tokens(0)
167 |       val accepting = tokens(1).toInt > 0
168 | 
169 |       val s: State = new State
170 |       s.setAccept(accepting)
171 |       stateMap.put(state, s)
172 | 
173 |       ctr += 1
174 |     }
175 | 
176 |     val noEdges: Int = lines.next.toInt
177 |     var edge: Int = 0
178 |     while (edge < noEdges) {
179 |       val tokens = lines.next.split("\\s+")
180 |       val beg    = stateMap.get(tokens(0))
181 |       val end    = stateMap.get(tokens(1))
182 |       // Iterate on chars (skip first two as they are the node indices)
183 |       util.Arrays.stream(tokens).skip(2).forEach((ch: String) => {
184 |         val chars: Array[Char] = Character.toChars(ch.toInt)
185 |         assert(chars.length == 1)
186 |         beg.addTransition(new Transition(chars(0), end))
187 |       })
188 | 
189 |       edge += 1
190 |     }
191 | 
192 |     val dfa: Automaton = new Automaton
193 |     dfa.setInitialState(stateMap.get(start))
194 |     dfa.minimize()
195 | 
196 |     if (IS_DEBUGGING) {
197 |       System.err.println("NODES: " + dfa.getNumberOfStates)
198 |     }
199 | 
200 |     dfa
201 |   }
202 | 
203 |   def mkTestDfa(): Automaton = {
204 |     val test = new Automaton
205 |     val s1 = new State
206 |     val s2 = new State
207 |     test.setInitialState(s1)
208 |     s2.setAccept(true)
209 |     s1.addTransition(new Transition('1', s1))
210 |     s1.addTransition(new Transition('0', s2))
211 |     s2.addTransition(new Transition('0', s2))
212 |     s2.addTransition(new Transition('1', s2))
213 |     test
214 |   }
215 | }
216 | 


--------------------------------------------------------------------------------
/scala/src/test/scala/SimplificationTest.scala:
--------------------------------------------------------------------------------
 1 | import dk.brics.automaton.RegExp
 2 | import io.github.thebabush.reflex.{DfaToRegex, Main, Pretty, Simple}
 3 | import org.scalatest.FunSuite
 4 | 
 5 | class SimplificationTest extends FunSuite {
 6 |   test("SimplificationTest") {
 7 |     val reconstructed = Pretty.print(DfaToRegex.hopcroft(Main.mkTestDfa())).toString()
 8 | 
 9 |     val dfa1 = new RegExp("1*0(0|1)*").toAutomaton(false)
10 |     val dfa2 = new RegExp(reconstructed).toAutomaton(false)
11 | 
12 |     assert(dfa1.equals(dfa2))
13 |   }
14 | }
15 | 


--------------------------------------------------------------------------------
/sleigh.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | rm -rf ../flex/sleigh/stage0/ ../flex/sleigh/out/
 4 | 
 5 | echo "=== STAGE 0 ==="
 6 | 
 7 | ./py/reflex.py \
 8 |   --accept          0x74760 2 \
 9 |   --base            0x75100 2 \
10 |   --chk             0x75560 2 \
11 |   --def             0x74b80 2 \
12 |   --ec              0x74360 4 \
13 |   --meta            0x74fe0 4 \
14 |   --nxt             0x75d20 2 \
15 |   --max-state 522 \
16 |   ../flex/sleigh/sleigh \
17 |   ../flex/sleigh/stage0/
18 | 
19 | ./py/simplify.py \
20 |   ../flex/sleigh/stage0/G.gpickle \
21 |   ../flex/sleigh/out/
22 | 
23 | echo "=== STAGE 1 ==="
24 | 
25 | echo "SCALA"
26 | parallel -j 12 echo print {} ';' ./scala/jreflex.sh print "{}.regexp" "{}" ::: ../flex/sleigh/out/*.dfa
27 | 
28 | #echo "PROTO"
29 | #./scala/jreflex.sh proto ../flex/sleigh/out/out.proto ../flex/sleigh/out/*.dfa
30 | 
31 | 


--------------------------------------------------------------------------------
/subs2pdf.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | cd subs
 4 | rm *.pdf
 5 | for f in $1/*.dot
 6 | do
 7 |   echo "==== $f ===="
 8 |   dot -Tpdf -o "$f.pdf" "$f"
 9 |   dot -Tpng -o "$f.png" "$f"
10 | done
11 | 
12 | 


--------------------------------------------------------------------------------
/tryone.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | 
 5 | 
 6 | test = input('> ')
 7 | test = eval(test)
 8 | print(test)
 9 | open('/tmp/test.slaspec', 'wb').write(test)
10 | os.system('./stroz/sleigh /tmp/test')
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------