├── .classpath
├── .gitignore
├── .project
├── .travis.yml
├── LICENSE
├── README.md
├── build.xml
└── src
└── jx86
├── io
└── AsmFileWriter.java
└── lang
├── Constant.java
├── Instruction.java
├── Register.java
├── Target.java
└── X86File.java
/.classpath:
--------------------------------------------------------------------------------
1 |
2 | X86File
s to a character stream in a
16 | * format compatible with the GNU Assembler (GAS). Such files can then be
17 | * compiled into binary machine code using the GNU Assembler.
18 | *
19 | * @author David J. Pearce
20 | *
21 | */
22 | public class AsmFileWriter {
23 | private PrintStream out;
24 |
25 | public AsmFileWriter(File file) throws IOException {
26 | this.out = new PrintStream(new FileOutputStream(file));
27 | }
28 |
29 | public AsmFileWriter(OutputStream output) throws IOException {
30 | this.out = new PrintStream(output);
31 | }
32 |
33 | public void close() {
34 | out.close();
35 | }
36 |
37 | public void write(X86File file) {
38 | for (X86File.Section s : file.sections()) {
39 | write(s);
40 | }
41 | }
42 |
43 | public void write(X86File.Section section) {
44 | if (section instanceof X86File.Code) {
45 | X86File.Code code = (X86File.Code) section;
46 | out.println();
47 | out.println("\t.text");
48 | for(Instruction insn : code.instructions) {
49 | write(insn);
50 | }
51 | } else if (section instanceof X86File.Data) {
52 | X86File.Data code = (X86File.Data) section;
53 | out.println();
54 | out.println("\t.data");
55 | for(Constant constant : code.constants) {
56 | write(constant);
57 | }
58 | } else {
59 | throw new IllegalArgumentException("unknown section encountered");
60 | }
61 | }
62 |
63 | public void write(Instruction insn) {
64 | if(insn instanceof Instruction.Label) {
65 | write((Instruction.Label) insn);
66 | } else if(insn instanceof Instruction.Unit) {
67 | write((Instruction.Unit) insn);
68 | } else if(insn instanceof Instruction.Reg) {
69 | write((Instruction.Reg) insn);
70 | } else if(insn instanceof Instruction.RegReg) {
71 | write((Instruction.RegReg) insn);
72 | } else if(insn instanceof Instruction.ImmReg) {
73 | write((Instruction.ImmReg) insn);
74 | } else if(insn instanceof Instruction.Addr) {
75 | write((Instruction.Addr) insn);
76 | } else if(insn instanceof Instruction.AddrReg) {
77 | write((Instruction.AddrReg) insn);
78 | } else if(insn instanceof Instruction.AddrRegReg) {
79 | write((Instruction.AddrRegReg) insn);
80 | } else if(insn instanceof Instruction.ImmIndReg) {
81 | write((Instruction.ImmIndReg) insn);
82 | } else if(insn instanceof Instruction.RegImmInd) {
83 | write((Instruction.RegImmInd) insn);
84 | } else if(insn instanceof Instruction.RegIndRegImm) {
85 | write((Instruction.RegIndRegImm) insn);
86 | } else if(insn instanceof Instruction.IndRegImmReg) {
87 | write((Instruction.IndRegImmReg) insn);
88 | } else {
89 | throw new IllegalArgumentException("unknown instruction encountered: " + insn);
90 | }
91 | }
92 |
93 | public void write(Instruction.Label insn) {
94 | if(insn.global) {
95 | out.println("\t.globl " + insn.label);
96 | }
97 | if(insn.alignment != 1) {
98 | out.println("\t.align " + insn.alignment);
99 | }
100 | out.println(insn.label + ":");
101 | }
102 |
103 | public void write(Instruction.Unit insn) {
104 | out.println("\t" + insn.operation);
105 | }
106 |
107 | public void write(Instruction.Reg insn) {
108 | out.println("\t" + insn.operation
109 | + Register.suffix(insn.operand.width()) + " %" + insn.operand);
110 | }
111 |
112 | public void write(Instruction.RegReg insn) {
113 | out.println("\t" + insn.operation
114 | + Register.suffix(insn.leftOperand.width(),insn.rightOperand.width()) + " %"
115 | + insn.leftOperand + ", %" + insn.rightOperand);
116 | }
117 |
118 | public void write(Instruction.ImmReg insn) {
119 | out.println("\t" + insn.operation
120 | + Register.suffix(insn.rightOperand.width()) + " $"
121 | + insn.leftOperand + ", %" + insn.rightOperand);
122 | }
123 |
124 | public void write(Instruction.ImmIndReg insn) {
125 | out.println("\t" + insn.operation
126 | + Register.suffix(insn.targetOperand.width()) + " "
127 | + insn.immediateOffset + "(%" + insn.baseOperand + "), %" + insn.targetOperand);
128 | }
129 |
130 | public void write(Instruction.RegImmInd insn) {
131 | out.println("\t" + insn.operation
132 | + Register.suffix(insn.sourceOperand.width()) + " %"
133 | + insn.sourceOperand + ", " + insn.immediateOffset + "(%"
134 | + insn.baseOperand + ")");
135 | }
136 |
137 | public void write(Instruction.IndRegImmReg insn) {
138 | out.println("\t" + insn.toString());
139 | }
140 |
141 | public void write(Instruction.RegIndRegImm insn) {
142 | out.println("\t" + insn.toString());
143 | }
144 |
145 | public void write(Instruction.Addr insn) {
146 | out.println("\t" + insn.operation + " " + insn.operand);
147 | }
148 |
149 | public void write(Instruction.AddrReg insn) {
150 | out.println("\t" + insn.operation
151 | + Register.suffix(insn.rightOperand.width()) + " "
152 | + insn.leftOperand + ", %" + insn.rightOperand);
153 | }
154 |
155 | public void write(Instruction.AddrRegReg insn) {
156 | out.println("\t" + insn.operation
157 | + Register.suffix(insn.rightOperand.width()) + " "
158 | + insn.leftOperand_1 + "(%" + insn.leftOperand_2 + "), %"
159 | + insn.rightOperand);
160 | }
161 |
162 | public void write(Constant constant) {
163 | if(constant.global) {
164 | out.println("\t.globl " + constant.label);
165 | }
166 | if(constant.alignment != 1) {
167 | out.println("\t.align " + constant.alignment);
168 | }
169 | if(constant.label != null) {
170 | out.println(constant.label + ":");
171 | }
172 | if(constant instanceof Constant.String) {
173 | Constant.String cs = (Constant.String) constant;
174 | // FIXME: probably should be doing some kind of escaping here.
175 | out.println("\t.asciz \"" + cs.value + "\"");
176 | } else if(constant instanceof Constant.Word) {
177 | Constant.Word cw = (Constant.Word) constant;
178 | out.println("\t.word " + cw.value);
179 | } else if(constant instanceof Constant.Long) {
180 | Constant.Long cw = (Constant.Long) constant;
181 | out.println("\t.long " + cw.value);
182 | } else if(constant instanceof Constant.Quad) {
183 | Constant.Quad cw = (Constant.Quad) constant;
184 | out.println("\t.quad " + cw.value);
185 | }
186 | }
187 | }
188 |
--------------------------------------------------------------------------------
/src/jx86/lang/Constant.java:
--------------------------------------------------------------------------------
1 | package jx86.lang;
2 |
3 | /**
4 | * Represents a labeled data item found within the data segment of an x86 file.
5 | *
6 | * @author David J. Pearce
7 | *
8 | */
9 | public abstract class Constant {
10 | public final int alignment;
11 | public final boolean global;
12 | public final java.lang.String label;
13 |
14 | public Constant(java.lang.String label, int alignment, boolean global) {
15 | this.label = label;
16 | this.alignment = alignment;
17 | this.global = global;
18 | }
19 |
20 | /**
21 | * Construct a string constant.
22 | *
23 | * @author David J. Pearce
24 | *
25 | */
26 | public static final class String extends Constant {
27 | public final java.lang.String value;
28 |
29 | public String(java.lang.String label, java.lang.String value) {
30 | super(label,1,false);
31 | this.value = value;
32 | }
33 |
34 | public String(java.lang.String label, int alignment, boolean global, java.lang.String value) {
35 | super(label,alignment,global);
36 | this.value = value;
37 | }
38 | }
39 |
40 | /**
41 | * Construct a single word constant.
42 | *
43 | * @author David J. Pearce
44 | *
45 | */
46 | public static final class Word extends Constant {
47 | public final int value;
48 |
49 | public Word(java.lang.String label, int value) {
50 | super(label,1,false);
51 | this.value = value;
52 | }
53 |
54 | public Word(java.lang.String label, int alignment, boolean global, int value) {
55 | super(label,alignment,global);
56 | this.value = value;
57 | }
58 | }
59 |
60 | /**
61 | * Construct a long (i.e. double) word constant.
62 | *
63 | * @author David J. Pearce
64 | *
65 | */
66 | public static final class Long extends Constant {
67 | public final long value;
68 |
69 | public Long(java.lang.String label, long value) {
70 | super(label,1,false);
71 | this.value = value;
72 | }
73 |
74 | public Long(java.lang.String label, int alignment, boolean global, long value) {
75 | super(label,alignment,global);
76 | this.value = value;
77 | }
78 | }
79 |
80 | /**
81 | * Construct a quad word constant.
82 | *
83 | * @author David J. Pearce
84 | *
85 | */
86 | public static final class Quad extends Constant {
87 | public final long value;
88 |
89 | public Quad(java.lang.String label, long value) {
90 | super(label,1,false);
91 | this.value = value;
92 | }
93 |
94 | public Quad(java.lang.String label, int alignment, boolean global, long value) {
95 | super(label,alignment,global);
96 | this.value = value;
97 | }
98 | }
99 | }
100 |
--------------------------------------------------------------------------------
/src/jx86/lang/Instruction.java:
--------------------------------------------------------------------------------
1 | package jx86.lang;
2 |
3 | /**
4 | * Represents an x86 machine instruction.
5 | *
6 | * @author David J. Pearce
7 | *
8 | */
9 | public interface Instruction {
10 |
11 | /**
12 | * Represents a label in an instruction sequence which could be a branch
13 | * target, etc.
14 | *
15 | * @author David J. Pearce
16 | *
17 | */
18 | public final class Label implements Instruction {
19 | public final String label;
20 | public final int alignment;
21 | public final boolean global;
22 |
23 | public Label(String label) {
24 | this.label = label;
25 | this.alignment = 1;
26 | this.global = false;
27 | }
28 |
29 | public Label(String label, int alignment, boolean global) {
30 | this.label = label;
31 | this.alignment = alignment;
32 | this.global = global;
33 | }
34 |
35 | public String toString() {
36 | return label + ":";
37 | }
38 | }
39 |
40 | // ============================================================
41 | // Unit Operationrs
42 | // ============================================================
43 |
44 | public enum UnitOp {
45 | clc, // Clear Carry flag
46 | cdc, // Clear direction flag
47 | cli, // Clear interrupt flag
48 | cltd, // Convert Signed Long to Signed Double Long
49 | cqto, // Convert Signed quad to oct
50 | cmc, // Complement carry flag
51 | cbw, // Convert byte to word
52 | cwde, // Convert word to double word
53 | cwd, // Convert word to double word
54 | cwq, // Convert double word to quad word
55 | cpuid, // CPU identification
56 | enter, // Make stack frame
57 | hlt, // Halt
58 | invd, // Invalidate internal caches
59 | iret, // Interrupt return
60 | iretd, // Interrupt return (double word operand)
61 | lahf, // Load Status Flags into AH Register
62 | lar, // Load Access Rights Byte
63 | lds, // Load Far Pointer
64 | les, // Load Far Pointer
65 | lfs, // Load Far Pointer
66 | lgs, // Load Far Pointer
67 | lss, // Load Far Pointer
68 |
69 | leave, // destroy stack frame
70 | nop, // no operation
71 | popa, // Pop All General-Purpose Registers
72 | popf, // Pop into flags
73 | pusha, // Push All General-Purpose Registers
74 | pushf, // Push EFLAGS Register onto the Stack
75 | ret // return from function
76 | }
77 |
78 | /**
79 | * Represents a unit instruction (e.g. ret
, nop
,
80 | * popf
, clc
, etc) which has no operands.
81 | *
82 | * @author David J. Pearce
83 | *
84 | */
85 | public final class Unit implements Instruction {
86 | public final UnitOp operation;
87 |
88 | /**
89 | * Create a unary instruction with a register operand.
90 | *
91 | * @param operation
92 | * Operation to perform
93 | */
94 | public Unit(UnitOp operation) {
95 | this.operation = operation;
96 | }
97 |
98 | public String toString() {
99 | return operation.toString();
100 | }
101 | }
102 |
103 | // ============================================================
104 | // Unary Operations
105 | // ============================================================
106 |
107 | public enum RegOp {
108 | dec, // Decrement by 1
109 | inc, // Increment by 1
110 | in,
111 | Int, // Call to interrupt
112 | invlpg, // Invalidate TLB entry
113 | div, // unsigned divide
114 | idiv, // signed division
115 | neg, // Two's Complement Negation
116 | not, // One's Complement Negation
117 | out, // Output to Port
118 | push,
119 | pop,
120 | rcl, // Rotate carry left
121 | rcr, // Rotate carry right
122 | rol, // Rotate left
123 | ror, // Rotate right
124 | sahf, // Store AH into Flags
125 | sal, // Shift Arithmetic Left
126 | sar, // Shift Arithmetic Right
127 | shl, // Shift Left
128 | shr, // Shift Right
129 | }
130 |
131 | /**
132 | * Represents a unary instruction (e.g. push
, pop
,
133 | * etc) with a register operand. For example:
134 | *
135 | *
136 | * pushq % eax 137 | *138 | * 139 | * This pushes the contents of the
%eax
register on to the
140 | * stack.
141 | *
142 | * @author David J. Pearce
143 | *
144 | */
145 | public final class Reg implements Instruction {
146 | public final RegOp operation;
147 | public final Register operand;
148 |
149 | /**
150 | * Create a unary instruction with a register operand.
151 | *
152 | * @param operation
153 | * Operation to perform
154 | * @param operand
155 | * Register operand
156 | */
157 | public Reg(RegOp operation, Register operand) {
158 | this.operation = operation;
159 | this.operand = operand;
160 | }
161 |
162 | public String toString() {
163 | return operation.toString() + " "
164 | + Register.suffix(operand.width()) + " %" + operand;
165 | }
166 | }
167 |
168 | // ============================================================
169 | // Binary Operations
170 | // ============================================================
171 |
172 | public enum RegRegOp {
173 | mov,
174 | adc, // Add with Carry
175 | add,
176 | sub,
177 | mul, // unsigned multiplication
178 | imul, // signed multiplication
179 | div,
180 | cmp,
181 | cmpxchg, // compare and exchange
182 | cmpxchg8b, // compare and exchange 8 bytes
183 | comi, // compare scalar ordered double-precision floating point
184 | or, // Logical Inclusive OR
185 | and, // Logical AND
186 | xor, // Logical Exclusive OR
187 | xchg
188 | }
189 |
190 | /**
191 | * Represents a binary instruction (e.g. mov
, add
,
192 | * etc) with register operands. For example:
193 | *
194 | * 195 | * movl %eax, %ebx 196 | *197 | * 198 | * This assigns the contents of the
%eax
register to the
199 | * %ebx
register.
200 | *
201 | * @author David J. Pearce
202 | *
203 | */
204 | public final class RegReg implements Instruction {
205 | public final RegRegOp operation;
206 | public final Register leftOperand;
207 | public final Register rightOperand;
208 |
209 | /**
210 | * Create a binary instruction with two register operands. The width of
211 | * registers must equal, or an exception is raised.
212 | *
213 | * @param operation
214 | * Operation to perform
215 | * @param leftOperand
216 | * Register operand on left-hand side
217 | * @param rightOperand
218 | * Register operand on right-hand side
219 | */
220 | public RegReg(RegRegOp operation, Register leftOperand, Register rightOperand) {
221 | if(!Register.areCompatiable(leftOperand.width(),rightOperand.width())) {
222 | throw new IllegalArgumentException("Register operands must have identical width");
223 | }
224 | this.operation = operation;
225 | this.leftOperand = leftOperand;
226 | this.rightOperand = rightOperand;
227 | }
228 |
229 | public String toString() {
230 | return operation.toString() + " " + Register.suffix(leftOperand.width(), rightOperand.width())
231 | + " %" + leftOperand + ", %" + rightOperand;
232 | }
233 | }
234 |
235 | public enum ImmRegOp {
236 | mov,
237 | adc, // Add with Carry
238 | add,
239 | sub,
240 | mul, // unsigned multiplication
241 | imul, // signed multiplication
242 | cmp,
243 | cmpxchg, // compare and exchange
244 | cmpxchg8b, // compare and exchange 8 bytes
245 | or, // Logical Inclusive OR
246 | and, // Logical AND
247 | xor, // Logical Exclusive OR
248 | }
249 |
250 | /**
251 | * Represents a binary instruction (e.g. mov
, add
,
252 | * etc) with an immediate source operand and a register target operand. For
253 | * example:
254 | *
255 | * 256 | * movl $3, %eax 257 | *258 | * 259 | * This assigns the constant 3 to the
%eax
register.
260 | *
261 | * @author David J. Pearce
262 | *
263 | */
264 | public final class ImmReg implements Instruction {
265 | public final ImmRegOp operation;
266 | public final long leftOperand;
267 | public final Register rightOperand;
268 |
269 | /**
270 | * Create a binary instruction from one register to another. The
271 | * immediate operand must fit within the width of the target register,
272 | * or an exception is raised.
273 | *
274 | * @param leftOperand
275 | * Immediate operand on left-hand side. This is always
276 | * interpreted as a signed integer, regardless of width. For
277 | * example, if the rhs
has byte width then the
278 | * accepted range for the immediate operand is -128 .. 127.
279 | * @param rightOperand
280 | * Register operand on right-hand side.
281 | */
282 | public ImmReg(ImmRegOp operation, long leftOperand, Register rightOperand) {
283 | switch(rightOperand.width()) {
284 | case Byte:
285 | if(leftOperand < Byte.MIN_VALUE || leftOperand > Byte.MAX_VALUE) {
286 | throw new IllegalArgumentException("immediate operand does not fit into byte");
287 | }
288 | break;
289 | case Word:
290 | if(leftOperand < Short.MIN_VALUE || leftOperand > Short.MAX_VALUE) {
291 | throw new IllegalArgumentException("immediate operand does not fit into word");
292 | }
293 | break;
294 | case Long:
295 | if(leftOperand < Integer.MIN_VALUE || leftOperand > Integer.MAX_VALUE) {
296 | throw new IllegalArgumentException("immediate operand does not fit into double word");
297 | }
298 | break;
299 | default:
300 | // this case is always true by construction
301 | }
302 | this.operation = operation;
303 | this.leftOperand = leftOperand;
304 | this.rightOperand = rightOperand;
305 | }
306 |
307 | public String toString() {
308 | return operation.toString() + Register.suffix(rightOperand.width())
309 | + "$" + leftOperand + ", %" + rightOperand;
310 | }
311 | }
312 |
313 | // ============================================================
314 | // Ternary Operations
315 | // ============================================================
316 |
317 | public enum ImmIndRegOp {
318 | mov
319 | }
320 |
321 | /**
322 | * Create a ternary instruction with a register target operand and an
323 | * indirect source operand (whose address is determined from a register and
324 | * an immediate offset). For example:
325 | *
326 | * 327 | * movl -8(%ebp), %eax 328 | *329 | * 330 | * This loads the value from the location 8 bytes below where the 331 | *
ebp
register currently points into the %eax
332 | * register.
333 | *
334 | * @author David J. Pearce
335 | *
336 | */
337 | public final class ImmIndReg implements Instruction {
338 | public final ImmIndRegOp operation;
339 | public final long immediateOffset;
340 | public final Register baseOperand;
341 | public final Register targetOperand;
342 |
343 | /**
344 | * Create a binary instruction which operates on a register and an
345 | * indirect location (whose address is determined from a register and an
346 | * immediate offset). The immediate operand must fit within the width of
347 | * the target register, or an exception is raised.
348 | *
349 | * @param leftOperandImm
350 | * Immediate operand on left-hand side. This is always
351 | * interpreted as a signed integer, regardless of width. For
352 | * example, if the rhs
has byte width then the
353 | * accepted range for the immediate operand is -128 .. 127.
354 | * @param leftOperandReg
355 | * Register operand used on left-hand side.
356 | * @param rightOperand
357 | * Register operand on right-hand side.
358 | */
359 | public ImmIndReg(ImmIndRegOp operation, long leftOperandImm,
360 | Register leftOperandReg, Register rightOperand) {
361 | switch(rightOperand.width()) {
362 | case Byte:
363 | if(leftOperandImm < Byte.MIN_VALUE || leftOperandImm > Byte.MAX_VALUE) {
364 | throw new IllegalArgumentException("immediate operand does not fit into byte");
365 | }
366 | break;
367 | case Word:
368 | if(leftOperandImm < Short.MIN_VALUE || leftOperandImm > Short.MAX_VALUE) {
369 | throw new IllegalArgumentException("immediate operand does not fit into word");
370 | }
371 | break;
372 | case Long:
373 | if(leftOperandImm < Integer.MIN_VALUE || leftOperandImm > Integer.MAX_VALUE) {
374 | throw new IllegalArgumentException("immediate operand does not fit into double word");
375 | }
376 | break;
377 | default:
378 | // this case is always true by construction
379 | }
380 | this.operation = operation;
381 | this.baseOperand = leftOperandReg;
382 | this.immediateOffset = leftOperandImm;
383 | this.targetOperand = rightOperand;
384 | }
385 |
386 | public String toString() {
387 | return operation.toString() + Register.suffix(targetOperand.width())
388 | + immediateOffset + "(%" + baseOperand + "), %" + targetOperand;
389 | }
390 | }
391 |
392 | public enum RegImmIndOp {
393 | mov
394 | }
395 |
396 | /**
397 | * Create a ternary instruction with a register source operand and an
398 | * indirect target operand (whose address is determined from a register and
399 | * an immediate offset). For example:
400 | *
401 | * 402 | * movl %eax, -8(%ebp) 403 | *404 | * 405 | * This loads the value from the
%eax
register into the
406 | * location 8 bytes below where the ebp
register currently
407 | * points.
408 | *
409 | * @author David J. Pearce
410 | *
411 | */
412 | public final class RegImmInd implements Instruction {
413 | public final RegImmIndOp operation;
414 | public final Register sourceOperand;
415 | public final long immediateOffset;
416 | public final Register baseOperand;
417 |
418 | /**
419 | * Create a binary instruction which operates on a register and an
420 | * indirect location (whose address is determined from a register and an
421 | * immediate offset). The immediate operand must fit within the width of
422 | * the target register, or an exception is raised.
423 | *
424 | */
425 | public RegImmInd(RegImmIndOp operation, Register sourceOperand, long immediateOffset, Register baseOperand) {
426 | switch(sourceOperand.width()) {
427 | case Byte:
428 | if(immediateOffset < Byte.MIN_VALUE || immediateOffset > Byte.MAX_VALUE) {
429 | throw new IllegalArgumentException("immediate operand does not fit into byte");
430 | }
431 | break;
432 | case Word:
433 | if(immediateOffset < Short.MIN_VALUE || immediateOffset > Short.MAX_VALUE) {
434 | throw new IllegalArgumentException("immediate operand does not fit into word");
435 | }
436 | break;
437 | case Long:
438 | if(immediateOffset < Integer.MIN_VALUE || immediateOffset > Integer.MAX_VALUE) {
439 | throw new IllegalArgumentException("immediate operand does not fit into double word");
440 | }
441 | break;
442 | default:
443 | // this case is always true by construction
444 | }
445 | this.operation = operation;
446 | this.sourceOperand = sourceOperand;
447 | this.baseOperand = baseOperand;
448 | this.immediateOffset = immediateOffset;
449 | }
450 |
451 | public String toString() {
452 | return operation.toString() + Register.suffix(sourceOperand.width()) + " %" + sourceOperand + ", "
453 | + immediateOffset + "(%" + baseOperand + ")";
454 | }
455 | }
456 |
457 | // ============================================================
458 | // Quaternary Operations
459 | // ============================================================
460 |
461 | public enum IndRegImmRegOp {
462 | mov
463 | }
464 |
465 | /**
466 | * Create a quaternary instruction with a register source operand and an
467 | * indirect target operand (whose address is determined from a two registers
468 | * and a scaling). For example:
469 | *
470 | * 471 | * movl (%ebx,%esi,4),%eax 472 | *473 | * 474 | * This loads the value from the location determined by %ebx + (%esi*4) into 475 | * the
%eax
register. Here, %ebx
is the base
476 | * operand and %esi
is the index operand.
477 | *
478 | * @author David J. Pearce
479 | *
480 | */
481 | public final class IndRegImmReg implements Instruction {
482 | public final IndRegImmRegOp operation;
483 | public final Register baseOperand;
484 | public final Register indexOperand;
485 | public final long scaling;
486 | public final Register targetOperand;
487 |
488 | public IndRegImmReg(IndRegImmRegOp op, Register baseOperand, Register indexOperand, long scaling, Register targetOperand) {
489 | this.operation = op;
490 | this.baseOperand = baseOperand;
491 | this.indexOperand = indexOperand;
492 | this.scaling = scaling;
493 | this.targetOperand = targetOperand;
494 | }
495 |
496 | public String toString() {
497 | return operation.toString() + Register.suffix(targetOperand.width()) + " (%" + baseOperand + ",%"
498 | + indexOperand + "," + scaling + "), %" + targetOperand;
499 | }
500 | }
501 |
502 | public enum RegIndRegImmOp {
503 | mov
504 | }
505 |
506 | /**
507 | * Create a quaternary instruction with a register source operand and an
508 | * indirect target operand (whose address is determined from a two registers
509 | * and a scaling). For example:
510 | *
511 | * 512 | * movl %eax, (%ebx,%esi,4) 513 | *514 | * 515 | * This loads the value from the
%eax
register into the
516 | * location determined by %ebx + (%esi*4). Here, %ebx
is the
517 | * base operand and %esi
is the index operand.
518 | *
519 | * @author David J. Pearce
520 | *
521 | */
522 | public final class RegIndRegImm implements Instruction {
523 | public final RegIndRegImmOp operation;
524 | public final Register sourceOperand;
525 | public final Register baseOperand;
526 | public final Register indexOperand;
527 | public final long scaling;
528 |
529 | public RegIndRegImm(RegIndRegImmOp op, Register sourceOperand, Register baseOperand, Register indexOperand, long scaling) {
530 | this.operation = op;
531 | this.sourceOperand = sourceOperand;
532 | this.baseOperand = baseOperand;
533 | this.indexOperand = indexOperand;
534 | this.scaling = scaling;
535 | }
536 |
537 | public String toString() {
538 | return operation.toString() + Register.suffix(sourceOperand.width()) + " %" + sourceOperand + ", (%"
539 | + baseOperand + ",%" + indexOperand + "," + scaling + ")";
540 | }
541 | }
542 |
543 | // ============================================================
544 | // Branch Operations
545 | // ============================================================
546 |
547 | public enum AddrOp {
548 | call, // Call procedure
549 | ja, // Jump if above (CF == 0 and ZF == 0)
550 | jae, // Jump if above or equal (CF == 0)
551 | jb, // Jump if below (CF == 1)
552 | jbe, // Jump if below or equal (CF == 1 or ZF == 1)
553 | jc, // Jump if carry (CF == 1)
554 | jcxz, // Jump if cx == 0
555 | jecxz, // Jump if ecx == 0
556 | je, // Jump if equal (ZF == 1)
557 | jg, // Jump if greater (ZF == 0 and SF==OF)
558 | jge, // Jump if greater or equal (SF==OF)
559 | jl, // Jump if less (SF<>OF)
560 | jle, // Jump if less or equal (ZF == 1 or SF<>OF)
561 | jna, // Jump if not above (CF == 1 or ZF == 1)
562 | jnae, // Jump if not above or equals (CF==1)
563 | jmp, // Unconditional Jump
564 | jnb, // Jump if not below (CF=0)
565 | jnbe, // Jump if not below or equal (CF=0 and ZF=0)
566 | jnc, // Jump if not carry (CF=0)
567 | jne, // Jump if not equal (ZF=0)
568 | jng, // Jump if not greater (ZF=1 or SF<>OF)
569 | jnge, // Jump if not greater or equal (SF<>OF)
570 | jnl, // Jump if not less (SF=OF)
571 | jnle, // Jump if not less or equal (ZF=0 and SF=OF)
572 | jno, // Jump if not overflow (OF=0)
573 | jnp, // Jump if not parity (PF=0)
574 | jns, // Jump if not sign (SF=0)
575 | jnz, // Jump if not zero (ZF=0)
576 | jo, // Jump if overflow (OF=1)
577 | jp, // Jump if parity (PF=1)
578 | jpe, // Jump if parity even (PF=1)
579 | jpo, // Jump if parity odd (PF=0)
580 | js, // Jump if sign (SF=1)
581 | jz, // Jump if zero (ZF = 1)
582 | loop, // Loop according r/e/cx
583 | loope, // Loop according r/e/cx
584 | loopz, // Loop according r/e/cx
585 | loopne, // Loop according r/e/cx
586 | loopnz, // Loop according r/e/cx
587 | }
588 |
589 | /**
590 | * Represents a unary instruction which uses a constant address operand
591 | * (represented with a label). For example, branching instructions (e.g.
592 | * jmp
, ja
, etc) with a label operand are
593 | * implemented in this way:
594 | *
595 | * 596 | * cmp %eax,%ebx 597 | * ja target 598 | *599 | * 600 | * This compares the
eax
and ebx
registesr and
601 | * branches to target
if eax
is above
602 | * ebx
.
603 | *
604 | * @author David J. Pearce
605 | *
606 | */
607 | public final class Addr implements Instruction {
608 | public final AddrOp operation;
609 | public final String operand;
610 |
611 | /**
612 | * Create a unary instruction with a register operand.
613 | *
614 | * @param operation
615 | * Operation to perform
616 | * @param operand
617 | * Register operand
618 | */
619 | public Addr(AddrOp operation, String operand) {
620 | this.operation = operation;
621 | this.operand = operand;
622 | }
623 |
624 | public String toString() {
625 | return operation.toString() + " " + operand;
626 | }
627 | }
628 |
629 | public enum AddrRegOp {
630 | lea, // Load effective address
631 | mov, // Load effective address
632 | }
633 |
634 | /**
635 | * Represents a binary instruction which uses a constant address operand
636 | * (represented with a label) and a register operand. For example, the
637 | * lea
instruction is implemented in this way:
638 | *
639 | * 640 | * lea $label,%eax 641 | *642 | * 643 | * This loads the address of the given label into the
eax
644 | * register.
645 | *
646 | * @author David J. Pearce
647 | *
648 | */
649 | public final class AddrReg implements Instruction {
650 | public final AddrRegOp operation;
651 | public final String leftOperand;
652 | public final Register rightOperand;
653 |
654 | /**
655 | * Create a unary instruction with a register operand.
656 | *
657 | * @param operation
658 | * Operation to perform
659 | * @param operand
660 | * Register operand
661 | */
662 | public AddrReg(AddrRegOp operation, String leftOperand, Register rightOperand) {
663 | this.operation = operation;
664 | this.leftOperand = leftOperand;
665 | this.rightOperand = rightOperand;
666 | }
667 |
668 | public String toString() {
669 | return operation.toString() + " " + leftOperand + ", %" + rightOperand;
670 | }
671 | }
672 |
673 | public enum AddrRegRegOp {
674 | lea, // Load effective address
675 | mov
676 | }
677 |
678 | /**
679 | * Represents a ternary instruction which uses an operand constructed from a
680 | * constant address and a register and a register operand. For example, the
681 | * lea
instruction is implemented in this way:
682 | *
683 | * 684 | * lea $label,%eax 685 | *686 | * 687 | * This loads the address of the given label into the
eax
688 | * register.
689 | *
690 | * @author David J. Pearce
691 | *
692 | */
693 | public final class AddrRegReg implements Instruction {
694 | public final AddrRegRegOp operation;
695 | public final String leftOperand_1;
696 | public final Register leftOperand_2;
697 | public final Register rightOperand;
698 |
699 | /**
700 | * Create a ternary instruction with an composite address/register
701 | * source operand, and a register target operand.
702 | *
703 | * @param operation
704 | * Operation to perform
705 | * @param operand
706 | * Register operand
707 | */
708 | public AddrRegReg(AddrRegRegOp operation, String leftOperand_1,
709 | Register leftOperand_2, Register rightOperand) {
710 | this.operation = operation;
711 | this.leftOperand_1 = leftOperand_1;
712 | this.leftOperand_2 = leftOperand_2;
713 | this.rightOperand = rightOperand;
714 | }
715 |
716 | public String toString() {
717 | return operation.toString() + " " + leftOperand_1 + "(%"
718 | + leftOperand_2 + "), %" + rightOperand;
719 | }
720 | }
721 | }
722 |
--------------------------------------------------------------------------------
/src/jx86/lang/Register.java:
--------------------------------------------------------------------------------
1 | package jx86.lang;
2 |
3 | import java.util.Map;
4 |
5 |
6 | /**
7 | * Represents and provides information an an x86 register. Registers are grouped
8 | * by architecture.
9 | *
10 | * @author David J. Pearce
11 | *
12 | */
13 | public class Register {
14 |
15 | // ============================================
16 | // Enums & Constants
17 | // ============================================
18 |
19 | public enum Width {
20 | ScalarDouble, // 64bits
21 | Quad, // 64 bits
22 | ScalarSingle, // 32bits
23 | Long, // 32 bits
24 | Word, // 16 bits
25 | Byte; // 8 bits
26 | }
27 |
28 |
29 | /**
30 | * Determine whether two registers are "compatible" with each other. That
31 | * is, whether or not they can be used together in a given instruction.
32 | *
33 | * @param lhs
34 | * @param rhs
35 | * @return
36 | */
37 | public static boolean areCompatiable(Width lhs, Width rhs) {
38 | return lhs == rhs || (lhs == Width.ScalarDouble && rhs == Width.Quad)
39 | || (lhs == Width.Quad && rhs == Width.ScalarDouble)
40 | || (lhs == Width.ScalarSingle && rhs == Width.Long)
41 | || (lhs == Width.Long && rhs == Width.ScalarSingle);
42 | }
43 |
44 | /**
45 | * Join two register widths together to produce their "compatible" width.
46 | *
47 | * @param lhs
48 | * @param rhs
49 | * @return
50 | */
51 | private static Register.Width join(Register.Width lhs, Register.Width rhs) {
52 | if(lhs == rhs) {
53 | return lhs;
54 | } else if(lhs == Width.ScalarDouble) {
55 | return join(Width.Quad,rhs);
56 | } else if(lhs == Width.ScalarSingle) {
57 | return join(Width.Long,rhs);
58 | } else if(rhs == Width.ScalarDouble) {
59 | return join(lhs,Width.Quad);
60 | } else if(rhs == Width.ScalarSingle) {
61 | return join(lhs,Width.Long);
62 | }
63 |
64 | throw new IllegalArgumentException("incomparable register widths: " + lhs + ", " + rhs);
65 | }
66 |
67 | /**
68 | * Return the appropriate suffix to associate with an instruction that
69 | * operates on two registers (potentially of different width).
70 | *
71 | * @param width
72 | * @return
73 | */
74 | public static String suffix(Register.Width lhs, Register.Width rhs) {
75 | return suffix(join(lhs,rhs));
76 | }
77 |
78 | /**
79 | * Return the suffix associated with a given register width.
80 | *
81 | * @param width
82 | * @return
83 | */
84 | public static String suffix(Register.Width width) {
85 | switch(width) {
86 | case Byte:
87 | return "b";
88 | case Word:
89 | return "w";
90 | case Long:
91 | return "l";
92 | case ScalarSingle:
93 | return "ss";
94 | case Quad:
95 | return "q";
96 | case ScalarDouble:
97 | return "sd";
98 | default:
99 | throw new IllegalArgumentException("Invalid register width: " + width.name());
100 | }
101 | }
102 |
103 | // x86_8
104 | public static final Register AL = new Register("al", Width.Byte);
105 | public static final Register AH = new Register("ah", Width.Byte);
106 | public static final Register BL = new Register("bl", Width.Byte);
107 | public static final Register BH = new Register("bh", Width.Byte);
108 | public static final Register CL = new Register("cl", Width.Byte);
109 | public static final Register CH = new Register("ch", Width.Byte);
110 | public static final Register DL = new Register("dl", Width.Byte);
111 | public static final Register DH = new Register("dh", Width.Byte);
112 |
113 | // x86_16
114 | public static final Register AX = new Register("ax", Width.Word);
115 | public static final Register BX = new Register("bx", Width.Word);
116 | public static final Register CX = new Register("cx", Width.Word);
117 | public static final Register DX = new Register("dx", Width.Word);
118 | public static final Register DI = new Register("di", Width.Word);
119 | public static final Register SI = new Register("si", Width.Word);
120 | public static final Register BP = new Register("bp", Width.Word);
121 | public static final Register SP = new Register("sp", Width.Word);
122 | public static final Register IP = new Register("ip", Width.Word);
123 |
124 | // x86_32
125 | public static final Register EAX = new Register("eax", Width.Long);
126 | public static final Register EBX = new Register("ebx", Width.Long);
127 | public static final Register ECX = new Register("ecx", Width.Long);
128 | public static final Register EDX = new Register("edx", Width.Long);
129 | public static final Register EDI = new Register("edi", Width.Long);
130 | public static final Register ESI = new Register("esi", Width.Long);
131 | public static final Register EBP = new Register("ebp", Width.Long);
132 | public static final Register ESP = new Register("esp", Width.Long);
133 | public static final Register EIP = new Register("eip", Width.Long);
134 |
135 | // x86_64
136 | public static final Register RAX = new Register("rax", Width.Quad);
137 | public static final Register RBX = new Register("rbx", Width.Quad);
138 | public static final Register RCX = new Register("rcx", Width.Quad);
139 | public static final Register RDX = new Register("rdx", Width.Quad);
140 | public static final Register RDI = new Register("rdi", Width.Quad);
141 | public static final Register RSI = new Register("rsi", Width.Quad);
142 | public static final Register RBP = new Register("rbp", Width.Quad);
143 | public static final Register RSP = new Register("rsp", Width.Quad);
144 | public static final Register RIP = new Register("rip", Width.Quad);
145 |
146 | // Streaming SIMD Extensions (SSE)
147 | public static final Register XMM0 = new Register("xmm0", Width.ScalarDouble);
148 | public static final Register XMM1 = new Register("xmm1", Width.ScalarDouble);
149 | public static final Register XMM2 = new Register("xmm2", Width.ScalarDouble);
150 | public static final Register XMM3 = new Register("xmm3", Width.ScalarDouble);
151 | public static final Register XMM4 = new Register("xmm4", Width.ScalarDouble);
152 | public static final Register XMM5 = new Register("xmm5", Width.ScalarDouble);
153 | public static final Register XMM6 = new Register("xmm6", Width.ScalarDouble);
154 | public static final Register XMM7 = new Register("xmm7", Width.ScalarDouble);
155 |
156 | // Families
157 | public static final Register[] AX_FAMILY = {
158 | Register.AL,Register.AH,Register.AX,Register.EAX,Register.RAX
159 | };
160 | public static final Register[] BX_FAMILY = {
161 | Register.BL,Register.BH,Register.BX,Register.EBX,Register.RBX
162 | };
163 | public static final Register[] CX_FAMILY = {
164 | Register.CL,Register.CH,Register.CX,Register.ECX,Register.RCX
165 | };
166 | public static final Register[] DX_FAMILY = {
167 | Register.DL,Register.DH,Register.DX,Register.EDX,Register.RDX
168 | };
169 | public static final Register[] DI_FAMILY = {
170 | Register.DI,Register.EDI,Register.RDI
171 | };
172 | public static final Register[] SI_FAMILY = {
173 | Register.SI,Register.ESI,Register.RSI
174 | };
175 | public static final Register[] BP_FAMILY = {
176 | Register.BP,Register.EBP,Register.RBP
177 | };
178 | public static final Register[] SP_FAMILY = {
179 | Register.SP,Register.ESP,Register.RSP
180 | };
181 | public static final Register[] IP_FAMILY = {
182 | Register.IP,Register.EIP,Register.RIP
183 | };
184 |
185 | public static final Register[][] ALL_FAMILIES = {
186 | AX_FAMILY,
187 | BX_FAMILY,
188 | CX_FAMILY,
189 | DX_FAMILY,
190 | DI_FAMILY,
191 | SI_FAMILY,
192 | BP_FAMILY,
193 | SP_FAMILY,
194 | IP_FAMILY
195 | };
196 |
197 | // ============================================
198 | // Fields
199 | // ============================================
200 |
201 | private final Width width;
202 | private final String name;
203 |
204 | // ============================================
205 | // Constructors
206 | // ============================================
207 | Register(String name, Width width) {
208 | this.name = name;
209 | this.width = width;
210 | }
211 |
212 | // ============================================
213 | // Accessors
214 | // ============================================
215 |
216 | /**
217 | * Return the width of this register;
218 | *
219 | * @return
220 | */
221 | public Width width() {
222 | return width;
223 | }
224 |
225 | /**
226 | * Return the name of this register;
227 | *
228 | * @return
229 | */
230 | public String name() {
231 | return name;
232 | }
233 |
234 | public String toString() {
235 | return name;
236 | }
237 |
238 | /**
239 | * Return the family this register is associated with.
240 | *
241 | * @return
242 | */
243 | public Register[] family() {
244 | for (int i = 0; i != ALL_FAMILIES.length; ++i) {
245 | Register[] candidate = ALL_FAMILIES[i];
246 | for (int j = 0; j != candidate.length; ++j) {
247 | if (candidate[j] == this) {
248 | return candidate;
249 | }
250 | }
251 | }
252 | throw new IllegalArgumentException("Register does not have family?");
253 | }
254 |
255 |
256 | /**
257 | *
258 | * Determine the first sibling of a given width in this registers family.
259 | * For example, the Quad
width sibling of the bx
260 | * register is the rbx
. In contrast, the Long
261 | * width sibling is ebx
.
262 | *
265 | * This function is useful for determining the head of a register family for
266 | * a given architecture. For example, on x86_64
the head of the
267 | * bx
family is rbx
.
268 | *
null
if no
273 | * such sibling exists.
274 | */
275 | public Register sibling(Width width) {
276 | Register[] family = family();
277 | for(int i=0;i!=family.length;++i) {
278 | Register sibling = family[i];
279 | if(sibling.width() == width) {
280 | // first match
281 | return sibling;
282 | }
283 | }
284 |
285 | return null;
286 | }
287 |
288 | // ============================================
289 | // Helpers
290 | // ============================================
291 |
292 |
293 | }
294 |
--------------------------------------------------------------------------------
/src/jx86/lang/Target.java:
--------------------------------------------------------------------------------
1 | package jx86.lang;
2 |
3 | /**
4 | * Provides an abstraction of a compilation target which is a combination of
5 | * operating system and x86 architecture. This is useful for abstracting away
6 | * details of compilation targets.
7 | *
8 | * @author David J. Pearce
9 | *
10 | */
11 | public final class Target {
12 |
13 | // ============================================
14 | // Enums & Constants
15 | // ============================================
16 |
17 | public static final Target MACOS_X86_64 = new Target(OS.MACOS,Arch.X86_64);
18 |
19 | public static final Target LINUX_X86_64 = new Target(OS.LINUX,Arch.X86_64);
20 |
21 |
22 | /**
23 | * The set of supported operating systems.
24 | *
25 | * @author David J. Pearce
26 | *
27 | */
28 | public enum OS {
29 | LINUX, MACOS
30 | }
31 |
32 | /**
33 | * The set of supported x86 architectures.
34 | *
35 | * @author David J. Pearce
36 | *
37 | */
38 | public enum Arch {
39 | X86_32, X86_64
40 | }
41 |
42 | // ============================================
43 | // Fields
44 | // ============================================
45 |
46 | public final OS os;
47 | public final Arch arch;
48 |
49 | // ============================================
50 | // Constructors
51 | // ============================================
52 |
53 | private Target(OS os, Arch arch) {
54 | this.os = os;
55 | this.arch = arch;
56 | }
57 |
58 | /**
59 | * Return the "natural" size of this architecture in bytes.
60 | *
61 | * @return
62 | */
63 | public int widthInBytes() {
64 | switch (arch) {
65 | case X86_32:
66 | return 4; // 4 * 8 = 32
67 | case X86_64:
68 | return 8; // 8 * 8 = 64
69 | }
70 | throw new IllegalArgumentException("Unknown architecture encountered: "
71 | + arch);
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/src/jx86/lang/X86File.java:
--------------------------------------------------------------------------------
1 | package jx86.lang;
2 |
3 | import java.util.ArrayList;
4 | import java.util.List;
5 |
6 | public class X86File {
7 |
8 | // ============================================
9 | // Classes
10 | // ============================================
11 |
12 | public interface Section {
13 |
14 | }
15 |
16 | public static class Code implements Section {
17 | public final List