breakpoints)
118 | {
119 | if (Breakpoints.Count == 0)
120 | {
121 | LineCount = GetAsmFileLineNumber();
122 | }
123 |
124 | var errorMsg = "";
125 |
126 | foreach (var bp in breakpoints)
127 | {
128 | if (bp >= LineCount)
129 | {
130 | errorMsg += bp + " ";
131 | }
132 | else
133 | {
134 | // Duplicate breakpoints are ignored.
135 | Breakpoints.Add(bp);
136 | }
137 | }
138 |
139 | return errorMsg.Length > 1 ? errorMsg : null;
140 | }
141 |
142 | public int GetAsmFileLineNumber()
143 | {
144 | using var fileStream = File.OpenRead(AsmFilePath);
145 |
146 | return CountAsmFileLines(fileStream);
147 | }
148 |
149 | public string GetErrorMsg(string symbol)
150 | {
151 | return string.Format(DebuggerErrorMsg, symbol);
152 | }
153 |
154 | ///
155 | /// Counts asm file lines. Used to determine if breakpoints can be set when debugging. Method written by Nima Ara.
156 | ///
157 | ///
158 | ///
159 | private int CountAsmFileLines(Stream stream)
160 | {
161 | //Ensure.NotNull(stream, nameof(stream));
162 |
163 | var lineCount = 0;
164 |
165 | var byteBuffer = new byte[1024 * 1024];
166 | const int bytesAtTheTime = 4;
167 | var detectedEOL = NULL;
168 | var currentChar = NULL;
169 |
170 | int bytesRead;
171 | while ((bytesRead = stream.Read(byteBuffer, 0, byteBuffer.Length)) > 0)
172 | {
173 | var i = 0;
174 | for (; i <= bytesRead - bytesAtTheTime; i += bytesAtTheTime)
175 | {
176 | currentChar = (char)byteBuffer[i];
177 |
178 | if (detectedEOL != NULL)
179 | {
180 | if (currentChar == detectedEOL)
181 | { lineCount++; }
182 |
183 | currentChar = (char)byteBuffer[i + 1];
184 | if (currentChar == detectedEOL)
185 | { lineCount++; }
186 |
187 | currentChar = (char)byteBuffer[i + 2];
188 | if (currentChar == detectedEOL)
189 | { lineCount++; }
190 |
191 | currentChar = (char)byteBuffer[i + 3];
192 | if (currentChar == detectedEOL)
193 | { lineCount++; }
194 | }
195 | else
196 | {
197 | if (currentChar == LF || currentChar == CR)
198 | {
199 | detectedEOL = currentChar;
200 | lineCount++;
201 | }
202 | i -= bytesAtTheTime - 1;
203 | }
204 | }
205 |
206 | for (; i < bytesRead; i++)
207 | {
208 | currentChar = (char)byteBuffer[i];
209 |
210 | if (detectedEOL != NULL)
211 | {
212 | if (currentChar == detectedEOL)
213 | { lineCount++; }
214 | }
215 | else
216 | {
217 | if (currentChar == LF || currentChar == CR)
218 | {
219 | detectedEOL = currentChar;
220 | lineCount++;
221 | }
222 | }
223 | }
224 | }
225 |
226 | if (currentChar != LF && currentChar != CR && currentChar != NULL)
227 | {
228 | lineCount++;
229 | }
230 | return lineCount;
231 | }
232 | }
233 | }
234 | }
235 |
--------------------------------------------------------------------------------
/AMx64/Interpreter/Expression.cs:
--------------------------------------------------------------------------------
1 | using System;
2 |
3 | namespace AMx64
4 | {
5 | public partial class AMX64
6 | {
7 | public class Expression
8 | {
9 | public Operations Operation = Operations.None;
10 |
11 | public string LeftOp = null, RightOp = null;
12 |
13 | public UInt64 LeftOpValue, RightOpValue;
14 |
15 | public byte CodeSize;
16 |
17 | public bool ExplicitSize = false;
18 |
19 | ///
20 | /// Result of the operation.
21 | ///
22 | public UInt64 Result = 0;
23 |
24 | public bool ParseAsmLine(string asmLine, out string errorMsg)
25 | {
26 | errorMsg = "";
27 | var asmLineUpper = asmLine.ToUpper();
28 |
29 | if (asmLineRegex.Match(asmLineUpper).Success)
30 | {
31 | var match = asmLineInstrRegex.Match(asmLineUpper);
32 |
33 | if (match.Success)
34 | {
35 | ParseOperation(match.Value.TrimEnd());
36 | asmLine = asmLine.Substring(match.Value.Length - 1).TrimStart();
37 | }
38 |
39 | if (asmLineInstrExplSizeRegex.Match(asmLineUpper).Success)
40 | {
41 | ParseExplicitSize(asmLine.Substring(0, asmLine.IndexOf(' ')));
42 | asmLine = asmLine.Substring(asmLine.IndexOf(' ')).TrimStart();
43 | }
44 |
45 | var tokens = asmLine.Split(',');
46 | LeftOp = tokens[0].Trim();
47 | RightOp = tokens[1].Trim();
48 |
49 | // If operands are different sizes.
50 | if (!ExplicitSize && ((LeftOp[0] == 'E' && RightOp[0] == 'R') || (LeftOp[0] != 'R' && LeftOp[0] != 'E' && (RightOp[0] == 'E' || RightOp[0] == 'R'))))
51 | {
52 | errorMsg = "Instruction operands must be the same size.";
53 | return false;
54 | }
55 |
56 | return CheckLeftOperand() && CheckRightOperand();
57 | }
58 | else if (asmLineJccRegex.Match(asmLineUpper).Success)
59 | {
60 | var tokens = asmLine.Split((char[])null);
61 | ParseOperation(tokens[0].TrimEnd());
62 | LeftOp = tokens[1].TrimStart();
63 |
64 | return CheckLeftOperand();
65 | }
66 | else if (asmLineNotInstrRegex.Match(asmLineUpper).Success || asmLineStackIntrRegex.Match(asmLineUpper).Success)
67 | {
68 | var tokens = asmLine.Split((char[])null);
69 | ParseOperation(tokens[0].TrimEnd());
70 |
71 | if (tokens.Length == 3)
72 | {
73 | ParseExplicitSize(tokens[1].Trim());
74 | LeftOp = tokens[2];
75 | }
76 | else
77 | {
78 | LeftOp = tokens[1];
79 | }
80 |
81 | return LeftOp.StartsWith('[') && !ExplicitSize ? false : CheckLeftOperand();
82 | }
83 | else
84 | {
85 | return false;
86 | }
87 | }
88 |
89 | private bool CheckRightOperand()
90 | {
91 | return string.IsNullOrEmpty(RightOp) || (!RightOp.StartsWith('[') || RightOp.EndsWith(']')) && (RightOp.StartsWith('[') || !RightOp.EndsWith(']'));
92 | }
93 |
94 | private bool CheckLeftOperand()
95 | {
96 | return (!LeftOp.StartsWith('[') || LeftOp.EndsWith(']')) && (LeftOp.StartsWith('[') || !LeftOp.EndsWith(']'));
97 | }
98 |
99 | private void ParseOperation(string operation)
100 | {
101 | switch (operation.ToUpper())
102 | {
103 | case "ADD":
104 | Operation = Operations.Add;
105 | break;
106 | case "SUB":
107 | Operation = Operations.Sub;
108 | break;
109 | case "MOV":
110 | Operation = Operations.Mov;
111 | break;
112 | case "AND":
113 | Operation = Operations.BitAnd;
114 | break;
115 | case "OR":
116 | Operation = Operations.BitOr;
117 | break;
118 | case "NOT":
119 | Operation = Operations.BitNot;
120 | break;
121 | case "CMP":
122 | Operation = Operations.Cmp;
123 | break;
124 | case "PUSH":
125 | Operation = Operations.Push;
126 | break;
127 | case "POP":
128 | Operation = Operations.Pop;
129 | break;
130 | case "JMP":
131 | Operation = Operations.Jmp;
132 | break;
133 | case "JE":
134 | Operation = Operations.Je;
135 | break;
136 | case "JNE":
137 | Operation = Operations.Jne;
138 | break;
139 | case "JGE":
140 | Operation = Operations.Jge;
141 | break;
142 | case "JL":
143 | Operation = Operations.Jl;
144 | break;
145 | }
146 | }
147 |
148 | private bool ParseExplicitSize(string explicitSize = "")
149 | {
150 | if (string.IsNullOrEmpty(explicitSize))
151 | {
152 | CodeSize = 3;
153 | ExplicitSize = false;
154 | }
155 | else
156 | {
157 | ExplicitSize = true;
158 |
159 | switch (explicitSize.ToUpper())
160 | {
161 | case "BYTE":
162 | CodeSize = 0;
163 | break;
164 | case "WORD":
165 | CodeSize = 1;
166 | break;
167 | case "DWORD":
168 | CodeSize = 2;
169 | break;
170 | case "QWORD":
171 | CodeSize = 3;
172 | break;
173 | // Default case can never happen.
174 | default:
175 | return false;
176 | }
177 | }
178 |
179 | return true;
180 | }
181 | }
182 | }
183 | }
184 |
--------------------------------------------------------------------------------
/AMx64/Memory/CPURegister.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Runtime.InteropServices;
3 |
4 | namespace AMx64
5 | {
6 | ///
7 | /// 64-bit register representation.
8 | ///
9 | [StructLayout(LayoutKind.Explicit)]
10 | public class CPURegister
11 | {
12 | [FieldOffset(0)]
13 | public UInt64 x64;
14 | public UInt32 x32 { get => (UInt32)x64; set => x64 = value; }
15 | [FieldOffset(0)]
16 | public UInt16 x16;
17 | [FieldOffset(0)]
18 | public byte x8;
19 | [FieldOffset(1)]
20 | public byte x8h;
21 |
22 | public CPURegister(UInt64 initValue)
23 | {
24 | x64 = initValue;
25 | }
26 |
27 | internal UInt64 this[UInt64 codeSize]
28 | {
29 | get
30 | {
31 | switch (codeSize)
32 | {
33 | case 3:
34 | {
35 | return x64;
36 | }
37 | case 2:
38 | {
39 | return x32;
40 | }
41 | case 1:
42 | {
43 | return x16;
44 | }
45 | case 0:
46 | {
47 | return x8;
48 | }
49 | case 4:
50 | {
51 | return x8h;
52 | }
53 | default:
54 | {
55 | throw new ArgumentOutOfRangeException("Registers code size out of range.");
56 | }
57 | }
58 | }
59 |
60 | set
61 | {
62 | switch (codeSize)
63 | {
64 | case 3:
65 | {
66 | x64 = value;
67 | break;
68 | }
69 | case 2:
70 | {
71 | x32 = (UInt32)value;
72 | break;
73 | }
74 | case 1:
75 | {
76 | x16 = (UInt16)value;
77 | break;
78 | }
79 | case 0:
80 | {
81 | x8 = (byte)value;
82 | break;
83 | }
84 | case 4:
85 | {
86 | x8h = (byte)value;
87 | break;
88 | }
89 | default:
90 | {
91 | throw new ArgumentOutOfRangeException("Registers code size out of range.");
92 | }
93 | }
94 | }
95 | }
96 | }
97 | }
98 |
--------------------------------------------------------------------------------
/AMx64/Memory/Registers.cs:
--------------------------------------------------------------------------------
1 | using System;
2 |
3 | namespace AMx64
4 | {
5 | public partial class AMX64
6 | {
7 | public CPURegister[] CPURegisters = new CPURegister[16];
8 |
9 | #region AX register
10 | public UInt64 RAX { get => CPURegisters[0].x64; set => CPURegisters[0].x64 = value; }
11 | public UInt32 EAX { get => CPURegisters[0].x32; set => CPURegisters[0].x32 = value; }
12 | public UInt16 AX { get => CPURegisters[0].x16; set => CPURegisters[0].x16 = value; }
13 | public byte AH { get => CPURegisters[0].x8h; set => CPURegisters[0].x8h = value; }
14 | public byte AL { get => CPURegisters[0].x8; set => CPURegisters[0].x8 = value; }
15 | #endregion
16 |
17 | #region BX register
18 | public UInt64 RBX { get => CPURegisters[1].x64; set => CPURegisters[1].x64 = value; }
19 | public UInt32 EBX { get => CPURegisters[1].x32; set => CPURegisters[1].x32 = value; }
20 | public UInt16 BX { get => CPURegisters[1].x16; set => CPURegisters[1].x16 = value; }
21 | public byte BH { get => CPURegisters[1].x8h; set => CPURegisters[1].x8h = value; }
22 | public byte BL { get => CPURegisters[1].x8; set => CPURegisters[1].x8 = value; }
23 | #endregion
24 |
25 | #region CX register
26 | public UInt64 RCX { get => CPURegisters[2].x64; set => CPURegisters[2].x64 = value; }
27 | public UInt32 ECX { get => CPURegisters[2].x32; set => CPURegisters[2].x32 = value; }
28 | public UInt16 CX { get => CPURegisters[2].x16; set => CPURegisters[2].x16 = value; }
29 | public byte CH { get => CPURegisters[2].x8h; set => CPURegisters[2].x8h = value; }
30 | public byte CL { get => CPURegisters[2].x8; set => CPURegisters[2].x8 = value; }
31 | #endregion
32 |
33 | #region DX register
34 | public UInt64 RDX { get => CPURegisters[3].x64; set => CPURegisters[3].x64 = value; }
35 | public UInt32 EDX { get => CPURegisters[3].x32; set => CPURegisters[3].x32 = value; }
36 | public UInt16 DX { get => CPURegisters[3].x16; set => CPURegisters[3].x16 = value; }
37 | public byte DH { get => CPURegisters[3].x8h; set => CPURegisters[3].x8h = value; }
38 | public byte DL { get => CPURegisters[3].x8; set => CPURegisters[3].x8 = value; }
39 | #endregion
40 |
41 | #region SI register
42 | public UInt64 RSI { get => CPURegisters[4].x64; set => CPURegisters[4].x64 = value; }
43 | public UInt32 ESI { get => CPURegisters[4].x32; set => CPURegisters[4].x32 = value; }
44 | public UInt16 SI { get => CPURegisters[4].x16; set => CPURegisters[4].x16 = value; }
45 | #endregion
46 |
47 | #region DI register
48 | public UInt64 RDI { get => CPURegisters[5].x64; set => CPURegisters[5].x64 = value; }
49 | public UInt32 EDI { get => CPURegisters[5].x32; set => CPURegisters[5].x32 = value; }
50 | public UInt16 DI { get => CPURegisters[5].x16; set => CPURegisters[5].x16 = value; }
51 | #endregion
52 |
53 | #region BP register
54 | public UInt64 RBP { get => CPURegisters[6].x64; set => CPURegisters[6].x64 = value; }
55 | public UInt32 EBP { get => CPURegisters[6].x32; set => CPURegisters[6].x32 = value; }
56 | public UInt16 BP { get => CPURegisters[6].x16; set => CPURegisters[6].x16 = value; }
57 | #endregion
58 |
59 | #region SP register
60 | public UInt64 RSP { get => CPURegisters[7].x64; set => CPURegisters[7].x64 = value; }
61 | public UInt32 ESP { get => CPURegisters[7].x32; set => CPURegisters[7].x32 = value; }
62 | public UInt16 SP { get => CPURegisters[7].x16; set => CPURegisters[7].x16 = value; }
63 | #endregion
64 |
65 | #region FLAGS register
66 | public UInt64 RFLAGS;
67 | public UInt32 EFLAGS { get => (UInt32)RFLAGS; set => RFLAGS = value & ~0xfffffffful | value; }
68 | public UInt16 FLAGS { get => (UInt16)RFLAGS; set => RFLAGS = RFLAGS & ~0xfffful | value; }
69 |
70 |
71 | //+--------+--------------+--------------+-------------------------------------------+-----------------------+------------------------+
72 | //| Bit | Mask | Abbreviation | Full Name | =1 | =0 |
73 | //+--------+--------------+--------------+-------------------------------------------+-----------------------+------------------------+
74 | //| 0 | 0x0001 | CF | Carry flag | CY(Carry) | NC(No Carry) |
75 | //| 1 | 0x0002 | | Reserved, always 1 in EFLAGS[2][3] | | |
76 | //| 2 | 0x0004 | PF | Parity flag | PE(Parity Even) | PO(Parity Odd) |
77 | //| 3 | 0x0008 | | Reserved[3] | | |
78 | //| 4 | 0x0010 | AF | Adjust flag | AC(Auxiliary Carry) | NA(No Auxiliary Carry) |
79 | //| 5 | 0x0020 | | Reserved[3] | | |
80 | //| 6 | 0x0040 | ZF | Zero flag | ZR(Zero) | NZ(Not Zero) |
81 | //| 7 | 0x0080 | SF | Sign flag | NG(Negative) | PL(Positive) |
82 | //| 8 | 0x0100 | TF | Trap flag(single step) | | |
83 | //| 9 | 0x0200 | IF | Interrupt enable flag | EI(Enable Interrupt) | DI(Disable Interrupt) |
84 | //| 10 | 0x0400 | DF | Direction flag | DN(Down) | UP(Up) |
85 | //| 11 | 0x0800 | OF | Overflow flag | OV(Overflow) | NV(Not Overflow) |
86 | //| 12-13 | 0x3000 | IOPL | I/O privilege level(286+ only) | | |
87 | //| 14 | 0x4000 | NT | Nested task flag(286+ only) | | |
88 | //| 15 | 0x8000 | | Reserved | | |
89 | //| 16 | 0x0001 0000 | RF | Resume flag(386+ only) | | |
90 | //| 17 | 0x0002 0000 | VM | Virtual 8086 mode flag(386+ only) | | |
91 | //| 18 | 0x0004 0000 | AC | Alignment check(486SX+ only) | | |
92 | //| 19 | 0x0008 0000 | VIF | Virtual interrupt flag(Pentium+) | | |
93 | //| 20 | 0x0010 0000 | VIP | Virtual interrupt pending(Pentium+) | | |
94 | //| 21 | 0x0020 0000 | ID | Able to use CPUID instruction(Pentium+) | | |
95 | //| 22‑31 | 0xFFC0 0000 | | Reserved | | |
96 | //+--------+--------------+--------------+-------------------------------------------+-----------------------+------------------------+
97 |
98 | public bool CF { get => (RFLAGS & 0x0001ul) != 0; set => RFLAGS = (RFLAGS & ~0x0001ul) | (value ? 0x0001ul : 0); }
99 | public bool PF { get => (RFLAGS & 0x0004ul) != 0; set => RFLAGS = (RFLAGS & ~0x0004ul) | (value ? 0x0004ul : 0); }
100 | public bool AF { get => (RFLAGS & 0x0010ul) != 0; set => RFLAGS = (RFLAGS & ~0x0010ul) | (value ? 0x0010ul : 0); }
101 | public bool ZF { get => (RFLAGS & 0x0040ul) != 0; set => RFLAGS = (RFLAGS & ~0x0040ul) | (value ? 0x0040ul : 0); }
102 | public bool SF { get => (RFLAGS & 0x0080ul) != 0; set => RFLAGS = (RFLAGS & ~0x0080ul) | (value ? 0x0080ul : 0); }
103 | public bool TF { get => (RFLAGS & 0x0100ul) != 0; set => RFLAGS = (RFLAGS & ~0x0100ul) | (value ? 0x0100ul : 0); }
104 | public bool IF { get => (RFLAGS & 0x0200ul) != 0; set => RFLAGS = (RFLAGS & ~0x0200ul) | (value ? 0x0200ul : 0); }
105 | public bool DF { get => (RFLAGS & 0x0400ul) != 0; set => RFLAGS = (RFLAGS & ~0x0400ul) | (value ? 0x0400ul : 0); }
106 | public bool OF { get => (RFLAGS & 0x0800ul) != 0; set => RFLAGS = (RFLAGS & ~0x0800ul) | (value ? 0x0800ul : 0); }
107 | public byte IOPL { get => (byte)((RFLAGS >> 12) & 3); set => RFLAGS = (RFLAGS & ~0x3000ul) | ((UInt64)(value & 3) << 12); }
108 | public bool NT { get => (RFLAGS & 0x4000ul) != 0; set => RFLAGS = (RFLAGS & ~0x4000ul) | (value ? 0x4000ul : 0); }
109 |
110 | #region EFLAGS
111 | public bool RF { get => (RFLAGS & 0x0001_0000ul) != 0; set => RFLAGS = (RFLAGS & ~0x0001_0000ul) | (value ? 0x0001_0000ul : 0); }
112 | public bool VM { get => (RFLAGS & 0x0002_0000ul) != 0; set => RFLAGS = (RFLAGS & ~0x0002_0000ul) | (value ? 0x0002_0000ul : 0); }
113 | public bool AC { get => (RFLAGS & 0x0004_0000ul) != 0; set => RFLAGS = (RFLAGS & ~0x0004_0000ul) | (value ? 0x0004_0000ul : 0); }
114 | public bool VIF { get => (RFLAGS & 0x0008_0000ul) != 0; set => RFLAGS = (RFLAGS & ~0x0008_0000ul) | (value ? 0x0008_0000ul : 0); }
115 | public bool VIP { get => (RFLAGS & 0x0010_0000ul) != 0; set => RFLAGS = (RFLAGS & ~0x0010_0000ul) | (value ? 0x0010_0000ul : 0); }
116 | public bool ID { get => (RFLAGS & 0x0020_0000ul) != 0; set => RFLAGS = (RFLAGS & ~0x0020_0000ul) | (value ? 0x0020_0000ul : 0); }
117 | #endregion
118 | #endregion
119 |
120 | ///
121 | /// Gets a register value.
122 | ///
123 | /// Name of a register whose value is returned.
124 | /// If registers name isn't valid.
125 | /// Regiser memory value.
126 | public UInt64 GetRegisterValue(string register)
127 | {
128 | return register.ToUpper() switch
129 | {
130 | // x64 registers
131 | "RAX" => RAX,
132 | "RBX" => RBX,
133 | "RCX" => RCX,
134 | "RDX" => RDX,
135 | "RSP" => RSP,
136 | "RBP" => RBP,
137 | "RSI" => RSI,
138 | "RDI" => RDI,
139 | "RFLAGS" => RFLAGS,
140 | // x32 registers
141 | "EAX" => EAX,
142 | "EBX" => EBX,
143 | "ECX" => ECX,
144 | "EDX" => EDX,
145 | "ESP" => ESP,
146 | "EBP" => EBP,
147 | "ESI" => ESI,
148 | "EDI" => EDI,
149 | "EFLAGS" => EFLAGS,
150 | // x16 registers
151 | "AX" => AX,
152 | "BX" => BX,
153 | "CX" => CX,
154 | "DX" => DX,
155 | "SP" => SP,
156 | "BP" => BP,
157 | "SI" => SI,
158 | "DI" => DI,
159 | "FLAGS" => FLAGS,
160 | // x8 registers
161 | "AH" => AH,
162 | "BH" => BH,
163 | "CH" => CH,
164 | "DH" => DH,
165 | "AL" => AL,
166 | "BL" => BL,
167 | "CL" => CL,
168 | "DL" => DL,
169 | _ => throw new Exception($"Register '{register}' doesn't exist.")
170 | };
171 | }
172 | }
173 | }
174 |
--------------------------------------------------------------------------------
/AMx64/Memory/Stack.cs:
--------------------------------------------------------------------------------
1 | using System;
2 |
3 | namespace AMx64
4 | {
5 | public partial class AMX64
6 | {
7 | ///
8 | /// Pops a value from the top of the stack.
9 | ///
10 | /// Thrown when at least one of the stack pointers is out of range.
11 | /// Amount by which the stack pointer is incremented (2, 4 or 8).
12 | /// Value from the top of stack.
13 | public UInt64 Pop(int size)
14 | {
15 | CheckStackPointers();
16 |
17 | if (RSP + (UInt64)size > RBP)
18 | {
19 | throw new InvalidOperationException("Stack Underflow occurred.");
20 | }
21 |
22 | memory.ReadFromStack(RSP, out var value, (UInt64)size);
23 | RSP += (UInt64)size;
24 |
25 | return value;
26 | }
27 |
28 | ///
29 | /// Pushes a value to the top of the stack.
30 | ///
31 | /// Thrown when at least one of the stack pointers is out of range.
32 | /// Value that is being pushed to stack.
33 | /// Amount by which the stack pointer is decremented (2, 4 or 8).
34 | public void Push(UInt64 value, int size)
35 | {
36 | CheckStackPointers();
37 |
38 | if (RSP - (UInt64)size < nextMemoryLocation)
39 | {
40 | throw new StackOverflowException("Stack Overflow occurred.");
41 | }
42 |
43 | memory.WriteToStack(RSP -= (UInt64)size, value, (UInt64)size);
44 | }
45 |
46 | ///
47 | /// Checks stack pointers.
48 | ///
49 | private void CheckStackPointers()
50 | {
51 | if (RBP > maxMemSize || RBP < nextMemoryLocation)
52 | {
53 | throw new Exception($"Stack pointer RBP out of range: {RBP}");
54 | }
55 | else if (RSP > maxMemSize || RSP < nextMemoryLocation)
56 | {
57 | throw new Exception($"Stack pointer RSP out of range: {RSP}");
58 | }
59 | else if (RSP > RBP)
60 | {
61 | throw new InvalidOperationException("Stack Underflow occurred.");
62 | }
63 | }
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/AMx64/Program.cs:
--------------------------------------------------------------------------------
1 | using System;
2 |
3 | namespace AMx64
4 | {
5 | public class Program
6 | {
7 | public static void Main(string[] args)
8 | {
9 | var simulator = new AMX64();
10 | try
11 | {
12 | simulator.Initialize(args);
13 | }
14 | catch(Exception ex)
15 | {
16 | Console.WriteLine(ex.Message);
17 | }
18 | }
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/AMx64/Properties/AssemblyInfo.cs:
--------------------------------------------------------------------------------
1 | using System.Reflection;
2 | using System.Runtime.CompilerServices;
3 | using System.Runtime.InteropServices;
4 |
5 | // General Information about an assembly is controlled through the following
6 | // set of attributes. Change these attribute values to modify the information
7 | // associated with an assembly.
8 | [assembly: AssemblyTitle("AMx64")]
9 | [assembly: AssemblyDescription("")]
10 | [assembly: AssemblyConfiguration("")]
11 | [assembly: AssemblyCompany("")]
12 | [assembly: AssemblyProduct("AMx64")]
13 | [assembly: AssemblyCopyright("Copyright © 2020")]
14 | [assembly: AssemblyTrademark("")]
15 | [assembly: AssemblyCulture("")]
16 |
17 | // Setting ComVisible to false makes the types in this assembly not visible
18 | // to COM components. If you need to access a type in this assembly from
19 | // COM, set the ComVisible attribute to true on that type.
20 | [assembly: ComVisible(false)]
21 |
22 | // The following GUID is for the ID of the typelib if this project is exposed to COM
23 | [assembly: Guid("7ab8e7dc-19b8-49d4-8a1b-f17a3a398c51")]
24 |
25 | // Version information for an assembly consists of the following four values:
26 | //
27 | // Major Version
28 | // Minor Version
29 | // Build Number
30 | // Revision
31 | //
32 | // You can specify all the values or you can default the Build and Revision Numbers
33 | // by using the '*' as shown below:
34 | // [assembly: AssemblyVersion("1.0.*")]
35 | [assembly: AssemblyVersion("1.0.0.0")]
36 | [assembly: AssemblyFileVersion("1.0.0.0")]
37 |
--------------------------------------------------------------------------------
/AMx64/Simulation.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.IO;
3 | using System.Linq;
4 |
5 | namespace AMx64
6 | {
7 | public partial class AMX64
8 | {
9 | ///
10 | /// Error codes
11 | ///
12 | public enum ErrorCode
13 | {
14 | None, OutOfBounds, UnhandledSyscall, UndefinedBehavior, ArithmeticError, Abort, SectionProblems, GlobalLine,
15 | NotImplemented, StackOverflow, StackUnderflow, StackError, AccessViolation, UnknownOp, Comment, InvalidLabel,
16 | InvalidLabelPosition, UnknownLabel, EmptyLine, InvalidEffectiveAddressesName, DataSectionProblem, BssSectionProblem,
17 | InvalidAsmLine, JmpOccurred, SuccessfullyRun, UnsuccessfullyRun, SyscallError, MemoryAllocError, Label
18 | // ArgCount, MissingSize, ArgError, FormatError, UsageError, UnknownOp, EmptyFile, SymbolRedefinition, UnknownSymbol, NotImplemented, Assertion, Failure, Comment,
19 | //OpenFail, NullPath, InvalidPath, DirectoryNotFound, AccessViolation, FileNotFound, PathFormatUnsupported, IOError, MemoryAllocError, ComputerInitError, UnknownError
20 | }
21 |
22 | protected Random randomValue = new Random();
23 |
24 | ///
25 | /// Initialize the simulation for execution.
26 | ///
27 | ///
28 | public bool Initialize(string[] args)
29 | {
30 | // Initialize cpu registers.
31 | for (var i = 0; i < CPURegisters.Length; ++i)
32 | {
33 | CPURegisters[i] = new CPURegister(randomValue.NextUInt64());
34 | }
35 |
36 | // Set stack pointer.
37 | RBP = RSP = (UInt64)maxMemSize - 1;
38 |
39 | // Initialize x64 user memory.
40 | for (var i = 0; i < maxMemSize; ++i)
41 | {
42 | memory[i] = randomValue.NextUInt8();
43 | }
44 |
45 | // Set asm file full path.
46 | if (File.Exists(args[0]))
47 | {
48 | if (!args[0].EndsWith(".asm"))
49 | {
50 | Console.WriteLine("File isn't an .asm file.");
51 | return false;
52 | }
53 |
54 | if (args[0].Contains('\\'))
55 | {
56 | AsmFilePath = args[0];
57 | }
58 | else
59 | {
60 | AsmFilePath += "\\" + args[0];
61 | }
62 | }
63 | else
64 | {
65 | Console.WriteLine($"File {args[0]} doesn't exist.");
66 | return false;
67 | }
68 |
69 | // if we have arguments simulation can start
70 | if (args != null)
71 | {
72 | if (args.Length != 1)
73 | {
74 | var cml = new CmlnParser();
75 | if (cml.Parse(args.Where((source, index) => index >= 1).ToArray()))
76 | {
77 | if (cml.cmlnAction == CmlnAction.Debug && Debug())
78 | {
79 | InterpretAsmFile();
80 | return true;
81 | }
82 | else
83 | {
84 | return false;
85 | }
86 | }
87 | else
88 | {
89 | return false;
90 | }
91 | }
92 | else
93 | {
94 | InterpretAsmFile();
95 | return true;
96 | }
97 | }
98 | // otherwise terminate execution
99 | else
100 | {
101 | return false;
102 | }
103 | }
104 | }
105 | }
106 |
--------------------------------------------------------------------------------
/AMx64/Utility.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Text;
3 |
4 | namespace AMx64
5 | {
6 | public static class Utility
7 | {
8 | ///
9 | /// Attempts to parse the string value into an unsigned integer.
10 | ///
11 | /// String to parse.
12 | /// The resulting value.
13 | /// Radix used (2, 8, 10 or 16).
14 | /// true if parsing is successful, otherwise false.
15 | public static bool TryParseUInt64(this string str, out UInt64 value, uint radix = 10)
16 | {
17 | if (radix != 2 && radix != 8 && radix != 10 && radix != 16)
18 | {
19 | //throw new ArgumentException("Radix can only be 2, 8, 10 or 16.");
20 | value = 0;
21 | return false;
22 | }
23 |
24 | value = 0;
25 | uint addAmount;
26 |
27 | if (str == null || str.Length == 0 ||
28 | (radix == 16 && str.Length > 16) || (radix == 10 && str.Length > 19) ||
29 | (radix == 2 && str.Length > 64) || (radix == 8 && str.Length > 21))
30 | {
31 | return false;
32 | }
33 |
34 | // checking each character
35 | for (var i = 0; i < str.Length; ++i)
36 | {
37 | value *= radix;
38 |
39 | // if it's a digit, add value directly
40 | if (str[i] >= '0' && str[i] <= '9')
41 | {
42 | addAmount = (uint)(str[i] - '0');
43 | }
44 | else if (str[i] >= 'a' && str[i] <= 'z')
45 | {
46 | addAmount = (uint)(str[i] - 'a' + 10);
47 | }
48 | else if (str[i] >= 'A' && str[i] <= 'Z')
49 | {
50 | addAmount = (uint)(str[i] - 'A' + 10);
51 | }
52 | else
53 | {
54 | return false;
55 | }
56 |
57 | // if add amount is out of range
58 | if (addAmount >= radix)
59 | {
60 | return false;
61 | }
62 |
63 | value += addAmount;
64 | }
65 | return true;
66 | }
67 |
68 | ///
69 | /// Attempts to parse the string value into its characters string accouting for C-style escapes in the case of backquotes.
70 | ///
71 | /// String to parse with quotes around it.
72 | /// The result of parsing.
73 | /// The error message that occurred during the parsing.
74 | /// true if parsing is successful, otherwise false.
75 | public static bool TryParseCharacterString(this string str, out string characters, ref string errorMsg)
76 | {
77 | characters = null;
78 |
79 | // Check if character string starts and ends with quote
80 | if (str[0] != '"' && str[0] != '\'' && str[0] != '`' || str[0] != str[str.Length - 1])
81 | {
82 | errorMsg = $"Ill-formed string: {str}";
83 | return false;
84 | }
85 |
86 | var b = new StringBuilder();
87 |
88 | // Read all characters in str.
89 | for (var i = 1; i < str.Length - 1; ++i)
90 | {
91 | // If backquote is used.
92 | if (str[0] == '`' && str[i] == '\\')
93 | {
94 | //errorMsg = $"Backquote C-style escapes are not yet supported: {str}";
95 | //return false;
96 | if (++i >= str.Length - 1)
97 | {
98 | errorMsg = $"Ill-formed string (ends with beginning of an escape sequence): {str}";
99 | return false;
100 | }
101 |
102 | int temp;
103 |
104 | switch (str[i])
105 | {
106 | case '\'':
107 | temp = '\'';
108 | break;
109 | case '"':
110 | temp = '"';
111 | break;
112 | case '`':
113 | temp = '`';
114 | break;
115 | case '\\':
116 | temp = '\\';
117 | break;
118 | case '?':
119 | temp = '?';
120 | break;
121 | case 'a':
122 | temp = '\a';
123 | break;
124 | case 'b':
125 | temp = '\b';
126 | break;
127 | case 't':
128 | temp = '\t';
129 | break;
130 | case 'n':
131 | temp = '\n';
132 | break;
133 | case 'v':
134 | temp = '\v';
135 | break;
136 | case 'f':
137 | temp = '\f';
138 | break;
139 | case 'r':
140 | temp = '\r';
141 | break;
142 | case 'e':
143 | temp = 27;
144 | break;
145 |
146 | case '0':
147 | case '1':
148 | case '2':
149 | case '3':
150 | case '4':
151 | case '5':
152 | case '6':
153 | case '7':
154 | temp = 0;
155 | // Read the octal value into temp (up to 3 octal digits).
156 | for (var octCount = 0; octCount < 3 && str[i] >= '0' && str[i] <= '7'; ++i, ++octCount)
157 | {
158 | temp = (temp << 3) | (str[i] - '0');
159 | }
160 | --i;
161 | break;
162 |
163 | case 'x':
164 | // Reads up to 2 hexadecimal digits.
165 | // Checks if it's a hex digit.
166 | if (!GetHexValue(str[++i], out temp))
167 | {
168 | errorMsg = $"Ill-formed string (invalid hexadecimal escape): {str}";
169 | return false;
170 | }
171 | // If the next char is also a hex digit.
172 | if (GetHexValue(str[i + 1], out var hexValue))
173 | {
174 | ++i;
175 | temp = (temp << 4) | hexValue;
176 | }
177 | break;
178 |
179 | case 'u':
180 | case 'U':
181 | errorMsg = $"Unicode character escapes are not yet supported: {str}";
182 | return false;
183 |
184 | default:
185 | errorMsg = $"Ill-formed string (escape sequence not recognized): {str}";
186 | return false;
187 | }
188 |
189 | // Append the character.
190 | b.Append((char)(temp & 0xff));
191 | }
192 | // Read the character verbatim.
193 | else
194 | {
195 | b.Append(str[i]);
196 | }
197 | }
198 |
199 | characters = b.ToString();
200 | return true;
201 | }
202 |
203 | public static bool GetHexValue(char ch, out int value)
204 | {
205 | if (ch >= '0' && ch <= '9')
206 | {
207 | value = ch - '0';
208 | }
209 | else if (ch >= 'a' && ch <= 'f')
210 | {
211 | value = ch - 'a' + 10;
212 | }
213 | else if (ch >= 'A' && ch <= 'F')
214 | {
215 | value = ch - 'A' + 10;
216 | }
217 | else
218 | {
219 | value = 0;
220 | return false;
221 | }
222 |
223 | return true;
224 | }
225 |
226 | ///
227 | /// Get a random value.
228 | ///
229 | /// Random object that is used.
230 | /// Random unsigned integer.
231 | public static UInt64 NextUInt64(this Random rndValue)
232 | {
233 | return ((UInt64)(UInt32)rndValue.Next() << 32) | (UInt32)rndValue.Next();
234 | }
235 |
236 | public static byte NextUInt8(this Random rndValue)
237 | {
238 | return (byte)(rndValue.Next() << 4 | (byte)rndValue.Next());
239 | }
240 |
241 | ///
242 | /// Gets a random boolean.
243 | ///
244 | /// Random object that is used.
245 | /// Random boolean.
246 | public static bool NextBool(this Random rndValue)
247 | {
248 | return rndValue.Next(2) == 1;
249 | }
250 |
251 | ///
252 | /// Checks if value is zero.
253 | ///
254 | /// Value being checked.
255 | /// true if value is zero, otherwise false.
256 | public static bool IsZero(UInt64 value)
257 | {
258 | return value == 0;
259 | }
260 |
261 | ///
262 | /// Checks if the string starts with a specified character.
263 | ///
264 | /// String value being checked.
265 | /// Character string must start with.
266 | /// true if string starts with specified character, otherwise false.
267 | public static bool StartsWith(this string stringValue, char character)
268 | {
269 | return stringValue.Length > 0 && stringValue[0] == character;
270 | }
271 |
272 | ///
273 | /// Check if the string ends with a specified character.
274 | ///
275 | /// String value being checked.
276 | /// Character string must end with.
277 | /// true if string ends with specified character, otherwise false.
278 | public static bool EndsWith(this string stringValue, char character)
279 | {
280 | return stringValue != null && stringValue.Length > 0 && stringValue[stringValue.Length - 1] == character;
281 | }
282 |
283 | ///
284 | /// Checks if string starts with a specified value and is followed by a white space.
285 | ///
286 | /// String value being checked.
287 | /// Prefix string value string must start with.
288 | /// true if string is equal to the specified value or begins with it and is followed by a white space.
289 | public static bool StartsWithValue(this string stringValue, string value)
290 | {
291 | return stringValue.StartsWith(value) && (stringValue.Length == value.Length || char.IsWhiteSpace(stringValue[value.Length]));
292 | }
293 |
294 | ///
295 | /// Gets the highest set bit.
296 | ///
297 | /// Value used to get highest bit.
298 | /// Highest value's bit.
299 | public static UInt64 GetHighBit(UInt64 value)
300 | {
301 | while ((value & (value - 1)) != 0)
302 | {
303 | value &= value - 1;
304 | }
305 |
306 | return value;
307 | }
308 |
309 | ///
310 | /// Gets the lowest set bit.
311 | ///
312 | /// Value used to get lowest bit.
313 | /// Lowest value's bit.
314 | public static UInt64 GetLowBit(UInt64 value)
315 | {
316 | return value & (~value + 1);
317 | }
318 |
319 | ///
320 | /// Writes a specified value to memory.
321 | ///
322 | /// Specified array in which to store the value.
323 | /// Starting index of specified array.
324 | /// Size of the value in bytes.
325 | /// Specified value to write to the array.
326 | /// true if value is written, otherwise false.
327 | public static bool Write(this byte[] array, UInt64 position, UInt64 size, UInt64 inputValue)
328 | {
329 | // Check memory bounds.
330 | if (MemoryBoundsExceeded(position, size, (UInt64)array.Length))
331 | {
332 | return false;
333 | }
334 |
335 | for (var i = 0; i < (int)size; ++i)
336 | {
337 | array[(int)position + i] = (byte)inputValue;
338 | inputValue >>= 8;
339 | }
340 |
341 | return true;
342 | }
343 |
344 | #region Stack handle
345 | ///
346 | /// Writes a specified value to stack.
347 | ///
348 | /// Specified array in which to store the value.
349 | /// Starting index of specified array.
350 | /// Specified value to write to the array.
351 | /// Size of the value in bytes.
352 | public static void WriteToStack(this byte[] array, UInt64 position, UInt64 inputValue, UInt64 size)
353 | {
354 | for (var i = 0; i < (int)size; ++i)
355 | {
356 | array[(int)position + i] = (byte)inputValue;
357 | inputValue >>= 8;
358 | }
359 | }
360 |
361 | ///
362 | /// Reads value from stack.
363 | ///
364 | /// Specified array from which to read value.
365 | /// Beginning index of specified array.
366 | /// Value read from the array.
367 | /// Size of the value in bytes.
368 | public static void ReadFromStack(this byte[] array, UInt64 position, out UInt64 outputValue, UInt64 size)
369 | {
370 | outputValue = 0;
371 |
372 | for (var i = (int)size - 1; i >= 0; --i)
373 | {
374 | outputValue = (outputValue << 8) | array[(int)position + i];
375 | }
376 | }
377 | #endregion
378 |
379 | ///
380 | /// Writes ASCII C-style string value to memory.
381 | ///
382 | /// Specified array in which to store the value.
383 | /// Starting index of specified array.
384 | /// Specified string value to write to the array.
385 | /// true if value is written, otherwise false.
386 | public static bool WriteString(this byte[] array, UInt64 position, string inputValue)
387 | {
388 | // Check memory bounds.
389 | if (MemoryBoundsExceeded(position, (UInt64)inputValue.Length + 1, (UInt64)array.Length))
390 | {
391 | return false;
392 | }
393 |
394 | // Write each character.
395 | for (var i = 0; i < inputValue.Length; ++i)
396 | {
397 | array[(int)position + i] = (byte)inputValue[i];
398 | }
399 |
400 | // Write a null terminator.
401 | array[(int)position + inputValue.Length] = 0;
402 |
403 | return true;
404 | }
405 |
406 | ///
407 | /// Reads value from memory.
408 | ///
409 | /// Specified array from which to read value.
410 | /// Beginning index of specified array.
411 | /// Size of the value in bytes.
412 | /// Value read from the array.
413 | /// true if value is read, otherwise false.
414 | public static bool Read(this byte[] array, UInt64 position, UInt64 size, out UInt64 outputValue)
415 | {
416 | outputValue = 0;
417 |
418 | // Check memory bounds.
419 | if (MemoryBoundsExceeded(position, size, (UInt64)array.Length))
420 | {
421 | return false;
422 | }
423 |
424 | for (var i = (int)size - 1; i >= 0; --i)
425 | {
426 | outputValue = (outputValue << 8) | array[(int)position + i];
427 | }
428 |
429 | return true;
430 | }
431 |
432 | ///
433 | /// Reads ASCII C-style string value to memory.
434 | ///
435 | /// Specified array from which to read value.
436 | /// Beginning index of specified array.
437 | /// Value read from the array.
438 | /// true if value is read, otherwise false.
439 | public static bool ReadString(this byte[] array, UInt64 position, UInt64 maxSize, out string outputValue)
440 | {
441 | var cString = new StringBuilder();
442 |
443 | // Read string until a terminator char is reached or maximum size is reached.
444 | for (; ; ++position)
445 | {
446 | if (position >= (UInt64)array.Length)
447 | {
448 | outputValue = null;
449 | return false;
450 | }
451 |
452 | if (cString.Length == (int)maxSize)
453 | {
454 | break;
455 | }
456 | else if (array[position] != 0)
457 | {
458 | cString.Append((char)array[position]);
459 | }
460 | else
461 | {
462 | break;
463 | }
464 | }
465 |
466 | outputValue = cString.ToString();
467 | return true;
468 | }
469 |
470 | ///
471 | /// Checks if the value with specified code size is negative.
472 | ///
473 | /// Value to check.
474 | /// Code size of the value.
475 | /// true if postive, otherwise false.
476 | public static bool Negative(UInt64 value, UInt64 codeSize)
477 | {
478 | return (value & SignMask(codeSize)) != 0;
479 | }
480 |
481 | ///
482 | /// Gets the bitmask for the sign bit of an integer with the specified code size.
483 | ///
484 | /// Specified code size.
485 | /// Integer's bitmask.
486 | public static UInt64 SignMask(UInt64 codeSize)
487 | {
488 | return 1ul << ((8 << (UInt16)codeSize) - 1);
489 | }
490 |
491 | ///
492 | /// Help method used to determine if array's bounds have been exceeded.
493 | ///
494 | /// Start index of array.
495 | /// Number of spaces used after start index.
496 | /// Length of the array.
497 | /// true if array bounds have been exceeded, otherwise false.
498 | private static bool MemoryBoundsExceeded(UInt64 position, UInt64 size, UInt64 arrayLimit)
499 | {
500 | return position >= arrayLimit /** .2*/ || position + size >= arrayLimit /** .2*/;
501 | }
502 | }
503 | }
504 |
--------------------------------------------------------------------------------
/AMx64/amx64.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AleksaMCode/AMx64/fea483d1e589e1cc47eac2179b3d9577505d5eb8/AMx64/amx64.ico
--------------------------------------------------------------------------------
/AMx64/asm_tests/add_test.asm:
--------------------------------------------------------------------------------
1 | section .text
2 | global main
3 | main:
4 | mov rax, 60
5 | add rax, 30
6 |
7 | mov rax, 60
8 | mov rdi, 0
9 | syscall
10 |
--------------------------------------------------------------------------------
/AMx64/asm_tests/bss_section_test.asm:
--------------------------------------------------------------------------------
1 | section .bss
2 | word resb 2
3 |
4 | section .text
5 | global main
6 | main:
7 |
8 | mov rax, 60
9 | mov rdi, 0
10 | syscall
11 |
--------------------------------------------------------------------------------
/AMx64/asm_tests/cmp_jmp_test.asm:
--------------------------------------------------------------------------------
1 | section .text
2 | global main
3 | main:
4 | mov rax, 60
5 |
6 | loop:
7 | add rax, 1
8 | cmp rax, 70
9 | jne loop
10 |
11 | mov rax, 60
12 | mov rdi, 0
13 | syscall
14 |
--------------------------------------------------------------------------------
/AMx64/asm_tests/data_section_test -2.asm:
--------------------------------------------------------------------------------
1 | section .data
2 | string db "Hello", 10, 0
3 |
4 | section .text
5 | global main
6 | main:
7 |
8 | mov rax, 60
9 | mov rdi, 0
10 | syscall
11 |
--------------------------------------------------------------------------------
/AMx64/asm_tests/data_section_test.asm:
--------------------------------------------------------------------------------
1 | section .data
2 | string db "This is a test string."
3 |
4 | section .text
5 | global main
6 | main:
7 |
8 | mov rax, 60
9 | mov rdi, 0
10 | syscall
11 |
--------------------------------------------------------------------------------
/AMx64/asm_tests/stack_test.asm:
--------------------------------------------------------------------------------
1 | section .text
2 | global main
3 | main:
4 | push rax
5 | push rbx
6 | push rcx
7 | push rdx
8 |
9 | mov rax, 0
10 | mov rbx, 0
11 | mov rcx, 0
12 | mov rdx, 0
13 |
14 | pop rdx
15 | pop rcx
16 | pop rbx
17 | pop rax
18 |
19 | mov rax, 60
20 | mov rdi, 0
21 | syscall
22 |
--------------------------------------------------------------------------------
/AMx64/asm_tests/sub_test.asm:
--------------------------------------------------------------------------------
1 | section .text
2 | global main
3 | main:
4 | mov rax, 60
5 | sub rax, 30
6 |
7 | mov rax, 60
8 | mov rdi, 0
9 | syscall
10 |
--------------------------------------------------------------------------------
/AMx64/asm_tests/sys_read_and_write_test.asm:
--------------------------------------------------------------------------------
1 | section .data
2 | msg db "Enter you string: "
3 | msg2 db "Your string is: "
4 |
5 | section .bss
6 | var resb 11
7 |
8 | section .text
9 | global main
10 | main:
11 |
12 | ; show welcome message
13 | mov rax, 1 ; write system call
14 | mov rdi, 1 ; stdout
15 | mov rsi, msg ; address for storage, declared in section .bss
16 | mov rdx, 18
17 | syscall
18 |
19 | ; read in the character
20 | mov rax, 0 ; read system call
21 | mov rdi, 0 ; stdin
22 | mov rsi, var ; address for storage, declared in section .bss
23 | mov rdx, 11
24 | syscall
25 |
26 | ; show user the output
27 | mov rax, 1 ; write system call
28 | mov rdi, 1 ; stdout
29 | mov rsi, msg2 ; address for storage, declared in section .bss
30 | mov rdx, 16
31 | syscall
32 |
33 | mov rax, 1 ; write system call
34 | mov rdi, 1 ; stdout
35 | mov rsi, var ; address for storage, declared in section .bss
36 | mov rdx, 11
37 | syscall
38 |
39 | ; exit system call
40 | mov rax, 60
41 | mov rdi, 0
42 | syscall
43 |
44 |
--------------------------------------------------------------------------------
/AMx64/asm_tests/sys_write_test.asm:
--------------------------------------------------------------------------------
1 | section .data
2 | hello db "Hello", 10, "World"
3 |
4 | section .bss
5 | word resb 2
6 |
7 | section .text
8 | global main
9 | main:
10 |
11 | mov rax, 1 ; write system call
12 | mov rdi, 1 ; stdout
13 | mov rsi, hello ; address for storage, declared in section .data
14 | mov rdx, 11
15 | syscall
16 |
17 | ; exit system call
18 | mov rax, 60
19 | mov rdi, 0
20 | syscall
21 |
--------------------------------------------------------------------------------
/AMx64/resources/amx64-help_command.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AleksaMCode/AMx64/fea483d1e589e1cc47eac2179b3d9577505d5eb8/AMx64/resources/amx64-help_command.jpg
--------------------------------------------------------------------------------
/AMx64/resources/amx64.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AleksaMCode/AMx64/fea483d1e589e1cc47eac2179b3d9577505d5eb8/AMx64/resources/amx64.png
--------------------------------------------------------------------------------
/AMx64/resources/help_command.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AleksaMCode/AMx64/fea483d1e589e1cc47eac2179b3d9577505d5eb8/AMx64/resources/help_command.jpg
--------------------------------------------------------------------------------
/AMx64/resources/list_command.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AleksaMCode/AMx64/fea483d1e589e1cc47eac2179b3d9577505d5eb8/AMx64/resources/list_command.jpg
--------------------------------------------------------------------------------
/AMx64/resources/print_command.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AleksaMCode/AMx64/fea483d1e589e1cc47eac2179b3d9577505d5eb8/AMx64/resources/print_command.jpg
--------------------------------------------------------------------------------
/AMx64/resources/quit_command.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AleksaMCode/AMx64/fea483d1e589e1cc47eac2179b3d9577505d5eb8/AMx64/resources/quit_command.jpg
--------------------------------------------------------------------------------
/AMx64/resources/stack-in-program-memory.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AleksaMCode/AMx64/fea483d1e589e1cc47eac2179b3d9577505d5eb8/AMx64/resources/stack-in-program-memory.jpg
--------------------------------------------------------------------------------
/AMx64/resources/step_command.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AleksaMCode/AMx64/fea483d1e589e1cc47eac2179b3d9577505d5eb8/AMx64/resources/step_command.jpg
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # AMx64
4 | AMx64 is a simplified 64-bit processor simulator implemented in C#. It comes with a built-in, assembly language loosely based around NASM. The processor acts as 64-bit machine code interpreter with its own instruction set that includes integer computations. The motivation behind this project was a better understanding of NASM and assembly language.
5 |
6 | ## Table of contents
7 | - [AMx64](#amx64)
8 | - [Table of contents](#table-of-contents)
9 | - [Usage](#usage)
10 | - [CPU details](#cpu-details)
11 | - [The AMASM Language (AMx64 Assembly Language)](#the-amasm-language-amx64-assembly-language)
12 | - [Sections](#sections)
13 | - [Data Section (.data)](#data-section-data)
14 | - [BSS Section (.bss)](#bss-section-bss)
15 | - [Text Section (.text)](#text-section-text)
16 | - [Layout of a AMASM Source Line](#layout-of-a-amasm-source-line)
17 | - [Pseudo-Instructions](#pseudo-instructions)
18 | - [DB and Friends: Declaring Initialized Data](#db-and-friends-declaring-initialized-data)
19 | - [RESB and Friends: Declaring Uninitialized Data](#resb-and-friends-declaring-uninitialized-data)
20 | - [Numeric Constants](#numeric-constants)
21 | - [Character Strings](#character-strings)
22 | - [Character Constants](#character-constants)
23 | - [String Constants](#string-constants)
24 | - [Comments](#comments)
25 | - [Labels](#labels)
26 | - [Operand/Address Size (Data Storage Sizes)](#operandaddress-size-data-storage-sizes)
27 | - [Supported instructions](#supported-instructions)
28 | - [ADD - Add](#add---add)
29 | - [SUB - Subtract](#sub---subtract)
30 | - [AND - Bitwise AND](#and---bitwise-and)
31 | - [OR - Bitwise OR](#or---bitwise-or)
32 | - [NOT - Bitwise NOT](#not---bitwise-not)
33 | - [MOV - Move](#mov---move)
34 | - [CMP - Compare](#cmp---compare)
35 | - [JMP - Unconditional Jump](#jmp---unconditional-jump)
36 | - [Jcc - Jump if Condition Is Met (Conditional Jump)](#jcc---jump-if-condition-is-met-conditional-jump)
37 | - [PUSH](#push)
38 | - [POP](#pop)
39 | - [Memory](#memory)
40 | - [Stack](#stack)
41 | - [Registers](#registers)
42 | - [General-Purpose Registers (GPRs)](#general-purpose-registers-gprs)
43 | - [FLAGS register](#flags-register)
44 | - [Addressing modes for data](#addressing-modes-for-data)
45 | - [Register (direct) Addressing](#register-direct-addressing)
46 | - [Immediate (literal) Addressing](#immediate-literal-addressing)
47 | - [Direct Memory Addressing](#direct-memory-addressing)
48 | - [Register Indirect Addressing](#register-indirect-addressing)
49 | - [Calling System Services](#calling-system-services)
50 | - [Return Codes](#return-codes)
51 | - [Console Output](#console-output)
52 | - [Console Input](#console-input)
53 | - [Proper way to end asm code](#proper-way-to-end-asm-code)
54 | - [Debug - AMDB](#debug---amdb)
55 | - [Getting Help](#getting-help)
56 | - [Setting Breakpoints](#setting-breakpoints)
57 | - [Deleting Breakpoints](#deleting-breakpoints)
58 | - [Display Source Code](#display-source-code)
59 | - [Examine Memory (Display Memory/Register Contents)](#examine-memory-display-memoryregister-contents)
60 | - [Continuing and Stepping](#continuing-and-stepping)
61 | - [Quitting](#quitting)
62 | - [References](#references)
63 | - [Books](#books)
64 | - [Links](#links)
65 | - [Github projects](#github-projects)
66 | - [To-Do List](#to-do-list)
67 |
68 | ## Usage
69 | To start the interpreter, all you need to do is run the following command:
70 |
71 | ```powershell
72 | .\AMx64.exe program.asm
73 | ```
74 |
75 | You can use --help
(abbreviated -h
) with no arguments to display a short list of commands available in AMx64,
76 |
77 |
78 | or you can run your program in debug mode by using a --debug
or -d
option.
79 |
80 | ```powershell
81 | .\AMx64.exe program.asm -d
82 | ```
83 |
84 | ## CPU details
85 | Registers are small storage cells built directly into a processor that are vastly faster than main memory (RAM) but are also more expensive per byte. Because of this price factor, there is not typically much room in a processor for storing data. The execution of a typical program is: move data from memory to registers, perform computations, move processed data from registers to memory and repeat.
86 | General-purpose registers are used for processing integral instructions (the most common type) and are under the complete control of the programmer.
87 |
88 | > **_NOTE:_**
89 | >
90 | > If you modify a subdivision of a register, the other subdivisions of that register will see the change.
91 |
92 | ## The AMASM Language (AMx64 Assembly Language)
93 | AMx64 comes with built-in assembly language loosely based around NASM or Intel syntax. Before we describe the syntax of operations and other utilities, we need to go over some of the basics of AMx64 assembly language.
94 |
95 | ### Sections
96 | In a typical assembly language, your program is broken up into several sections.
97 |
98 | > **_NOTE:_**
99 | >
100 | > .data
and .bss
sections must come before .text
section in asm code.
101 | > .rodata
section isn't supported.
102 |
103 | #### Data Section (.data)
104 | The data section holds all variables that are initialized to specific values. This will typically be used only for global variables. The initialized data must be declared in the section .data
section. There must be a space after the word section. All initialized variables and constants are placed in this section. Variable names must start with a letter, followed by letters or numbers, including a special character, underscore. Variable definitions must include the name, the data type, and the initial value for the variable.
105 |
106 | #### BSS Section (.bss)
107 | The BSS section (Block Started by Symbol) is a section that has no contents in terms of data or instructions. It consists only of a number that represents its length that the operating system then expands upon program initialization to a zero-filled, contiguous block of memory with said length (hence the name). This is used when you would ordinarily put something in the data section, but you don’t care about initializing it to a specific value. Uninitialized data must be declared in the section .bss
section. There must be a space after the word section. All uninitialized variables are declared in this section. Variable names must start with a letter, followed by letters or numbers, including a special character, underscore. Variable definitions must include the name, the data type, and the count.
108 |
109 | #### Text Section (.text)
110 | The text section holds all of your executable code and will typically dwarf the other sections in terms of size. The code is placed in the section .text
section. There must be a space after the word section. The instructions are specified one per line, and each must be a valid instruction with the appropriate required operands. The text section will include some headers or labels that define the initial program entry point. The following declarations must be included.
111 |
112 | ```asm
113 | global main
114 | main:
115 | ```
116 |
117 | > **_NOTE:_**
118 | >
119 | > AMx64 require asm file to define the program entry point, where execution will begin when the program is run. In NASM you specify the entry point by declaring the special symbol ..start at the point where you wish execution to begin. In AMASM you can use user-defined entry point.
120 |
121 | ### Layout of a AMASM Source Line
122 | Like most assemblers, each AMASM source line contains some combination of the four fields
123 |
124 | `label: instruction operands ; comment`
125 |
126 | As usual, most of these fields are optional; the presence or absence of any combination of a label, an instruction and a comment is allowed. Of course, the operand field is either required or forbidden by the presence and nature of the instruction field. It doesn't support multiline commands that are available in NASM using the backslash character (\) as the line continuation character.
127 |
128 | AMASM places no restrictions on white space within a line: labels may have white space before them, or instructions may have no space before them, or anything. The colon after a label is also optional.
129 |
130 | ### Pseudo-Instructions
131 | Pseudo-instructions are things which, though not real x86 machine instructions, are used in the instruction
132 | field anyway because that’s the most convenient place to put them. The current pseudo-instructions are DB, DW, DD and DQ; their uninitialized counterparts RESB, RESW, RESD and RESQ.
133 |
134 | > **_NOTE:_**
135 | >
136 | > - The INCBIN command, the EQU command, and the TIMES prefix are not currently available.
137 | > - Pseudo-instructions DT, DO, DY, REST, RESO and RESY are also not available.
138 |
139 | #### DB and Friends: Declaring Initialized Data
140 | DB, DW, DD and DQ are used, much as in MASM, to declare initialized data in the output file. They can be invoked in a wide range of ways:
141 |
142 | ```asm
143 | db 0x55 ; just the byte 0x55
144 | db 0x55,0x56,0x57 ; three bytes in succession
145 | db 'a',0x55 ; character constants are OK
146 | db 'hello',13,10,'$' ; so are string constants
147 | dw 0x1234 ; 0x34 0x12
148 | dw 'a' ; 0x61 0x00 (it’s just a number)
149 | dw 'ab' ; 0x61 0x62 (character constant)
150 | dw 'abc' ; 0x61 0x62 0x63 0x00 (string)
151 | dd 0x12345678 ; 0x78 0x56 0x34 0x12
152 | dq 0x123456789abcdef0 ; eight byte constant
153 | ```
154 |
155 | #### RESB and Friends: Declaring Uninitialized Data
156 | RESB, RESW, RESD and RESQ are designed to be used in the BSS section of a module: they declare uninitialized storage space. Each takes a single operand, which is the number of bytes, words, doublewords or whatever to reserve. For example:
157 |
158 | ```asm
159 | buffer resb 64 ; reserve 64 bytes
160 | wordvar resw 1 ; reserve a word
161 | ```
162 |
163 | ### Numeric Constants
164 | A numeric constant is simply a number. Number values may be specified in decimal, hex, or octal. AMASM allows you to specify numbers in a variety of number bases, in a variety of ways: you can suffix H or X, D or T, Q or O, and B or Y for hexadecimal, decimal, octal and binary respectively, or you can prefix 0x, for hexadecimal in the style of C. In addition, AMASM accept the prefix 0h for hexadecimal, 0d or 0t for decimal, 0o or 0q for octal, and 0b or 0y for binary. Please note that unlike C, a 0 prefix by itself does not imply an octal constant!
165 |
166 | Some examples (all producing exactly the same code):
167 | ```asm
168 | mov ax,200 ; decimal
169 | mov ax,0200 ; still decimal
170 | mov ax,0200d ; explicitly decimal
171 | mov ax,0d200 ; also decimal
172 | mov ax,0c8h ; hex
173 | mov ax,0xc8 ; hex yet again
174 | mov ax,0hc8 ; still hex
175 | mov ax,310q ; octal
176 | mov ax,310o ; octal again
177 | mov ax,0o310 ; octal yet again
178 | mov ax,0q310 ; hex yet again
179 | mov ax,11001000b ; binary
180 | mov ax,1100_1000b ; same binary constant
181 | mov ax,0b1100_1000 ; same binary constant yet again
182 | ```
183 | > **_NOTE:_**
184 | >
185 | > Numeric constants can have underscores ('_') interspersed to break up long strings.
186 | >
187 |
188 | ### Character Strings
189 | In addition to numeric data, symbolic (non-numeric) data is often required. Consequently, the symbols are represented by assigning numeric values to each symbol or character. A character is typically stored in a byte (8-bits) of space. This works well since memory is byte addressable. Examples of characters include letters, numerical digits, common punctuation marks (such as '.' or '!'), and whitespace.
A character string consists of up to eight characters enclosed in either single quotes ('...'), double quotes ("...") or backquotes (`...`). Single or double quotes are equivalent to NASM (except of course that surrounding the constant with single quotes allows double quotes to appear within it and vice versa); the contents of those are represented verbatim. The general concept also includes control characters, which do not correspond to symbols in a particular language, but to other information used to process text. Examples of control characters include carriage return or tab.
190 |
191 | Strings enclosed in backquotes support C−style \–escapes for
192 | special characters. The following escape sequences are recognized by backquoted strings:
193 |
194 |
195 |
196 | \’ |
197 | single quote (’) |
198 |
199 |
200 | \" |
201 | double quote (") |
202 |
203 |
204 | \‘ |
205 | backquote (`) |
206 |
207 |
208 | \\ |
209 | backslash (\) |
210 |
211 |
212 | \? |
213 | question mark (?) |
214 |
215 |
216 | \a |
217 | BEL (ASCII 7) |
218 |
219 |
220 | \b |
221 | BS (ASCII 8) |
222 |
223 |
224 | \t |
225 | TAB (ASCII 9) |
226 |
227 |
228 | \n |
229 | LF (ASCII 10) |
230 |
231 |
232 | \v |
233 | VT (ASCII 11) |
234 |
235 |
236 | \f |
237 | FF (ASCII 12) |
238 |
239 |
240 | \r |
241 | CR (ASCII 13) |
242 |
243 |
244 | \e |
245 | ESC (ASCII 27) |
246 |
247 |
248 | \377 |
249 | Up to 3 octal digits − literal byte |
250 |
251 |
252 | \xFF |
253 | Up to 2 hexadecimal digits − literal byte |
254 |
255 |
256 |
257 |
258 | > **_NOTE:_**
259 | >
260 | > Character literals don't currently support quotes '"
'.
261 | > Unicode character escapes are not yet supported.
262 | > Characters can be displayed to the console, but cannot be used for calculations. Integers can be used for calculations, but cannot be displayed to the console (without changing the representation).
263 |
264 | ### Character Constants
265 | A character constant consists of a string up to eight bytes long, used in an expression context. It is treated as if
266 | it was an integer. A character constant with more than one byte will be arranged with little−endian order in mind: if you code
267 |
268 | ```asm
269 | mov eax,'abcd'
270 | ```
271 |
272 | then the constant generated is not 0x61626364, but 0x64636261, so that if you were then to store the value into memory, it would read abcd rather than dcba.
273 |
274 | ### String Constants
275 | String constants are character strings used in the context of some pseudo−instructions, namely the DB family. A string constant looks like a character constant, only longer. It is treated as a concatenation of maximum−size character constants for the conditions. So the following are equivalent:
276 |
277 | ```asm
278 | db 'hello' ; string constant
279 | db 'h','e','l','l','o' ; equivalent character constants
280 | ```
281 |
282 | > **_NOTE:_**
283 | >
284 | > When used in a string−supporting context, quoted strings are treated as a string constants even if they are short enough to be a character constant, because otherwise db ’ab’ would have the same effect as db ’a’, which would be silly.
285 |
286 | ### Comments
287 | The semicolon (';') is used to note program comments. Comments (using the ';') may be placed anywhere, including after an instruction. Any characters after the ';' are ignored by the interpreter. This can be used to explain steps taken in the code or to comment out sections of code.
288 |
289 | ### Labels
290 | A program label is the target, or a location to jump to, for control statements. For example, the start of a loop might be marked with a label such as “loopStart”. The code may be re-executed by jumping to the label. Generally, a label starts with a letter, followed by letters, numbers, or symbols (limited to '_'), terminated with a colon (':').
291 |
292 | > **_NOTE:_**
293 | >
294 | > - Local labels aren't available.
295 | > - Program labels may be defined only once.
296 |
297 | ### Operand/Address Size (Data Storage Sizes)
298 | The x86-64 architecture supports a specific set of data storage size elements, all based on powers of two. To specify a size of an operand, simply preface the operands or operand with a mnemonic for the size you want. In a situation when you have, for instance add qword rax, rbx
, size is perfectly valid but redundant. These sizes are not case-sensitive. You should already be quite aware that addresses can have different sizes. Almost any instruction that references memory must use one of the prefixes BYTE, WORD, DWORD or QWORD to indicate what size of memory operand it refers to (e.q. add byte rax, [rbx]
). The supported storage sizes are as follows:
299 |
300 |
301 | Storage |
302 | Size (bits) |
303 | Size (bytes) |
304 |
305 |
306 | BYTE |
307 | 8-bits |
308 | 1 byte |
309 |
310 |
311 | WORD |
312 | 16-bits |
313 | 2 byte |
314 |
315 |
316 | DWORD (Double-word) |
317 | 32-bits |
318 | 4 byte |
319 |
320 |
321 | QWORD (Quadword) |
322 | 64-bits |
323 | 8 byte |
324 |
325 |
326 |
327 | ### Supported instructions
328 | This chapter provides a basic overview for a simple subset of the x86-64 instruction set, focusing on the integer operation. This section summarizes the notation used, is fairly common in the technical literature. In general, an instruction will consist of the instruction or operation itself (e.q., add, sub, etc.) and the operands. The operands refer to where the data (to be operated on) is coming from and/or where the result is to be placed.
329 |
330 | > **_NOTE:_**
331 | >
332 | > Instructions, register and variable names are case-insensitive.
333 |
334 | #### ADD - Add
335 | Adds the destination operand (first operand) and the source operand (second operand) and then stores the result in the destination operand. The destination operand can be a register or a memory location; the source operand can be an immediate, a register, or a memory location. (However, two memory operands cannot be used in one instruction.) When an immediate value is used as an operand, it is sign-extended to the length of the destination operand format.
336 |
337 | Usage:
338 | ```asm
339 | ADD r, imm/r/m
340 | ADD m, imm/r
341 | ```
342 |
343 | Format:
344 |
345 | ```asm
346 | DEST ← DEST + SRC;
347 | ```
348 |
349 | Flags affected:
350 | 1. **ZF** is set if the result is zero; it's cleared otherwise.
351 | 2. **SF** is set if the result is negative; it's cleared otherwise.
352 | 3. **PF** is set if the result has even parity in the low 8 bits; it's cleared otherwise.
353 | 4. **CF** is set if the addition caused a carry-out from the high bit; it's cleared otherwise.
354 | 5. **OF** is set if the addition resulted in arithmetic under/overflow; it's cleared otherwise.
355 |
356 | > **_NOTE:_**
357 | >
358 | > The ADD instruction performs integer addition.
359 |
360 | #### SUB - Subtract
361 | Subtracts the second operand (source operand) from the first operand (destination operand) and stores the result in the destination operand. The destination operand can be a register or a memory location; the source operand can be an immediate, register, or memory location. (However, two memory operands cannot be used in one instruction.) When an immediate value is used as an operand, it is sign-extended to the length of the destination operand format.
362 |
363 |
364 | Usage:
365 | ```asm
366 | SUB r, imm/r/m
367 | SUB m, imm/r
368 | ```
369 |
370 | Format:
371 |
372 | ```asm
373 | DEST ← (DEST – SRC);
374 | ```
375 |
376 | Flags affected:
377 | 1. **ZF** is set if the result is zero; it's cleared otherwise.
378 | 2. **SF** is set if the result is negative; it's cleared otherwise.
379 | 3. **PF** is set if the result has even parity in the low 8 bits; it's cleared otherwise.
380 | 4. **CF** is set if the subtraction caused a borrow from the low 4 bits; it's cleared otherwise.
381 | 5. **OF** is set if the subtraction resulted in arithmetic under/overflow; it's cleared otherwise.
382 |
383 | > **_NOTE:_**
384 | >
385 | > The SUB instruction performs integer subtraction.
386 | >
387 | #### AND - Bitwise AND
388 | Performs a bitwise AND operation on the destination (first) and source (second) operands and stores the result in the destination operand location. The source operand can be an immediate, a register, or a memory location; the destination operand can be a register or a memory location. (However, two memory operands cannot be used in one instruction.) Each bit of the result is set to 1 if both corresponding bits of the first and second operands are 1; otherwise, it is set to 0.
389 |
390 | Usage:
391 | ```asm
392 | AND r, imm/r/m
393 | AND m, imm/r
394 | ```
395 |
396 | Format:
397 |
398 | ```asm
399 | DEST ← DEST AND SRC;
400 | ```
401 |
402 | Flags affected:
403 | 1. **ZF** is set if the result is zero; it's cleared otherwise.
404 | 2. **SF** is set if the result is negative; it's cleared otherwise.
405 | 3. **PF** is set if the result has even parity in the low 8 bits; it's cleared. otherwise.
406 | 4. **CF** and **OF** are cleared.
407 |
408 | #### OR - Bitwise OR
409 | Performs a bitwise inclusive OR operation between the destination (first) and source (second) operands and stores the result in the destination operand location. The source operand can be an immediate, a register, or a memory location; the destination operand can be a register or a memory location. (However, two memory operands cannot be used in one instruction.) Each bit of the result of the OR instruction is set to 0 if both corresponding bits of the first and second operands are 0; otherwise, each bit is set to 1.
410 |
411 | Usage:
412 |
413 | ```asm
414 | OR r, imm/r/m
415 | OR m, imm/r
416 | ```
417 |
418 | Format:
419 |
420 | ```asm
421 | DEST ← DEST OR SRC;
422 | ```
423 |
424 | Flags affected:
425 | 1. **ZF** is set if the result is zero; it's cleared otherwise.
426 | 2. **SF** is set if the result is negative; it's cleared otherwise.
427 | 3. **PF** is set if the result has even parity in the low 8 bits; it's cleared
428 | 4. **CF** and **OF** are cleared.
429 |
430 | #### NOT - Bitwise NOT
431 | Performs a bitwise NOT operation (each 1 is set to 0, and each 0 is set to 1) on the destination operand and stores the result in the destination operand location. The destination operand can be a register or a memory location.
432 |
433 | Usage:
434 | ```asm
435 | NOT r/m
436 | ```
437 |
438 | Format:
439 |
440 | ```asm
441 | DEST ← NOT DEST;
442 | ```
443 |
444 | > **_NOTE:_**
445 | >
446 | > It doesn't affect flags.
447 |
448 | #### MOV - Move
449 | Copies the second operand (source operand) to the first operand (destination operand). The source operand can be an immediate value, general-purpose register or memory location; the destination register can be a general-purpose register or memory location. Both operands must be the same size, which can be a byte, a word, a doubleword, or a quadword.
450 |
451 | Usage:
452 |
453 | ```asm
454 | MOV r, imm/r/m
455 | MOV m, imm/r
456 | ```
457 |
458 | > **_NOTE:_**
459 | >
460 | > It doesn't affect flags.
461 |
462 | #### CMP - Compare
463 | Compares the first source operand with the second source operand and sets the status flags in the EFLAGS register according to the results. The comparison is performed by subtracting the second operand from the first operand and then setting the status flags in the same manner as the SUB instruction. When an immediate value is used as an operand, it is sign-extended to the length of the first operand. SUB should be used in place of CMP when the result is needed. The condition codes used by the Jcc instructions are based on the results of a CMP instruction.
464 |
465 | Usage:
466 |
467 | ```asm
468 | CMP r, imm/r/m
469 | CMP m, imm/r
470 | ```
471 |
472 | Format:
473 |
474 | ```asm
475 | temp ← SRC1 − SignExtend(SRC2);
476 | ModifyStatusFlags;
477 | ```
478 |
479 | Flags affected:
480 | 1. **ZF** is set if the result is zero; it's cleared otherwise.
481 | 2. **SF** is set if the result is negative; it's cleared otherwise.
482 | 3. **PF** is set if the result has even parity in the low 8 bits; it's cleared otherwise.
483 | 4. **CF** is set if the subtraction caused a borrow from the low 4 bits; it's cleared otherwise.
484 | 5. **OF** is set if the subtraction resulted in arithmetic under/overflow; it's cleared otherwise.
485 |
486 | #### JMP - Unconditional Jump
487 |
Jumps execution to the provided location in a program, denoted with a program label. This instruction does not depend on the current conditions of the flag bits in the EFLAG register. Transfer of control may be forward, to execute a new set of instructions or backward, to re-execute the same steps.
488 | Usage:
489 |
490 | ```asm
491 | JMP label
492 | ```
493 |
494 | > **_NOTE:_**
495 | >
496 | > It doesn't affect flags.
497 |
498 | #### Jcc - Jump if Condition Is Met (Conditional Jump)
499 | Jcc is not a single instruction, it describes the jump mnemonics that checks the condition code before jumping. If some specified condition is satisfied in a conditional jump, the control flow is transferred to a target instruction. These instructions form the basis for all conditional branching. There are numerous conditional jump instructions depending upon the condition and data.
500 |
501 | Two steps are required for a Jcc; the compare instruction and the conditional jump instruction. The conditional jump instruction will jump or not jump to the provided label based on the result of the previous comparison operation. The compare instruction will compare two operands and store the results of the comparison in the EFLAG register. This requires that the compare instruction is immediately followed by the conditional jump instruction. If other instructions are placed between the compare and conditional jump, the EFLAG register will be altered, and the conditional jump may not reflect the correct condition.
502 | Usage:
503 |
504 | ```asm
505 | Jcc label
506 | ```
507 |
508 | Instruction | Description | Flags tested | Condition
509 | | - | - | :-: | :-:
510 | JE | Jump Equal | ZF | ZF == 1
511 | JNE | Jump not Equal | ZF | ZF == 0
512 | JGE | Jump Greater/Equal | OF, SF | SF == 0
513 | JL | Jump Less | OF, SF | SF != 0
514 |
515 | > **_NOTE:_**
516 | >
517 | > It doesn't affect flags.
518 |
519 | #### PUSH
520 | Decrements the stack pointer and then stores the source operand on the top of the stack. The size parameter determines the size of the value that is pushed.
521 | Usage:
522 |
523 | ```asm
524 | PUSH imm/r/m
525 | ```
526 |
527 | > **_NOTE:_**
528 | >
529 | > - It doesn't affect flags.
530 | > - The operand size (16, 32, or 64 bits) determines the amount by which the stack pointer is decremented (2, 4 or 8).
531 | > - If the source operand is an immediate of size less than the operand size, a sign-extended value is pushed on the stack.
532 |
533 | #### POP
534 | Loads the value from the top of the stack to the location specified with the destination operand (or explicit opcode) and then increments the stack pointer. The destination operand can be a general-purpose register, memory location, or segment register.
535 | Usage:
536 |
537 | ```asm
538 | POP r/m
539 | ```
540 |
541 | Format:
542 |
543 | ```asm
544 | pop value;
545 | dest ← value;
546 | ```
547 |
548 | > **_NOTE:_**
549 | >
550 | > - It doesn't affect flags.
551 | > - The operand size (16, 32, or 64 bits) determines the amount by which the stack pointer is incremented (2, 4 or 8).
552 |
553 | ## Memory
554 | A memory value is an expression that evaluates to the address of some value in memory. In AMx64 assembly language, addresses are enclosed in brackets “[…]” with the address expression inside.
555 |
556 | > **_NOTE:_**
557 | >
558 | > Despite the fact that you can use 64-bit address, you only have 2 GB of memory available due to internal limits of C# in Visual Studio.
559 |
560 | ### Stack
561 |
In a computer, a stack is a type of data structure where items are added and then removed from the stack in reverse order. That is, the most recently added item is the very first one that is removed. This is often referred to as Last-In, First-Out (LIFO). A stack is heavily used in programming for the storage of information during procedure or function calls.
562 |
563 | In most languages (even low-level ones) the stack is completely hidden from the programmer. In these languages we can only indirectly impact it. We already know that declaring a variable sets aside space on the stack, and that calling a function uses the stack as well. The difference now is that in assembly language, the programmer is responsible for managing the stack.
564 |
565 | The stack is managed by RBP and RSP (base pointer and stack pointer). Upon program initialization, RBP and RSP are set to the address of the top of the stack, which begins at the high side of the program's available memory and grows downward. Because of this, RSP will always point to the most-recently-added item on the stack. To add an item to the stack you can use the PUSH instruction. To remove an item, you can use the POP instruction.
566 |
567 | > **_NOTE:_**
568 | > - You can't push 8 bit value on stack.
569 | > - RSP can modified directly without damaging the stack structure, but care should be taken when doing so.
570 |
571 | ### Registers
572 | Register operand refers to the contents of a register. AMx64 has a total of 16 registers, but not all of them are currently in use. To refer to one of the available registers, you simply need to designate the name of the partition you want to use (e.q. RAX, RBX, etc.). The register name you use indicates the size of the operand (i.e. how much data is moved, processed, etc.). For instance, using EAX to load a value from memory (e.g. mov eax, [var]
) loads a 32-bit value from memory into the 32-bit partition of RAX.
573 |
574 | AMx64 uses the following names for general-purpose registers in 64-bit mode. This is consistent with the AMD/Intel documentation and most other assemblers.
575 |
576 | #### General-Purpose Registers (GPRs)
577 | There are sixteen, 64-bit General Purpose Registers (GPRs). The currently available GPRs are described in the following table. A GPR register can be accessed with all 64-bits or some portion or subset accessed.
578 |
579 |
580 | Naming conventions |
581 | 64 bits |
582 | 32 bits |
583 | 16 bits |
584 | High 8 bits |
585 | Low 8 bits |
586 |
587 |
588 | Accumulator |
589 | RAX |
590 | EAX |
591 | AX |
592 | AH |
593 | AL |
594 |
595 |
596 | Base |
597 | RBX |
598 | EBX |
599 | BX |
600 | BH |
601 | BL |
602 |
603 |
604 | Counter |
605 | RCX |
606 | ECX |
607 | CX |
608 | CH |
609 | CL |
610 |
611 |
612 | Data |
613 | RDX |
614 | EDX |
615 | DX |
616 | DH |
617 | DL |
618 |
619 |
620 | Stack pointer |
621 | RSP |
622 | ESP |
623 | SP |
624 | |
625 |
626 |
627 | Stack base pointer |
628 | RBP |
629 | EBP |
630 | BP |
631 | |
632 |
633 |
634 | Source index |
635 | RSI |
636 | ESI |
637 | SI |
638 | |
639 |
640 |
641 | Destination index |
642 | RDI |
643 | EDI |
644 | DI |
645 | |
646 |
647 |
648 | When using data element sizes less than 64-bits (e.q. 32-bit, 16-bit, or 8-bit), the lower portion of the register can be accessed by using a different register name as shown in the table.
649 |
650 | > **_NOTE:_**
651 | >
652 | > Some of the GPR registers are used for dedicated purposes as described inthe later sections.
653 | #### FLAGS register
654 | Status register contains the current state of the processor. This register stores status information about the instruction that was just executed. It's 16 bits wide. Its successors, the EFLAGS and RFLAGS registers, are 32 bits and 64 bits wide, respectively. The wider registers retain compatibility with their smaller predecessors, as it is the case with the other registers. AMx64 flags register conforms to Intel x86_64 standard; not all bits are used in the current version.
655 |
656 |
657 |
658 | Bit |
659 | Mark |
660 | Abbreviation |
661 | Name |
662 | Description |
663 | =1 |
664 | =0 |
665 | Implementation status |
666 |
667 |
668 | 0 |
669 | 0x0001 |
670 | CF |
671 | Carry flag |
672 | Set if the last arithmetic operation carried (addition) or borrowed (subtraction) a bit beyond the size of the register. This is then checked when the operation is followed with an add-with-carry or subtract-with-borrow to deal with values too large for just one register to contain. |
673 | CY (Carry) |
674 | NC (No Carry) |
675 | ✅ |
676 |
677 |
678 | 2 |
679 | 0x0004 |
680 | PF |
681 | Adjust flag |
682 | Carry of Binary Code Decimal (BCD) numbers arithmetic operations. |
683 | AC (Auxiliary Carry) |
684 | NA (No Auxiliary Carry) |
685 | ✅ |
686 |
687 |
688 | 4 |
689 | 0x0010 |
690 | AF |
691 | Parity flag |
692 | Set if the number of set bits in the least significant byte is a multiple of 2. |
693 | PE (Parity Even) |
694 | PO (Parity Odd) |
695 | ❎ |
696 |
697 |
698 | 6 |
699 | 0x0040 |
700 | ZF |
701 | Zero flag |
702 | Set if the result of an operation is Zero (0). |
703 | ZR (Zero) |
704 | NZ (Not Zero) |
705 | ✅ |
706 |
707 |
708 | 7 |
709 | 0x0080 |
710 | SF |
711 | Sign flag |
712 | Set if the result of an operation is negative. |
713 | NG (Negative) |
714 | PL (Positive) |
715 | ✅ |
716 |
717 |
718 | 8 |
719 | 0x0100 |
720 | TF |
721 | Trap flag |
722 | Set if step by step debugging. |
723 | |
724 | ❎ |
725 |
726 |
727 | 9 |
728 | 0x0200 |
729 | IF |
730 | Interrupt enable flag |
731 | Set if interrupts are enabled. |
732 | EI (Enable Interrupt) |
733 | DI (Disable Interrupt) |
734 | ❎ |
735 |
736 |
737 | 10 |
738 | 0x0400 |
739 | DF |
740 | Direction flag |
741 | Stream direction. If set, string operations will decrement their pointer rather than incrementing it, reading memory backwards. |
742 | DN (Down) |
743 | UP (Up) |
744 | ❎ |
745 |
746 |
747 | 11 |
748 | 0x0800 |
749 | OF |
750 | Overflow flag |
751 | Set if signed arithmetic operations result in a value too large for the register to contain. |
752 | OV (Overflow) |
753 | NV (Not Overflow) |
754 | ❎ |
755 |
756 |
757 | 12-13 |
758 | 0x3000 |
759 | IOPL |
760 | I/O privilege level |
761 | I/O Privilege Level of the current process. |
762 | |
763 | ❎ |
764 |
765 |
766 |
767 | ### Addressing modes for data
768 | The addressing mode indicates the manner in which the operand is presented, or the addressing modes are the supported methods for accessing a value in memory using the address of a data item being accessed (read or written). This might include the name of a variable or the location in an array.
769 |
770 | > **_NOTE:_**
771 | >
772 | > The only way to access memory is with the brackets ("[]"). Omitting the brackets will not access memory and instead obtain the address of the item.
773 |
774 | #### Register (direct) Addressing
775 | ```
776 | +------+-----+-----+
777 | | mov | reg1| reg2| reg1:=reg2
778 | +------+-----+-----+
779 | ```
780 |
This "addressing mode" does not have an effective address and is not considered to be an addressing mode on some computers. In this example, all the operands are in registers, and the result is placed in a register. E.q.
781 |
782 | ```asm
783 | mov ax, bx ; moves contents of register bx into ax
784 | ```
785 |
786 | #### Immediate (literal) Addressing
787 | ```
788 | +------+-----+----------------+
789 | | add | reg1| constant | reg1 := reg1 + constant;
790 | +------+-----+----------------+
791 | ```
792 |
This "addressing mode" does not have an effective address, and is not considered to be an addressing mode on some computers. E.q.
793 |
794 | ```asm
795 | mov ax, 1 ; moves value of 1 into register ax
796 | ```
797 |
moves a value of 1 into register ax. Instead of using an operand from memory, the value of the operand is held within the instruction itself.
798 |
799 | #### Direct Memory Addressing
800 | Direct memory mode addressing means that the operand is a location in memory (accessed via an address). This is also referred to as indirection or dereferencing.
801 |
802 | ```asm
803 | mov qword rax, [var] ; copy var content into rax
804 | ```
805 | This instruction will access the memory location of the variable var and retrieve the value stored there. This requires that the CPU wait until the value is retrieved before completing the operation, and thus might take slightly longer to complete than a similar operation using an immediate value.
806 |
807 | > **_NOTE:_**
808 | >
809 | > Direct offset addressing is not currently supported.
810 | >
811 | #### Register Indirect Addressing
812 | For example, when accessing arrays, a more generalized method is usually required. Specifically, an address can be placed in a register and indirection performed using the register (instead of the variable name). E.q.
813 |
814 | ```asm
815 | mov rbx, var
816 | mov dword eax, [rbx]
817 | ```
818 |
819 | ## Calling System Services
820 | When calling system services, arguments are placed in the standard argument registers. System services do not typically use stack-based arguments. This limits the arguments of system services to six. To call a system service, the first step is to determine which system service is desired. The general process is that the system service call code is placed in the RAX register. The call code is a number that has been assigned for the specific system service being requested. These are assigned as part of the operating system and cannot be changed by application programs. AMx64 uses a very small subset of system service call codes as a set of constants. If any are needed, the arguments for system services are placed in the RDI, RSI, RDX, RCX, R8 and R9 registers (in that order). The following table shows the argument locations which are consistent with the standard calling convention.
821 |
822 |
823 |
824 | Register |
825 | Usage |
826 |
827 |
828 | RAX |
829 | Call code |
830 |
831 |
832 | RDI |
833 | 1st argument |
834 |
835 |
836 | RSI |
837 | 2nd argument |
838 |
839 |
840 | RDX |
841 | 3rd argument |
842 |
843 |
844 | RCX |
845 | 4th argument |
846 |
847 |
848 | R8 |
849 | 5th argument |
850 |
851 |
852 | R9 |
853 | 6th argument |
854 |
855 |
856 |
857 | Each system call will use a different number of arguments (from none up to 6). However, the system service call code is always required. After the call code and any arguments are set, the syscall instruction is executed. The syscall instruction will pause the interpret process and will attempt to perform the service specified in the RAX register. When the system service returns, the interpret process will be resumed.
858 |
859 | > **_NOTE:_**
860 | >
861 | > R8 and R9 registers are not currently available for usage.
862 |
863 | ### Return Codes
864 | The system call will return a code in the RAX register. If the value returned is less than 0, that is an indication that an error has occurred. If the operation is successful, the value returned will depend on the specific system service.
865 |
866 |
867 |
868 | Call Code (RAX) |
869 | System Service |
870 | Description |
871 |
872 |
873 | 0 |
874 | sys_read |
875 | Read characters - If unsuccessful, returns negative value. If successful, returns a count of characters actually read. RDI - file descriptor RSI - address of where to store characters RDX - number of characters to read |
876 |
877 |
878 | 1 |
879 | sys_write |
880 | Write characters - If unsuccessful, returns negative value. If successful, returns a count of characters actually written. RDI - file descriptor RSI - address of characters where to write RDX - number of characters to write |
881 |
882 |
883 | 60 |
884 | sys_exit |
885 | Terminate executing process. RDI - exit status |
886 |
887 |
888 |
889 | ### Console Output
890 | The system service to output characters to the console is the system write (sys_write). Like a high-level language, characters are written to standard out (stdout) which is the console. The stdout is the default file descriptor for the console. The file descriptor is already opened and available for use in programs (assembly and high-level languages). The arguments for the write system service are as follows:
891 |
892 |
893 |
894 | Register |
895 | sys_write |
896 |
897 |
898 | RAX |
899 | Call code = sys_write (1) |
900 |
901 |
902 | RDI |
903 | Output location, stdout (1) |
904 |
905 |
906 | RSI |
907 | Address of characters to output |
908 |
909 |
910 | RDX |
911 | Number of characters to output |
912 |
913 |
914 |
915 | ### Console Input
916 | The system service to read characters from the console is the system read (sys_read). Like a high-level language, for the console, characters are read from standard input (stdin). The stdin is the default file descriptor for reading characters from the keyboard. The file descriptor is already opened and available for use in program (assembly and high-level languages).
917 |
918 | When using the system service to read from the keyboard, much like the write system service, the number of characters to read is required. Of course, we will need to declare an appropriate amount of space to store the characters being read. If we request 10 characters to read and the user types more than 10, the additional characters will be lost.
919 |
920 |
921 |
922 | Register |
923 | sys_read |
924 |
925 |
926 | RAX |
927 | Call code = sys_read (0) |
928 |
929 |
930 | RDI |
931 | Input location, stdin (0) |
932 |
933 |
934 | RSI |
935 | Address of where to store characters read |
936 |
937 |
938 | RDX |
939 | Number of characters to read |
940 |
941 |
942 |
943 | ### Proper way to end asm code
944 | No special label or directives are required to terminate the program. However, to terminate asm code properly you should do the following:
945 |
946 | ```asm
947 | mov rax, 60
948 | mov rdi, 0
949 | syscall
950 | ```
951 | These instructions indicate that the program ends correctly. If the program terminates unsuccessfully, it should store value 1 inside the RDI register.
952 |
953 | ## Debug - AMDB
954 | A debugger allows the user to control execution of a program, examine variables and other memory. AMDB is loosely based on GDB. Once the debugger is started, in order to effectively use the debugger, an initial breakpoint must be set. Once a breakpoint is set, the run
(or r
) command can be performed. The breakpoint is indicated with a red line number on the left, and the current location is indicated with a green asm line (see example below). Specifically, the green line points to the next instruction to be executed. That is, the green asm line has not yet been executed.
955 | 
956 |
957 | ### Getting Help
958 | You can always ask amdb itself for information on its commands, using the command help
. You can use help
(abbreviated h
) with no arguments to display a short list of commands.
959 | 
960 |
961 | ### Setting Breakpoints
962 | Breakpoints are set with the break
command (abbreviated b
). This command tells amdb to pause interpretation of your program at some point to allow you to inspect the value of variables and other memory locations. It will pause interpretation just before the specified line number is interpreted.
963 |
964 | - break [breakpoints]
965 | Set a breakpoint(s) at the given location(s) (line number(s)). The breakpoint will stop your program just before it executes any of the code in the specified location. E.q. break 2 3 4
.
966 |
967 |
968 | As needed, additional breakpoints can be set. However, the run
command will re-start execution from the beginning and stop at the initial breakpoint.
969 |
970 | ### Deleting Breakpoints
971 | It is often necessary to eliminate a breakpoint once it has done its job and you no longer want your interpretation to stop there. This is called deleting the breakpoint. A breakpoint that has been deleted no longer exists. You can delete breakpoints using the d
(or delete
) command.
972 |
973 | - delete
974 | - Deletes all available breakpoints.
975 | - delete [breakpoints]
976 | - Deletes all available breakpoints. E.q.
delete 2 3 4
.
977 |
978 |
979 | ### Display Source Code
980 | You can display your source code inside amdb using the l
(or list
) command. amdb will print 7 lines of source code at a time, with a line number at the start of each line. The current line is always highlighted with a green color.
981 |
982 | ### Examine Memory (Display Memory/Register Contents)
983 | Once you have paused a interpretation, you can use the p
(or print
) command to print the values of variables, specified memory locations or registers.
984 |
985 | - print
986 | - Shows internal state of all available registers as well as the values of flags inside of the FLAGS register.

987 | - print register
988 | - Shows value stored in a specified register. E.q.
print RAX
.
989 | - print size variable
990 | - Shows value stored in memory starting from the memory location which is referenced using a variable. E.q.
print word hello_msg
.
991 | - print size memory_location
992 | - Shows value stored in memory starting from the memory location. E.q.
print word 0x000000000000000A
. Memory location can be set in a hex format (e.q. 0x000000000000007B) or in a decimal format (e.q. 123).
993 |
994 |
995 | ### Continuing and Stepping
996 | Continuing means resuming file interpretation until your it completes normally. In contrast, stepping means executing just one more “step” of your interpreter, where “step” means one line of source code. When continuing, the interpreter may stop even sooner, due to a breakpoint.
997 |
998 | - continue or c
999 | - Continues interpretation until the end of the file or until it reaches the next breakpoint.
1000 | - step or s
1001 | - Interprets the current and stops interpretation on the next line.

1002 |
1003 |
1004 | ### Quitting
1005 | To quit from a amdb session, type q
(short for quit) or quit
. amdb will ask if you really want to quit. If you do, type y
followed by the Enter
key. This check may see a little unnecessary, but it helps prevent people quitting accidentally at that crucial moment in a lengthy debugging session.
1006 | 
1007 |
1008 | ## References
1009 | ### Books
1010 |
1017 |
1018 | ### Links
1019 |
1028 |
1029 | ### Github projects
1030 | Some of the projects that helped me create my project.
1031 | - [NASM](https://github.com/netwide-assembler/nasm)
1032 | - [CSX64](https://github.com/dragazo/CSX64)
1033 | - [Asm-Dude](https://github.com/HJLebbink/asm-dude)
1034 | - [binutils-gdb](https://github.com/bminor/binutils-gdb)
1035 |
1036 | ## To-Do List
1037 | - [x] Add Direct memory addressing.
1038 | - [ ] Add Direct offset addressing.
1039 | - [x] Add Register indirect addressing.
1040 | - [x] Implement Stack memory structure.
1041 | - [x] Implement push and pop instructions.
1042 | - [x] Implement 64-bit addressable memory.
1043 | - [x] Implement assembler sections (.data, .bss, .text).
1044 | - [x] Implement C-style character escapes.
1045 | - [x] Implement character constants.
1046 | - [ ] Add pseudo-instruction EQU.
1047 | - [ ] Build an amdbui.
1048 | - [ ] Implement paging memory management.
1049 | - [ ] Implement Debugging with step-back.
--------------------------------------------------------------------------------