├── .gitignore
├── LICENSE
├── README.md
├── glyph.nim
├── glyph
    └── snes
    │   ├── addressing_modes.nim
    │   ├── cpu.nim
    │   ├── datatypes.nim
    │   ├── opcodes.nim
    │   └── private
    │       └── macros_opcodes.nim
├── resources
    ├── SNES_resources.md
    └── interpreter_optimizations.md
└── tests
    └── opcLength.nim


/.gitignore:
--------------------------------------------------------------------------------
1 | nimcache/
2 | 
3 | # Executables shall be put in an ignored build/ directory
4 | # Ignore dynamic, static libs and libtool archive files
5 | build/
6 | *.so
7 | *.dylib
8 | *.a


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2018 Mamy André-Ratsimbazafy
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Glyph
2 | 
3 | [![License: Apache](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) ![Stability: experimental](https://img.shields.io/badge/stability-experimental-orange.svg)
4 | 
5 | A Super NES emulator project written in Nim. Currently WIP.
6 | 


--------------------------------------------------------------------------------
/glyph.nim:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mratsim/glyph/8b278c5e76c3f1053a196173a93686afda0596cc/glyph.nim


--------------------------------------------------------------------------------
/glyph/snes/addressing_modes.nim:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2018 Mamy André-Ratsimbazafy
  2 | # Distributed under the Apache v2 License (license terms are at http://www.apache.org/licenses/LICENSE-2.0).
  3 | 
  4 | import ./datatypes
  5 | 
  6 | # $number represents an address in hexadecimal representation
  7 | # $HHLL corresponds to high-high-low-low nibbles of a 16-bit address
  8 | # #$CONST represent a constant (prefix #)
  9 | 
 10 | # Note, for implementation we don't respect the:
 11 | #  - Ecc1_m16bit - Add 1 cycle if Accumulator is accessed in 16-bit mode
 12 | #  - Ecc2_m16bit - Add 2 cycles if Accumulator is accessed in 16-bit mode
 13 | #  - Ecc1_xy16bit - Add 1 cycle if Index Register is accessed in 16-bit mode
 14 | # We follow the rule of thumb: 8-byte access = 1 cpu cycle
 15 | # Opcode implementations/tests should take care that
 16 | # implementation cycles = theoretical cycles + modifiers
 17 | 
 18 | template readPC(): uint8 {.dirty.} =
 19 |   ## Read a single byte, increment cycle and program counter
 20 |   # Note:
 21 |   #   Program segments cannot cross bank boundaries;
 22 |   #   if the program counter increments past $FFFF,
 23 |   #   it rolls over to $0000 without incrementing the program counter bank register.
 24 |   Next()
 25 |   CycleCPU()
 26 |   sys.mem[PB, PC]
 27 | 
 28 | func readAddr(sys: Sys, isLong: static[bool] = false): Addr {.inline.} =
 29 |   ## Read an address at the current program counter position and increment the program counter.
 30 |   ## Returns a 24-bit address
 31 |   #  Implementation - 65816 is little-endian:
 32 |   #    low byte then high byte then data bank byte
 33 |   result.lo = readPC()                                        # 1 cycle
 34 |   result.hi = readPC()                                        # 1 cycle
 35 |   result.bank = when isLong: readPC()                         # (+1 cycle if long addressing)
 36 |                 else: DB
 37 | 
 38 | func readData(sys: Sys, T: typedesc[uint8 or uint16], adr: Addr): T {.inline.}=
 39 |   ## Read a uint8 or uint16 value at a specific data address.
 40 |   ## Crossing a bank boundary (0xFFFF) when reading data does not cost an extra cycle.
 41 |   #  Implementation - 65816 is little-endian:
 42 |   #    low byte then high byte
 43 |   when T is uint16:                                        # 2 cycles
 44 |     CycleCPU()
 45 |     result.lo = sys.mem[adr]
 46 | 
 47 |     CycleCPU()
 48 |     result.hi = sys.mem[adr + 1] # This crosses data banks. No extra cycle.
 49 |   else:                                                    # 1 cycle
 50 |     CycleCPU()
 51 |     result = sys.mem[adr]
 52 | 
 53 | func readIndirectAddr(sys: Sys, ptrAddr: uint16, isLong: static[bool] = false): Addr {.inline.}=
 54 |   ## Takes an address of a pointer and resolve/dereference that pointer.
 55 |   ## Input address A --> read an address B at that address A --> returns address B
 56 |   CycleCPU()                                               # 1 cycle
 57 |   result.lo = sys.mem[0, ptrAddr]
 58 |   CycleCPU()                                               # 1 cycle
 59 |   result.hi = sys.mem[0, ptrAddr+1]
 60 |   when isLong:
 61 |     CycleCPU()                                             # (+1 cycle if long addressing)
 62 |     result.bank = sys.mem[0, ptrAddr+2]
 63 |   else:
 64 |     result.bank = DB
 65 | 
 66 | template crossBoundary(adr: Addr, dataBank: uint8) {.dirty.} =
 67 |   ## Add 1 CPU cycle if crossing bank boundary.
 68 |   ## Physically, crossing a boundary when adding an index requires
 69 |   ## an extra read of the data bank byte to increment it.
 70 |   when EccCrossBoundary in ecc:
 71 |     if adr.bank != dataBank: CycleCPU()
 72 | 
 73 | template directLowNonZero(adr: Addr, d: uint16) {.dirty.} =
 74 |   ## Add 1 CPU cycle if low byte of direct register is non-zero
 75 |   ## Physically, extra cycle is needed f the register is not page-aligned.
 76 |   when EccDirectLowNonZero in ecc:
 77 |     if d.lo != 0x00: CycleCPU()
 78 | 
 79 | func immediate*(sys: Sys, T: typedesc[uint8 or uint16], ecc: static[ExtraCycleCosts]): T {.inline.}=
 80 |   ## Immediate addressing mode - $OP #$CONST
 81 |   ## Returns the 1 or 2-byte constant immediately after the opcode
 82 |   ##  8-bit: inc   #$12 -- cycle: 1 -- length: 2
 83 |   ## 16-bit: inc #$1234 -- cycle: 2 -- length: 3
 84 |   when T is uint16:                                        # 2 cycles
 85 |     result.lo = readPC()
 86 |     result.hi = readPC()
 87 |   else:                                                    # 1 cycle
 88 |     result = readPC()
 89 | 
 90 | func absolute*(sys: Sys, T: typedesc[uint8 or uint16], ecc: static[ExtraCycleCosts]): T {.inline.}=
 91 |   ## Absolute addressing mode - $OP $HHLL
 92 |   ## Loads and return the value at the (Current Data Bank, 16-bit address).
 93 |   ## Address is relative to the current data bank.
 94 |   ##  8-bit: and $1234 -- cycle: 3 -- length: 3
 95 |   ## 16-bit: and $1234 -- cycle: 4 -- length: 3
 96 | 
 97 |   let adr = sys.readAddr()                                 # 2 cycles
 98 |   result = sys.readData(T, adr, ecc)                       # 1 cycle (8-bit) or 2 cycles (16-bit)
 99 | 
100 | func absoluteLong*(sys: Sys, T: typedesc[uint8 or uint16], ecc: static[ExtraCycleCosts]): T {.inline.}=
101 |   ## Absolute long addressing mode - $OP $DBHHLL
102 |   ## Loads and return the value at the 24-bit address immediately after the opcode.
103 |   ## The first 8-bit corresponds to the databank addressed.
104 |   ##  8-bit: and $123456 -- cycle: 4 -- length: 4
105 |   ## 16-bit: and $123456 -- cycle: 5 -- length: 4
106 | 
107 |   let adr = sys.readAddr()                                 # 3 cycles
108 |   result = sys.readData(T, adr, ecc)                       # 1 cycle (8-bit) or 2 cycles (16-bit)
109 | 
110 | func absoluteX*(sys: Sys, T: typedesc[uint8 or uint16], ecc: static[ExtraCycleCosts]): T {.inline.}=
111 |   ## Absolute Indexed, X addressing mode - $OP $HHLL,X
112 |   ## Loads and return the value at the (Current Data Bank, 16-bit address + X register).
113 |   ## Address is relative to the current data bank.
114 |   ##  8-bit: and #$1234, X -- cycle: 3 -- length: 3
115 |   ## 16-bit: and #$1234, X -- cycle: 4 -- length: 3
116 | 
117 |   let adr = sys.readAddr() + X                             # 2 cycles
118 |   crossBoundary(adr, DB)                                   # (+1 if crossing data bank boundary)
119 |   result = sys.readData(T, adr, ecc)                       # 1 cycle (8-bit) or 2 cycles (16-bit)
120 | 
121 | func absoluteY*(sys: Sys, T: typedesc[uint8 or uint16], ecc: static[ExtraCycleCosts]): T {.inline.}=
122 |   ## Absolute Indexed, X addressing mode - $OP $HHLL,X
123 |   ## Loads and return the value at the (Current Data Bank, 16-bit address + Y register).
124 |   ## Address is relative to the current data bank.
125 |   ##  8-bit: and $1234 -- cycle: 3 -- length: 3
126 |   ## 16-bit: and $1234 -- cycle: 4 -- length: 3
127 | 
128 |   let adr = sys.readAddr() + Y                             # 2 cycles
129 |   crossBoundary(adr, DB)                                   # (+1 if crossing data bank boundary)
130 |   result = sys.readData(T, adr, ecc)                       # 1 cycle (8-bit) or 2 cycles (16-bit)
131 | 
132 | func absoluteLongX*(sys: Sys, T: typedesc[uint8 or uint16], ecc: static[ExtraCycleCosts]): T {.inline.}=
133 |   ## Absolute Long Indexed, X addressing mode - $OP $DBHHLL,X
134 |   ## Loads and return the value at the (24-bit address + X register).
135 |   ## The first 8-bit corresponds to the databank addressed.
136 |   ##  8-bit: and $1234 -- cycle: 3 -- length: 3
137 |   ## 16-bit: and $1234 -- cycle: 4 -- length: 3
138 | 
139 |   let adr = sys.readAddr(isLong = true)                    # 2 cycles
140 |   let db = adr.db
141 |   let effectiveAdr = readAddr + X
142 |   crossBoundary(effectiveAdr, db)                          # (+1 if crossing data bank boundary)
143 |   result = sys.readData(T, effectiveAdr, ecc)              # 1 cycle (8-bit) or 2 cycles (16-bit)
144 | 
145 | func direct*(sys: Sys, T: typedesc[uint8 or uint16], ecc: static[ExtraCycleCosts]): T {.inline.}=
146 |   ## Direct addressing mode - $OP $LL
147 |   ## Loads and return the value at the (Bank 0, 8-bit address + D register).
148 |   ##  8-bit: and $12 -- cycle: 3 -- length: 2
149 |   ## 16-bit: and $12 -- cycle: 4 -- length: 2
150 |   ## +1 cycle if Direct register is not page-aligned (low byte == 0)
151 | 
152 |   let adr = toAddr(0, D + readPC())                        # 1 cycle
153 |   directLowNonZero(adr, D)                                 # (+1 Direct register low byte != 0)
154 |   result = sys.readData(T, adr, ecc)                       # 1 cycle (8-bit) or 2 cycles (16-bit)
155 | 
156 | func directX*(sys: Sys, T: typedesc[uint8 or uint16], ecc: static[ExtraCycleCosts]): T {.inline.}=
157 |   ## Direct Indexed with X addressing mode - $OP $LL,X
158 |   ## Loads and return the value at the (0, 8-bit address + D register + X register).
159 |   ##  8-bit: and $12 -- cycle: 6 -- length: 2
160 |   ## 16-bit: and $12 -- cycle: 7 -- length: 2
161 |   ## +1 cycle if Direct register is not page-aligned (low byte == 0)
162 | 
163 |   let adr = toAddr(0, D + readPC() + X)                    # 1 cycle
164 |   directLowNonZero(adr, D)                                 # (+1 Direct register low byte != 0)
165 | 
166 |   CycleCpu()                                               # 1 cycle (IO)
167 |   result = sys.readData(T, adr, ecc)                       # 1 cycle (8-bit) or 2 cycles (16-bit)
168 | 
169 | func directY*(sys: Sys, T: typedesc[uint8 or uint16], ecc: static[ExtraCycleCosts]): T {.inline.}=
170 |   ## Direct Indexed with Y addressing mode - $OP $LL,Y
171 |   ## Loads and return the value at the (0, 8-bit address + D register + Y register).
172 |   ##  8-bit: and $12 -- cycle: 6 -- length: 2
173 |   ## 16-bit: and $12 -- cycle: 7 -- length: 2
174 |   ## +1 cycle if Direct register is not page-aligned (low byte == 0)
175 | 
176 |   let adr = toAddr(0, D + readPC() + X)                    # 1 cycle
177 |   directLowNonZero(adr, D)                                 # (+1 Direct register low byte != 0)
178 | 
179 |   CycleCpu()                                               # 1 cycle (IO)
180 |   result = sys.readData(T, adr, ecc)                       # 1 cycle (8-bit) or 2 cycles (16-bit)
181 | 
182 | func directXindirect*(sys: Sys, T: typedesc[uint8 or uint16], ecc: static[ExtraCycleCosts]): T {.inline.}=
183 |   ## Direct Indexed Indirect (with X) addressing mode - $OP ($LL,X)
184 |   ## Loads and return the value at the (Current Data Bank, 8-bit address + D register + X register).
185 |   ##  8-bit: and $12 -- cycle: 6 -- length: 2
186 |   ## 16-bit: and $12 -- cycle: 7 -- length: 2
187 |   ## +1 cycle if Direct register is not page-aligned (low byte == 0)
188 | 
189 |   let offset = D + readPC() + X                            # 1 cycle
190 |   directLowNonZero(adr, D)                                 # (+1 Direct register low byte != 0)
191 | 
192 |   CycleCpu()                                               # 1 cycle (IO)
193 |   let adr = sys.readIndirectAddr(offset)                   # 2 cycles (pointer dereference)
194 |   result = sys.readData(T, adr, ecc)                       # 1 cycle (8-bit) or 2 cycles (16-bit)
195 | 
196 | func directIndirect*(sys: Sys, T: typedesc[uint8 or uint16], ecc: static[ExtraCycleCosts]): T {.inline.}=
197 |   ## Direct Indirect addressing mode - $OP ($LL)
198 |   ## Loads and return the value at the (Current Data Bank, 8-bit address + D register).
199 |   ##  8-bit: and $12 -- cycle: 5 -- length: 2
200 |   ## 16-bit: and $12 -- cycle: 6 -- length: 2
201 |   ## +1 cycle if Direct register is not page-aligned (low byte == 0)
202 | 
203 |   let offset = D + readPC()                                # 1 cycle
204 |   directLowNonZero(adr, D)                                 # (+1 Direct register low byte != 0)
205 | 
206 |   let adr = sys.readIndirectAddr(offset)                   # 2 cycles (pointer dereference)
207 |   result = sys.readData(T, adr, ecc)                       # 1 cycle (8-bit) or 2 cycles (16-bit)
208 | 
209 | func directIndirectLong*(sys: Sys, T: typedesc[uint8 or uint16], ecc: static[ExtraCycleCosts]): T {.inline.}=
210 |   ## Direct Indirect Long addressing mode - $OP [$LL]
211 |   ## Loads and return the value at the (0 + carry, 8-bit address + D register).
212 |   ##  8-bit: and $12 -- cycle: 5 -- length: 2
213 |   ## 16-bit: and $12 -- cycle: 6 -- length: 2
214 |   ## +1 cycle if Direct register is not page-aligned (low byte == 0)
215 | 
216 |   let offset = D + readPC()                                # 1 cycle
217 |   directLowNonZero(adr, D)                                 # (+1 Direct register low byte != 0)
218 | 
219 |   let adr = sys.readIndirectAddr(offset, isLong = true)    # 3 cycles (long pointer dereference)
220 |   result = sys.readData(T, adr, ecc)                       # 1 cycle (8-bit) or 2 cycles (16-bit)
221 | 
222 | func directIndirectY*(sys: Sys, T: typedesc[uint8 or uint16], ecc: static[ExtraCycleCosts]): T {.inline.}=
223 |   ## Direct Indirect Indexed addressing mode - $OP ($LL), Y
224 |   ## Loads and return the value at the (Current Data Bank, 8-bit address + D register) + Y.
225 |   ##  8-bit: and $12 -- cycle: 5 -- length: 2
226 |   ## 16-bit: and $12 -- cycle: 6 -- length: 2
227 |   ## +1 cycle if Direct register is not page-aligned (low byte == 0)
228 | 
229 |   let offset = D + readPC()                                # 1 cycle
230 |   directLowNonZero(adr, D)                                 # (+1 Direct register low byte != 0)
231 | 
232 |   let adr = sys.readIndirectAddr(offset)                   # 2 cycles (pointer dereference)
233 |   let effectiveAddr = adr + Y
234 |   crossBoundary(effectiveAddr, adr)                        # (+1 if crossing data bank boundary)
235 | 
236 |   result = sys.readData(T, adr, ecc)                       # 1 cycle (8-bit) or 2 cycles (16-bit)
237 | 


--------------------------------------------------------------------------------
/glyph/snes/cpu.nim:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2018 Mamy André-Ratsimbazafy
2 | # Distributed under the Apache v2 License (license terms are at http://www.apache.org/licenses/LICENSE-2.0).
3 | 


--------------------------------------------------------------------------------
/glyph/snes/datatypes.nim:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2018 Mamy André-Ratsimbazafy
  2 | # Distributed under the Apache v2 License (license terms are at http://www.apache.org/licenses/LICENSE-2.0).
  3 | 
  4 | import tables
  5 | 
  6 | ######################################################################
  7 | #
  8 | # Helpers
  9 | #
 10 | ######################################################################
 11 | 
 12 | # We don't use {.union.} types here for lo and hi uint8 access of uint16
 13 | # as it doesn't work with JS target.
 14 | # Ergonomically it also requires extra `u16`, `u8.lo`, `u8.hi` access.
 15 | 
 16 | template lo*(x: uint16): uint8 = x.uint8
 17 | template `lo=`*(x: var uint16, data: uint8) =
 18 |   x = (x and 0xFF00) or data.uint16
 19 | 
 20 | template hi*(x: uint16): uint8 = uint8(x shr 8)
 21 | template `hi=`*(x: var uint16, data: uint8) =
 22 |   x = (x and 0x00FF) or (data.uint16 shl 8)
 23 | 
 24 | func isMsbSet*[T: SomeUnsignedInt](n: T): bool {.inline.}=
 25 |   ## Returns true if the most significant bit of an integer is set.
 26 |   const msb_pos = sizeof(T) * 8 - 1
 27 |   result = bool(n shr msb_pos)
 28 | 
 29 | ######################################################################
 30 | #
 31 | # CPU
 32 | #
 33 | ######################################################################
 34 | 
 35 | type
 36 |   # Note using uint8 instead of machine word size will add zero-extending overhead at every load
 37 | 
 38 |   CPUStatusKind* = enum
 39 |     Carry                   ## C - 0b00000001
 40 |     Zero                    ## Z - 0b00000010
 41 |     IRQ_Disabled            ## I - 0b00000100
 42 |     Decimal_Mode            ## D - 0b00001000
 43 |     Index8bit               ## X - 0b00010000
 44 |     Accum8bit               ## M - 0b00100000
 45 |     Overflow                ## V - 0b01000000
 46 |     Negative                ## N - 0b10000000
 47 |     Emulation_mode          ## E - hidden / B - Break 0b00010000. Define if 6502 mode or 65816 mode
 48 | 
 49 |   CpuRegs* = object
 50 |     # General purpose registers
 51 |     A*: uint16              ## Accumulator - Math register. Stores operands or results of arithmetic operations.
 52 |     X*, Y*: uint16          ## Index registers. Reference memory, pass data, counters for loops ...
 53 |     # Addressing registers
 54 |     D*: uint16              ## Direct page addressing. Holds the memory bank address of the data the CPU is accessing.
 55 |     DB*: uint8              ## Data Bank. Holds the default bank for memory transfers.
 56 |     # Program control register
 57 |     PB*: uint8              ## Program Bank. Holds the bank address of all instruction fetches.
 58 |     PC*: uint16             ## Program Counter. Address of the current memory instruction.
 59 |     SP*: uint16             ## Stack Pointer.
 60 |     # Status register
 61 |     P*: set[CPUStatusKind]  ## Processor status
 62 | 
 63 |   AddressingMode* = enum
 64 |     # $number represents a number in hexadecimal representation
 65 |     # Name                  # Example
 66 |     Accumulator             # dec a
 67 |     Implied                 # clc
 68 |     Immediate               # inc #$12 or #$1234
 69 |     Absolute                # and $1234
 70 |     AbsoluteLong            # and $123456
 71 |     AbsoluteLongX           # and $123456,X
 72 |     AbsoluteX               # and $1234,X
 73 |     AbsoluteY               # and $1234,Y
 74 |     AbsoluteXIndirect       # jmp ($1234,X)
 75 |     AbsoluteIndirect        # jmp ($1234)
 76 |     AbsoluteIndirectLong    # jml [$1234]
 77 |     Direct                  # and $12
 78 |     DirectX                 # stz $12,X
 79 |     DirectY                 # stz $12,Y
 80 |     DirectXIndirect         # and ($12,X)
 81 |     DirectIndirect          # and ($12)
 82 |     DirectIndirectLong      # and [$12]
 83 |     DirectIndirectY         # and ($12),Y
 84 |     DirectIndirectLongY     # and [$12],Y
 85 |     ProgramCounterRelative  # beq $12
 86 |     ProgCountRelativeLong   # brl $1234
 87 |     StackRelative           # and $12,S
 88 |     StackRelativeIndirectY  # and ($12,S),Y
 89 |     BlockMove               # mvp $12, $34
 90 | 
 91 |   Cpu* = object
 92 |     regs*: CpuRegs
 93 |     cycles*: int
 94 | 
 95 | template genFlagAccessor(flag: CPUStatusKind, accessor: untyped) =
 96 |   template `accessor`*(P: set[CPUStatusKind]): bool =
 97 |     flag in P
 98 | 
 99 |   template `accessor=`*(P: set[CPUStatusKind], val: bool) =
100 |     if val:
101 |       P.incl flag
102 |     else:
103 |       P.excl flag
104 | 
105 | genFlagAccessor Carry, carry
106 | genFlagAccessor Zero, zero
107 | genFlagAccessor IRQ_Disabled, irq_disabled
108 | genFlagAccessor Decimal_Mode, decimal_mode
109 | genFlagAccessor Index8bit, index8bit
110 | genFlagAccessor Accum8bit, accum8bit
111 | genFlagAccessor Overflow, overflow
112 | genFlagAccessor Negative, negative
113 | genFlagAccessor Emulation_mode, emulation_mode
114 | 
115 | ######################################################################
116 | #
117 | # Opcodes
118 | #
119 | ######################################################################
120 | 
121 | type
122 |   ExtraCycleCost* = enum
123 |     Ecc1_m16bit         # +1 cycle if access is done in 16-bit memory or accumulator
124 |     EccDirectLowNonZero # +1 cycle if low byte of Direct page register != 0
125 |     EccCrossBoundary    # +1 cycle if adding index crosses a page boundary
126 |     Ecc2_m16bit         # +2 cycles if access is done in 16-bit memory or accumulator (read-modify-write)
127 |     EccBranchTaken      # +1 cycle if branch taken
128 |     Ecc65C02BranchCross # +1 cycle if branch taken, cross boundary and emulation mode
129 |     Ecc65816Native      # +1 cycle if 65816 mode (no emulation)
130 |     Ecc1_xy16bit        # +1 cycle if access is done in 16-bit index register
131 |     Ecc3_reset          # +3 cycles to shut CPU down: additional cycles required by reset for restart
132 |     Ecc3_interrupt      # +3 cycles to shut CPU down: additional cycles required by interrupt for restart
133 | 
134 |   ExtraCycleCosts* = set[ExtraCycleCost]
135 | 
136 | type
137 |   OpcParams* = tuple[name: string, cycles: int, ecc: NimNode, addr_mode: NimNode, impl: NimNode]
138 |   OpcTable* = OrderedTable[int, OpcParams]
139 | 
140 | 
141 | ######################################################################
142 | #
143 | # Memory
144 | #
145 | ######################################################################
146 | type
147 |   Mem* = object
148 | 
149 |   Sys* = ref object
150 |     cpu*: Cpu
151 |     mem*: Mem
152 | 
153 |   Addr* = distinct range[0'u32..0xFFFFFF'u32]
154 |     ## 24-bit address
155 | 
156 | proc `shl`(x: Addr, y: int): Addr {.borrow, noSideEffect.}
157 | proc `or`(x, y: Addr): Addr {.borrow, noSideEffect.}
158 | proc `+`*(x, y: Addr): Addr {.borrow, noSideEffect.}
159 | 
160 | template `+`*(x: Addr, y: SomeInteger): Addr =
161 |   x + Addr(y)
162 | 
163 | template toAddr*(bank: uint8, adr: uint16): Addr =
164 |   Addr(bank) shl 16 or Addr(adr)
165 | 
166 | func `[]`*(mem: Mem, adr: Addr): uint8 {.inline.}=
167 |   # Stub
168 |   discard
169 | 
170 | func `[]`*(mem: Mem, bank: uint8, adr: uint16): uint8 {.inline.}=
171 |   # Stub
172 |   mem[toAddr(bank, adr)]
173 | 
174 | template bank*(adr: Addr): uint8 =
175 |   ## Get the databank from a 24-bit address
176 |   uint8(uint32(adr) shr 16)
177 | template `bank=`*(adr: var Addr, bank: uint8) =
178 |   ## Set/overwrite the databank of a 24-bit address $DBHHLL
179 |   adr = (adr and 0x00FFFF) or (data.Addr shl 16)
180 | 
181 | template `lo=`*(adr: var Addr, lo: uint8) =
182 |   ## Set/overwrite the low byte of a 24-bit address $DBHHLL
183 |   adr = (adr and 0xFFFF00) or lo.Addr
184 | 
185 | template `hi=`*(adr: var Addr, hi: uint8) =
186 |   ## Set/overwrite the hi byte of a 24-bit address $DBHHLL
187 |   adr = (adr and 0xFF00FF) or (lo.Addr shl 8)
188 | 
189 | ######################################################################
190 | #
191 | # Aliases
192 | #
193 | ######################################################################
194 | template DB*(): uint8 {.dirty.} = sys.cpu.regs.DB
195 | template PB*(): uint8 {.dirty.} = sys.cpu.regs.pB
196 | template PC*(): uint16 {.dirty.} = sys.cpu.regs.PC
197 | template P*(): set[CPUStatusKind] {.dirty.} = sys.cpu.regs.P
198 | 
199 | template D*(): uint16 {.dirty.} = sys.cpu.regs.D
200 | 
201 | template X*(): uint16 {.dirty.} = sys.cpu.regs.X
202 | template Y*(): uint16 {.dirty.} = sys.cpu.regs.Y
203 | 
204 | template CycleCPU*() {.dirty.} = inc sys.cpu.cycles
205 | template CycleCPU*(n: int) {.dirty.} = inc sys.cpu.cycles, n
206 | template Next*()     {.dirty.} = inc PC
207 | 


--------------------------------------------------------------------------------
/glyph/snes/opcodes.nim:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2018 Mamy André-Ratsimbazafy
  2 | # Distributed under the Apache v2 License (license terms are at http://www.apache.org/licenses/LICENSE-2.0).
  3 | 
  4 | import ./private/macros_opcodes, ./datatypes
  5 | 
  6 | template branch(condition: untyped) {.dirty.} =
  7 |   let jmpRelAddr = sys.immediate(uint8, `extraCycleCosts`{.inject.})
  8 |   if condition:
  9 |     CycleCPU()
 10 |     let jmpAddr = sys.cpu.PC + jmpRelAddr.uint16
 11 |     if P.emulation_mode and jmpAddr.uint8 < jmpRelAddr:
 12 |       # Extra-cycle if we cross a 256-bit page boundary in emulation mode
 13 |       CycleCPU()
 14 |     sys.cpu.PC = jmpAddr
 15 | 
 16 | genOpcTable:
 17 |   op ADC: # Add with Carry
 18 |     0x61: cycles 6, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectXIndirect
 19 |     0x63: cycles 4, {Ecc1_m16bit}                                       , StackRelative
 20 |     0x65: cycles 3, {Ecc1_m16bit, EccDirectLowNonZero}                  , Direct
 21 |     0x67: cycles 6, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectIndirectLong
 22 |     0x69: cycles 2, {Ecc1_m16bit}                                       , Immediate
 23 |     0x6D: cycles 4, {Ecc1_m16bit}                                       , Absolute
 24 |     0x6F: cycles 5, {Ecc1_m16bit}                                       , AbsoluteLong
 25 |     0x71: cycles 5, {Ecc1_m16bit, EccDirectLowNonZero, EccCrossBoundary}, DirectIndirectY
 26 |     0x72: cycles 5, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectIndirect
 27 |     0x73: cycles 7, {Ecc1_m16bit}                                       , StackRelativeIndirectY
 28 |     0x75: cycles 4, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectX
 29 |     0x77: cycles 6, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectIndirectLongY
 30 |     0x79: cycles 4, {Ecc1_m16bit, EccCrossBoundary}                     , AbsoluteY
 31 |     0x7D: cycles 4, {Ecc1_m16bit, EccCrossBoundary}                     , AbsoluteX
 32 |     0x7F: cycles 5, {Ecc1_m16bit}                                       , AbsoluteLongX
 33 | 
 34 |     implementation:
 35 |       # ###################################################################################
 36 |       template adcImpl(sys: Sys, T: typedesc[uint8 or uint16], carry, overflow: var bool) =
 37 |         # Implement uint8 and uint16 mode
 38 | 
 39 |         template A {.dirty.} =
 40 |           # Alias for accumulator depending on mode
 41 |           when T is uint16: sys.cpu.regs.A
 42 |           else: sys.cpu.regs.A.lo
 43 | 
 44 |         func add(x, y: T, carry, overflow: var bool): T {.inline.} =
 45 |           # Add function helper
 46 |           # Carry edge cases on uint8:
 47 |           #   - x =   0, y =   0, P.carry = 0 --> result =   0, carry = 0
 48 |           #   - x = 255, y =   0, P.carry = 1 --> result =   0, carry = 1
 49 |           #   - x =   0, y = 255, P.carry = 1 --> result =   0, carry = 1
 50 |           #   - x = 127, y = 128, P.carry = 1 --> result =   0, carry = 1
 51 |           #   - x = 128, y = 127, P.carry = 1 --> result =   0, carry = 1
 52 |           #   - x = 255, y =   0, P.carry = 0 --> result = 255, carry = 0
 53 |           #   - x =   0, y = 255, P.carry = 0 --> result = 255, carry = 0
 54 |           #   - x = 127, y = 128, P.carry = 0 --> result = 255, carry = 0
 55 |           #   - x = 128, y = 127, P.carry = 0 --> result = 255, carry = 0
 56 |           result = x + y
 57 |           carry = carry or result < x
 58 |           overflow =  overflow or
 59 |                       not(result.isMsbSet xor x.isMsbSet) or
 60 |                       not(result.isMsbSet xor y.isMsbSet)
 61 | 
 62 |         # Fetch data.
 63 |         # `addressingMode` and `extraCycleCosts` are injected by "implementation"
 64 |         let val = sys.`addressingMode`(T, `extraCycleCosts`{.inject.})
 65 | 
 66 |         # Computation
 67 |         # TODO: Decimal mode
 68 |         A = add(A, val, carry, overflow)
 69 |         A = add(A, T(P.carry), carry, overflow)
 70 |       # ###################################################################################
 71 | 
 72 |       var carry, overflow = false
 73 | 
 74 |       if P.emulation_mode:
 75 |         sys.adcImpl(uint8, carry, overflow)
 76 |       else:
 77 |         sys.adcImpl(uint16, carry, overflow)
 78 | 
 79 |       # Sets the flags
 80 |       P.carry    = carry
 81 |       P.overflow = overflow
 82 |       P.negative = A.isMsbSet
 83 |       P.zero     = A == 0
 84 | 
 85 |   op AND: # AND Accumulator with memory
 86 |     0x21: cycles 6, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectXIndirect
 87 |     0x23: cycles 4, {Ecc1_m16bit}                                       , StackRelative
 88 |     0x25: cycles 3, {Ecc1_m16bit, EccDirectLowNonZero}                  , Direct
 89 |     0x27: cycles 6, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectIndirectLong
 90 |     0x29: cycles 2, {Ecc1_m16bit}                                       , Immediate
 91 |     0x2D: cycles 4, {Ecc1_m16bit}                                       , Absolute
 92 |     0x2F: cycles 5, {Ecc1_m16bit}                                       , AbsoluteLong
 93 |     0x31: cycles 5, {Ecc1_m16bit, EccDirectLowNonZero, EccCrossBoundary}, DirectIndirectY
 94 |     0x32: cycles 5, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectIndirect
 95 |     0x33: cycles 7, {Ecc1_m16bit}                                       , StackRelativeIndirectY
 96 |     0x35: cycles 4, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectX
 97 |     0x37: cycles 6, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectIndirectLongY
 98 |     0x39: cycles 4, {Ecc1_m16bit, EccCrossBoundary}                     , AbsoluteY
 99 |     0x3D: cycles 4, {Ecc1_m16bit, EccCrossBoundary}                     , AbsoluteX
100 |     0x3F: cycles 5, {Ecc1_m16bit}                                       , AbsoluteLongX
101 | 
102 |     implementation:
103 |       # ###################################################################################
104 |       template andImpl(sys: Sys, T: typedesc[uint8 or uint16]) =
105 |         # Implement uint8 and uint16 mode
106 | 
107 |         template A {.dirty.} =
108 |           # Alias for accumulator depending on mode
109 |           when T is uint16: sys.cpu.regs.A
110 |           else: sys.cpu.regs.A.lo
111 | 
112 |         # Fetch data.
113 |         # `addressingMode` and `extraCycleCosts` are injected by "implementation"
114 |         let val = sys.`addressingMode`(T, `extraCycleCosts`{.inject.})
115 | 
116 |         # Computation
117 |         A = A and val
118 |       # ###################################################################################
119 | 
120 |       if P.emulation_mode:
121 |         sys.andImpl(uint8)
122 |       else:
123 |         sys.andImpl(uint16)
124 | 
125 |       # Sets the flags
126 |       P.negative = A.isMsbSet
127 |       P.zero     = A == 0
128 | 
129 |   op ASL: # Arithmetic Shift Left
130 |     0x06: cycles 5, {EccDirectLowNonZero, Ecc2_m16bit}                  , Direct
131 |     0x0A: cycles 2, {}                                                  , Accumulator
132 |     0x0E: cycles 6, {Ecc2_m16bit}                                       , Absolute
133 |     0x16: cycles 6, {EccDirectLowNonZero, Ecc2_m16bit}                  , DirectX
134 |     0x1E: cycles 7, {Ecc2_m16bit}                                       , AbsoluteX
135 | 
136 |     implementation:
137 |       # ###################################################################################
138 |       template aslImpl(sys: Sys, T: typedesc[uint8 or uint16], carry: var bool) =
139 |         # Implement uint8 and uint16 mode
140 | 
141 |         template A {.dirty.} =
142 |           # Alias for accumulator depending on mode
143 |           when T is uint16: sys.cpu.regs.A
144 |           else: sys.cpu.regs.A.lo
145 | 
146 |         # Fetch data.
147 |         # `addressingMode` and `extraCycleCosts` are injected by "implementation"
148 |         let val = sys.`addressingMode`(T, `extraCycleCosts`{.inject.})
149 | 
150 |         # Computation
151 |         A = val shl 1
152 |         carry = val.isMsbSet
153 |       # ###################################################################################
154 | 
155 |       if P.emulation_mode:
156 |         sys.aslImpl(uint8, P.carry)
157 |       else:
158 |         sys.aslImpl(uint16, P.carry)
159 | 
160 |       # Sets the flags
161 |       P.negative = A.isMsbSet
162 |       P.zero     = A == 0
163 | 
164 |   op BCC: # Branch if Carry Clear
165 |     0x90: cycles 2, {EccBranchTaken, Ecc65C02BranchCross}              , ProgramCounterRelative
166 |     implementation:
167 |       branch(not P.carry)
168 | 
169 |   op BCS: # Branch if Carry Set
170 |     0xB0: cycles 2, {EccBranchTaken, Ecc65C02BranchCross}              , ProgramCounterRelative
171 |     implementation:
172 |       branch(P.carry)
173 | 
174 |   op BEQ: # Branch if Equal
175 |     0xF0: cycles 2, {EccBranchTaken, Ecc65C02BranchCross}              , ProgramCounterRelative
176 |     implementation:
177 |       branch(P.zero)
178 | 
179 |   op BIT: # Test Bits
180 |     0x24: cycles 3, {Ecc1_m16bit, EccDirectNonZero}                    , Direct
181 |     0x2C: cycles 4, {Ecc1_m16bit}                                      , Absolute
182 |     0x34: cycles 4, {Ecc1_m16bit, EccDirectNonZero}                    , DirectX
183 |     0x3C: cycles 4, {Ecc1_m16bit, EccCrossBoundary}                    , AbsoluteX
184 |     0x89: cycles 2, {Ecc1_m16bit}                                      , Immediate
185 | 
186 |     implementation:
187 |       discard
188 | 
189 |   op BNE: # Branch if Not Equal
190 |     0x30: cycles 2, {EccBranchTaken, Ecc65C02BranchCross}              , ProgramCounterRelative
191 |     implementation:
192 |       branch(not P.zero)
193 | 
194 |   op BPL: # Branch if Plus
195 |     0x10: cycles 2, {EccBranchTaken, Ecc65C02BranchCross}              , ProgramCounterRelative
196 |     implementation:
197 |       branch(not P.isNegative)
198 | 
199 |   op BRA: # Branch Always
200 |     0x80: cycles 3, {Ecc65C02BranchCross}                              , ProgramCounterRelative
201 |     implementation:
202 |       discard
203 | 
204 |   op BRK: # Break
205 |     0x00: cycles 7, {EccBranchTaken, Ecc65C02BranchCross}              , Stack
206 |     implementation:
207 |       discard
208 | 
209 |   op BRL: # Branch Long Always
210 |     0x82: cycles 2, {}                                                 , ProgCountRelativeLong
211 |     implementation:
212 |       discard
213 | 
214 |   op BVC: # Branch if Overflow Clear
215 |     0x50: cycles 2, {EccBranchTaken, Ecc65C02BranchCross}              , ProgramCounterRelative
216 |     implementation:
217 |       branch(not P.overflow)
218 | 
219 |   op BVS: # Branch if Overflow Set
220 |     0x70: cycles 2, {EccBranchTaken, Ecc65C02BranchCross}              , ProgramCounterRelative
221 |     implementation:
222 |       branch(P.overflow)
223 | 
224 |   op CLC: # Clear Carry
225 |     0x18: cycles 2, {}                                                 , Implied
226 |     implementation:
227 |       discard
228 | 
229 |   op CLD: # Clear Decimal Mode Flag
230 |     0xD8: cycles 2, {}                                                 , Implied
231 |     implementation:
232 |       discard
233 | 
234 |   op CLI: # Clear Interrupt Disable Flag
235 |     0x58: cycles 2, {}                                                 , Implied
236 |     implementation:
237 |       discard
238 | 
239 |   op CLV: # Clear Overflow Flag
240 |     0xB8: cycles 2, {}                                                 , Implied
241 |     implementation:
242 |       discard
243 | 
244 |   op CMP: # Compare Accumulator with Memory
245 |     0xC1: cycles 6, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectXIndirect
246 |     0xC3: cycles 4, {Ecc1_m16bit}                                       , StackRelative
247 |     0xC5: cycles 3, {Ecc1_m16bit, EccDirectLowNonZero}                  , Direct
248 |     0xC7: cycles 6, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectIndirectLong
249 |     0xC9: cycles 2, {Ecc1_m16bit}                                       , Immediate
250 |     0xCD: cycles 4, {Ecc1_m16bit}                                       , Absolute
251 |     0xCF: cycles 5, {Ecc1_m16bit}                                       , AbsoluteLong
252 |     0xD1: cycles 5, {Ecc1_m16bit, EccDirectLowNonZero, EccCrossBoundary}, DirectIndirectY
253 |     0xD2: cycles 5, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectIndirect
254 |     0xD3: cycles 7, {Ecc1_m16bit}                                       , StackRelativeIndirectY
255 |     0xD5: cycles 4, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectX
256 |     0xD7: cycles 6, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectIndirectLongY
257 |     0xD9: cycles 4, {Ecc1_m16bit, EccCrossBoundary}                     , AbsoluteY
258 |     0xDD: cycles 4, {Ecc1_m16bit, EccCrossBoundary}                     , AbsoluteX
259 |     0xDF: cycles 5, {Ecc1_m16bit}                                       , AbsoluteLongX
260 | 
261 |     implementation:
262 |       discard
263 | 
264 |   op COP: # Co-Processor Enable
265 |     0x02: cycles 7, {Ecc65816Native}                                    , Immediate
266 | 
267 |     implementation:
268 |       discard
269 | 
270 |   op CPX: # Compare Index Register X with Memory
271 |     0xE0: cycles 2, {Ecc1_xy16bit}                                      , Immediate
272 |     0xE4: cycles 3, {EccDirectLowNonZero, Ecc1_xy16bit}                 , Direct
273 |     0xEC: cycles 4, {Ecc1_xy16bit}                                      , Absolute
274 | 
275 |     implementation:
276 |       discard
277 | 
278 |   op CPY: # Compare Index Register Y with Memory
279 |     0xC0: cycles 2, {Ecc1_xy16bit}                                      , Immediate
280 |     0xC4: cycles 3, {EccDirectLowNonZero, Ecc1_xy16bit}                 , Direct
281 |     0xCC: cycles 4, {Ecc1_xy16bit}                                      , Absolute
282 | 
283 |     implementation:
284 |       discard
285 | 
286 |   op DEC: # Decrement
287 |     0x3A: cycles 2, {}                                                  , Accumulator
288 |     0xC6: cycles 2, {EccDirectLowNonZero, Ecc2_m16bit}                  , Direct
289 |     0xCE: cycles 6, {Ecc2_m16bit}                                       , Absolute
290 |     0xD6: cycles 6, {EccDirectLowNonZero, Ecc2_m16bit}                  , DirectX
291 |     0xDE: cycles 7, {Ecc2_m16bit}                                       , AbsoluteX
292 | 
293 |     implementation:
294 |       discard
295 | 
296 |   op DEX: # Decrement Index Register X
297 |     0xCA: cycles 2, {}                                                  , Implied
298 |     implementation:
299 |       discard
300 | 
301 |   op DEY: # Decrement Index Register Y
302 |     0x88: cycles 2, {}                                                  , Implied
303 |     implementation:
304 |       discard
305 | 
306 |   op EOR: # Exclusive OR
307 |     0x41: cycles 6, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectXIndirect
308 |     0x43: cycles 4, {Ecc1_m16bit}                                       , StackRelative
309 |     0x45: cycles 3, {Ecc1_m16bit, EccDirectLowNonZero}                  , Direct
310 |     0x47: cycles 6, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectIndirectLong
311 |     0x49: cycles 2, {Ecc1_m16bit}                                       , Immediate
312 |     0x4D: cycles 4, {Ecc1_m16bit}                                       , Absolute
313 |     0x4F: cycles 5, {Ecc1_m16bit}                                       , AbsoluteLong
314 |     0x51: cycles 5, {Ecc1_m16bit, EccDirectLowNonZero, EccCrossBoundary}, DirectIndirectY
315 |     0x52: cycles 5, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectIndirect
316 |     0x53: cycles 7, {Ecc1_m16bit}                                       , StackRelativeIndirectY
317 |     0x55: cycles 4, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectX
318 |     0x57: cycles 6, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectIndirectLongY
319 |     0x59: cycles 4, {Ecc1_m16bit, EccCrossBoundary}                     , AbsoluteY
320 |     0x5D: cycles 4, {Ecc1_m16bit, EccCrossBoundary}                     , AbsoluteX
321 |     0x5F: cycles 5, {Ecc1_m16bit}                                       , AbsoluteLongX
322 | 
323 |     implementation:
324 |       discard
325 | 
326 |   op INC: # Increment
327 |     0x1A: cycles 2, {}                                                  , Accumulator
328 |     0xE6: cycles 2, {EccDirectLowNonZero, Ecc2_m16bit}                  , Direct
329 |     0xEE: cycles 6, {Ecc2_m16bit}                                       , Absolute
330 |     0xF6: cycles 6, {EccDirectLowNonZero, Ecc2_m16bit}                  , DirectX
331 |     0xFE: cycles 7, {Ecc2_m16bit}                                       , AbsoluteX
332 | 
333 |     implementation:
334 |       discard
335 | 
336 |   op INX: # Increment Index Register X
337 |     0xE8: cycles 2, {}                                                  , Implied
338 |     implementation:
339 |       discard
340 | 
341 |   op INY: # Increment Index Register Y
342 |     0xC8: cycles 2, {}                                                  , Implied
343 |     implementation:
344 |       discard
345 | 
346 |   op JMP: # Jump
347 |     0x4C: cycles 3, {}                                                  , Absolute
348 |     0x5C: cycles 4, {}                                                  , AbsoluteLong
349 |     0X6C: cycles 5, {}                                                  , AbsoluteIndirect
350 |     0x7C: cycles 6, {}                                                  , AbsoluteXIndirect
351 |     0xDC: cycles 6, {}                                                  , AbsoluteIndirectLong
352 | 
353 |     implementation:
354 |       discard
355 | 
356 |   op JSR: # Jump to Subroutine
357 |     0x20: cycles 6, {}                                                  , Absolute
358 |     0x22: cycles 8, {}                                                  , AbsoluteLong
359 |     0xFC: cycles 8, {}                                                  , AbsoluteXIndirect
360 | 
361 |     implementation:
362 |       discard
363 | 
364 |   op LDA: # Load Accumulator from Memory
365 |     0xA1: cycles 6, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectXIndirect
366 |     0xA3: cycles 4, {Ecc1_m16bit}                                       , StackRelative
367 |     0xA5: cycles 3, {Ecc1_m16bit, EccDirectLowNonZero}                  , Direct
368 |     0xA7: cycles 6, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectIndirectLong
369 |     0xA9: cycles 2, {Ecc1_m16bit}                                       , Immediate
370 |     0xAD: cycles 4, {Ecc1_m16bit}                                       , Absolute
371 |     0xAF: cycles 5, {Ecc1_m16bit}                                       , AbsoluteLong
372 |     0xB1: cycles 5, {Ecc1_m16bit, EccDirectLowNonZero, EccCrossBoundary}, DirectIndirectY
373 |     0xB2: cycles 5, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectIndirect
374 |     0xB3: cycles 7, {Ecc1_m16bit}                                       , StackRelativeIndirectY
375 |     0xB5: cycles 4, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectX
376 |     0xB7: cycles 6, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectIndirectLongY
377 |     0xB9: cycles 4, {Ecc1_m16bit, EccCrossBoundary}                     , AbsoluteY
378 |     0xBD: cycles 4, {Ecc1_m16bit, EccCrossBoundary}                     , AbsoluteX
379 |     0xBF: cycles 5, {Ecc1_m16bit}                                       , AbsoluteLongX
380 | 
381 |     implementation:
382 |       discard
383 | 
384 |   op LDX: # Load Index Register X from Memory
385 |     0xA2: cycles 2, {Ecc1_xy16bit}                                      , Immediate
386 |     0xA6: cycles 3, {EccDirectLowNonZero, Ecc1_xy16bit}                 , Direct
387 |     0xAE: cycles 4, {Ecc1_xy16bit}                                      , Absolute
388 |     0xB6: cycles 4, {EccDirectLowNonZero, Ecc1_xy16bit}                 , DirectY
389 |     0xBE: cycles 4, {EccCrossBoundary, Ecc1_xy16bit}                    , AbsoluteY
390 | 
391 |     implementation:
392 |       discard
393 | 
394 |   op LDY: # Load Index Register Y from Memory
395 |     0xA0: cycles 2, {Ecc1_xy16bit}                                      , Immediate
396 |     0xA4: cycles 3, {EccDirectLowNonZero, Ecc1_xy16bit}                 , Direct
397 |     0xAC: cycles 4, {Ecc1_xy16bit}                                      , Absolute
398 |     0xB4: cycles 4, {EccDirectLowNonZero, Ecc1_xy16bit}                 , DirectX
399 |     0xBC: cycles 4, {EccCrossBoundary, Ecc1_xy16bit}                    , AbsoluteX
400 | 
401 |     implementation:
402 |       discard
403 | 
404 |   op LSR: # Logical Shift Memory or Accumulator Right
405 |     0x46: cycles 5, {EccDirectLowNonZero, Ecc2_m16bit}                  , Direct
406 |     0x4A: cycles 2, {}                                                  , Accumulator
407 |     0x4E: cycles 6, {Ecc2_m16bit}                                       , Absolute
408 |     0x56: cycles 6, {EccDirectLowNonZero, Ecc2_m16bit}                  , DirectX
409 |     0x5E: cycles 7, {Ecc2_m16bit}                                       , AbsoluteX
410 | 
411 |     implementation:
412 |       discard
413 | 
414 |   op MVN: # Block Move Negative
415 |     0x54: cycles 1, {EccDirectLowNonZero}                               , BlockMove
416 |     implementation:
417 |       discard
418 | 
419 |   op MVP: # Block Move Positive
420 |     0x44: cycles 1, {EccDirectLowNonZero}                               , BlockMove
421 |     implementation:
422 |       discard
423 | 
424 |   op NOP: # No Operation
425 |     0xEA: cycles 2, {}                                                  , Implied
426 |     implementation:
427 |       discard
428 | 
429 |   op ORA: # OR Accumulator with Memory
430 |     0x01: cycles 6, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectXIndirect
431 |     0x03: cycles 4, {Ecc1_m16bit}                                       , StackRelative
432 |     0x05: cycles 3, {Ecc1_m16bit, EccDirectLowNonZero}                  , Direct
433 |     0x07: cycles 6, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectIndirectLong
434 |     0x09: cycles 2, {Ecc1_m16bit}                                       , Immediate
435 |     0x0D: cycles 4, {Ecc1_m16bit}                                       , Absolute
436 |     0x0F: cycles 5, {Ecc1_m16bit}                                       , AbsoluteLong
437 |     0x11: cycles 5, {Ecc1_m16bit, EccDirectLowNonZero, EccCrossBoundary}, DirectIndirectY
438 |     0x12: cycles 5, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectIndirect
439 |     0x13: cycles 7, {Ecc1_m16bit}                                       , StackRelativeIndirectY
440 |     0x15: cycles 4, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectX
441 |     0x17: cycles 6, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectIndirectLongY
442 |     0x19: cycles 4, {Ecc1_m16bit, EccCrossBoundary}                     , AbsoluteY
443 |     0x1D: cycles 4, {Ecc1_m16bit, EccCrossBoundary}                     , AbsoluteX
444 |     0x1F: cycles 5, {Ecc1_m16bit}                                       , AbsoluteLongX
445 | 
446 |     implementation:
447 |       discard
448 | 
449 |   op PEA: # Push Effective Absolute Address
450 |     0xF4: cycles 5, {}                                                  , Immediate
451 |     implementation:
452 |       discard
453 | 
454 |   op PEI: # Push Effective Indirect Address
455 |     0xD4: cycles 6, {EccDirectLowNonZero}                               , Direct
456 |     implementation:
457 |       discard
458 | 
459 |   op PER: # Push Effective Absolute Address
460 |     0x62: cycles 6, {}                                                  , Immediate
461 |     implementation:
462 |       discard
463 | 
464 |   op PHA: # Push Accumulator
465 |     0x48: cycles 3, {Ecc1_m16bit}                                       , Immediate
466 |     implementation:
467 |       discard
468 | 
469 |   op PHB: # Push Data Bank Register
470 |     0x8B: cycles 3, {}                                                  , Implied
471 |     implementation:
472 |       discard
473 | 
474 |   op PHD: # Push Direct Page Register
475 |     0x0B: cycles 4, {}                                                  , Implied
476 |     implementation:
477 |       discard
478 | 
479 |   op PHK: # Push Program Bank Register
480 |     0x4B: cycles 3, {}                                                  , Implied
481 |     implementation:
482 |       discard
483 | 
484 |   op PHP: # Push Processor Status Register
485 |     0x08: cycles 3, {}                                                  , Implied
486 |     implementation:
487 |       discard
488 | 
489 |   op PHX: # Push Index Register X
490 |     0xDA: cycles 3, {Ecc1_xy16bit}                                      , Implied
491 |     implementation:
492 |       discard
493 | 
494 |   op PHY: # Push Index Register Y
495 |     0x5A: cycles 3, {Ecc1_xy16bit}                                      , Implied
496 |     implementation:
497 |       discard
498 | 
499 |   op PLA: # Pull Accumulator
500 |     0x68: cycles 4, {Ecc1_m16bit}                                       , Implied
501 |     implementation:
502 |       discard
503 | 
504 |   op PLB: # Pull Data Bank Register
505 |     0xAB: cycles 4, {}                                                  , Implied
506 |     implementation:
507 |       discard
508 | 
509 |   op PLD: # Pull Direct Page Register
510 |     0x2B: cycles 5, {}                                                  , Implied
511 |     implementation:
512 |       discard
513 | 
514 |   op PLP: # Pull Processor Status Register
515 |     0x28: cycles 4, {}                                                  , Implied
516 |     implementation:
517 |       discard
518 | 
519 |   op PLX: # Pull Index Register X
520 |     0xFA: cycles 4, {Ecc1_xy16bit}                                      , Implied
521 |     implementation:
522 |       discard
523 | 
524 |   op PLY: # Pull Index Register Y
525 |     0x7A: cycles 4, {Ecc1_xy16bit}                                      , Implied
526 |     implementation:
527 |       discard
528 | 
529 |   op REP: # Reset Processor Status Bits
530 |     0xC2: cycles 3, {}                                                  , Immediate
531 |     implementation:
532 |       discard
533 | 
534 |   op ROL: # Rotate Memory or Accumulator Left
535 |     0x26: cycles 5, {EccDirectLowNonZero, Ecc2_m16bit}                  , Direct
536 |     0x2A: cycles 2, {}                                                  , Accumulator
537 |     0x2E: cycles 6, {Ecc2_m16bit}                                       , Absolute
538 |     0x36: cycles 6, {EccDirectLowNonZero, Ecc2_m16bit}                  , DirectX
539 |     0x3E: cycles 7, {Ecc2_m16bit}                                       , AbsoluteX
540 | 
541 |     implementation:
542 |       discard
543 | 
544 |   op ROR: # Rotate Memory or Accumulator Right
545 |     0x66: cycles 5, {EccDirectLowNonZero, Ecc2_m16bit}                  , Direct
546 |     0x6A: cycles 2, {}                                                  , Accumulator
547 |     0x6E: cycles 6, {Ecc2_m16bit}                                       , Absolute
548 |     0x76: cycles 6, {EccDirectLowNonZero, Ecc2_m16bit}                  , DirectX
549 |     0x7E: cycles 7, {Ecc2_m16bit}                                       , AbsoluteX
550 | 
551 |     implementation:
552 |       discard
553 | 
554 |   op RTI: # Return from Interrup
555 |     0x40: cycles 6, {Ecc65816Native}                                    , Implied
556 |     implementation:
557 |       discard
558 | 
559 |   op RTL: # Return from Subroutine Long
560 |     0x6B: cycles 6, {}                                                  , Implied
561 |     implementation:
562 |       discard
563 | 
564 |   op RTS: # Return from Subroutine
565 |     0x60: cycles 6, {}                                                  , Implied
566 |     implementation:
567 |       discard
568 | 
569 |   op SBC: # Substract with Borrow from Accumulator
570 |     0xE1: cycles 6, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectXIndirect
571 |     0xE3: cycles 4, {Ecc1_m16bit}                                       , StackRelative
572 |     0xE5: cycles 3, {Ecc1_m16bit, EccDirectLowNonZero}                  , Direct
573 |     0xE7: cycles 6, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectIndirectLong
574 |     0xE9: cycles 2, {Ecc1_m16bit}                                       , Immediate
575 |     0xED: cycles 4, {Ecc1_m16bit}                                       , Absolute
576 |     0xEF: cycles 5, {Ecc1_m16bit}                                       , AbsoluteLong
577 |     0xF1: cycles 5, {Ecc1_m16bit, EccDirectLowNonZero, EccCrossBoundary}, DirectIndirectY
578 |     0xF2: cycles 5, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectIndirect
579 |     0xF3: cycles 7, {Ecc1_m16bit}                                       , StackRelativeIndirectY
580 |     0xF5: cycles 4, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectX
581 |     0xF7: cycles 6, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectIndirectLongY
582 |     0xF9: cycles 4, {Ecc1_m16bit, EccCrossBoundary}                     , AbsoluteY
583 |     0xFD: cycles 4, {Ecc1_m16bit, EccCrossBoundary}                     , AbsoluteX
584 |     0xFF: cycles 5, {Ecc1_m16bit}                                       , AbsoluteLongX
585 | 
586 |     implementation:
587 |       discard
588 | 
589 |   op SEC: ## Set Carry Flag
590 |     0x38: cycles 2, {}                                                  , Implied
591 |     implementation:
592 |       discard
593 | 
594 |   op SED: ## Set Decimal Flag
595 |     0xF8: cycles 2, {}                                                  , Implied
596 |     implementation:
597 |       discard
598 | 
599 |   op SEI: ## Set Interrupt Flag
600 |     0x78: cycles 2, {}                                                  , Implied
601 |     implementation:
602 |       discard
603 | 
604 |   op SEP: ## Reset Processor Status Bits
605 |     0xE2: cycles 3, {}                                                  , Immediate
606 |     implementation:
607 |       discard
608 | 
609 |   op STA: # Store Accumulator to Memory
610 |     0x81: cycles 6, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectXIndirect
611 |     0x83: cycles 4, {Ecc1_m16bit}                                       , StackRelative
612 |     0x85: cycles 3, {Ecc1_m16bit, EccDirectLowNonZero}                  , Direct
613 |     0x87: cycles 6, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectIndirectLong
614 |     0x8D: cycles 4, {Ecc1_m16bit}                                       , Absolute
615 |     0x8F: cycles 5, {Ecc1_m16bit}                                       , AbsoluteLong
616 |     0x91: cycles 5, {Ecc1_m16bit, EccDirectLowNonZero, EccCrossBoundary}, DirectIndirectY
617 |     0x92: cycles 5, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectIndirect
618 |     0x93: cycles 7, {Ecc1_m16bit}                                       , StackRelativeIndirectY
619 |     0x95: cycles 4, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectX
620 |     0x97: cycles 6, {Ecc1_m16bit, EccDirectLowNonZero}                  , DirectIndirectLongY
621 |     0x99: cycles 4, {Ecc1_m16bit, EccCrossBoundary}                     , AbsoluteY
622 |     0x9D: cycles 4, {Ecc1_m16bit, EccCrossBoundary}                     , AbsoluteX
623 |     0x9F: cycles 5, {Ecc1_m16bit}                                       , AbsoluteLongX
624 | 
625 |     implementation:
626 |       discard
627 | 
628 |   op STP: ## Stop Processor
629 |     0xDB: cycles 3, {Ecc3_reset}                                        , Implied
630 |     implementation:
631 |       discard
632 | 
633 |   op STX: ## Store Index Register X to Memory
634 |     0x86: cycles 3, {EccDirectLowNonZero, Ecc1_xy16bit}                 , Direct
635 |     0x8E: cycles 4, {Ecc1_xy16bit}                                      , Absolute
636 |     0x96: cycles 4, {EccDirectLowNonZero, Ecc1_xy16bit}                 , DirectY
637 | 
638 |     implementation:
639 |       discard
640 | 
641 |   op STY: ## Store Index Register X to Memory
642 |     0x84: cycles 3, {EccDirectLowNonZero, Ecc1_xy16bit}                 , Direct
643 |     0x8C: cycles 4, {Ecc1_xy16bit}                                      , Absolute
644 |     0x94: cycles 4, {EccDirectLowNonZero, Ecc1_xy16bit}                 , DirectY
645 | 
646 |     implementation:
647 |       discard
648 | 
649 |   op STZ: ## Store Zero to Memory
650 |     0x64: cycles 3, {Ecc1_m16bit, EccDirectLowNonZero}                 , Direct
651 |     0x74: cycles 4, {Ecc1_m16bit, EccDirectLowNonZero}                 , DirectX
652 |     0x9C: cycles 4, {Ecc1_m16bit}                                      , Absolute
653 |     0x9E: cycles 5, {Ecc1_m16bit}                                      , AbsoluteX
654 | 
655 |     implementation:
656 |       discard
657 | 
658 |   op TAX: ## Transfer Accumulator to Index Register X
659 |     0xAA: cycles 2, {}                                                 , Implied
660 |     implementation:
661 |       discard
662 | 
663 |   op TAY: ## Transfer Accumulator to Index Register X
664 |     0xA8: cycles 2, {}                                                 , Implied
665 |     implementation:
666 |       discard
667 | 
668 |   op TCD: ## Transfer 16-bit Accumulator to Direct Page Register
669 |     0x5B: cycles 2, {}                                                 , Implied
670 |     implementation:
671 |       discard
672 | 
673 |   op TCS: ## Transfer 16-bit Accumulator to Stack Pointer
674 |     0x1B: cycles 2, {}                                                 , Implied
675 |     implementation:
676 |       discard
677 | 
678 |   op TDC: ## Transfer Direct Page Register to 16-bit Accumulator
679 |     0x7B: cycles 2, {}                                                 , Implied
680 |     implementation:
681 |       discard
682 | 
683 |   op TRB: ## Test and Reset Memory Bits Against Accumulator
684 |     0x14: cycles 5, {EccDirectLowNonZero, Ecc2_m16bit}                 , Direct
685 |     0x1C: cycles 6, {EccCrossBoundary}                                 , Absolute
686 |     implementation:
687 |       discard
688 | 
689 |   op TSB: ## Test and Set Memory Bits Against Accumulator
690 |     0x04: cycles 5, {EccDirectLowNonZero, Ecc2_m16bit}                 , Direct
691 |     0x0C: cycles 6, {Ecc2_m16bit}                                      , Absolute
692 |     implementation:
693 |       discard
694 | 
695 |   op TSC: ## Transfer Stack Pointer to 16-bit Accumulator
696 |     0x3B: cycles 2, {}                                                 , Implied
697 |     implementation:
698 |       discard
699 | 
700 |   op TSX: ## Transfer Stack pointer to Index Register X
701 |     0xBA: cycles 2, {}                                                 , Implied
702 |     implementation:
703 |       discard
704 | 
705 |   op TXA: ## Transfer Stack pointer to Accumulator
706 |     0x8A: cycles 2, {}                                                 , Implied
707 |     implementation:
708 |       discard
709 | 
710 |   op TXS: ## Transfer Index Register X to Stack pointer
711 |     0x9A: cycles 2, {}                                                 , Implied
712 |     implementation:
713 |       discard
714 | 
715 |   op TXY: ## Transfer Index Register X to Index Register Y
716 |     0x9B: cycles 2, {}                                                 , Implied
717 |     implementation:
718 |       discard
719 | 
720 |   op TYA: ## Transfer Index Register Y to Accumulator
721 |     0x98: cycles 2, {}                                                 , Implied
722 |     implementation:
723 |       discard
724 | 
725 |   op TYX: ## Transfer Index Register Y to Index Register X
726 |     0xBB: cycles 2, {}                                                 , Implied
727 |     implementation:
728 |       discard
729 | 
730 |   op WAI: ## Wait for Interrupt
731 |     0xCB: cycles 3, {Ecc3_interrupt}                                   , Implied
732 |     implementation:
733 |       discard
734 | 
735 |   op WDM: ## Reserved for Future Expansion
736 |     0x42: cycles 2, {}                                                 , Immediate
737 |     implementation:
738 |       discard
739 | 
740 |   op XBA: ## Exchange B and A 8-bit Accumulators
741 |     0xEB: cycles 3, {}                                                 , Implied
742 |     implementation:
743 |       discard
744 | 
745 |   op XCE: ## Exchange Carry and Emulation Flags
746 |     0xFB: cycles 2, {}                                                 , Implied
747 |     implementation:
748 |       discard
749 | 


--------------------------------------------------------------------------------
/glyph/snes/private/macros_opcodes.nim:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2018 Mamy André-Ratsimbazafy
  2 | # Distributed under the Apache v2 License (license terms are at http://www.apache.org/licenses/LICENSE-2.0).
  3 | 
  4 | import macros, strformat, strutils, tables, ../datatypes
  5 | 
  6 | when defined(glyphdebug):
  7 |   import strutils
  8 | 
  9 | macro genOpcTable*(opcs: untyped): untyped =
 10 |   # Usage:
 11 |   # ------
 12 |   #   genOpcTable:
 13 |   #     op lda: # LDA Load the Accumulator with Memory
 14 |   #       0xA1: cycles 6, {Ecc1_16bit, EccCrossBoundary}, DirectXIndirect
 15 |   #       0xA3: cycles 4, {Ecc1_16bit}                  , StackRelative
 16 |   #
 17 |   #       implementation:
 18 |   #         cpu.A = foobar
 19 | 
 20 |   # Parsed AST
 21 |   # ----------
 22 |   #   StmtList
 23 |   #     Command
 24 |   #       Ident "op"
 25 |   #       Ident "lda"
 26 |   #       StmtList
 27 |   #         Call
 28 |   #           IntLit 161
 29 |   #           StmtList
 30 |   #             Command
 31 |   #               Ident "cycles"
 32 |   #               IntLit 6
 33 |   #               Curly
 34 |   #                 Ident "Ecc1_16bit"
 35 |   #                 Ident "EccCrossBoundary"
 36 |   #               Ident "DirectXIndirect"
 37 |   #         Call
 38 |   #           IntLit 163
 39 |   #           StmtList
 40 |   #             Command
 41 |   #               Ident "cycles"
 42 |   #               IntLit 4
 43 |   #               Curly
 44 |   #                 Ident "Ecc1_16bit"
 45 |   #               Ident "StackRelative"
 46 |   #         Call
 47 |   #           Ident "implementation"
 48 |   #           StmtList
 49 |   #             Asgn
 50 |   #               DotExpr
 51 |   #                 Ident "cpu"
 52 |   #                 Ident "A"
 53 |   #               Ident "foobar"
 54 | 
 55 |   var opcTable = initOrderedTable[int, OpcParams]()
 56 | 
 57 |   for op in opcs:
 58 |     # Sanity checks
 59 |     op.expectKind nnkCommand
 60 |     assert op[0].eqIdent "op"
 61 |     op[1].expectKind nnkIdent
 62 |     op[2].expectKind nnkStmtList
 63 |     assert op.len == 3
 64 | 
 65 |     # Get name and implementation
 66 |     let name = op[1].strVal
 67 |     let implSection = op[2][op[2].len - 1]
 68 | 
 69 |     implSection.expectKind nnkCall
 70 |     assert implSection.len == 2
 71 |     assert implSection[0].eqIdent "implementation"
 72 |     implSection[1].expectKind nnkStmtList
 73 |     let impl = implSection[1]
 74 | 
 75 |     # Iterate over instruction params
 76 |     # we skip the last which is the implementation
 77 |     for instruction in op[2]:
 78 |       if instruction[0].kind == nnkIdent and instruction[0].eqIdent "implementation":
 79 |         break
 80 | 
 81 |       # Sanity checks
 82 |       instruction.expectKind nnkCall
 83 |       assert instruction.len == 2
 84 |       instruction[0].expectKind nnkIntLit
 85 |       instruction[1].expectKind nnkStmtList
 86 | 
 87 |       assert instruction[1].len == 1
 88 |       instruction[1][0].expectKind nnkCommand
 89 |       assert instruction[1][0].len == 4
 90 |       assert instruction[1][0][0].eqIdent "cycles"
 91 |       instruction[1][0][1].expectKind nnkIntLit
 92 |       instruction[1][0][2].expectKind nnkCurly
 93 |       instruction[1][0][3].expectKind nnkIdent
 94 | 
 95 |       # Get the values
 96 |       let
 97 |         opcode    = instruction[0].intVal.int
 98 |         cycles    = instruction[1][0][1].intVal.int
 99 |         ecc       = instruction[1][0][2]
100 |         addr_mode = instruction[1][0][3]
101 | 
102 |         opcParams: OpcParams = (name, cycles, ecc, addr_mode, impl)
103 | 
104 |       # Add to the table
105 |       let hasKey = opcTable.hasKeyOrPut(opcode, opcParams)
106 |       if hasKey:
107 |         let usedBy = opcTable[opcode].name
108 |         error &"Tried to insert opcode 0x{opcode.toHex(2)} for {name}. It is already used by {usedBy} instruction."
109 | 
110 |   # Reorder by opcode value
111 |   opcTable.sort(proc(x, y: tuple[key: int, val: OpcParams]):int = cmp(x.key, y.key))
112 | 
113 |   when defined(glyphdebug):
114 |     for k, v in opcTable.pairs:
115 |       echo "0x" & k.toHex(2) & " - " & v.name
116 | 


--------------------------------------------------------------------------------
/resources/SNES_resources.md:
--------------------------------------------------------------------------------
 1 | # SNES resources
 2 | 
 3 | | Description                                                                         | Link                                                                                                         |
 4 | | ----------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------ |
 5 | | SFDW - instructions reference   best presentation                                   | https://wiki.superfamicom.org/65816-reference                                                                |
 6 | | Similar table from Commodore World magazine, Issue #16                              | http://www.defence-force.org/computing/oric/coding/annexe_2/index.htm                                        |
 7 | | Various grouping of opcodes (logical, alphabetical, hex, cycle)                     | http://www.thealmightyguru.com/Games/Hacking/Wiki/index.php/6502_Opcodes                                     |
 8 | | Simple overview of opcodes, registers and addressing modes                          | https://en.wikibooks.org/wiki/Super_NES_Programming/65c816_reference                                         |
 9 | | Kafuka board - 65816 ASM                                                            | http://acmlm.kafuka.org/board/thread.php?id=99.                                                              |
10 | | 6502.org - opcode list                                                              | http://6502.org/tutorials/65c816opcodes.html                                                                 |
11 | | Emulator 101 - Opcode, addressing and timing reference                              | https://github.com/kpmiller/emulator101/blob/master/Generate6502Reference/6502ops.csv                        |
12 | | Nesdev - Programmer's manual                                                        | https://wiki.nesdev.com/w/images/7/76/Programmanual.pdf                                                      |
13 | | Excellent short overview                                                            | https://github.com/michielvoo/SNES/wiki/CPU                                                                  |
14 | | Good display of cycle cost                                                          | http://softpixel.com/~cwright/sianse/docs/65816NFO.HTM                                                       |
15 | |                                                                                     | https://github.com/andrew-jacobs/emu816                                                                      |
16 | | The best opcode table                                                               | http://oxyron.de/html/opcodes816.html                                                                        |
17 | | Excellent thread with byuu (higan) and phire (dolphin) discussing subcycle accuracy | https://www.reddit.com/r/emulation/comments/53jdqj/what_exactly_is_a_cycleaccurate_emulator/                 |
18 | | Good overview of addressing modes and cycle costs                                   | https://github.com/gilligan/snesdev/blob/master/docs/65816.txt                                               |
19 | | SNES timings                                                                        | http://folk.uio.no/sigurdkn/snes/timing.txt                                                                  |
20 | | SNES Graphics                                                                       | https://emu-docs.org/Super%20NES/General/snesdoc.html                                                        |
21 | | Official DataSheet with cycle accurate timings                                      | http://datasheets.chipdb.org/Western%20Design/w65c816s.pdf                                                   |
22 | | Memory mapping                                                                      | https://wiki.superfamicom.org/memory-mapping                                                                 |
23 | | LoROM detail                                                                        | https://www.cs.umb.edu/~bazz/snes/cartridges/lorom.html                                                      |
24 | | Database of games  ROM, RAM and PCB                                                 | https://jensma.de/snes/index.php                                                                             |
25 | | In-depth Memory Mapping                                                             | http://problemkaputt.de/fullsnes.htm                                                                         |
26 | | SNES tests                                                                          | https://docs.google.com/spreadsheets/d/11AxbMohEzBab1LBbq7t-BQBdpGMERoWv-ibaXVLBDrE/edit#gid=2067633561      |
27 | | SNES Accuracy tests android                                                         | https://www.reddit.com/r/emulation/comments/2nv30w/android_snes_emulator_accuracy_testing_results/           |
28 | | More SNES tests                                                                     | https://forums.bannister.org/ubbthreads.php?ubb=showflat&Number=59965                                        |
29 | | More recent accuracy tests                                                          | http://tasvideos.org/EmulatorResources/SNESAccuracyTests.html                                                |
30 | | Programming the SNES (in-depth, French)                                             | https://jeux.developpez.com/tutoriels/SNES/debuter-programmation-super-nintendo/                             |
31 | | SNES Dev Manual by Nintendo                                                         | https://ia801905.us.archive.org/13/items/SNESDevManual/book1.pdf                                             |
32 | | Exhaustive memory, DMA, PPU, APU, Controllers, Cartridges, Timings                  | https://problemkaputt.de/fullsnes.htm                                                                        |
33 | | Apple software development guide for 65816                                          | http://ae.applearchives.com/apple_e/65816/65816_16_bit_software_devel.pdf                                    |
34 | | Opcode read/write/exec cycle by cycle detail                                        | http://fdwr.tripod.com/docs/65c816.txt                                                                       |
35 | | In-depth guide to 6502                                                              | ftp://public.asimov.net/pub/apple_II/documentation/programming/6502assembly/Programming%20the%206502_OCR.pdf |
36 | | Official doc from 2016 (in color)                                                   | https://www.mouser.co.uk/ds/2/436/w65c816s-1062580.pdf                                                       |
37 | | Crossing bank boundaries                                                            | http://board.zsnes.com/phpBB3/viewtopic.php?f=6&t=1299                                                       |
38 | | Instruction wrapping                                                                | https://wiki.superfamicom.org/instruction-wrapping                                                           |
39 | | 6502 - cycle accurate steps                                                         | http://atarihq.com/danb/files/64doc.txt                                                                      |
40 | |                                                                                     |                                                                                                              |
41 | 


--------------------------------------------------------------------------------
/resources/interpreter_optimizations.md:
--------------------------------------------------------------------------------
  1 | # Interpreter optimization
  2 | 
  3 | You will find the latest research in [Nimbus interpreter optimization wiki.](https://github.com/status-im/nimbus/wiki/Interpreter-optimization-resources)
  4 | 
  5 | ## Pure interpreter
  6 | 
  7 | * Threading techniques for Forth (indirect, Direct, Token, Switch, Call, Segment threading)                                                                                                                                   - [link](http://www.complang.tuwien.ac.at/forth/threaded-code.html#call-threading)
  8 | * Benchmark of interpreter dispatch techniques for Forth on x86, PPC, MIPS, SPARC, Itanium and ARM                                                                                                                            - [link](http://www.complang.tuwien.ac.at/forth/threading/)
  9 | * PhD Thesis: Virtual machine Showdown: Stack vs Registers, with review of ALL interpreter dispatch techniques                                                                                                                - [link](https://www.scss.tcd.ie/publications/tech-reports/reports.07/TCD-CS-2007-49.pdf)
 10 | * Basic overview of computed gotos                                                                                                                                                                                            - [link](https://eli.thegreenplace.net/2012/07/12/computed-goto-for-efficient-dispatch-tables)
 11 | * Optimizing direct threaded code by selective inlining (Paper from 1998 which includes JIT introduction with code!)                                                                                                          - [link](http://flint.cs.yale.edu/jvmsem/doc/threaded.ps)
 12 | * Design of a bytecode interpreter, including Stack vs Register, how to represent values (single type, tagged unions, untagged union, interface/virtual function)                                                             - [link](http://gameprogrammingpatterns.com/bytecode.html)
 13 | * Writing a fast interpreter: control-flow graph optimization from LuaJIT author                                                                                                                                              - [link](http://lua-users.org/lists/lua-l/2011-02/msg00742.html)
 14 | * In-depth dive on how to write an emulator                                                                                                                                                                                   - [link](http://fms.komkon.org/EMUL8/HOWTO.html)
 15 | * Review of interpreter dispatch strategies to limit branch mispredictions: direct threaded code vs indirect threaded code vs token threaded code vs switch based dispatching vs replicated switch dispatching + Bibliography - [link](http://realityforge.org/code/virtual-machines/2011/05/19/interpreters.html)
 16 | * Fast VMs without assembly - speeding up the interpreter loop: threaded interpreter, duff's device, JIT, Nostradamus distributor by the author of Bosch x86 emulator                                                         - [link](http://www.emulators.com/docs/nx25_nostradamus.htm)
 17 | * Switch case vs Table vs Function caching/dynarec                                                                                                                                                                            - [link](http://ngemu.com/threads/switch-case-vs-function-table.137562/)
 18 | * Jump tables vs Switch                                                                                                                                                                                                       - [link](http://www.cipht.net/2017/10/03/are-jump-tables-always-fastest.html)
 19 | * Paper: branch prediction and the performance of Interpreters - Don't trust the folklore                                                                                                                                     - [link](https://hal.inria.fr/hal-01100647/document)
 20 | * Paper by author of ANTLR: The Structure and Performance of Efficient Interpreters                                                                                                                                           - [link](https://www.jilp.org/vol5/v5paper12.pdf)
 21 | * Paper by author of ANTLR introducing dynamic replication: Optimizing Indirect Branch Prediction Accuracy in Virtual Machine Interpreter                                                                                     - [link](https://www.scss.tcd.ie/David.Gregg/papers/toplas05.pdf)
 22 | * Benchmarking VM Dispatch strategies in Rust: Switch vs unrolled switch vs tail call dispatch vs Computed Gotos                                                                                                              - [link](https://pliniker.github.io/post/dispatchers/)
 23 | * Computed Gotos for fast dispatching in the official CPython codebase                                                                                                                                                                               - [link](https://github.com/python/cpython/blob/9d6171ded5c56679bc295bacffc718472bcb706b/Python/ceval.c#L571-L608)
 24 | 
 25 | ## JIT / Dynamic recompilation
 26 | 
 27 | * Simple portable JIT (x86, x64, ARM, PowerPC and MIPS) for Brainfuck using DynASM (by LuaJIT author) - [Link](http://blog.reverberate.org/2012/12/hello-jit-world-joy-of-simple-jits.html)
 28 | * Optimizing direct threaded code by selective inlining                                   - [link](http://flint.cs.yale.edu/jvmsem/doc/threaded.ps)
 29 | * Dynamic recompilation introduction                                                      - [link](http://ngemu.com/threads/dynamic-recompilation-an-introduction.20491/)
 30 | * Dynamic recompilation guide with Chip8                                                  - [link](https://github.com/marco9999/Dynarec_Guide/blob/master/Introduction%20to%20Dynamic%20Recompilation%20in%20Emulation.pdf)
 31 | * Dynamic recompilation - accompanying source code                                        - [link](https://github.com/marco9999/Super8_jitcore/)
 32 | * Presentation: Interpretation (basic indirect and direct threaded) vs binary translation - [link](http://www.ittc.ku.edu/~kulkarni/teaching/EECS768/slides/chapter2.pdf)
 33 | * Threaded interpretation vs Dynarec                                                      - [link](http://www.emutalk.net/threads/55275-Threaded-interpretation-vs-Dynamic-Binary-Translation)
 34 | * Dynamic recompilation wiki                                                              - [link](http://emulation.gametechwiki.com/index.php/Dynamic_recompilation)
 35 | 
 36 | ## Context Threading
 37 | 
 38 | Context threading is a promising alternative to Direct/Indirect/Call/Token/Subroutine/Switch threading
 39 | that makes interpretation nice with the hardware branch predictor. Practical implementation wanted:
 40 | 
 41 |   - [Web version of the thesis by Zalewski](http://www.cs.toronto.edu/~matz/dissertation/matzDissertation-latex2html/node7.html)
 42 |   - [Paper](http://www.cs.toronto.edu/~matz/pubs/demkea_context.pdf)
 43 |   - [Powerpoint](https://webdocs.cs.ualberta.ca/~amaral/cascon/CDP05/slides/CDP05-berndl.pdf)
 44 |   - [Review / Critic](https://www.complang.tuwien.ac.at/anton/lvas/sem06w/fest.pdf)
 45 |   - Cited and reviewed in [Virtual Machine Showdown PhD Thesis](https://www.scss.tcd.ie/publications/tech-reports/reports.07/TCD-CS-2007-49.pdf)
 46 | 
 47 | Basically, instead of computed goto, you have computed "call" and each section called is ended by
 48 | the ret (return) instruction. Note that it the address called is still inline, there is no parameter pushed on the stack.
 49 | 
 50 | The trick is that CPU has the following types of predictors:
 51 | 
 52 | - Linear or straight-line code
 53 | - Conditional branches
 54 | - Calls and Returns
 55 | - Indirect branches
 56 | 
 57 | But direct threaded code / computed goto only makes use of indirect branches (goto). Context Threading seems to reduce
 58 | cache misses by up to 95% by exploiting all those predictors. However it requires assembly as there is no way to generate
 59 | arbitrary call and ret instructions.
 60 | 
 61 | ## Codebases
 62 | 
 63 | - [Bochs x86 emulator](https://sourceforge.net/projects/bochs/)
 64 |   - [Virtualization without Execution: Designing a portable VM - Powerpoint](http://bochs.sourceforge.net/VirtNoJit.pdf)
 65 |   - [Virtualization without Execution - Paper](http://bochs.sourceforge.net/Virtualization_Without_Hardware_Final.pdf)
 66 |   - Author is also the author of the Nostradamus Distributor linked in pure interpreter optimizations
 67 | - MorphoVM
 68 |   - Thesis: [Morpho VM: An Indirect Threaded Stackless
 69 | Virtual Machine](https://skemman.is/bitstream/1946/4809/1/hhg-bs.pdf)
 70 | 
 71 | ## Nim implementation benchmark
 72 | 
 73 | ```Nim
 74 | import random, sequtils, times
 75 | 
 76 | type
 77 |   Op = enum
 78 |     Halt # = 0x0000
 79 |     Inc  # = 0x0100
 80 |     Dec  # = 0x0110
 81 |     Mul2 # = 0x0230
 82 |     Div2 # = 0x0240
 83 |     Add7 # = 0x0307
 84 |     Neg  # = 0x0400
 85 | 
 86 | func interp_switch(code: seq[Op], initVal: int): int =
 87 | 
 88 |   var
 89 |     pc = 0
 90 |   result = initVal
 91 | 
 92 |   while true:
 93 |     case code[pc]:
 94 |     of Halt:
 95 |       return
 96 |     of Inc:
 97 |       inc pc
 98 |       inc result
 99 |     of Dec:
100 |       inc pc
101 |       dec result
102 |     of Mul2:
103 |       inc pc
104 |       result *= 2
105 |     of Div2:
106 |       inc pc
107 |       result = result div 2
108 |     of Add7:
109 |       inc pc
110 |       inc result, 7
111 |     of Neg:
112 |       inc pc
113 |       result = -result
114 | 
115 | #################################################################################################################
116 | 
117 | func interp_cgoto(code: seq[Op], initVal: int): int =
118 |   # Requires a dense enum
119 |   var
120 |     pc = 0
121 |   result = initVal
122 | 
123 |   while true:
124 |     {.computedGoto.}
125 |     let instr = code[pc]
126 |     case instr:
127 |     of Halt:
128 |       return
129 |     of Inc:
130 |       inc pc
131 |       inc result
132 |     of Dec:
133 |       inc pc
134 |       dec result
135 |     of Mul2:
136 |       inc pc
137 |       result *= 2
138 |     of Div2:
139 |       inc pc
140 |       result = result div 2
141 |     of Add7:
142 |       inc pc
143 |       inc result, 7
144 |     of Neg:
145 |       inc pc
146 |       result = -result
147 | 
148 | #################################################################################################################
149 | 
150 | func halt(result: var int, stop: var bool) {.inline, nimcall.}=
151 |   stop = true
152 | 
153 | func inc(result: var int, stop: var bool) {.inline, nimcall.}=
154 |   inc result
155 | 
156 | func dec(result: var int, stop: var bool) {.inline, nimcall.}=
157 |   dec result
158 | 
159 | func mul2(result: var int, stop: var bool) {.inline, nimcall.}=
160 |   result *= 2
161 | 
162 | func div2(result: var int, stop: var bool) {.inline, nimcall.}=
163 |   result = result div 2
164 | 
165 | func add7(result: var int, stop: var bool) {.inline, nimcall.}=
166 |   inc result, 7
167 | 
168 | func neg(result: var int, stop: var bool) {.inline, nimcall.}=
169 |   result = -result
170 | 
171 | # Requires dense enum
172 | type InstrF = proc (result: var int, stop: var bool){.inline, nimcall, noSideEffect, gcsafe, locks: 0.}
173 | 
174 | type FuncTable = array[Op, InstrF]
175 | 
176 | const funcTable: FuncTable = [
177 |   Halt: halt,
178 |   Inc: inc,
179 |   Dec: dec,
180 |   Mul2: mul2,
181 |   Div2: div2,
182 |   Add7: add7,
183 |   Neg: neg
184 | ]
185 | 
186 | proc interp_ftable(code: seq[Op], initVal: int): int =
187 |   # Requires dense enum
188 |   var
189 |     pc = 0
190 |     stop = false
191 |   result = initVal
192 | 
193 |   while not stop:
194 |     funcTable[code[pc]](result, stop)
195 |     inc pc
196 | 
197 | #################################################################################################################
198 | 
199 | type
200 |   InstrNext = proc (val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.}
201 | 
202 |   OpH = ref object
203 |     handler: InstrNext
204 | 
205 |   FuncTableNext = array[Op, OpH]
206 | 
207 | proc halt(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.}
208 | proc inc(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.}
209 | proc dec(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.}
210 | proc mul2(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.}
211 | proc div2(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.}
212 | proc add7(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.}
213 | proc neg(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.}
214 | 
215 | let funcTableNext: FuncTableNext = [
216 |   Halt: OpH(handler: halt),
217 |   Inc: OpH(handler: inc),
218 |   Dec: OpH(handler: dec),
219 |   Mul2: OpH(handler: mul2),
220 |   Div2: OpH(handler: div2),
221 |   Add7: OpH(handler: add7),
222 |   Neg: OpH(handler: neg)
223 | ]
224 | 
225 | proc halt(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.}=
226 |   stop = true
227 | 
228 | proc inc(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.}=
229 | 
230 |   inc val
231 |   inc pc
232 |   result = funcTableNext[code[pc]]
233 | 
234 | proc dec(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.}=
235 |   dec val
236 |   inc pc
237 |   result = funcTableNext[code[pc]]
238 | 
239 | proc mul2(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.}=
240 |   val *= 2
241 |   inc pc
242 |   result = funcTableNext[code[pc]]
243 | 
244 | proc div2(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.}=
245 |   val = val div 2
246 |   inc pc
247 |   result = funcTableNext[code[pc]]
248 | 
249 | proc add7(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.}=
250 |   inc val, 7
251 |   inc pc
252 |   result = funcTableNext[code[pc]]
253 | 
254 | proc neg(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.}=
255 |   val = -val
256 |   inc pc
257 |   result = funcTableNext[code[pc]]
258 | 
259 | proc interp_handlers(code: seq[Op], initVal: int): int =
260 |   # Requires dense enum
261 |   var
262 |     pc = 0
263 |     stop = false
264 |     oph = funcTableNext[code[pc]]
265 |   result = initVal
266 | 
267 |   while not stop:
268 |     oph = oph.handler(result, code, pc, stop)
269 | 
270 | #################################################################################################################
271 | 
272 | type
273 |   OpD = ref object {.inheritable.}
274 | 
275 |   Ohalt {.final.}= ref object of OpD
276 |   Oinc {.final.}= ref object of OpD
277 |   Odec {.final.}= ref object of OpD
278 |   Omul2 {.final.}= ref object of OpD
279 |   Odiv2 {.final.}= ref object of OpD
280 |   Oadd7 {.final.}= ref object of OpD
281 |   Oneg {.final.}= ref object of OpD
282 | 
283 |   FuncTableToken = array[Op, OpD]
284 | 
285 | method execute(op: OpD, result: var int, stop: var bool) {.base, inline, noSideEffect.} =
286 |   raise newException(ValueError, "To override")
287 | 
288 | method execute(op: Ohalt, result: var int, stop: var bool) {.inline, noSideEffect.}=
289 |   stop = true
290 | 
291 | method execute(op: Oinc, result: var int, stop: var bool) {.inline, noSideEffect.}=
292 |   inc result
293 | 
294 | method execute(op: Odec, result: var int, stop: var bool) {.inline, noSideEffect.}=
295 |   dec result
296 | 
297 | method execute(op: Omul2, result: var int, stop: var bool) {.inline, noSideEffect.}=
298 |   result *= 2
299 | 
300 | method execute(op: Odiv2, result: var int, stop: var bool) {.inline, noSideEffect.}=
301 |   result = result div 2
302 | 
303 | method execute(op: Oadd7, result: var int, stop: var bool) {.inline, noSideEffect.}=
304 |   inc result, 7
305 | 
306 | method execute(op: Oneg, result: var int, stop: var bool) {.inline, noSideEffect.}=
307 |   result = -result
308 | 
309 | let funcTableToken: FuncTableToken = [
310 |   Halt: Ohalt(),
311 |   Inc: Oinc(),
312 |   Dec: Odec(),
313 |   Mul2: Omul2(),
314 |   Div2: Odiv2(),
315 |   Add7: Oadd7(),
316 |   Neg: Oneg()
317 | ]
318 | 
319 | proc interp_methods(code: seq[Op], initVal: int): int =
320 |   # Requires dense enum
321 |   var
322 |     pc = 0
323 |     stop = false
324 |     opt: OpD
325 |   result = initVal
326 | 
327 |   while not stop:
328 |     opt = funcTableToken[code[pc]]
329 |     opt.execute(result, stop)
330 |     inc pc
331 | 
332 | #################################################################################################################
333 | 
334 | import random, sequtils, times, os, strutils, strformat
335 | 
336 | const Nb_Instructions = 1_000_000_000
337 | 
338 | template bench(impl: untyped) =
339 |   let start = cpuTime()
340 |   let r = impl(instructions, n)
341 |   let stop = cpuTIme()
342 |   let elapsed = stop - start
343 |   echo "result: " & $r
344 |   let procname = impl.astToStr
345 |   let mips = (Nb_Instructions.float / (1_000_000.0 * elapsed))
346 |   echo procname & " took " & $elapsed & "s for " & $Nb_Instructions & " instructions: " & $mips & " Mips (M instructions/s)"
347 | 
348 | proc main(n: int)=
349 |   randomize(42)
350 | 
351 |   let ops = [Inc, Dec, Mul2, Div2, Add7, Neg]
352 |   let instructions = newSeqWith(Nb_Instructions, rand(ops)) & Halt
353 | 
354 |   bench(interp_switch)
355 |   bench(interp_cgoto) # requires dense enum (no holes)
356 |   bench(interp_ftable) # requires dense enum (no holes) or tables (instead of arrays)
357 |   bench(interp_handlers) # requires dense enum (no holes) or tables (instead of arrays)
358 |   bench(interp_methods) # requires dense enum (no holes) or tables (instead of arrays)
359 | 
360 | # Warmup
361 | var start = cpuTime()
362 | block:
363 |   var foo = 123
364 |   for i in 0 ..< 1_000_000_000:
365 |     foo += i*i mod 456
366 |     foo = foo mod 789
367 | 
368 | # Compiler shouldn't optimize away the results as cpuTime rely on sideeffects
369 | var stop = cpuTime()
370 | echo "Warmup: " & $(stop - start) & "s"
371 | 
372 | # Main loop
373 | let arguments = commandLineParams()
374 | let initial = if arguments.len > 0: parseInt($arguments[0])
375 |               else: 1
376 | 
377 | main(initial)
378 | 
379 | ## Results on i5-5257U (Broadwell mobile dual core 2.7 turbo 3.1Ghz)
380 | # Note that since Haswell, Intel CPU are significantly improed on Switch prediction
381 | # This probably won't carry to ARM devices
382 | 
383 | # Warmup: 4.081501s
384 | # result: -14604293096444
385 | # interp_switch took 8.604712000000003s for 1000000000 instructions: 116.2153945419672 Mips (M instructions/s)
386 | # result: -14604293096444
387 | # interp_cgoto took 7.367597000000004s for 1000000000 instructions: 135.7294651159665 Mips (M instructions/s)
388 | # result: -201628509198920 <--- some bug here to fix
389 | # interp_ftable took 8.957571000000002s for 1000000000 instructions: 111.6374070604631 Mips (M instructions/s)
390 | # result: -14604293096444
391 | # interp_handlers took 11.039072s for 1000000000 instructions: 90.58732473164413 Mips (M instructions/s)
392 | # result: -14604293096444
393 | # interp_methods took 23.359635s for 1000000000 instructions: 42.80888806695823 Mips (M instructions/s)
394 | ```
395 | 


--------------------------------------------------------------------------------
/tests/opcLength.nim:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2018 Mamy André-Ratsimbazafy
 2 | # Distributed under the Apache v2 License (license terms are at http://www.apache.org/licenses/LICENSE-2.0).
 3 | 
 4 | import ../glyph/snes/datatypes
 5 | 
 6 | const OpcLength* = [
 7 |     Accumulator            : 1,
 8 |     Implied                : 1,
 9 |     Immediate              : 2, # 3 if 16-bit mode
10 |     Absolute               : 3,
11 |     AbsoluteLong           : 4,
12 |     AbsoluteLongX          : 4,
13 |     AbsoluteX              : 3,
14 |     AbsoluteY              : 3,
15 |     AbsoluteXIndirect      : 3,
16 |     AbsoluteIndirect       : 3,
17 |     AbsoluteIndirectLong   : 3,
18 |     Direct                 : 2,
19 |     DirectX                : 2,
20 |     DirectY                : 2,
21 |     DirectXIndirect        : 2,
22 |     DirectIndirect         : 2,
23 |     DirectIndirectLong     : 2,
24 |     DirectIndirectY        : 2,
25 |     DirectIndirectLongY    : 2,
26 |     ProgramCounterRelative : 2,
27 |     ProgCountRelativeLong  : 3,
28 |     StackRelative          : 2,
29 |     StackRelativeIndirectY : 2,
30 |     BlockMove              : 3,
31 | ]
32 | 


--------------------------------------------------------------------------------