├── .github
├── FUNDING.yml
├── pull_request_template.md
└── workflows
│ └── autobuild.yml
├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── rom-converter.py
└── src
├── apu.S
├── apu_address.S
├── apu_alu.S
├── apu_control.S
├── apu_emitter.S
├── apu_transfer.S
├── cpu.S
├── cpu_address.S
├── cpu_alu.S
├── cpu_control.S
├── cpu_transfer.S
├── defines.h
├── dma.S
├── dsp.S
├── font.S
├── input.S
├── macros.h
├── main.S
├── memory.S
├── menu.S
├── mul_div.S
├── ppu.S
├── rsp_main.S
└── xcop_dsp1.S
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | patreon: Hydr8gon
2 | custom: paypal.me/Hydr8gon
3 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | Pull requests are not accepted for this project; see the contributing section of the readme for more details.
2 |
--------------------------------------------------------------------------------
/.github/workflows/autobuild.yml:
--------------------------------------------------------------------------------
1 | name: Automatic Builds
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 |
8 | jobs:
9 | build:
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 | - name: Setup Node.js
14 | uses: actions/setup-node@v4
15 | with:
16 | node-version: latest
17 | - name: Install libdragon
18 | run: npm install -g libdragon
19 | - name: Checkout
20 | uses: actions/checkout@v4
21 | - name: Compile
22 | run: |
23 | libdragon init
24 | libdragon make -j$(nproc)
25 | - name: Upload
26 | uses: actions/upload-artifact@v4
27 | with:
28 | name: sodium64
29 | path: |
30 | sodium64.z64
31 | rom-converter.py
32 |
33 | update-release:
34 | runs-on: ubuntu-latest
35 | needs: build
36 |
37 | steps:
38 | - name: Delete old release
39 | uses: dev-drprasad/delete-tag-and-release@v0.2.1
40 | with:
41 | delete_release: true
42 | tag_name: release
43 | env:
44 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
45 | - name: Get artifacts
46 | uses: actions/download-artifact@v4
47 | - name: Package artifacts
48 | run: |
49 | mv sodium64/sodium64.z64 .
50 | mv sodium64/rom-converter.py .
51 | zip -r sodium64.zip sodium64.z64 rom-converter.py
52 | - name: Create new release
53 | uses: ncipollo/release-action@v1
54 | with:
55 | name: Rolling Release
56 | body: These are automatically updated builds of the latest commit.
57 | artifacts: sodium64.zip
58 | tag: release
59 | token: ${{ secrets.GITHUB_TOKEN }}
60 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /build
2 | /out
3 | /sodium64.z64
4 | /*.sfc
5 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | # Based on n64.mk from libdragon
2 |
3 | PROJ_NAME := sodium64
4 | BUILD_DIR := build
5 | SRC_DIRS := src
6 |
7 | SFILES := $(foreach dir,$(SRC_DIRS),$(wildcard $(dir)/*.S))
8 | HFILES := $(foreach dir,$(SRC_DIRS),$(wildcard $(dir)/*.h))
9 | OFILES := $(patsubst %.S,$(BUILD_DIR)/%.o,$(SFILES))
10 |
11 | N64_ROM_TITLE = "$(PROJ_NAME)"
12 | N64_ROM_SAVETYPE = sram256k
13 |
14 | # Override this to use a toolchain installed separately from libdragon
15 | N64_GCCPREFIX ?= $(N64_INST)
16 | N64_ROOTDIR = $(N64_INST)
17 | N64_BINDIR = $(N64_ROOTDIR)/bin
18 | N64_INCLUDEDIR = $(N64_ROOTDIR)/mips64-elf/include
19 | N64_LIBDIR = $(N64_ROOTDIR)/mips64-elf/lib
20 | N64_GCCPREFIX_TRIPLET = $(N64_GCCPREFIX)/bin/mips64-elf-
21 |
22 | COMMA := ,
23 |
24 | N64_CC = $(N64_GCCPREFIX_TRIPLET)gcc
25 | N64_CXX = $(N64_GCCPREFIX_TRIPLET)g++
26 | N64_AS = $(N64_GCCPREFIX_TRIPLET)as
27 | N64_AR = $(N64_GCCPREFIX_TRIPLET)ar
28 | N64_LD = $(N64_GCCPREFIX_TRIPLET)ld
29 | N64_OBJCOPY = $(N64_GCCPREFIX_TRIPLET)objcopy
30 | N64_OBJDUMP = $(N64_GCCPREFIX_TRIPLET)objdump
31 | N64_SIZE = $(N64_GCCPREFIX_TRIPLET)size
32 | N64_NM = $(N64_GCCPREFIX_TRIPLET)nm
33 | N64_STRIP = $(N64_GCCPREFIX_TRIPLET)strip
34 |
35 | N64_CHKSUM = $(N64_BINDIR)/chksum64
36 | N64_ED64ROMCONFIG = $(N64_BINDIR)/ed64romconfig
37 | N64_MKDFS = $(N64_BINDIR)/mkdfs
38 | N64_TOOL = $(N64_BINDIR)/n64tool
39 | N64_SYM = $(N64_BINDIR)/n64sym
40 | N64_ELFCOMPRESS = $(N64_BINDIR)/n64elfcompress
41 | N64_AUDIOCONV = $(N64_BINDIR)/audioconv64
42 | N64_MKSPRITE = $(N64_BINDIR)/mksprite
43 |
44 | N64_ASFLAGS = -mtune=vr4300 -march=vr4300 -Wa,--fatal-warnings
45 | N64_RSPASFLAGS = -march=mips1 -mabi=32 -Wa,--fatal-warnings
46 | N64_LDFLAGS = -L$(N64_LIBDIR) -Tn64.ld --gc-sections
47 | N64_TOOLFLAGS = --title $(N64_ROM_TITLE)
48 | N64_ED64ROMCONFIGFLAGS = --savetype $(N64_ROM_SAVETYPE) --regionfree
49 |
50 | COMMA := ,
51 |
52 | # Change the dependency chain of .z64 ROMs to use the N64 toolchain
53 | %.z64: CC=$(N64_CC)
54 | %.z64: CXX=$(N64_CXX)
55 | %.z64: AS=$(N64_AS)
56 | %.z64: LD=$(N64_LD)
57 | %.z64: ASFLAGS+=$(N64_ASFLAGS)
58 | %.z64: RSPASFLAGS+=$(N64_RSPASFLAGS)
59 | %.z64: LDFLAGS+=$(N64_LDFLAGS)
60 | %.z64: $(BUILD_DIR)/%.elf
61 | @echo " [Z64] $@"
62 | $(N64_SYM) $< $<.sym
63 | cp $< $<.stripped
64 | $(N64_STRIP) -s $<.stripped
65 | $(N64_ELFCOMPRESS) -o $(dir $<) -c 1 $<.stripped
66 | @rm -f $@
67 | DFS_FILE="$(filter %.dfs, $^)"; \
68 | if [ -z "$$DFS_FILE" ]; then \
69 | $(N64_TOOL) $(N64_TOOLFLAGS) --toc --output $@ --align 256 $<.stripped --align 8 $<.sym; \
70 | else \
71 | $(N64_TOOL) $(N64_TOOLFLAGS) --toc --output $@ --align 256 $<.stripped --align 8 $<.sym --align 16 "$$DFS_FILE"; \
72 | fi
73 | if [ ! -z "$(strip $(N64_ED64ROMCONFIGFLAGS))" ]; then \
74 | $(N64_ED64ROMCONFIG) $(N64_ED64ROMCONFIGFLAGS) $@; \
75 | fi
76 |
77 | # Assembly rule. We use .S for both RSP and MIPS assembly code, and we differentiate
78 | # using the prefix of the filename: if it starts with "rsp", it is RSP ucode, otherwise
79 | # it's a standard MIPS assembly file.
80 | $(BUILD_DIR)/%.o: %.S
81 | @mkdir -p $(dir $@)
82 | set -e; \
83 | FILENAME="$(notdir $(basename $@))"; \
84 | if case "$$FILENAME" in "rsp"*) true;; *) false;; esac; then \
85 | SYMPREFIX="$(subst .,_,$(subst /,_,$(basename $@)))"; \
86 | TEXTSECTION="$(basename $@).text"; \
87 | DATASECTION="$(basename $@).data"; \
88 | BINARY="$(basename $@).elf"; \
89 | echo " [RSP] $<"; \
90 | $(N64_CC) $(RSPASFLAGS) -L$(N64_LIBDIR) -nostartfiles -Wl,-Trsp.ld -Wl,--gc-sections -Wl,-Map=$(BUILD_DIR)/$(notdir $(basename $@)).map -o $@ $<; \
91 | mv "$@" $$BINARY; \
92 | $(N64_OBJCOPY) -O binary -j .text $$BINARY $$TEXTSECTION.bin; \
93 | $(N64_OBJCOPY) -O binary -j .data $$BINARY $$DATASECTION.bin; \
94 | $(N64_OBJCOPY) -I binary -O elf32-bigmips -B mips4300 \
95 | --redefine-sym _binary_$${SYMPREFIX}_text_bin_start=$${FILENAME}_text_start \
96 | --redefine-sym _binary_$${SYMPREFIX}_text_bin_end=$${FILENAME}_text_end \
97 | --redefine-sym _binary_$${SYMPREFIX}_text_bin_size=$${FILENAME}_text_size \
98 | --set-section-alignment .data=8 \
99 | --rename-section .text=.data $$TEXTSECTION.bin $$TEXTSECTION.o; \
100 | $(N64_OBJCOPY) -I binary -O elf32-bigmips -B mips4300 \
101 | --redefine-sym _binary_$${SYMPREFIX}_data_bin_start=$${FILENAME}_data_start \
102 | --redefine-sym _binary_$${SYMPREFIX}_data_bin_end=$${FILENAME}_data_end \
103 | --redefine-sym _binary_$${SYMPREFIX}_data_bin_size=$${FILENAME}_data_size \
104 | --set-section-alignment .data=8 \
105 | --rename-section .text=.data $$DATASECTION.bin $$DATASECTION.o; \
106 | $(N64_SIZE) -G $$BINARY; \
107 | $(N64_LD) -relocatable $$TEXTSECTION.o $$DATASECTION.o -o $@; \
108 | rm $$TEXTSECTION.bin $$DATASECTION.bin $$TEXTSECTION.o $$DATASECTION.o; \
109 | else \
110 | echo " [AS] $<"; \
111 | $(CC) -c $(ASFLAGS) -o $@ $<; \
112 | fi
113 |
114 | %.elf: $(N64_LIBDIR)/n64.ld
115 | @mkdir -p $(dir $@)
116 | @echo " [LD] $@"
117 | $(CXX) -o $@ $(filter-out $(N64_LIBDIR)/n64.ld,$^) -lc $(patsubst %,-Wl$(COMMA)%,$(LDFLAGS)) -Wl,-Map=$(BUILD_DIR)/$(notdir $(basename $@)).map
118 | $(N64_SIZE) -G $@
119 |
120 | .SILENT:
121 |
122 | all: $(PROJ_NAME).z64
123 |
124 | $(BUILD_DIR)/$(PROJ_NAME).elf: $(OFILES)
125 |
126 | $(OFILES): $(HFILES)
127 |
128 | clean:
129 | rm -rf $(BUILD_DIR) $(PROJ_NAME).z64
130 |
131 | .PHONY: all clean
132 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # sodium64
2 | A SNES emulator for the N64, written in assembly.
3 |
4 | ### Overview
5 | The goal of sodium64 is to be fast and accurate enough to at least make some SNES games playable on the N64. It handles
6 | rendering entirely on the RSP in order to reduce load on the main CPU. I thought it would be fun to write something
7 | specifically for older hardware in assembly, so sodium64 was born!
8 |
9 | ### Downloads
10 | The latest build of sodium64 is automatically provided via GitHub Actions, and can be downloaded from the
11 | [releases page](https://github.com/Hydr8gon/sodium64/releases).
12 |
13 | ### Usage
14 | Place SNES ROMs with extension `.sfc`/`.smc` in the same folder as `sodium64.z64` and `rom-converter.py`. Run
15 | `rom-converter.py` using [Python](https://www.python.org) to convert the SNES ROMs to N64 ROMs. The output ROMs will be
16 | in a new folder called `out`.
17 |
18 | Alternatively, some flashcarts support loading ROMs directly with a supplied emulator. If you have an EverDrive, copy
19 | `sodium64.z64` to the `ED64/emu` folder on your SD card and rename it to `smc.v64`. SNES ROMs must be in headerless
20 | `.smc` format to work this way; `rom-converter.py` can optionally convert input ROMs for this.
21 |
22 | ### Controls
23 | | **N64** | **SNES** |
24 | |:---------:|:------------:|
25 | | C-Buttons | ABXY |
26 | | D-Pad | D-Pad |
27 | | L/R | L/R |
28 | | A/B | Start/Select |
29 | | Start | Settings |
30 |
31 | ### Contributing
32 | This is a personal project, and I've decided to not review or accept pull requests for it. If you want to help, you can
33 | test things and report issues or provide feedback. If you can afford it, you can also donate to motivate me and allow me
34 | to spend more time on things like this. Nothing is mandatory, and I appreciate any interest in my projects, even if
35 | you're just a user!
36 |
37 | ### Building
38 | Although sodium64 is written in assembly, it relies on [libdragon](https://github.com/DragonMinded/libdragon.git) for
39 | its build system. With that set up, run `make` in the project root directory to start building.
40 |
41 | ### Hardware References
42 | * [Fullsnes](https://problemkaputt.de/fullsnes.htm) - The main source of information on SNES hardware
43 | * [Anomie Docs](https://www.romhacking.net/community/548) - More detailed documentation for certain components
44 | * [6502 Tutorials](http://6502.org/tutorials/) - Has articles thoroughly covering the CPU and its quirks
45 | * [bsnes](https://github.com/bsnes-emu/bsnes) - Reference for the HLE DSP-1 coprocessor commands
46 |
47 | ### Other Links
48 | * [Hydra's Lair](https://hydr8gon.github.io) - Blog where I may or may not write about things
49 | * [Discord Server](https://discord.gg/JbNz7y4) - A place to chat about my projects and stuff
50 |
--------------------------------------------------------------------------------
/rom-converter.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import os
4 |
5 | convert = 0
6 |
7 | print("sodium64 ROM converter")
8 | print("Place sodium64.z64 and SNES ROMs with extension .sfc/.smc in the same folder as this script.")
9 | print("The out folder will contain the ROMs converted to .z64 format, ready to play.\n")
10 |
11 | if not os.path.isfile("sodium64.z64"):
12 | print("Error: sodium64.z64 not found!")
13 | exit()
14 |
15 | if not os.path.isdir("out"):
16 | os.mkdir("out")
17 |
18 | for filename in os.listdir("."):
19 | ext = filename[-4:].lower()
20 | if (ext == ".sfc" or ext == ".smc") and os.path.isfile(filename):
21 | print("Converting " + filename + "...")
22 | baseFile = open("sodium64.z64", "rb")
23 | inFile = open(filename, "rb")
24 | if ext == ".sfc":
25 | convert = 1
26 | if os.stat(filename).st_size & 0x3FF == 0x200:
27 | inFile.seek(0x200) # Skip header
28 | convert = 1
29 | outFile = open("out/" + filename[:-4] + ".z64", "wb")
30 | outFile.write(baseFile.read())
31 | outFile.seek(0x104000, 0)
32 | outFile.write(inFile.read())
33 | inFile.close()
34 |
35 | if convert == 1:
36 | print("\nA header or .sfc extension was detected in one or more ROMs.")
37 | print("Some flashcarts support loading ROMs directly with a supplied emulator.")
38 | print("To be compatible with this, ROMs must be converted to headerless .smc files.")
39 | print("Would you like to convert the input ROMs to this format? (y/N)")
40 |
41 | if input() == "y":
42 | for filename in os.listdir("."):
43 | ext = filename[-4:].lower()
44 | if (ext == ".sfc" or ext == ".smc") and os.path.isfile(filename):
45 | print("Converting " + filename + "...")
46 | file = open(filename, "rb")
47 | if os.stat(filename).st_size & 0x3FF == 0x200:
48 | file.seek(0x200) # Skip header
49 | data = file.read()
50 | file.close()
51 | os.remove(filename)
52 | file = open(filename[:-4] + ".smc", "wb")
53 | file.write(data)
54 |
55 | print("Done!")
56 |
--------------------------------------------------------------------------------
/src/apu.S:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021-2025 Hydr8gon
3 |
4 | This file is part of sodium64.
5 |
6 | sodium64 is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU General Public License as published
8 | by the Free Software Foundation, either version 3 of the License,
9 | or (at your option) any later version.
10 |
11 | sodium64 is distributed in the hope that it will be useful, but
12 | WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 | General Public License for more details.
15 |
16 | You should have received a copy of the GNU General Public License
17 | along with sodium64. If not, see .
18 | */
19 |
20 | #include
21 | #include "defines.h"
22 |
23 | .globl apu_map
24 | .globl apu_ram
25 | .globl apu_count
26 | .globl apu_reg_x
27 | .globl apu_reg_y
28 | .globl apu_accum
29 | .globl apu_stack
30 | .globl apu_flags
31 | .globl apu_clock
32 |
33 | .globl apu_execute
34 | .globl apu_read8
35 | .globl apu_write8
36 | .globl offset_timers
37 | .globl read_apuio0
38 | .globl read_apuio1
39 | .globl read_apuio2
40 | .globl read_apuio3
41 | .globl write_apuio0
42 | .globl write_apuio1
43 | .globl write_apuio2
44 | .globl write_apuio3
45 |
46 | .data
47 |
48 | .align 4
49 | apu_map: .byte 0:0x3FF, 0x40
50 | apu_ram: .byte 0:0x10000
51 | apu_rom: // Allows uploading code from the CPU
52 | .byte 0xCD, 0xEF, 0xBD, 0xE8, 0x00, 0xC6, 0x1D, 0xD0 // 0xFFC0-0xFFC7
53 | .byte 0xFC, 0x8F, 0xAA, 0xF4, 0x8F, 0xBB, 0xF5, 0x78 // 0xFFC8-0xFFCF
54 | .byte 0xCC, 0xF4, 0xD0, 0xFB, 0x2F, 0x19, 0xEB, 0xF4 // 0xFFD0-0xFFD7
55 | .byte 0xD0, 0xFC, 0x7E, 0xF4, 0xD0, 0x0B, 0xE4, 0xF5 // 0xFFD8-0xFFDF
56 | .byte 0xCB, 0xF4, 0xD7, 0x00, 0xFC, 0xD0, 0xF3, 0xAB // 0xFFE0-0xFFE7
57 | .byte 0x01, 0x10, 0xEF, 0x7E, 0xF4, 0x10, 0xEB, 0xBA // 0xFFE8-0xFFEF
58 | .byte 0xF6, 0xDA, 0x00, 0xBA, 0xF4, 0xC4, 0xF4, 0xDD // 0xFFF0-0xFFF7
59 | .byte 0x5D, 0xD0, 0xDB, 0x1F, 0x00, 0x00, 0xC0, 0xFF // 0xFFF8-0xFFFF
60 |
61 | .align 4
62 | apu_cycle1: .word 341 * 4
63 | apu_cycle2: .word 341 * 4
64 | apu_ocycles: .word INT_MIN:3
65 | apu_inputs: .hword 0:2
66 | apu_outputs: .hword 0:2
67 | apu_timers: .byte 0:3
68 | apu_tmdivs: .byte 0:3
69 | apu_tmouts: .byte 0:3
70 | apu_control: .byte 0xB0
71 |
72 | .align 4
73 | apu_count: .hword 0xFFC0
74 | apu_reg_x: .byte 0
75 | apu_reg_y: .byte 0
76 | apu_accum: .byte 0
77 | apu_stack: .byte 0xFF
78 | apu_flags: .byte 0
79 | apu_clock: .byte APU_CYCLE * 2
80 |
81 | .align 4
82 | read_iomap:
83 | .word read_unk, read_unk, read_dspaddr, read_dspdata // 0xF0-0xF3
84 | .word read_cpuio0, read_cpuio1, read_cpuio2, read_cpuio3 // 0xF4-0xF7
85 | .word read_unk, read_unk, read_unk, read_unk // 0xF8-0xFB
86 | .word read_unk, read_t0out, read_t1out, read_t2out // 0xFC-0xFF
87 |
88 | .align 4
89 | write_iomap:
90 | .word write_unk, write_control, write_dspaddr, write_dspdata // 0xF0-0xF3
91 | .word write_cpuio0, write_cpuio1, write_cpuio2, write_cpuio3 // 0xF4-0xF7
92 | .word write_unk, write_unk, write_t0div, write_t1div // 0xF8-0xFB
93 | .word write_t2div, write_unk, write_unk, write_unk // 0xFC-0xFF
94 |
95 | .text
96 | .set noreorder
97 |
98 | .align 5
99 | apu_execute:
100 | // Compile a new JIT block if one doesn't exist for the current PC
101 | lhu s0, apu_count
102 | ble s3, a3, dsp_sample // Check if the DSP should run
103 | sll t0, s0, 2
104 | lw t0, jit_lookup(t0)
105 | beqz t0, compile_block
106 |
107 | // Compile a new JIT block if memory changed at the start
108 | lhu t1, 0(t0)
109 | lw t1, jit_tags(t1)
110 | lw t2, 4(t0)
111 | bne t1, t2, compile_block
112 |
113 | // Compile a new JIT block if memory changed at the end
114 | lhu t2, 2(t0)
115 | lw t2, jit_tags(t2)
116 | lw t1, 8(t0)
117 | bne t1, t2, compile_block
118 |
119 | // Jump to a cached JIT block's code
120 | addi t0, t0, 12
121 | jr t0
122 | nop
123 |
124 | .align 5
125 | apu_read8: // a0: address - v0: value
126 | // Read a byte from APU memory
127 | srl t0, a0, 6
128 | lbu t1, apu_map(t0)
129 | andi t2, a0, 0xFFF0
130 | add t0, a0, t1
131 | lbu v0, apu_ram(t0)
132 |
133 | // Check if the address is an I/O port and decrease the cycle count
134 | lbu t0, apu_clock
135 | beq t2, 0xF0, io_read8
136 | sub s3, s3, t0
137 | read_unk: // Unknown I/O register read; do nothing
138 | jr ra
139 |
140 | io_read8:
141 | // Read from an I/O register in the lookup table
142 | andi t0, a0, 0xF
143 | sll t0, t0, 2 // Word offset
144 | lw t0, read_iomap(t0)
145 | jr t0
146 | nop
147 |
148 | .align 5
149 | apu_write8: // a0: address, a1: value
150 | // Write a byte to APU memory
151 | sb a1, apu_ram(a0)
152 |
153 | // Increment the memory block's tag to indicate change
154 | srl t0, a0, 6
155 | sll t0, t0, 2
156 | lw t1, jit_tags(t0)
157 | addi t1, t1, 1
158 | sw t1, jit_tags(t0)
159 |
160 | // Check if the address is an I/O port and decrease the cycle count
161 | lbu t0, apu_clock
162 | andi t2, a0, 0xFFF0
163 | beq t2, 0xF0, io_write8
164 | sub s3, s3, t0
165 | write_unk:// Unknown I/O register write; do nothing
166 | jr ra
167 |
168 | io_write8:
169 | // Write to an I/O register in the lookup table
170 | andi t0, a0, 0xF
171 | sll t0, t0, 2 // Word offset
172 | lw t0, write_iomap(t0)
173 | jr t0
174 | nop
175 |
176 | .align 5
177 | update_overflows:
178 | // Never overflow timer 0 if it's disabled
179 | lbu t0, apu_control
180 | andi t1, t0, 0x1
181 | bnez t1, stamp_timer0
182 | li t1, INT_MIN
183 | sw t1, apu_ocycles + 0
184 | b overflow_timer1
185 | nop
186 |
187 | stamp_timer0:
188 | // Update the overflow timestamp for timer 0
189 | lbu t1, apu_timers + 0
190 | lbu t2, apu_tmdivs + 0
191 | addi t1, t1, 1
192 | sub t1, t2, t1
193 | andi t1, t1, 0xFF
194 | addi t1, t1, 1 // Ticks until overflow
195 | li t2, APU_TIMER1 // Cycles per timer tick
196 | mult t1, t2 // Cycles until overflow
197 | lw t1, apu_cycle1
198 | mflo t2
199 | sub t1, t1, t2
200 | sw t1, apu_ocycles + 0
201 |
202 | overflow_timer1:
203 | // Never overflow timer 1 if it's disabled
204 | andi t1, t0, 0x2
205 | bnez t1, stamp_timer1
206 | li t1, INT_MIN
207 | sw t1, apu_ocycles + 4
208 | b overflow_timer2
209 | nop
210 |
211 | stamp_timer1:
212 | // Update the overflow timestamp for timer 1
213 | lbu t1, apu_timers + 1
214 | lbu t2, apu_tmdivs + 1
215 | addi t1, t1, 1
216 | sub t1, t2, t1
217 | andi t1, t1, 0xFF
218 | addi t1, t1, 1 // Ticks until overflow
219 | li t2, APU_TIMER1 // Cycles per timer tick
220 | mult t1, t2 // Cycles until overflow
221 | lw t1, apu_cycle1
222 | mflo t2
223 | sub t1, t1, t2
224 | sw t1, apu_ocycles + 4
225 |
226 | overflow_timer2:
227 | // Never overflow timer 2 if it's disabled
228 | andi t1, t0, 0x4
229 | bnez t1, stamp_timer2
230 | li t1, INT_MIN
231 | sw t1, apu_ocycles + 8
232 | jr ra
233 | nop
234 |
235 | stamp_timer2:
236 | // Update the overflow timestamp for timer 2
237 | lbu t1, apu_timers + 2
238 | lbu t2, apu_tmdivs + 2
239 | addi t1, t1, 1
240 | sub t1, t2, t1
241 | andi t1, t1, 0xFF
242 | addi t1, t1, 1 // Ticks until overflow
243 | li t2, APU_TIMER2 // Cycles per timer tick
244 | mult t1, t2 // Cycles until overflow
245 | lw t1, apu_cycle2
246 | mflo t2
247 | sub t1, t1, t2
248 | sw t1, apu_ocycles + 8
249 | jr ra
250 | nop
251 |
252 | .align 5
253 | update_timers:
254 | // Get how many timer 2 ticks passed since the last update
255 | lw t1, apu_cycle2
256 | sub t0, t1, s3 // Cycles since last update
257 | li t1, APU_TIMER2 // Cycles per timer tick
258 | blt t0, t1, early_exit
259 | nop
260 | div t0, t1
261 | mflo t6 // Ticks passed
262 | mfhi t1 // Cycles since latest tick
263 | add t1, t1, s3
264 | sw t1, apu_cycle2
265 |
266 | // Check if timer 2 is enabled
267 | lbu t5, apu_control
268 | andi t0, t5, 0x4
269 | beqz t0, update_timer0
270 | move t4, t6 // Ticks left
271 |
272 | loop_timer2:
273 | // Check if timer 2 will overflow
274 | lw t0, apu_ocycles + 8
275 | bgt s3, t0, inc_timer2
276 | nop
277 |
278 | // Adjust the ticks left for after overflow
279 | lbu t1, apu_timers + 2
280 | lbu t2, apu_tmdivs + 2
281 | addi t1, t1, 1
282 | sub t1, t2, t1
283 | andi t1, t1, 0xFF
284 | addi t1, t1, 1 // Ticks until overflow
285 | sub t4, t4, t1
286 |
287 | // Overflow timer 2
288 | lbu t1, apu_tmouts + 2
289 | addi t1, t1, 1
290 | andi t1, t1, 0xF
291 | sb t1, apu_tmouts + 2
292 | sb zero, apu_timers + 2
293 |
294 | // Update the overflow timestamp for next overflow
295 | addi t1, t2, -1
296 | andi t1, t1, 0xFF
297 | addi t1, t1, 1 // Ticks until overflow
298 | li t2, APU_TIMER2 // Cycles per timer tick
299 | mult t1, t2 // Cycles until overflow
300 | mflo t2
301 | sub t0, t0, t2
302 | sw t0, apu_ocycles + 8
303 | b loop_timer2
304 | nop
305 |
306 | inc_timer2:
307 | // Increment timer 2 without overflow
308 | lbu t0, apu_timers + 2
309 | add t0, t0, t4
310 | sb t0, apu_timers + 2
311 |
312 | update_timer0:
313 | // Get how many timer 0/1 ticks passed since the last update
314 | lw t1, apu_cycle1
315 | sub t0, t1, s3 // Cycles since last update
316 | li t1, APU_TIMER1 // Cycles per timer tick
317 | blt t0, t1, early_exit
318 | nop
319 | div t0, t1
320 | mflo t6 // Ticks passed
321 | mfhi t1 // Cycles since latest tick
322 | add t1, t1, s3
323 | sw t1, apu_cycle1
324 |
325 | // Check if timer 0 is enabled
326 | andi t0, t5, 0x1
327 | beqz t0, update_timer1
328 | move t4, t6 // Ticks left
329 |
330 | loop_timer0:
331 | // Check if timer 0 will overflow
332 | lw t0, apu_ocycles + 0
333 | bgt s3, t0, inc_timer0
334 | nop
335 |
336 | // Adjust the ticks left for after overflow
337 | lbu t1, apu_timers + 0
338 | lbu t2, apu_tmdivs + 0
339 | addi t1, t1, 1
340 | sub t1, t2, t1
341 | andi t1, t1, 0xFF
342 | addi t1, t1, 1 // Ticks until overflow
343 | sub t4, t4, t1
344 |
345 | // Overflow timer 0
346 | lbu t1, apu_tmouts + 0
347 | addi t1, t1, 1
348 | andi t1, t1, 0xF
349 | sb t1, apu_tmouts + 0
350 | sb zero, apu_timers + 0
351 |
352 | // Update the overflow timestamp for next overflow
353 | addi t1, t2, -1
354 | andi t1, t1, 0xFF
355 | addi t1, t1, 1 // Ticks until overflow
356 | li t2, APU_TIMER1 // Cycles per timer tick
357 | mult t1, t2 // Cycles until overflow
358 | mflo t2
359 | sub t0, t0, t2
360 | sw t0, apu_ocycles + 0
361 | b loop_timer0
362 | nop
363 |
364 | inc_timer0:
365 | // Increment timer 0 without overflow
366 | lbu t0, apu_timers + 0
367 | add t0, t0, t4
368 | sb t0, apu_timers + 0
369 |
370 | update_timer1:
371 | // Check if timer 1 is enabled
372 | andi t0, t5, 0x2
373 | beqz t0, early_exit
374 | move t4, t6 // Ticks left
375 |
376 | loop_timer1:
377 | // Check if timer 1 will overflow
378 | lw t0, apu_ocycles + 4
379 | bgt s3, t0, inc_timer1
380 | nop
381 |
382 | // Adjust the ticks left for after overflow
383 | lbu t1, apu_timers + 1
384 | lbu t2, apu_tmdivs + 1
385 | addi t1, t1, 1
386 | sub t1, t2, t1
387 | andi t1, t1, 0xFF
388 | addi t1, t1, 1 // Ticks until overflow
389 | sub t4, t4, t1
390 |
391 | // Overflow timer 1
392 | lbu t1, apu_tmouts + 1
393 | addi t1, t1, 1
394 | andi t1, t1, 0xF
395 | sb t1, apu_tmouts + 1
396 | sb zero, apu_timers + 1
397 |
398 | // Update the overflow timestamp for next overflow
399 | addi t1, t2, -1
400 | andi t1, t1, 0xFF
401 | addi t1, t1, 1 // Ticks until overflow
402 | li t2, APU_TIMER1 // Cycles per timer tick
403 | mult t1, t2 // Cycles until overflow
404 | mflo t2
405 | sub t0, t0, t2
406 | sw t0, apu_ocycles + 4
407 | b loop_timer1
408 | nop
409 |
410 | inc_timer1:
411 | // Increment timer 1 without overflow
412 | lbu t0, apu_timers + 1
413 | add t0, t0, t4
414 | sb t0, apu_timers + 1
415 | early_exit:
416 | jr ra
417 | nop
418 |
419 | .align 5
420 | offset_timers: // a0: cycles
421 | // Offset timer update timestamps for the next event
422 | lw t0, apu_cycle1
423 | lw t1, apu_cycle2
424 | add t0, t0, a0
425 | add t1, t1, a0
426 | sw t0, apu_cycle1
427 | sw t1, apu_cycle2
428 |
429 | // Offset timer 0's overflow timestamp if enabled
430 | la t0, apu_ocycles
431 | li t1, INT_MIN
432 | lw t2, 0(t0)
433 | beq t2, t1, offset1
434 | add t2, t2, a0
435 | sw t2, 0(t0)
436 |
437 | offset1:
438 | // Offset timer 1's overflow timestamp if enabled
439 | lw t3, 4(t0)
440 | beq t3, t1, offset2
441 | add t3, t3, a0
442 | sw t3, 4(t0)
443 |
444 | offset2:
445 | // Offset timer 2's overflow timestamp if enabled
446 | lw t4, 8(t0)
447 | beq t4, t1, offset3
448 | add t4, t4, a0
449 | sw t4, 8(t0)
450 | offset3:
451 | jr ra
452 | nop
453 |
454 | .align 5
455 | read_cpuio0: // v0: value
456 | // Read APU input communication value 0
457 | lbu v0, apu_inputs + 0
458 | jr ra
459 | nop
460 |
461 | .align 5
462 | read_cpuio1: // v0: value
463 | // Read APU input communication value 1
464 | lbu v0, apu_inputs + 1
465 | jr ra
466 | nop
467 |
468 | .align 5
469 | read_cpuio2: // v0: value
470 | // Read APU input communication value 2
471 | lbu v0, apu_inputs + 2
472 | jr ra
473 | nop
474 |
475 | .align 5
476 | read_cpuio3: // v0: value
477 | // Read APU input communication value 3
478 | lbu v0, apu_inputs + 3
479 | jr ra
480 | nop
481 |
482 | .align 5
483 | read_t0out: // v0: value
484 | // Update timers if timer 0 should overflow
485 | lw t0, apu_ocycles + 0
486 | bgt s3, t0, end_t0out
487 | move gp, ra
488 | jal update_timers
489 | nop
490 |
491 | end_t0out:
492 | // Read timer 0's overflow count and reset it
493 | lbu v0, apu_tmouts + 0
494 | sb zero, apu_tmouts + 0
495 | jr gp
496 | nop
497 |
498 | .align 5
499 | read_t1out: // v0: value
500 | // Update timers if timer 1 should overflow
501 | lw t0, apu_ocycles + 4
502 | bgt s3, t0, end_t1out
503 | move gp, ra
504 | jal update_timers
505 | nop
506 |
507 | end_t1out:
508 | // Read timer 1's overflow count and reset it
509 | lbu v0, apu_tmouts + 1
510 | sb zero, apu_tmouts + 1
511 | jr gp
512 | nop
513 |
514 | .align 5
515 | read_t2out: // v0: value
516 | // Update timers if timer 2 should overflow
517 | lw t0, apu_ocycles + 8
518 | bgt s3, t0, end_t2out
519 | move gp, ra
520 | jal update_timers
521 | nop
522 |
523 | end_t2out:
524 | // Read timer 2's overflow count and reset it
525 | lbu v0, apu_tmouts + 2
526 | sb zero, apu_tmouts + 2
527 | jr gp
528 | nop
529 |
530 | .align 5
531 | write_control: // a1: value
532 | // Update timers and write to the APU control register
533 | move gp, ra
534 | jal update_timers
535 | nop
536 | lbu t0, apu_control
537 | move ra, gp
538 | sb a1, apu_control
539 |
540 | // Update the APU ROM mapping
541 | andi t1, a1, 0x80 // ROM enabled
542 | srl t1, t1, 1
543 | sb t1, apu_map + 0x3FF
544 | lw t1, jit_tags + 0xFFC
545 | addi t1, t1, 1
546 | sw t1, jit_tags + 0xFFC
547 |
548 | // Reset timers 0-2 if bit 0-2 turns from 0 to 1
549 | xor t1, t0, a1
550 | and t1, t1, a1
551 | andi t0, t1, 0x1
552 | beqz t0, timer1
553 | nop
554 | sb zero, apu_timers + 0
555 | sb zero, apu_tmouts + 0
556 | timer1:
557 | andi t0, t1, 0x2
558 | beqz t0, timer2
559 | nop
560 | sb zero, apu_timers + 1
561 | sb zero, apu_tmouts + 1
562 | timer2:
563 | andi t0, t1, 0x4
564 | beqz t0, reset0
565 | nop
566 | sb zero, apu_timers + 2
567 | sb zero, apu_tmouts + 2
568 |
569 | reset0:
570 | // Clear the first/last 2 input latches if bit 4/5 is set
571 | andi t0, a1, 0x10
572 | beqz t0, reset1
573 | nop
574 | sh zero, apu_inputs + 0
575 | reset1:
576 | andi t0, a1, 0x20
577 | beqz t0, reset2
578 | nop
579 | sh zero, apu_inputs + 2
580 | reset2:
581 | j update_overflows
582 |
583 | .align 5
584 | write_cpuio0: // a1: value
585 | // Write APU output communication value 0
586 | sb a1, apu_outputs + 0
587 | jr ra
588 | nop
589 |
590 | .align 5
591 | write_cpuio1: // a1: value
592 | // Write APU output communication value 1
593 | sb a1, apu_outputs + 1
594 | jr ra
595 | nop
596 |
597 | .align 5
598 | write_cpuio2: // a1: value
599 | // Write APU output communication value 2
600 | sb a1, apu_outputs + 2
601 | jr ra
602 | nop
603 |
604 | .align 5
605 | write_cpuio3: // a1: value
606 | // Write APU output communication value 3
607 | sb a1, apu_outputs + 3
608 | jr ra
609 | nop
610 |
611 | .align 5
612 | write_t0div: // a1: value
613 | // Update timers and write timer 0's divider value
614 | move gp, ra
615 | jal update_timers
616 | nop
617 | sb a1, apu_tmdivs + 0
618 | j update_overflows
619 | move ra, gp
620 |
621 | .align 5
622 | write_t1div: // a1: value
623 | // Update timers and write timer 1's divider value
624 | move gp, ra
625 | jal update_timers
626 | nop
627 | sb a1, apu_tmdivs + 1
628 | j update_overflows
629 | move ra, gp
630 |
631 | .align 5
632 | write_t2div: // a1: value
633 | // Update timers and write timer 2's divider value
634 | move gp, ra
635 | jal update_timers
636 | nop
637 | sb a1, apu_tmdivs + 2
638 | j update_overflows
639 | move ra, gp
640 |
641 | .align 5
642 | read_apuio0: // v0: value
643 | // Read APU output communication value 0
644 | lbu v0, apu_outputs + 0
645 | jr ra
646 | nop
647 |
648 | .align 5
649 | read_apuio1: // v0: value
650 | // Read APU output communication value 0
651 | lbu v0, apu_outputs + 1
652 | jr ra
653 | nop
654 |
655 | .align 5
656 | read_apuio2: // v0: value
657 | // Read APU output communication value 0
658 | lbu v0, apu_outputs + 2
659 | jr ra
660 | nop
661 |
662 | .align 5
663 | read_apuio3: // v0: value
664 | // Read APU output communication value 0
665 | lbu v0, apu_outputs + 3
666 | jr ra
667 | nop
668 |
669 | .align 5
670 | write_apuio0: // a1: value
671 | // Write APU input communication value 0
672 | sb a1, apu_inputs + 0
673 | jr ra
674 | nop
675 |
676 | .align 5
677 | write_apuio1: // a1: value
678 | // Write APU input communication value 1
679 | sb a1, apu_inputs + 1
680 | jr ra
681 | nop
682 |
683 | .align 5
684 | write_apuio2: // a1: value
685 | // Write APU input communication value 2
686 | sb a1, apu_inputs + 2
687 | jr ra
688 | nop
689 |
690 | .align 5
691 | write_apuio3: // a1: value
692 | // Write APU input communication value 3
693 | sb a1, apu_inputs + 3
694 | jr ra
695 | nop
696 |
--------------------------------------------------------------------------------
/src/apu_address.S:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021-2025 Hydr8gon
3 |
4 | This file is part of sodium64.
5 |
6 | sodium64 is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU General Public License as published
8 | by the Free Software Foundation, either version 3 of the License,
9 | or (at your option) any later version.
10 |
11 | sodium64 is distributed in the hope that it will be useful, but
12 | WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 | General Public License for more details.
15 |
16 | You should have received a copy of the GNU General Public License
17 | along with sodium64. If not, see .
18 | */
19 |
20 | #include "macros.h"
21 |
22 | .globl apu_imm
23 | .globl apu_dira
24 | .globl apu_dirr
25 | .globl apu_drxa
26 | .globl apu_drxr
27 | .globl apu_drya
28 | .globl apu_dryr
29 | .globl apu_brxa
30 | .globl apu_brxr
31 | .globl apu_bxpa
32 | .globl apu_bxpr
33 | .globl apu_absa
34 | .globl apu_absr
35 | .globl apu_abxa
36 | .globl apu_abxr
37 | .globl apu_abya
38 | .globl apu_abyr
39 | .globl apu_idxa
40 | .globl apu_idxr
41 | .globl apu_idya
42 | .globl apu_idyr
43 | .globl apu_drb
44 | .globl apu_dri
45 | .globl apu_dr2
46 | .globl apu_bxy
47 |
48 | .text
49 | .set noreorder
50 |
51 | .align 5
52 | apu_imm: // #nn
53 | // Get the 8-bit immediate value
54 | jal jit_read8
55 | nop
56 |
57 | // Emit code to load a value
58 | li t0, ORI(V0, ZERO, 0)
59 | or t0, t0, v0
60 | j emit_op
61 | move ra, gp
62 |
63 | .align 5
64 | apu_dira: // aa
65 | // Choose whether to emit a memory read or just provide an address
66 | b apu_dir
67 | li t8, 0
68 | apu_dirr:
69 | li t8, 1
70 |
71 | apu_dir:
72 | // Get the 8-bit immediate address
73 | jal jit_read8
74 | nop
75 |
76 | // Update the JIT register state
77 | jal load_flags
78 | nop
79 |
80 | // Emit code to load a value from a zero page address
81 | EMIT_OP SLL(A0, S1, 3)
82 | EMIT_OP ANDI(A0, A0, 0x100)
83 | beqz t8, dir_skip
84 | nop
85 | la t0, apu_read8
86 | jal emit_jal
87 | nop
88 | dir_skip:
89 | li t0, ORI(A0, A0, 0)
90 | or t0, t0, v0
91 | j emit_op
92 | move ra, gp
93 |
94 | .align 5
95 | apu_drxa: // aa+X
96 | // Choose whether to emit a memory read or just provide an address
97 | b apu_drx
98 | li t8, 0
99 | apu_drxr:
100 | li t8, 1
101 |
102 | apu_drx:
103 | // Get the 8-bit immediate address
104 | jal jit_read8
105 | nop
106 |
107 | // Update the JIT register state
108 | jal load_reg_x
109 | nop
110 | jal load_flags
111 | nop
112 |
113 | // Emit code to load a value from a zero page address plus register X
114 | EMIT_OP SLL(A0, S1, 3)
115 | EMIT_OP ANDI(A0, A0, 0x100)
116 | li t0, ADDI(T0, T9, 0)
117 | jal emit_op
118 | or t0, t0, v0
119 | EMIT_OP ANDI(T0, T0, 0xFF)
120 | beqz t8, drx_skip
121 | nop
122 | la t0, apu_read8
123 | jal emit_jal
124 | nop
125 | drx_skip:
126 | li t0, OR(A0, A0, T0)
127 | j emit_op
128 | move ra, gp
129 |
130 | .align 5
131 | apu_drya: // aa+Y
132 | // Choose whether to emit a memory read or just provide an address
133 | b apu_dry
134 | li t8, 0
135 | apu_dryr:
136 | li t8, 1
137 |
138 | apu_dry:
139 | // Get the 8-bit immediate address
140 | jal jit_read8
141 | nop
142 |
143 | // Update the JIT register state
144 | jal load_reg_y
145 | nop
146 | jal load_flags
147 | nop
148 |
149 | // Emit code to load a value from a zero page address plus register Y
150 | EMIT_OP SLL(A0, S1, 3)
151 | EMIT_OP ANDI(A0, A0, 0x100)
152 | li t0, ADDI(T0, T8, 0)
153 | jal emit_op
154 | or t0, t0, v0
155 | EMIT_OP ANDI(T0, T0, 0xFF)
156 | beqz t8, dry_skip
157 | nop
158 | la t0, apu_read8
159 | jal emit_jal
160 | nop
161 | dry_skip:
162 | li t0, OR(A0, A0, T0)
163 | j emit_op
164 | move ra, gp
165 |
166 | .align 5
167 | apu_brxa: // (X)
168 | // Choose whether to emit a memory read or just provide an address
169 | b apu_brx
170 | li t8, 0
171 | apu_brxr:
172 | li t8, 1
173 |
174 | apu_brx:
175 | // Update the JIT register state
176 | jal load_reg_x
177 | nop
178 | jal load_flags
179 | nop
180 |
181 | // Emit code to load a value from register X as a zero page address
182 | EMIT_OP SLL(A0, S1, 3)
183 | EMIT_OP ANDI(A0, A0, 0x100)
184 | EMIT_OP ANDI(T9, T9, 0xFF)
185 | beqz t8, brx_skip
186 | nop
187 | la t0, apu_read8
188 | jal emit_jal
189 | nop
190 | brx_skip:
191 | li t0, OR(A0, A0, T9)
192 | j emit_op
193 | move ra, gp
194 |
195 | .align 5
196 | apu_bxpa: // (X)+
197 | // Choose whether to emit a memory read or just provide an address
198 | b apu_bxp
199 | li t8, 0
200 | apu_bxpr:
201 | li t8, 1
202 |
203 | apu_bxp:
204 | // Update the JIT register state
205 | jal load_reg_x
206 | ori s1, s1, FLAG_SX
207 | jal load_flags
208 | nop
209 |
210 | // Emit code to load a value from register X as a zero page address with post-increment
211 | EMIT_OP SLL(A0, S1, 3)
212 | EMIT_OP ANDI(A0, A0, 0x100)
213 | EMIT_OP ANDI(T9, T9, 0xFF)
214 | EMIT_OP OR(A0, A0, T9)
215 | beqz t8, bxp_skip
216 | nop
217 | la t0, apu_read8
218 | jal emit_jal
219 | nop
220 | bxp_skip:
221 | li t0, ADDI(T9, T9, 1)
222 | j emit_op
223 | move ra, gp
224 |
225 | .align 5
226 | apu_absa: // aaaa
227 | // Get the 16-bit immediate address
228 | jal jit_read8
229 | nop
230 | jal jit_read8
231 | move t7, v0
232 | sll t0, v0, 8
233 | or t7, t7, t0
234 |
235 | // End the JIT block early if a self-modifying write is detected
236 | sgt t0, t7, a0
237 | addi t2, t7, -2
238 | slt t1, t2, t9
239 | and t0, t0, t1
240 | beqz t0, abs_skip
241 | nop
242 | move t9, t2
243 |
244 | abs_skip:
245 | // Emit code to load an absolute address
246 | li t0, ORI(A0, ZERO, 0)
247 | or t0, t0, t7
248 | j emit_op
249 | move ra, gp
250 |
251 | .align 5
252 | apu_absr: // aaaa
253 | // Get the 16-bit immediate address
254 | jal jit_read8
255 | nop
256 | jal jit_read8
257 | move t7, v0
258 | sll t0, v0, 8
259 | or t7, t7, t0
260 |
261 | // Emit code to load a value from an absolute address
262 | la t0, apu_read8
263 | jal emit_jal
264 | nop
265 | li t0, ORI(A0, ZERO, 0)
266 | or t0, t0, t7
267 | j emit_op
268 | move ra, gp
269 |
270 | .align 5
271 | apu_abxa: // aaaa+X
272 | // Choose whether to emit a memory read or just provide an address
273 | b apu_abx
274 | li t8, 0
275 | apu_abxr:
276 | li t8, 1
277 |
278 | apu_abx:
279 | // Get the 16-bit immediate address
280 | jal jit_read8
281 | nop
282 | jal jit_read8
283 | move t7, v0
284 | sll t0, v0, 8
285 |
286 | // Update the JIT register state
287 | jal load_reg_x
288 | or t7, t7, t0
289 |
290 | // Emit code to load a value from an absolute address plus register X
291 | EMIT_OP ANDI(T9, T9, 0xFF)
292 | li t0, ADDI(A0, T9, 0)
293 | jal emit_op
294 | or t0, t0, t7
295 | beqz t8, abx_skip
296 | nop
297 | la t0, apu_read8
298 | jal emit_jal
299 | nop
300 | abx_skip:
301 | li t0, ANDI(A0, A0, 0xFFFF)
302 | j emit_op
303 | move ra, gp
304 |
305 | .align 5
306 | apu_abya: // aaaa+Y
307 | // Choose whether to emit a memory read or just provide an address
308 | b apu_aby
309 | li t8, 0
310 | apu_abyr:
311 | li t8, 1
312 |
313 | apu_aby:
314 | // Get the 16-bit immediate address
315 | jal jit_read8
316 | nop
317 | jal jit_read8
318 | move t7, v0
319 | sll t0, v0, 8
320 |
321 | // Update the JIT register state
322 | jal load_reg_y
323 | or t7, t7, t0
324 |
325 | // Emit code to load a value from an absolute address plus register Y
326 | EMIT_OP ANDI(T8, T8, 0xFF)
327 | li t0, ADDI(A0, T8, 0)
328 | jal emit_op
329 | or t0, t0, t7
330 | beqz t8, aby_skip
331 | nop
332 | la t0, apu_read8
333 | jal emit_jal
334 | nop
335 | aby_skip:
336 | li t0, ANDI(A0, A0, 0xFFFF)
337 | j emit_op
338 | move ra, gp
339 |
340 | .align 5
341 | apu_idxa: // [aa+X]
342 | // Choose whether to emit a memory read or just provide an address
343 | b apu_idx
344 | li t8, 0
345 | apu_idxr:
346 | li t8, 1
347 |
348 | apu_idx:
349 | // Get the 8-bit immediate address
350 | jal jit_read8
351 | nop
352 |
353 | // Update the JIT register state
354 | jal load_reg_x
355 | nop
356 | jal load_flags
357 | nop
358 |
359 | // Emit code to form a zero page address plus register X
360 | EMIT_OP SLL(A0, S1, 3)
361 | EMIT_OP ANDI(A0, A0, 0x100)
362 | li t0, ADDI(T0, T9, 0)
363 | jal emit_op
364 | or t0, t0, v0
365 | EMIT_OP ANDI(T0, T0, 0xFF)
366 | EMIT_OP OR(A0, A0, T0)
367 |
368 | // Emit code to load a value from a 16-bit address in the zero page
369 | la t0, apu_read8
370 | jal emit_jal
371 | nop
372 | EMIT_OP ADDI(A0, A0, 1)
373 | EMIT_OP SLL(S0, V0, 8)
374 | la t0, apu_read8
375 | jal emit_jal
376 | nop
377 | EMIT_OP ADDI(A0, A0, -1)
378 | beqz t8, idx_skip
379 | nop
380 | la t0, apu_read8
381 | jal emit_jal
382 | nop
383 | idx_skip:
384 | li t0, OR(A0, S0, V0)
385 | j emit_op
386 | move ra, gp
387 |
388 | .align 5
389 | apu_idya: // [aa]+Y
390 | // Choose whether to emit a memory read or just provide an address
391 | b apu_idy
392 | li t8, 0
393 | apu_idyr:
394 | li t8, 1
395 |
396 | apu_idy:
397 | // Get the 8-bit immediate address
398 | jal jit_read8
399 | nop
400 |
401 | // Update the JIT register state
402 | jal load_reg_y
403 | nop
404 | jal load_flags
405 | nop
406 |
407 | // Emit code to form a zero page address
408 | EMIT_OP SLL(A0, S1, 3)
409 | EMIT_OP ANDI(A0, A0, 0x100)
410 | li t0, ORI(A0, A0, 0)
411 | jal emit_op
412 | or t0, t0, v0
413 |
414 | // Emit code to load a 16-bit address in the zero page
415 | la t0, apu_read8
416 | jal emit_jal
417 | nop
418 | EMIT_OP ADDI(A0, A0, 1)
419 | EMIT_OP SLL(S0, V0, 8)
420 | la t0, apu_read8
421 | jal emit_jal
422 | nop
423 | EMIT_OP ADDI(A0, A0, -1)
424 | EMIT_OP OR(A0, S0, V0)
425 |
426 | // Emit code to load a value from an absolute address plus register Y
427 | EMIT_OP ANDI(T8, T8, 0xFF)
428 | EMIT_OP ADD(A0, A0, T8)
429 | beqz t8, idy_skip
430 | nop
431 | la t0, apu_read8
432 | jal emit_jal
433 | nop
434 | idy_skip:
435 | li t0, ANDI(A0, A0, 0xFFFF)
436 | j emit_op
437 | move ra, gp
438 |
439 | .align 5
440 | apu_drb: // aaa.b
441 | // Get the 16-bit immediate value
442 | jal jit_read8
443 | nop
444 | jal jit_read8
445 | move t7, v0
446 | sll t0, v0, 8
447 | or t7, t7, t0
448 |
449 | // Get aaa as an address and b as a bitmask
450 | andi t2, t7, 0x1FFF
451 | li t0, 1
452 | srl t7, t7, 13
453 | sll t7, t0, t7
454 |
455 | // Emit code to load a bitmask and a value from an address
456 | la t0, apu_read8
457 | jal emit_jal
458 | nop
459 | li t0, ORI(A0, ZERO, 0)
460 | jal emit_op
461 | or t0, t0, t2
462 | li t0, ORI(A1, ZERO, 0)
463 | or t0, t0, t7
464 | j emit_op
465 | move ra, gp
466 |
467 | .align 5
468 | apu_dri: // aa,#nn
469 | // Get the 8-bit immediate value and address
470 | jal jit_read8
471 | nop
472 | jal jit_read8
473 | move t7, v0
474 |
475 | // Update the JIT register state
476 | jal load_flags
477 | nop
478 |
479 | // Emit code to load a value
480 | li t0, ORI(V0, ZERO, 0)
481 | jal emit_op
482 | or t0, t0, t7
483 |
484 | // Emit code to form a zero page address
485 | EMIT_OP SLL(A0, S1, 3)
486 | EMIT_OP ANDI(A0, A0, 0x100)
487 | li t0, ORI(A0, A0, 0)
488 | or t0, t0, v0
489 | j emit_op
490 | move ra, gp
491 |
492 | .align 5
493 | apu_dr2: // aa,bb
494 | // Get the two 8-bit immediate addresses
495 | jal jit_read8
496 | nop
497 | jal jit_read8
498 | move t7, v0
499 |
500 | // Update the JIT register state
501 | jal load_flags
502 | nop
503 |
504 | // Emit code to load a value from a zero page address
505 | EMIT_OP SLL(S0, S1, 3)
506 | EMIT_OP ANDI(S0, S0, 0x100)
507 | la t0, apu_read8
508 | jal emit_jal
509 | nop
510 | li t0, ORI(A0, S0, 0)
511 | jal emit_op
512 | or t0, t0, t7
513 |
514 | // Emit code to form another zero page address
515 | li t0, ORI(A0, S0, 0)
516 | or t0, t0, v0
517 | j emit_op
518 | move ra, gp
519 |
520 | .align 5
521 | apu_bxy: // (X),(Y)
522 | // Update the JIT register state
523 | jal load_reg_x
524 | nop
525 | jal load_reg_y
526 | nop
527 | jal load_flags
528 | nop
529 |
530 | // Emit code to load a value from register Y as a zero page address
531 | EMIT_OP SLL(S0, S1, 3)
532 | EMIT_OP ANDI(S0, S0, 0x100)
533 | EMIT_OP ANDI(T8, T8, 0xFF)
534 | la t0, apu_read8
535 | jal emit_jal
536 | nop
537 | EMIT_OP OR(A0, S0, T8)
538 |
539 | // Emit code to form a zero page address with register X
540 | li t0, OR(A0, S0, T9)
541 | j emit_op
542 | move ra, gp
543 |
--------------------------------------------------------------------------------
/src/apu_control.S:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021-2025 Hydr8gon
3 |
4 | This file is part of sodium64.
5 |
6 | sodium64 is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU General Public License as published
8 | by the Free Software Foundation, either version 3 of the License,
9 | or (at your option) any later version.
10 |
11 | sodium64 is distributed in the hope that it will be useful, but
12 | WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 | General Public License for more details.
15 |
16 | You should have received a copy of the GNU General Public License
17 | along with sodium64. If not, see .
18 | */
19 |
20 | #include "macros.h"
21 |
22 | .globl apu_bpl
23 | .globl apu_bmi
24 | .globl apu_bvc
25 | .globl apu_bvs
26 | .globl apu_bcc
27 | .globl apu_bcs
28 | .globl apu_bne
29 | .globl apu_beq
30 | .globl apu_bbc1
31 | .globl apu_bbs1
32 | .globl apu_cbne
33 | .globl apu_dbnzy
34 | .globl apu_dbnzm
35 | .globl apu_bra
36 | .globl apu_jmp
37 | .globl apu_jmpm
38 | .globl apu_call
39 | .globl apu_tcall1
40 | .globl apu_pcall
41 | .globl apu_ret
42 | .globl apu_ret1
43 | .globl apu_brk
44 | .globl apu_clrp
45 | .globl apu_setp
46 | .globl apu_ei
47 | .globl apu_di
48 |
49 | .text
50 | .set noreorder
51 |
52 | .align 5
53 | apu_bpl: // BPL dest
54 | // Sign-extend the offset value and add it to the PC
55 | jal jit_read8
56 | nop
57 | sll v0, v0, 24
58 | sra v0, v0, 24
59 |
60 | // Update the JIT register state
61 | jal load_flags
62 | add v0, v0, a0
63 | jal update_nz
64 | ori s1, s1, FLAG_PC
65 |
66 | // Emit code to branch if the N flag is clear
67 | EMIT_OP ANDI(T0, S1, 0x80) // N
68 | EMIT_OP BEQ(T0, ZERO, 2)
69 | li t0, ORI(S0, ZERO, 0)
70 | jal emit_op
71 | or t0, t0, v0
72 | li t0, ORI(S0, ZERO, 0)
73 | la ra, finish_block
74 | j emit_op
75 | or t0, t0, a0
76 |
77 | .align 5
78 | apu_bmi: // BMI dest
79 | // Sign-extend the offset value and add it to the PC
80 | jal jit_read8
81 | nop
82 | sll v0, v0, 24
83 | sra v0, v0, 24
84 |
85 | // Update the JIT register state
86 | jal load_flags
87 | add v0, v0, a0
88 | jal update_nz
89 | ori s1, s1, FLAG_PC
90 |
91 | // Emit code to branch if the N flag is set
92 | EMIT_OP ANDI(T0, S1, 0x80) // N
93 | EMIT_OP BEQ(T0, ZERO, 2)
94 | li t0, ORI(S0, ZERO, 0)
95 | jal emit_op
96 | or t0, t0, a0
97 | li t0, ORI(S0, ZERO, 0)
98 | la ra, finish_block
99 | j emit_op
100 | or t0, t0, v0
101 |
102 | .align 5
103 | apu_bvc: // BVC dest
104 | // Sign-extend the offset value and add it to the PC
105 | jal jit_read8
106 | nop
107 | sll v0, v0, 24
108 | sra v0, v0, 24
109 | add v0, v0, a0
110 |
111 | // Update the JIT register state
112 | jal load_flags
113 | ori s1, s1, FLAG_PC
114 |
115 | // Emit code to branch if the V flag is clear
116 | EMIT_OP ANDI(T0, S1, 0x40) // V
117 | EMIT_OP BEQ(T0, ZERO, 2)
118 | li t0, ORI(S0, ZERO, 0)
119 | jal emit_op
120 | or t0, t0, v0
121 | li t0, ORI(S0, ZERO, 0)
122 | la ra, finish_block
123 | j emit_op
124 | or t0, t0, a0
125 |
126 | .align 5
127 | apu_bvs: // BVS dest
128 | // Sign-extend the offset value and add it to the PC
129 | jal jit_read8
130 | nop
131 | sll v0, v0, 24
132 | sra v0, v0, 24
133 | add v0, v0, a0
134 |
135 | // Update the JIT register state
136 | jal load_flags
137 | ori s1, s1, FLAG_PC
138 |
139 | // Emit code to branch if the V flag is set
140 | EMIT_OP ANDI(T0, S1, 0x40) // V
141 | EMIT_OP BEQ(T0, ZERO, 2)
142 | li t0, ORI(S0, ZERO, 0)
143 | jal emit_op
144 | or t0, t0, a0
145 | li t0, ORI(S0, ZERO, 0)
146 | la ra, finish_block
147 | j emit_op
148 | or t0, t0, v0
149 |
150 | .align 5
151 | apu_bcc: // BCC dest
152 | // Sign-extend the offset value and add it to the PC
153 | jal jit_read8
154 | nop
155 | sll v0, v0, 24
156 | sra v0, v0, 24
157 | add v0, v0, a0
158 |
159 | // Update the JIT register state
160 | jal load_flags
161 | ori s1, s1, FLAG_PC
162 |
163 | // Emit code to branch if the C flag is clear
164 | EMIT_OP ANDI(T0, S1, 0x1) // C
165 | EMIT_OP BEQ(T0, ZERO, 2)
166 | li t0, ORI(S0, ZERO, 0)
167 | jal emit_op
168 | or t0, t0, v0
169 | li t0, ORI(S0, ZERO, 0)
170 | la ra, finish_block
171 | j emit_op
172 | or t0, t0, a0
173 |
174 | .align 5
175 | apu_bcs: // BCS dest
176 | // Sign-extend the offset value and add it to the PC
177 | jal jit_read8
178 | nop
179 | sll v0, v0, 24
180 | sra v0, v0, 24
181 | add v0, v0, a0
182 |
183 | // Update the JIT register state
184 | jal load_flags
185 | ori s1, s1, FLAG_PC
186 |
187 | // Emit code to branch if the C flag is set
188 | EMIT_OP ANDI(T0, S1, 0x1) // C
189 | EMIT_OP BEQ(T0, ZERO, 2)
190 | li t0, ORI(S0, ZERO, 0)
191 | jal emit_op
192 | or t0, t0, a0
193 | li t0, ORI(S0, ZERO, 0)
194 | la ra, finish_block
195 | j emit_op
196 | or t0, t0, v0
197 |
198 | .align 5
199 | apu_bne: // BNE dest
200 | // Sign-extend the offset value and add it to the PC
201 | jal jit_read8
202 | nop
203 | sll v0, v0, 24
204 | sra v0, v0, 24
205 |
206 | // Update the JIT register state
207 | jal load_flags
208 | add v0, v0, a0
209 | jal update_nz
210 | ori s1, s1, FLAG_PC
211 |
212 | // Emit code to branch if the Z flag is clear
213 | EMIT_OP ANDI(T0, S1, 0x2) // Z
214 | EMIT_OP BEQ(T0, ZERO, 2)
215 | li t0, ORI(S0, ZERO, 0)
216 | jal emit_op
217 | or t0, t0, v0
218 | li t0, ORI(S0, ZERO, 0)
219 | la ra, finish_block
220 | j emit_op
221 | or t0, t0, a0
222 |
223 | .align 5
224 | apu_beq: // BEQ dest
225 | // Sign-extend the offset value and add it to the PC
226 | jal jit_read8
227 | nop
228 | sll v0, v0, 24
229 | sra v0, v0, 24
230 |
231 | // Update the JIT register state
232 | jal load_flags
233 | add v0, v0, a0
234 | jal update_nz
235 | ori s1, s1, FLAG_PC
236 |
237 | // Emit code to branch if the Z flag is set
238 | EMIT_OP ANDI(T0, S1, 0x2) // Z
239 | EMIT_OP BEQ(T0, ZERO, 2)
240 | li t0, ORI(S0, ZERO, 0)
241 | jal emit_op
242 | or t0, t0, a0
243 | li t0, ORI(S0, ZERO, 0)
244 | la ra, finish_block
245 | j emit_op
246 | or t0, t0, v0
247 |
248 | .align 5
249 | apu_bbc1: // BBC op.b,dest
250 | // Create a mask based on the opcode number
251 | srl t0, v1, 8
252 | li t1, 1
253 | sll t2, t1, t0
254 |
255 | // Sign-extend the offset value and add it to the PC
256 | jal jit_read8
257 | ori s1, s1, FLAG_PC // Update the JIT register state
258 | sll v0, v0, 24
259 | sra v0, v0, 24
260 | add v0, v0, a0
261 |
262 | // Emit code to branch if a bit in a memory value is clear
263 | li t0, ANDI(T0, V0, 0)
264 | jal emit_op
265 | or t0, t0, t2
266 | EMIT_OP BEQ(T0, ZERO, 2)
267 | li t0, ORI(S0, ZERO, 0)
268 | jal emit_op
269 | or t0, t0, v0
270 | li t0, ORI(S0, ZERO, 0)
271 | la ra, finish_block
272 | j emit_op
273 | or t0, t0, a0
274 |
275 | .align 5
276 | apu_bbs1: // BBS op.b,dest
277 | // Create a mask based on the opcode number
278 | srl t0, v1, 8
279 | li t1, 1
280 | sll t2, t1, t0
281 |
282 | // Sign-extend the offset value and add it to the PC
283 | jal jit_read8
284 | ori s1, s1, FLAG_PC // Update the JIT register state
285 | sll v0, v0, 24
286 | sra v0, v0, 24
287 | add v0, v0, a0
288 |
289 | // Emit code to branch if a bit in a memory value is set
290 | li t0, ANDI(T0, V0, 0)
291 | jal emit_op
292 | or t0, t0, t2
293 | EMIT_OP BEQ(T0, ZERO, 2)
294 | li t0, ORI(S0, ZERO, 0)
295 | jal emit_op
296 | or t0, t0, a0
297 | li t0, ORI(S0, ZERO, 0)
298 | la ra, finish_block
299 | j emit_op
300 | or t0, t0, v0
301 |
302 | .align 5
303 | apu_cbne: // CBNE op,dest
304 | // Sign-extend the offset value and add it to the PC
305 | jal jit_read8
306 | nop
307 | sll v0, v0, 24
308 | sra v0, v0, 24
309 | add v0, v0, a0
310 |
311 | // Update the JIT register state
312 | jal load_accum
313 | ori s1, s1, FLAG_PC
314 |
315 | // Emit code to branch if the accumulator doesn't equal a memory value
316 | EMIT_OP ANDI(T7, T7, 0xFF)
317 | EMIT_OP BEQ(T7, V0, 2)
318 | li t0, ORI(S0, ZERO, 0)
319 | jal emit_op
320 | or t0, t0, a0
321 | li t0, ORI(S0, ZERO, 0)
322 | la ra, finish_block
323 | j emit_op
324 | or t0, t0, v0
325 |
326 | .align 5
327 | apu_dbnzy: // DBNZ Y,dest
328 | // Sign-extend the offset value and add it to the PC
329 | jal jit_read8
330 | nop
331 | sll v0, v0, 24
332 | sra v0, v0, 24
333 | add v0, v0, a0
334 |
335 | // Update the JIT register state
336 | jal load_reg_y
337 | ori s1, s1, FLAG_SY | FLAG_PC
338 |
339 | // Emit code to decrement register Y and branch if it's not zero
340 | EMIT_OP ANDI(T8, T8, 0xFF)
341 | EMIT_OP ADDI(T8, T8, -1)
342 | EMIT_OP BEQ(T8, ZERO, 2)
343 | li t0, ORI(S0, ZERO, 0)
344 | jal emit_op
345 | or t0, t0, a0
346 | li t0, ORI(S0, ZERO, 0)
347 | la ra, finish_block
348 | j emit_op
349 | or t0, t0, v0
350 |
351 | .align 5
352 | apu_dbnzm: // DBNZ op,dest
353 | // Sign-extend the offset value and add it to the PC
354 | jal jit_read8
355 | ori s1, s1, FLAG_PC // Update the JIT register state
356 | sll v0, v0, 24
357 | sra v0, v0, 24
358 | add v0, v0, a0
359 |
360 | // Emit code to decrement a memory value and branch if it's not zero
361 | la t0, apu_write8
362 | jal emit_jal
363 | nop
364 | EMIT_OP ADDI(A1, V0, -1)
365 | EMIT_OP BEQ(A1, ZERO, 2)
366 | li t0, ORI(S0, ZERO, 0)
367 | jal emit_op
368 | or t0, t0, a0
369 | li t0, ORI(S0, ZERO, 0)
370 | la ra, finish_block
371 | j emit_op
372 | or t0, t0, v0
373 |
374 | .align 5
375 | apu_bra: // BRA dest
376 | // Sign-extend the offset value and add it to the PC
377 | jal jit_read8
378 | ori s1, s1, FLAG_PC // Update the JIT register state
379 | sll t2, v0, 24
380 | sra t2, t2, 24
381 | add t2, t2, a0
382 |
383 | // Emit code to branch unconditionally
384 | li t0, ORI(S0, ZERO, 0)
385 | la ra, finish_block
386 | j emit_op
387 | or t0, t0, t2
388 |
389 | .align 5
390 | apu_jmp: // JMP op
391 | // Emit code to jump to an address
392 | EMIT_OP OR(S0, ZERO, A0)
393 | j finish_block
394 | ori s1, s1, FLAG_PC // Update the JIT register state
395 |
396 | .align 5
397 | apu_jmpm: // JMP [op]
398 | // Emit code to jump to an address from memory
399 | EMIT_OP OR(S0, ZERO, V0)
400 | la t0, apu_read8
401 | jal emit_jal
402 | ori s1, s1, FLAG_PC // Update the JIT register state
403 | EMIT_OP ADDI(A0, A0, 1)
404 | EMIT_OP SLL(T0, V0, 8)
405 | EMIT_OP OR(S0, S0, T0)
406 | j finish_block
407 | nop
408 |
409 | .align 5
410 | apu_call: // CALL op
411 | // Update the JIT register state
412 | jal load_stack
413 | ori s1, s1, FLAG_SS | FLAG_PC
414 |
415 | // Emit code to jump and push the old program counter to the stack
416 | EMIT_OP OR(S0, ZERO, A0)
417 | EMIT_OP ANDI(A0, S2, 0xFF)
418 | EMIT_OP ORI(A0, A0, 0x100)
419 | la t0, apu_write8
420 | jal emit_jal
421 | nop
422 | li t0, ORI(A1, ZERO, 0)
423 | srl t2, a0, 8
424 | jal emit_op
425 | or t0, t0, t2
426 | EMIT_OP ADDI(A0, A0, -1)
427 | la t0, apu_write8
428 | jal emit_jal
429 | nop
430 | li t0, ORI(A1, ZERO, 0)
431 | andi t2, a0, 0xFF
432 | jal emit_op
433 | or t0, t0, t2
434 | EMIT_OP ADDI(S2, S2, -2)
435 | j finish_block
436 | nop
437 |
438 | .align 5
439 | apu_tcall1: // TCALL n
440 | // Create an address based on the opcode number
441 | srl t0, v1, 7
442 | sll t0, t0, 1
443 | li t1, 0xFFDE
444 | sub t7, t1, t0
445 |
446 | // Update the JIT register state
447 | jal load_stack
448 | ori s1, s1, FLAG_SS | FLAG_PC
449 |
450 | // Emit code to jump to an address from memory
451 | la t0, apu_read8
452 | jal emit_jal
453 | nop
454 | li t0, ORI(A0, ZERO, 0)
455 | jal emit_op
456 | or t0, t0, t7
457 | EMIT_OP OR(S0, ZERO, V0)
458 | la t0, apu_read8
459 | jal emit_jal
460 | nop
461 | EMIT_OP ADDI(A0, A0, 1)
462 | EMIT_OP SLL(T0, V0, 8)
463 | EMIT_OP OR(S0, S0, T0)
464 |
465 | // Emit code to push the old program counter to the stack
466 | EMIT_OP ANDI(A0, S2, 0xFF)
467 | EMIT_OP ORI(A0, A0, 0x100)
468 | la t0, apu_write8
469 | jal emit_jal
470 | nop
471 | li t0, ORI(A1, ZERO, 0)
472 | srl t2, a0, 8
473 | jal emit_op
474 | or t0, t0, t2
475 | EMIT_OP ADDI(A0, A0, -1)
476 | la t0, apu_write8
477 | jal emit_jal
478 | nop
479 | li t0, ORI(A1, ZERO, 0)
480 | andi t2, a0, 0xFF
481 | jal emit_op
482 | or t0, t0, t2
483 | EMIT_OP ADDI(S2, S2, -2)
484 | j finish_block
485 | nop
486 |
487 | .align 5
488 | apu_pcall: // PCALL uu
489 | // Create an address based on the parameter
490 | jal jit_read8
491 | nop
492 | ori t2, v0, 0xFF00
493 |
494 | // Update the JIT register state
495 | jal load_stack
496 | ori s1, s1, FLAG_SS | FLAG_PC
497 |
498 | // Emit code to jump to an address in the highest page of RAM
499 | li t0, ORI(S0, ZERO, 0)
500 | jal emit_op
501 | or t0, t0, t2
502 |
503 | // Emit code to push the old program counter to the stack
504 | EMIT_OP ANDI(A0, S2, 0xFF)
505 | EMIT_OP ORI(A0, A0, 0x100)
506 | la t0, apu_write8
507 | jal emit_jal
508 | nop
509 | li t0, ORI(A1, ZERO, 0)
510 | srl t2, a0, 8
511 | jal emit_op
512 | or t0, t0, t2
513 | EMIT_OP ADDI(A0, A0, -1)
514 | la t0, apu_write8
515 | jal emit_jal
516 | nop
517 | li t0, ORI(A1, ZERO, 0)
518 | andi t2, a0, 0xFF
519 | jal emit_op
520 | or t0, t0, t2
521 | EMIT_OP ADDI(S2, S2, -2)
522 | j finish_block
523 | nop
524 |
525 | .align 5
526 | apu_ret: // RET
527 | // Update the JIT register state
528 | jal load_stack
529 | ori s1, s1, FLAG_SS | FLAG_PC
530 |
531 | // Emit code to pop the program counter from the stack
532 | EMIT_OP ADDI(A0, S2, 1)
533 | EMIT_OP ANDI(A0, A0, 0xFF)
534 | li t0, apu_read8
535 | jal emit_jal
536 | nop
537 | EMIT_OP ORI(A0, A0, 0x100)
538 | EMIT_OP OR(S0, ZERO, V0)
539 | la t0, apu_read8
540 | jal emit_jal
541 | nop
542 | EMIT_OP ADDI(A0, A0, 1)
543 | EMIT_OP SLL(T0, V0, 8)
544 | EMIT_OP OR(S0, S0, T0)
545 | EMIT_OP ADDI(S2, S2, 2)
546 | j finish_block
547 | nop
548 |
549 | .align 5
550 | apu_ret1: // RET1
551 | // Update the JIT register state
552 | jal load_stack
553 | ori s1, s1, FLAG_SS | FLAG_SF | FLAG_LF | FLAG_PC
554 | li t0, ~FLAG_NZ
555 | and s1, s1, t0
556 |
557 | // Emit code to pop the flags from the stack
558 | EMIT_OP ADDI(A0, S2, 1)
559 | EMIT_OP ANDI(A0, A0, 0xFF)
560 | la t0, apu_read8
561 | jal emit_jal
562 | nop
563 | EMIT_OP ORI(A0, A0, 0x100)
564 | EMIT_OP OR(S1, ZERO, V0)
565 |
566 | // Emit code to pop the program counter from the stack
567 | la t0, apu_read8
568 | jal emit_jal
569 | nop
570 | EMIT_OP ADDI(A0, A0, 1)
571 | EMIT_OP OR(S0, ZERO, V0)
572 | la t0, apu_read8
573 | jal emit_jal
574 | nop
575 | EMIT_OP ADDI(A0, A0, 1)
576 | EMIT_OP SLL(T0, V0, 8)
577 | EMIT_OP OR(S0, S0, T0)
578 | EMIT_OP ADDI(S2, S2, 3)
579 | j finish_block
580 | nop
581 |
582 | .align 5
583 | apu_brk: // BRK
584 | // Update the JIT register state
585 | jal load_stack
586 | ori s1, s1, FLAG_SS | FLAG_SF | FLAG_PC
587 | jal load_flags
588 | nop
589 | jal update_nz
590 | nop
591 |
592 | // Emit code to jump to an address stored at 0xFFDE
593 | la t0, apu_read8
594 | jal emit_jal
595 | nop
596 | EMIT_OP ORI(A0, ZERO, 0xFFDE)
597 | EMIT_OP OR(S0, ZERO, V0)
598 | la t0, apu_read8
599 | jal emit_jal
600 | nop
601 | EMIT_OP ADDI(A0, A0, 1)
602 | EMIT_OP SLL(T0, V0, 8)
603 | EMIT_OP OR(S0, S0, T0)
604 |
605 | // Emit code to push the old program counter to the stack
606 | EMIT_OP ANDI(A0, S2, 0xFF)
607 | EMIT_OP ORI(A0, A0, 0x100)
608 | la t0, apu_write8
609 | jal emit_jal
610 | nop
611 | li t0, ORI(A1, ZERO, 0)
612 | srl t2, a0, 8
613 | jal emit_op
614 | or t0, t0, t2
615 | EMIT_OP ADDI(A0, A0, -1)
616 | la t0, apu_write8
617 | jal emit_jal
618 | nop
619 | li t0, ORI(A1, ZERO, 0)
620 | andi t2, a0, 0xFF
621 | jal emit_op
622 | or t0, t0, t2
623 |
624 | // Emit code to push the flags to the stack and modify current ones
625 | EMIT_OP ADDI(A0, A0, -1)
626 | la t0, apu_write8
627 | jal emit_jal
628 | nop
629 | EMIT_OP OR(A1, ZERO, S1)
630 | EMIT_OP ANDI(S1, S1, 0xFB) // Clear I
631 | EMIT_OP ORI(S1, S1, 0x10) // Set B
632 | EMIT_OP ADDI(S2, S2, -3)
633 | j finish_block
634 | nop
635 |
636 | .align 5
637 | apu_clrp: // CLRP
638 | // Update the JIT register state
639 | jal load_flags
640 | ori s1, s1, FLAG_SF
641 |
642 | // Emit code to clear the P flag
643 | EMIT_OP ANDI(S1, S1, 0xDF)
644 | j finish_opcode
645 | nop
646 |
647 |
648 | .align 5
649 | apu_setp: // SETP
650 | // Update the JIT register state
651 | jal load_flags
652 | ori s1, s1, FLAG_SF
653 |
654 | // Emit code to set the P flag
655 | EMIT_OP ORI(S1, S1, 0x20)
656 | j finish_opcode
657 | nop
658 |
659 |
660 | .align 5
661 | apu_di: // DI
662 | // Update the JIT register state
663 | jal load_flags
664 | ori s1, s1, FLAG_SF
665 |
666 | // Emit code to clear the I flag
667 | EMIT_OP ANDI(S1, S1, 0xFB)
668 | j finish_opcode
669 | nop
670 |
671 |
672 | .align 5
673 | apu_ei: // EI
674 | // Update the JIT register state
675 | jal load_flags
676 | ori s1, s1, FLAG_SF
677 |
678 | // Emit code to set the I flag
679 | EMIT_OP ORI(S1, S1, 0x4)
680 | j finish_opcode
681 | nop
682 |
--------------------------------------------------------------------------------
/src/apu_emitter.S:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021-2025 Hydr8gon
3 |
4 | This file is part of sodium64.
5 |
6 | sodium64 is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU General Public License as published
8 | by the Free Software Foundation, either version 3 of the License,
9 | or (at your option) any later version.
10 |
11 | sodium64 is distributed in the hope that it will be useful, but
12 | WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 | General Public License for more details.
15 |
16 | You should have received a copy of the GNU General Public License
17 | along with sodium64. If not, see .
18 | */
19 |
20 | #include "macros.h"
21 |
22 | .globl jit_tags
23 | .globl jit_lookup
24 | .globl jit_pointer
25 |
26 | .globl jit_read8
27 | .globl compile_block
28 | .globl finish_opcode
29 | .globl finish_block
30 | .globl load_reg_x
31 | .globl load_reg_y
32 | .globl load_accum
33 | .globl load_stack
34 | .globl load_flags
35 | .globl queue_nz
36 | .globl update_nz
37 | .globl emit_op
38 | .globl emit_j
39 | .globl emit_jal
40 |
41 | .data
42 |
43 | .align 4
44 | jit_tags: .word 0:0x400
45 | jit_lookup: .word 0:0x10000
46 | jit_pointer: .word JIT_BUFFER
47 |
48 | .align 4
49 | jit_opcodes: // Lookup table for addressing and operation functions
50 | .word next_opcode, 0, apu_tcall1, 0, apu_dirr, apu_set1, apu_dirr, apu_bbs1 // 0x00-0x03
51 | .word apu_dirr, apu_ora, apu_absr, apu_ora, apu_brxr, apu_ora, apu_idxr, apu_ora // 0x04-0x07
52 | .word apu_imm, apu_ora, apu_dr2, apu_orm, apu_drb, apu_or1a, apu_dirr, apu_aslm // 0x08-0x0B
53 | .word apu_absr, apu_aslm, apu_php, 0, apu_absr, apu_tset1, apu_brk, 0 // 0x0C-0x0F
54 | .word apu_bpl, 0, apu_tcall1, 0, apu_dirr, apu_clr1, apu_dirr, apu_bbc1 // 0x10-0x13
55 | .word apu_drxr, apu_ora, apu_abxr, apu_ora, apu_abyr, apu_ora, apu_idyr, apu_ora // 0x14-0x17
56 | .word apu_dri, apu_orm, apu_bxy, apu_orm, apu_dirr, apu_decw, apu_drxr, apu_aslm // 0x18-0x1B
57 | .word apu_asla, 0, apu_decx, 0, apu_absr, apu_cmpx, apu_abxr, apu_jmpm // 0x1C-0x1F
58 | .word apu_clrp, 0, apu_tcall1, 0, apu_dirr, apu_set1, apu_dirr, apu_bbs1 // 0x20-0x23
59 | .word apu_dirr, apu_anda, apu_absr, apu_anda, apu_brxr, apu_anda, apu_idxr, apu_anda // 0x24-0x27
60 | .word apu_imm, apu_anda, apu_dr2, apu_andm, apu_drb, apu_or1b, apu_dirr, apu_rolm // 0x28-0x2B
61 | .word apu_absr, apu_rolm, apu_pha, 0, apu_dirr, apu_cbne, apu_bra, 0 // 0x2C-0x2F
62 | .word apu_bmi, 0, apu_tcall1, 0, apu_dirr, apu_clr1, apu_dirr, apu_bbc1 // 0x30-0x33
63 | .word apu_drxr, apu_anda, apu_abxr, apu_anda, apu_abyr, apu_anda, apu_idyr, apu_anda // 0x34-0x37
64 | .word apu_dri, apu_andm, apu_bxy, apu_andm, apu_dirr, apu_incw, apu_drxr, apu_rolm // 0x38-0x3B
65 | .word apu_rola, 0, apu_incx, 0, apu_dirr, apu_cmpx, apu_absa, apu_call // 0x3C-0x3F
66 | .word apu_setp, 0, apu_tcall1, 0, apu_dirr, apu_set1, apu_dirr, apu_bbs1 // 0x40-0x43
67 | .word apu_dirr, apu_eora, apu_absr, apu_eora, apu_brxr, apu_eora, apu_idxr, apu_eora // 0x44-0x47
68 | .word apu_imm, apu_eora, apu_dr2, apu_eorm, apu_drb, apu_and1a, apu_dirr, apu_lsrm // 0x48-0x4B
69 | .word apu_absr, apu_lsrm, apu_phx, 0, apu_absr, apu_tclr1, apu_pcall, 0 // 0x4C-0x4F
70 | .word apu_bvc, 0, apu_tcall1, 0, apu_dirr, apu_clr1, apu_dirr, apu_bbc1 // 0x50-0x53
71 | .word apu_drxr, apu_eora, apu_abxr, apu_eora, apu_abyr, apu_eora, apu_idyr, apu_eora // 0x54-0x57
72 | .word apu_dri, apu_eorm, apu_bxy, apu_eorm, apu_dirr, apu_cmpw, apu_drxr, apu_lsrm // 0x58-0x5B
73 | .word apu_lsra, 0, apu_movxa, 0, apu_absr, apu_cmpy, apu_absa, apu_jmp // 0x5C-0x5F
74 | .word apu_clrc, 0, apu_tcall1, 0, apu_dirr, apu_set1, apu_dirr, apu_bbs1 // 0x60-0x63
75 | .word apu_dirr, apu_cmpa, apu_absr, apu_cmpa, apu_brxr, apu_cmpa, apu_idxr, apu_cmpa // 0x64-0x67
76 | .word apu_imm, apu_cmpa, apu_dr2, apu_cmpm, apu_drb, apu_and1b, apu_dirr, apu_rorm // 0x68-0x6B
77 | .word apu_absr, apu_rorm, apu_phy, 0, apu_dirr, apu_dbnzm, apu_ret, 0 // 0x6C-0x6F
78 | .word apu_bvs, 0, apu_tcall1, 0, apu_dirr, apu_clr1, apu_dirr, apu_bbc1 // 0x70-0x73
79 | .word apu_drxr, apu_cmpa, apu_abxr, apu_cmpa, apu_abyr, apu_cmpa, apu_idyr, apu_cmpa // 0x74-0x77
80 | .word apu_dri, apu_cmpm, apu_bxy, apu_cmpm, apu_dirr, apu_addw, apu_drxr, apu_rorm // 0x78-0x7B
81 | .word apu_rora, 0, apu_movax, 0, apu_dirr, apu_cmpy, apu_ret1, 0 // 0x7C-0x7F
82 | .word apu_setc, 0, apu_tcall1, 0, apu_dirr, apu_set1, apu_dirr, apu_bbs1 // 0x80-0x83
83 | .word apu_dirr, apu_adca, apu_absr, apu_adca, apu_brxr, apu_adca, apu_idxr, apu_adca // 0x84-0x87
84 | .word apu_imm, apu_adca, apu_dr2, apu_adcm, apu_drb, apu_eor1, apu_dirr, apu_decm // 0x88-0x8B
85 | .word apu_absr, apu_decm, apu_imm, apu_movy, apu_ppp, 0, apu_dri, apu_movm // 0x8C-0x8F
86 | .word apu_bcc, 0, apu_tcall1, 0, apu_dirr, apu_clr1, apu_dirr, apu_bbc1 // 0x90-0x93
87 | .word apu_drxr, apu_adca, apu_abxr, apu_adca, apu_abyr, apu_adca, apu_idyr, apu_adca // 0x94-0x97
88 | .word apu_dri, apu_adcm, apu_bxy, apu_adcm, apu_dirr, apu_subw, apu_drxr, apu_decm // 0x98-0x9B
89 | .word apu_deca, 0, apu_movxs, 0, apu_div, 0, apu_xcn, 0 // 0x9C-0x9F
90 | .word apu_ei, 0, apu_tcall1, 0, apu_dirr, apu_set1, apu_dirr, apu_bbs1 // 0xA0-0xA3
91 | .word apu_dirr, apu_sbca, apu_absr, apu_sbca, apu_brxr, apu_sbca, apu_idxr, apu_sbca // 0xA4-0xA7
92 | .word apu_imm, apu_sbca, apu_dr2, apu_sbcm, apu_drb, apu_mov1b, apu_dirr, apu_incm // 0xA8-0xAB
93 | .word apu_absr, apu_incm, apu_imm, apu_cmpy, apu_ppa, 0, apu_bxpa, apu_amov // 0xAC-0xAF
94 | .word apu_bcs, 0, apu_tcall1, 0, apu_dirr, apu_clr1, apu_dirr, apu_bbc1 // 0xB0-0xB3
95 | .word apu_drxr, apu_sbca, apu_abxr, apu_sbca, apu_abyr, apu_sbca, apu_idyr, apu_sbca // 0xB4-0xB7
96 | .word apu_dri, apu_sbcm, apu_bxy, apu_sbcm, apu_dirr, apu_movwya, apu_drxr, apu_incm // 0xB8-0xBB
97 | .word apu_inca, 0, apu_movsx, 0, apu_unk, 0, apu_bxpr, apu_mova // 0xBC-0xBF
98 | .word apu_di, 0, apu_tcall1, 0, apu_dirr, apu_set1, apu_dirr, apu_bbs1 // 0xC0-0xC3
99 | .word apu_dira, apu_amov, apu_absa, apu_amov, apu_brxa, apu_amov, apu_idxa, apu_amov // 0xC4-0xC7
100 | .word apu_imm, apu_cmpx, apu_absa, apu_xmov, apu_drb, apu_mov1a, apu_dira, apu_ymov // 0xC8-0xCB
101 | .word apu_absa, apu_ymov, apu_imm, apu_movx, apu_ppx, 0, apu_mul, 0 // 0xCC-0xCF
102 | .word apu_bne, 0, apu_tcall1, 0, apu_dirr, apu_clr1, apu_dirr, apu_bbc1 // 0xD0-0xD3
103 | .word apu_drxa, apu_amov, apu_abxa, apu_amov, apu_abya, apu_amov, apu_idya, apu_amov // 0xD4-0xD7
104 | .word apu_dira, apu_xmov, apu_drya, apu_xmov, apu_dira, apu_movway, apu_drxa, apu_ymov // 0xD8-0xDB
105 | .word apu_decy, 0, apu_movay, 0, apu_drxr, apu_cbne, apu_unk, 0 // 0xDC-0xDF
106 | .word apu_clrv, 0, apu_tcall1, 0, apu_dirr, apu_set1, apu_dirr, apu_bbs1 // 0xE0-0xE3
107 | .word apu_dirr, apu_mova, apu_absr, apu_mova, apu_brxr, apu_mova, apu_idxr, apu_mova // 0xE4-0xE7
108 | .word apu_imm, apu_mova, apu_absr, apu_movx, apu_drb, apu_not1, apu_dirr, apu_movy // 0xE8-0xEB
109 | .word apu_absr, apu_movy, apu_notc, 0, apu_ppy, 0, apu_unk, 0 // 0xEC-0xEF
110 | .word apu_beq, 0, apu_tcall1, 0, apu_dirr, apu_clr1, apu_dirr, apu_bbc1 // 0xF0-0xF3
111 | .word apu_drxr, apu_mova, apu_abxr, apu_mova, apu_abyr, apu_mova, apu_idyr, apu_mova // 0xF4-0xF7
112 | .word apu_dirr, apu_movx, apu_dryr, apu_movx, apu_dr2, apu_movm, apu_drxr, apu_movy // 0xF8-0xFB
113 | .word apu_incy, 0, apu_movya, 0, apu_dbnzy, 0, apu_unk, 0 // 0xFC-0xFF
114 |
115 | .text
116 | .set noreorder
117 |
118 | .align 5
119 | jit_read8: // a0: address - v0: value
120 | // Read a byte from memory, count cycles, and increment the address
121 | lbu v0, apu_clock
122 | srl t0, a0, 6
123 | lbu t1, apu_map(t0)
124 | sub s2, s2, v0
125 | add t0, a0, t1
126 | lbu v0, apu_ram(t0)
127 | jr ra
128 | addi a0, a0, 1
129 |
130 | .align 5
131 | compile_block:
132 | // Round the JIT pointer to nearest cache line, offset by header size
133 | lw t0, jit_pointer
134 | addi t0, t0, 0x1F + 12
135 | andi t1, t0, 0x1F
136 | sub a1, t0, t1 // Pointer
137 | addi t0, a1, -12
138 | sw t0, jit_pointer
139 |
140 | // Initialize values for tracking JIT block state
141 | move s1, zero // Status
142 | move s2, zero // Cycles
143 | move a0, s0 // PC
144 | addi t9, a0, BLOCK_SIZE // Limit
145 |
146 | // Check JIT bounds and invalidate data cache for the header
147 | li a2, ROM_BUFFER // Bounds
148 | bgeu a1, a2, reset_buffer
149 | srl t1, a0, 6
150 | cache 0x11, CACHED(0)(t0)
151 |
152 | // Set the start memory block and tag in the JIT header
153 | sll t1, t1, 2
154 | sh t1, 0(t0)
155 | lw t1, jit_tags(t1)
156 | sw t1, 4(t0)
157 |
158 | next_opcode:
159 | // Read an opcode from memory and jump to its functions
160 | jal jit_read8
161 | nop
162 | sll v1, v0, 3
163 | lw t1, jit_opcodes + 0(v1)
164 | lw gp, jit_opcodes + 4(v1)
165 | jr t1
166 | nop
167 |
168 | .align 5
169 | finish_opcode:
170 | // Compile another opcode if the limit hasn't been reached
171 | blt a0, t9, next_opcode
172 | nop
173 |
174 | finish_block:
175 | // Update NZ flags at the end of a block
176 | jal update_nz
177 | nop
178 |
179 | // Emit code to store register X if enabled
180 | andi t0, s1, FLAG_SX
181 | beqz t0, skip_sx
182 | li t3, SB(T9, 0, 0)
183 | la t4, apu_reg_x
184 | jal full_address
185 | nop
186 |
187 | skip_sx:
188 | // Emit code to store register Y if enabled
189 | andi t0, s1, FLAG_SY
190 | beqz t0, skip_sy
191 | li t3, SB(T8, 0, 0)
192 | la t4, apu_reg_y
193 | jal full_address
194 | nop
195 |
196 | skip_sy:
197 | // Emit code to store the accumulator if enabled
198 | andi t0, s1, FLAG_SA
199 | beqz t0, skip_sa
200 | li t3, SB(T7, 0, 0)
201 | la t4, apu_accum
202 | jal full_address
203 | nop
204 |
205 | skip_sa:
206 | // Emit code to store the stack pointer if enabled
207 | andi t0, s1, FLAG_SS
208 | beqz t0, skip_ss
209 | li t3, SB(S2, 0, 0)
210 | la t4, apu_stack
211 | jal full_address
212 | nop
213 |
214 | skip_ss:
215 | // Emit code to store the flags if enabled
216 | andi t0, s1, FLAG_SF
217 | beqz t0, skip_sf
218 | li t3, SB(S1, 0, 0)
219 | la t4, apu_flags
220 | jal full_address
221 | nop
222 |
223 | skip_sf:
224 | // Emit code to load the program counter value unless disabled
225 | andi t0, s1, FLAG_PC
226 | bnez t0, skip_pc
227 | li t0, ORI(S0, ZERO, 0)
228 | jal emit_op
229 | or t0, t0, a0
230 |
231 | skip_pc:
232 | // Emit code to store the program counter
233 | la t4, apu_count
234 | jal full_address
235 | li t3, SH(S0, 0, 0)
236 |
237 | // Emit code to adjust APU cycle count and return to the main loop
238 | la t0, cpu_execute
239 | jal emit_j
240 | andi s2, s2, 0xFFFF
241 | li t0, ADDI(S3, S3, 0)
242 | jal emit_op
243 | or t0, t0, s2
244 |
245 | // Update the JIT pointer and use its old value for block lookup
246 | lw t0, jit_pointer
247 | sw a1, jit_pointer
248 | sll t1, s0, 2
249 | lui t2, 0x2000
250 | sub t2, t0, t2 // Cached
251 | sw t2, jit_lookup(t1)
252 |
253 | // Set the end memory block and tag in the JIT header
254 | srl t1, a0, 6
255 | sll t1, t1, 2
256 | sh t1, 2(t0)
257 | lw t1, jit_tags(t1)
258 |
259 | // Jump to the finished JIT block's code
260 | addi t2, t2, 12
261 | jr t2
262 | sw t1, 8(t0)
263 |
264 | .align 5
265 | reset_buffer:
266 | // Clear all block lookup pointers
267 | la t0, jit_lookup
268 | la t1, jit_pointer
269 | reset_loop:
270 | addi t0, t0, 4
271 | bne t0, t1, reset_loop
272 | sw zero, -4(t0)
273 |
274 | // Reset the JIT pointer and restart compilation
275 | li t0, JIT_BUFFER
276 | sw t0, jit_pointer
277 | j compile_block
278 | nop
279 |
280 | .align 5
281 | apu_unk:
282 | // Emit code to loop infinitely for unimplemented opcodes
283 | ori s1, s1, FLAG_PC
284 | li t0, ORI(S0, ZERO, 0)
285 | addi t2, a0, -1
286 | andi t2, t2, 0xFFFF
287 | la ra, finish_block
288 | j emit_op
289 | or t0, t0, t2
290 |
291 | .align 5
292 | full_address: // t3: opcode, t4: address
293 | // Adjust the address for signed offsets
294 | andi t0, t4, 0x8000
295 | sll t0, t0, 1
296 | add t4, t4, t0
297 |
298 | // Emit code for accessing a 32-bit memory address
299 | move t6, ra
300 | li t0, LUI(AT_, 0)
301 | srl t1, t4, 16
302 | jal emit_op
303 | or t0, t0, t1
304 | move t0, t3
305 | li t1, AT_ << 21
306 | andi t2, t4, 0xFFFF
307 | or t0, t1, t2
308 | move ra, t6
309 | j emit_op
310 | or t0, t0, t3
311 |
312 | .align 5
313 | load_reg_x:
314 | // Check if register X has already been loaded
315 | andi t0, s1, FLAG_LX
316 | beqz t0, do_lx
317 | ori s1, s1, FLAG_LX
318 | jr ra
319 |
320 | do_lx:
321 | // Emit code to load register X if needed
322 | li t3, LBU(T9, 0, 0)
323 | la t4, apu_reg_x
324 | j full_address
325 | nop
326 |
327 | .align 5
328 | load_reg_y:
329 | // Check if register Y has already been loaded
330 | andi t0, s1, FLAG_LY
331 | beqz t0, do_ly
332 | ori s1, s1, FLAG_LY
333 | jr ra
334 |
335 | do_ly:
336 | // Emit code to load register Y if needed
337 | li t3, LBU(T8, 0, 0)
338 | la t4, apu_reg_y
339 | j full_address
340 | nop
341 |
342 | .align 5
343 | load_accum:
344 | // Check if the accumulator has already been loaded
345 | andi t0, s1, FLAG_LA
346 | beqz t0, do_la
347 | ori s1, s1, FLAG_LA
348 | jr ra
349 |
350 | do_la:
351 | // Emit code to load the accumulator if needed
352 | li t3, LBU(T7, 0, 0)
353 | la t4, apu_accum
354 | j full_address
355 | nop
356 |
357 | .align 5
358 | load_stack:
359 | // Check if the stack pointer has already been loaded
360 | andi t0, s1, FLAG_LS
361 | beqz t0, do_ls
362 | ori s1, s1, FLAG_LS
363 | jr ra
364 |
365 | do_ls:
366 | // Emit code to load the stack pointer if needed
367 | li t3, LBU(S2, 0, 0)
368 | la t4, apu_stack
369 | j full_address
370 | nop
371 |
372 | .align 5
373 | load_flags:
374 | // Check if the flags have already been loaded
375 | andi t0, s1, FLAG_LF
376 | beqz t0, do_lf
377 | ori s1, s1, FLAG_LF
378 | jr ra
379 |
380 | do_lf:
381 | // Emit code to load the flags if needed
382 | li t3, LBU(S1, 0, 0)
383 | la t4, apu_flags
384 | j full_address
385 | nop
386 |
387 | .align 5
388 | queue_nz: // t1: value
389 | // Emit code to save a value for setting NZ flags later
390 | ori s1, s1, FLAG_NZ
391 | li t0, ANDI(A2, 0, 0xFF)
392 | sll t1, t1, 21
393 | j emit_op
394 | or t0, t0, t1
395 |
396 | .align 5
397 | update_nz:
398 | // Check if a value is queued for setting NZ flags
399 | andi t0, s1, FLAG_NZ
400 | bnez t0, do_nz
401 | move t7, ra
402 | jr ra
403 | nop
404 |
405 | do_nz:
406 | // Emit code to update NZ flags if needed
407 | xori s1, s1, FLAG_NZ
408 | jal load_flags
409 | ori s1, s1, FLAG_SF
410 | EMIT_OP ANDI(S1, S1, 0x7D)
411 | EMIT_OP SLT(T6, ZERO, A2)
412 | EMIT_OP XORI(T6, T6, 0x1)
413 | EMIT_OP SLL(T6, T6, 1)
414 | EMIT_OP OR(S1, S1, T6)
415 | EMIT_OP ANDI(T6, A2, 0x80)
416 | li t0, OR(S1, S1, T6)
417 | j emit_op
418 | move ra, t7
419 |
420 | .align 5
421 | emit_op: // t0: opcode
422 | // Write to the JIT buffer, invalidate caches, and reset on overflow
423 | sw t0, (a1)
424 | cache 0x10, CACHED(0)(a1)
425 | cache 0x11, CACHED(0)(a1)
426 | addi a1, a1, 4
427 | beq a1, a2, reset_buffer
428 | nop
429 | jr ra
430 | nop
431 |
432 | .align 5
433 | emit_j: // t0: target
434 | // Emit a "j" opcode for the JIT
435 | srl t0, t0, 2
436 | li t1, 0x3FFFFFF
437 | and t0, t0, t1
438 | lui t1, 0x0800
439 | j emit_op
440 | or t0, t0, t1
441 |
442 | .align 5
443 | emit_jal: // t0: target
444 | // Emit a "jal" opcode for the JIT
445 | srl t0, t0, 2
446 | li t1, 0x3FFFFFF
447 | and t0, t0, t1
448 | lui t1, 0x0C00
449 | j emit_op
450 | or t0, t0, t1
451 |
--------------------------------------------------------------------------------
/src/apu_transfer.S:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021-2025 Hydr8gon
3 |
4 | This file is part of sodium64.
5 |
6 | sodium64 is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU General Public License as published
8 | by the Free Software Foundation, either version 3 of the License,
9 | or (at your option) any later version.
10 |
11 | sodium64 is distributed in the hope that it will be useful, but
12 | WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 | General Public License for more details.
15 |
16 | You should have received a copy of the GNU General Public License
17 | along with sodium64. If not, see .
18 | */
19 |
20 | #include "macros.h"
21 |
22 | .globl apu_movax
23 | .globl apu_movxa
24 | .globl apu_movay
25 | .globl apu_movya
26 | .globl apu_movxs
27 | .globl apu_movsx
28 | .globl apu_mova
29 | .globl apu_movx
30 | .globl apu_movy
31 | .globl apu_movwya
32 | .globl apu_movm
33 | .globl apu_amov
34 | .globl apu_xmov
35 | .globl apu_ymov
36 | .globl apu_movway
37 | .globl apu_pha
38 | .globl apu_phx
39 | .globl apu_phy
40 | .globl apu_php
41 | .globl apu_ppa
42 | .globl apu_ppx
43 | .globl apu_ppy
44 | .globl apu_ppp
45 |
46 | .text
47 | .set noreorder
48 |
49 | .align 5
50 | apu_movax: // MOV A,X
51 | // Update the JIT register state
52 | jal load_reg_x
53 | ori s1, s1, FLAG_SA | FLAG_LA
54 |
55 | // Emit code to move a value to the accumulator from register X and set flags
56 | EMIT_OP OR(T7, ZERO, T9)
57 | la ra, finish_opcode
58 | j queue_nz
59 | li t1, T7
60 |
61 | .align 5
62 | apu_movxa: // MOV X,A
63 | // Update the JIT register state
64 | jal load_accum
65 | ori s1, s1, FLAG_SX | FLAG_LX
66 |
67 | // Emit code to move a value to register X from the accumulator and set flags
68 | EMIT_OP OR(T9, ZERO, T7)
69 | la ra, finish_opcode
70 | j queue_nz
71 | li t1, T9
72 |
73 | .align 5
74 | apu_movay: // MOV A,Y
75 | // Update the JIT register state
76 | jal load_reg_y
77 | ori s1, s1, FLAG_SA | FLAG_LA
78 |
79 | // Emit code to move a value to the accumulator from register Y and set flags
80 | EMIT_OP OR(T7, ZERO, T8)
81 | la ra, finish_opcode
82 | j queue_nz
83 | li t1, T7
84 |
85 | .align 5
86 | apu_movya: // MOV Y,A
87 | // Update the JIT register state
88 | jal load_accum
89 | ori s1, s1, FLAG_SY | FLAG_LY
90 |
91 | // Emit code to move a value to register Y from the accumulator and set flags
92 | EMIT_OP OR(T8, ZERO, T7)
93 | la ra, finish_opcode
94 | j queue_nz
95 | li t1, T8
96 |
97 | .align 5
98 | apu_movxs: // MOV X,SP
99 | // Update the JIT register state
100 | jal load_stack
101 | ori s1, s1, FLAG_SX | FLAG_LX
102 |
103 | // Emit code to move a value to register X from the stack pointer and set flags
104 | EMIT_OP OR(T9, ZERO, S2)
105 | la ra, finish_opcode
106 | j queue_nz
107 | li t1, T9
108 |
109 | .align 5
110 | apu_movsx: // MOV SP,X
111 | // Update the JIT register state
112 | jal load_reg_x
113 | ori s1, s1, FLAG_SS | FLAG_LS
114 |
115 | // Emit code to move a value to the stack pointer from register X
116 | EMIT_OP OR(S2, ZERO, T9)
117 | j finish_opcode
118 | nop
119 |
120 | .align 5
121 | apu_mova: // MOV A,op
122 | // Update the JIT register state
123 | ori s1, s1, FLAG_SA | FLAG_LA
124 |
125 | // Emit code to move a value to the accumulator and set flags
126 | EMIT_OP OR(T7, ZERO, V0)
127 | la ra, finish_opcode
128 | j queue_nz
129 | li t1, T7
130 |
131 | .align 5
132 | apu_movx: // MOV X,op
133 | // Update the JIT register state
134 | ori s1, s1, FLAG_SX | FLAG_LX
135 |
136 | // Emit code to move a value to register X and set flags
137 | EMIT_OP OR(T9, ZERO, V0)
138 | la ra, finish_opcode
139 | j queue_nz
140 | li t1, T9
141 |
142 | .align 5
143 | apu_movy: // MOV Y,op
144 | // Update the JIT register state
145 | ori s1, s1, FLAG_SY | FLAG_LY
146 |
147 | // Emit code to move a value to register Y and set flags
148 | EMIT_OP OR(T8, ZERO, V0)
149 | la ra, finish_opcode
150 | j queue_nz
151 | li t1, T8
152 |
153 | .align 5
154 | apu_movwya: // MOVW YA,op
155 | // Update the JIT register state
156 | ori s1, s1, FLAG_SY | FLAG_SA | FLAG_LY | FLAG_LA
157 |
158 | // Emit code to move a 16-bit value to register YA
159 | EMIT_OP OR(T7, ZERO, V0)
160 | la t0, apu_read8
161 | jal emit_jal
162 | nop
163 | EMIT_OP ADDI(A0, A0, 1)
164 | EMIT_OP OR(T8, ZERO, V0)
165 |
166 | // Emit code to set flags for the 16-bit value
167 | EMIT_OP SLT(T0, ZERO, T7)
168 | EMIT_OP OR(T0, T0, T8)
169 | la ra, finish_opcode
170 | j queue_nz
171 | li t1, T0
172 |
173 | .align 5
174 | apu_movm: // MOV op
175 | // Emit code to move a value to a memory address
176 | la t0, apu_write8
177 | jal emit_jal
178 | nop
179 | EMIT_OP OR(A1, ZERO, V0)
180 | j finish_opcode
181 | nop
182 |
183 | .align 5
184 | apu_amov: // MOV op,A
185 | // Update the JIT register state
186 | jal load_accum
187 | nop
188 |
189 | // Emit code to move a value to memory from the accumulator
190 | la t0, apu_write8
191 | jal emit_jal
192 | nop
193 | EMIT_OP OR(A1, ZERO, T7)
194 | j finish_opcode
195 | nop
196 |
197 | .align 5
198 | apu_xmov: // MOV op,X
199 | // Update the JIT register state
200 | jal load_reg_x
201 | nop
202 |
203 | // Emit code to move a value to memory from register X
204 | la t0, apu_write8
205 | jal emit_jal
206 | nop
207 | EMIT_OP OR(A1, ZERO, T9)
208 | j finish_opcode
209 | nop
210 |
211 | .align 5
212 | apu_ymov: // MOV op,Y
213 | // Update the JIT register state
214 | jal load_reg_y
215 | nop
216 |
217 | // Emit code to move a value to memory from register Y
218 | la t0, apu_write8
219 | jal emit_jal
220 | nop
221 | EMIT_OP OR(A1, ZERO, T8)
222 | j finish_opcode
223 | nop
224 |
225 | .align 5
226 | apu_movway: // MOVW op,YA
227 | // Update the JIT register state
228 | jal load_reg_y
229 | nop
230 | jal load_accum
231 | nop
232 |
233 | // Emit code to move a 16-bit value to memory from register YA
234 | la t0, apu_write8
235 | jal emit_jal
236 | nop
237 | EMIT_OP OR(A1, ZERO, T7)
238 | EMIT_OP ADDI(A0, A0, 1)
239 | la t0, apu_write8
240 | jal emit_jal
241 | nop
242 | EMIT_OP OR(A1, ZERO, T8)
243 | j finish_opcode
244 | nop
245 |
246 | .align 5
247 | apu_pha: // PUSH A
248 | // Update the JIT register state
249 | jal load_stack
250 | ori s1, s1, FLAG_SS
251 | jal load_accum
252 | nop
253 |
254 | // Emit code to push the accumulator to the stack
255 | EMIT_OP ANDI(A0, S2, 0xFF)
256 | EMIT_OP ORI(A0, A0, 0x100)
257 | EMIT_OP OR(A1, ZERO, T7)
258 | la t0, apu_write8
259 | jal emit_jal
260 | nop
261 | EMIT_OP ADDI(S2, S2, -1)
262 | j finish_opcode
263 | nop
264 |
265 | .align 5
266 | apu_phx: // PUSH X
267 | // Update the JIT register state
268 | jal load_stack
269 | ori s1, s1, FLAG_SS
270 | jal load_reg_x
271 | nop
272 |
273 | // Emit code to push register X to the stack
274 | EMIT_OP ANDI(A0, S2, 0xFF)
275 | EMIT_OP ORI(A0, A0, 0x100)
276 | EMIT_OP OR(A1, ZERO, T9)
277 | la t0, apu_write8
278 | jal emit_jal
279 | nop
280 | EMIT_OP ADDI(S2, S2, -1)
281 | j finish_opcode
282 | nop
283 |
284 | .align 5
285 | apu_phy: // PUSH Y
286 | // Update the JIT register state
287 | jal load_stack
288 | ori s1, s1, FLAG_SS
289 | jal load_reg_y
290 | nop
291 |
292 | // Emit code to push register Y to the stack
293 | EMIT_OP ANDI(A0, S2, 0xFF)
294 | EMIT_OP ORI(A0, A0, 0x100)
295 | EMIT_OP OR(A1, ZERO, T8)
296 | la t0, apu_write8
297 | jal emit_jal
298 | nop
299 | EMIT_OP ADDI(S2, S2, -1)
300 | j finish_opcode
301 | nop
302 |
303 | .align 5
304 | apu_php: // PUSH PSW
305 | // Update the JIT register state
306 | jal load_stack
307 | ori s1, s1, FLAG_SS
308 | jal load_flags
309 | nop
310 | jal update_nz
311 | nop
312 |
313 | // Emit code to push the flags to the stack
314 | EMIT_OP ANDI(A0, S2, 0xFF)
315 | EMIT_OP ORI(A0, A0, 0x100)
316 | EMIT_OP OR(A1, ZERO, S1)
317 | la t0, apu_write8
318 | jal emit_jal
319 | nop
320 | EMIT_OP ADDI(S2, S2, -1)
321 | j finish_opcode
322 | nop
323 |
324 | .align 5
325 | apu_ppa: // POP A
326 | // Update the JIT register state
327 | jal load_stack
328 | ori s1, s1, FLAG_SS | FLAG_SA | FLAG_LA
329 |
330 | // Emit code to pop the accumulator from the stack
331 | EMIT_OP ADDI(S2, S2, 1)
332 | EMIT_OP ANDI(A0, S2, 0xFF)
333 | la t0, apu_read8
334 | jal emit_jal
335 | nop
336 | EMIT_OP ORI(A0, A0, 0x100)
337 | EMIT_OP OR(T7, ZERO, V0)
338 | j finish_opcode
339 | nop
340 |
341 | .align 5
342 | apu_ppx: // POP X
343 | // Update the JIT register state
344 | jal load_stack
345 | ori s1, s1, FLAG_SS | FLAG_SX | FLAG_LX
346 |
347 | // Emit code to pop register X from the stack
348 | EMIT_OP ADDI(S2, S2, 1)
349 | EMIT_OP ANDI(A0, S2, 0xFF)
350 | la t0, apu_read8
351 | jal emit_jal
352 | nop
353 | EMIT_OP ORI(A0, A0, 0x100)
354 | EMIT_OP OR(T9, ZERO, V0)
355 | j finish_opcode
356 | nop
357 |
358 | .align 5
359 | apu_ppy: // POP Y
360 | // Update the JIT register state
361 | jal load_stack
362 | ori s1, s1, FLAG_SS | FLAG_SY | FLAG_LY
363 |
364 | // Emit code to pop the accumulator from the stack
365 | EMIT_OP ADDI(S2, S2, 1)
366 | EMIT_OP ANDI(A0, S2, 0xFF)
367 | la t0, apu_read8
368 | jal emit_jal
369 | nop
370 | EMIT_OP ORI(A0, A0, 0x100)
371 | EMIT_OP OR(T8, ZERO, V0)
372 | j finish_opcode
373 | nop
374 |
375 | .align 5
376 | apu_ppp: // POP PSW
377 | // Update the JIT register state
378 | jal load_stack
379 | ori s1, s1, FLAG_SS | FLAG_SF | FLAG_LF
380 | li t0, ~FLAG_NZ
381 | and s1, s1, t0
382 |
383 | // Emit code to pop the flags from the stack
384 | EMIT_OP ADDI(S2, S2, 1)
385 | EMIT_OP ANDI(A0, S2, 0xFF)
386 | la t0, apu_read8
387 | jal emit_jal
388 | nop
389 | EMIT_OP ORI(A0, A0, 0x100)
390 | EMIT_OP OR(S1, ZERO, V0)
391 | j finish_opcode
392 | nop
393 |
--------------------------------------------------------------------------------
/src/cpu_address.S:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021-2025 Hydr8gon
3 |
4 | This file is part of sodium64.
5 |
6 | sodium64 is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU General Public License as published
8 | by the Free Software Foundation, either version 3 of the License,
9 | or (at your option) any later version.
10 |
11 | sodium64 is distributed in the hope that it will be useful, but
12 | WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 | General Public License for more details.
15 |
16 | You should have received a copy of the GNU General Public License
17 | along with sodium64. If not, see .
18 | */
19 |
20 | #include "macros.h"
21 |
22 | .globl cpu_im8
23 | .globl cpu_i16
24 | .globl cpu_dir
25 | .globl cpu_drx
26 | .globl cpu_dry
27 | .globl cpu_drs
28 | .globl cpu_abs
29 | .globl cpu_abx
30 | .globl cpu_aby
31 | .globl cpu_lng
32 | .globl cpu_lnx
33 | .globl cpu_ind
34 | .globl cpu_idx
35 | .globl cpu_idy
36 | .globl cpu_isy
37 | .globl cpu_idl
38 | .globl cpu_ily
39 |
40 | .text
41 | .set noreorder
42 |
43 | .align 5
44 | cpu_im8: // #nn
45 | // Get the address of the 8-bit immediate value
46 | addi s0, s7, 1
47 | jr gp
48 | addi s7, s7, 2
49 |
50 | cpu_i16: // #nnnn
51 | // Get the address of the 16-bit immediate value
52 | addi s0, s7, 1
53 | jr gp
54 | addi s7, s7, 3
55 |
56 | .align 5
57 | cpu_dir: // nn
58 | // Get the 8-bit immediate value added to the direct offset as an address
59 | MEM_READ8 1(s7)
60 | add s0, s8, v0
61 | andi s0, s0, 0xFFFF
62 | jr gp
63 | addi s7, s7, 2
64 |
65 | .align 5
66 | cpu_drx: // nn,X
67 | // Get the 8-bit immediate value plus register X added to the direct offset as an address
68 | MEM_READ8 1(s7)
69 | lhu t0, register_x
70 | add t0, t0, v0
71 | add s0, s8, t0
72 | andi s0, s0, 0xFFFF
73 | jr gp
74 | addi s7, s7, 2
75 |
76 | .align 5
77 | cpu_dry: // nn,Y
78 | // Get the 8-bit immediate value plus register Y added to the direct offset as an address
79 | MEM_READ8 1(s7)
80 | lhu t0, register_y
81 | add t0, t0, v0
82 | add s0, s8, t0
83 | andi s0, s0, 0xFFFF
84 | jr gp
85 | addi s7, s7, 2
86 |
87 | .align 5
88 | cpu_drs: // nn,S
89 | // Get the 8-bit immediate value added to the stack pointer as an address
90 | MEM_READ8 1(s7)
91 | lhu s0, stack_ptr
92 | add s0, s0, v0
93 | jr gp
94 | addi s7, s7, 2
95 |
96 | .align 5
97 | cpu_abs: // nnnn
98 | // Get the 16-bit immediate value added to the data bank as an address
99 | MEM_READ16 1(s7)
100 | lw s0, data_bank
101 | add s0, s0, v0
102 | jr gp
103 | addi s7, s7, 3
104 |
105 | .align 5
106 | cpu_abx: // nnnn,X
107 | // Get the 16-bit immediate value plus register X added to the data bank as an address
108 | MEM_READ16 1(s7)
109 | lhu t0, register_x
110 | add t0, t0, v0
111 | lw s0, data_bank
112 | add s0, s0, t0
113 | jr gp
114 | addi s7, s7, 3
115 |
116 | .align 5
117 | cpu_aby: // nnnn,Y
118 | // Get the 16-bit immediate value plus register Y added to the data bank as an address
119 | MEM_READ16 1(s7)
120 | lhu t0, register_y
121 | add t0, t0, v0
122 | lw s0, data_bank
123 | add s0, s0, t0
124 | jr gp
125 | addi s7, s7, 3
126 |
127 | .align 5
128 | cpu_lng: // nnnnnn
129 | // Get the 24-bit immediate value as an address
130 | MEM_READ8 3(s7)
131 | sll s0, v0, 16
132 | MEM_READ16 1(s7)
133 | or s0, s0, v0
134 | jr gp
135 | addi s7, s7, 4
136 |
137 | .align 5
138 | cpu_lnx: // nnnnnn,X
139 | // Get the 24-bit immediate value plus register X as an address
140 | MEM_READ8 3(s7)
141 | sll s0, v0, 16
142 | MEM_READ16 1(s7)
143 | lhu t0, register_x
144 | add t0, t0, v0
145 | add s0, s0, t0
146 | jr gp
147 | addi s7, s7, 4
148 |
149 | .align 5
150 | cpu_ind: // (nn)
151 | // Get the 8-bit immediate value added to the direct offset as an address
152 | MEM_READ8 1(s7)
153 | add s0, s8, v0
154 | andi s0, s0, 0xFFFF
155 |
156 | // Get a 16-bit value from memory added to the data bank as an address
157 | MEM_READ16
158 | lw s0, data_bank
159 | add s0, s0, v0
160 | jr gp
161 | addi s7, s7, 2
162 |
163 | .align 5
164 | cpu_idx: // (nn,X)
165 | // Get the 8-bit immediate value plus register X added to the direct offset as an address
166 | MEM_READ8 1(s7)
167 | lhu t0, register_x
168 | add t0, t0, v0
169 | add s0, s8, t0
170 | andi s0, s0, 0xFFFF
171 |
172 | // Get a 16-bit value from memory added to the data bank as an address
173 | MEM_READ16
174 | lw s0, data_bank
175 | add s0, s0, v0
176 | jr gp
177 | addi s7, s7, 2
178 |
179 | .align 5
180 | cpu_idy: // (nn),Y
181 | // Get the 8-bit immediate value added to the direct offset as an address
182 | MEM_READ8 1(s7)
183 | add s0, s8, v0
184 | andi s0, s0, 0xFFFF
185 |
186 | // Get a 16-bit value from memory plus register Y added to the data bank as an address
187 | MEM_READ16
188 | lw s0, data_bank
189 | lhu t0, register_y
190 | add s0, s0, v0
191 | add s0, s0, t0
192 | jr gp
193 | addi s7, s7, 2
194 |
195 | .align 5
196 | cpu_isy: // (nn,S),Y
197 | // Get the 8-bit immediate value added to the stack pointer as an address
198 | MEM_READ8 1(s7)
199 | lhu s0, stack_ptr
200 | add s0, s0, v0
201 |
202 | // Get a 16-bit value from memory plus register Y added to the data bank as an address
203 | MEM_READ16
204 | lw s0, data_bank
205 | lhu t0, register_y
206 | add s0, s0, v0
207 | add s0, s0, t0
208 | jr gp
209 | addi s7, s7, 2
210 |
211 | .align 5
212 | cpu_idl: // [nn]
213 | // Get the 8-bit immediate value added to the direct offset as an address
214 | MEM_READ8 1(s7)
215 | add s0, s8, v0
216 | andi s0, s0, 0xFFFF
217 |
218 | // Read a 24-bit value from memory as an address
219 | MEM_READ8 2(s0)
220 | sll s1, v0, 16
221 | MEM_READ16
222 | or s0, s1, v0
223 | jr gp
224 | addi s7, s7, 2
225 |
226 | .align 5
227 | cpu_ily: // [nn],Y
228 | // Get the 8-bit immediate value added to the direct offset as an address
229 | MEM_READ8 1(s7)
230 | add s0, s8, v0
231 | andi s0, s0, 0xFFFF
232 |
233 | // Read a 24-bit value from memory plus register Y as an address
234 | MEM_READ8 2(s0)
235 | sll s1, v0, 16
236 | MEM_READ16
237 | lhu t0, register_y
238 | add t0, t0, v0
239 | add s0, s1, t0
240 | jr gp
241 | addi s7, s7, 2
242 |
--------------------------------------------------------------------------------
/src/cpu_control.S:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021-2025 Hydr8gon
3 |
4 | This file is part of sodium64.
5 |
6 | sodium64 is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU General Public License as published
8 | by the Free Software Foundation, either version 3 of the License,
9 | or (at your option) any later version.
10 |
11 | sodium64 is distributed in the hope that it will be useful, but
12 | WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 | General Public License for more details.
15 |
16 | You should have received a copy of the GNU General Public License
17 | along with sodium64. If not, see .
18 | */
19 |
20 | #include "macros.h"
21 |
22 | .globl cpu_bra
23 | .globl cpu_brl
24 | .globl cpu_jmp
25 | .globl cpu_jml
26 | .globl cpu_jmpi
27 | .globl cpu_jmpx
28 | .globl cpu_jmli
29 | .globl cpu_jsr
30 | .globl cpu_jsle
31 | .globl cpu_jsl
32 | .globl cpu_jsrx
33 | .globl cpu_rtie
34 | .globl cpu_rti
35 | .globl cpu_rtle
36 | .globl cpu_rtl
37 | .globl cpu_rts
38 | .globl cpu_bpl
39 | .globl cpu_bmi
40 | .globl cpu_bvc
41 | .globl cpu_bvs
42 | .globl cpu_bcc
43 | .globl cpu_bcs
44 | .globl cpu_bne
45 | .globl cpu_beq
46 | .globl cpu_brke
47 | .globl cpu_brk
48 | .globl cpu_cope
49 | .globl cpu_cop
50 | .globl cpu_clc
51 | .globl cpu_cli
52 | .globl cpu_cld
53 | .globl cpu_clv
54 | .globl cpu_sec
55 | .globl cpu_sei
56 | .globl cpu_sed
57 | .globl cpu_rep
58 | .globl cpu_sep
59 | .globl cpu_xce
60 | .globl cpu_stp
61 | .globl cpu_xba
62 | .globl cpu_wai
63 | .globl cpu_wdm
64 | .globl cpu_nop
65 |
66 | .text
67 | .set noreorder
68 |
69 | .align 5
70 | cpu_bra: // BRA disp8
71 | // Add the signed 8-bit immediate value to the PC
72 | MEM_READ8 1(s7), lb
73 | addi s7, s7, 2
74 | add t0, s7, v0
75 | andi t0, t0, 0xFFFF
76 | andi t1, s7, 0xFFFF
77 | sub s7, s7, t1
78 | j cpu_execute
79 | add s7, s7, t0
80 |
81 | .align 5
82 | cpu_brl: // BRL disp16
83 | // Add the signed 16-bit immediate value to the PC
84 | MEM_READ16 1(s7)
85 | addi s7, s7, 3
86 | add t0, s7, v0
87 | andi t0, t0, 0xFFFF
88 | andi t1, s7, 0xFFFF
89 | sub s7, s7, t1
90 | j cpu_execute
91 | add s7, s7, t0
92 |
93 | .align 5
94 | cpu_jmp: // JMP nnnn
95 | // Set the PC to the 16-bit immediate value
96 | MEM_READ16 1(s7)
97 | lui t0, 0xFFFF
98 | and s7, s7, t0
99 | j cpu_execute
100 | add s7, s7, v0
101 |
102 | .align 5
103 | cpu_jml: // JML nnnnnn
104 | // Set the PC and bank to the 24-bit immediate value
105 | MEM_READ16 1(s7)
106 | move s0, v0
107 | MEM_READ8 3(s7)
108 | sll t0, v0, 16
109 | j cpu_execute
110 | or s7, s0, t0
111 |
112 | .align 5
113 | cpu_jmpi: // JMP (nnnn)
114 | // Set the PC to the value at the 16-bit immediate address
115 | MEM_READ16 1(s7)
116 | move s0, v0
117 | MEM_READ16
118 | lui t0, 0xFFFF
119 | and s7, s7, t0
120 | j cpu_execute
121 | add s7, s7, v0
122 |
123 | .align 5
124 | cpu_jmpx: // JMP (nnnn,X)
125 | // Get the 16-bit immediate value plus register X added to the program bank as an address
126 | MEM_READ16 1(s7)
127 | lhu t0, register_x
128 | add t0, t0, v0
129 | lui s1, 0xFFFF
130 | and t1, s7, s1
131 | add s0, t0, t1
132 |
133 | // Set the PC to the memory value at the address
134 | MEM_READ16
135 | and s7, s7, s1
136 | j cpu_execute
137 | add s7, s7, v0
138 |
139 | .align 5
140 | cpu_jmli: // JML [nnnn]
141 | // Set the PC and bank to the value at the 16-bit immediate address
142 | MEM_READ16 1(s7)
143 | move s0, v0
144 | MEM_READ16
145 | move s7, v0
146 | MEM_READ8 2(s0)
147 | sll t0, v0, 16
148 | j cpu_execute
149 | or s7, s7, t0
150 |
151 | .align 5
152 | cpu_jsr: // JSR nnnn
153 | // Update the stack pointer
154 | lhu s0, stack_ptr
155 | addi s0, s0, -2
156 | sh s0, stack_ptr
157 |
158 | // Push the PC+2 to the stack
159 | addi a1, s7, 2
160 | MEM_WRITE16 1(s0)
161 |
162 | // Set the PC to the 16-bit immediate value
163 | MEM_READ16 1(s7)
164 | lui t0, 0xFFFF
165 | and s7, s7, t0
166 | j cpu_execute
167 | add s7, s7, v0
168 |
169 | .align 5
170 | cpu_jsle: // JSL nnnnnn (emulation mode)
171 | // Update the stack pointer
172 | lhu s0, stack_ptr
173 | addi s0, s0, -2
174 | sh s0, stack_ptr
175 |
176 | // Push the PC+3 to the stack
177 | addi a1, s7, 3
178 | MEM_WRITE16 1(s0)
179 |
180 | // Set the PC and bank to the 24-bit immediate value
181 | MEM_READ16 1(s7)
182 | move s0, v0
183 | MEM_READ8 3(s7)
184 | sll t0, v0, 16
185 | j cpu_execute
186 | or s7, s0, t0
187 |
188 | .align 5
189 | cpu_jsl: // JSL nnnnnn
190 | // Update the stack pointer
191 | lhu s0, stack_ptr
192 | addi s0, s0, -3
193 | sh s0, stack_ptr
194 |
195 | // Push the PC+3 and bank to the stack
196 | srl a1, s7, 16
197 | MEM_WRITE8 3(s0)
198 | addi a1, s7, 3
199 | MEM_WRITE16 1(s0)
200 |
201 | // Set the PC and bank to the 24-bit immediate value
202 | MEM_READ16 1(s7)
203 | move s0, v0
204 | MEM_READ8 3(s7)
205 | sll t0, v0, 16
206 | j cpu_execute
207 | or s7, s0, t0
208 |
209 | .align 5
210 | cpu_jsrx: // JSR (nnnn,X)
211 | // Update the stack pointer
212 | lhu s0, stack_ptr
213 | addi s0, s0, -2
214 | sh s0, stack_ptr
215 |
216 | // Push the PC+2 to the stack
217 | addi a1, s7, 2
218 | MEM_WRITE16 1(s0)
219 |
220 | // Get the 16-bit immediate value plus register X added to the program bank as an address
221 | MEM_READ16 1(s7)
222 | lhu t0, register_x
223 | add t0, t0, v0
224 | lui s1, 0xFFFF
225 | and t1, s7, s1
226 | add s0, t0, t1
227 |
228 | // Set the PC to the memory value at the address
229 | MEM_READ16
230 | and s7, s7, s1
231 | j cpu_execute
232 | add s7, s7, v0
233 |
234 | .align 5
235 | cpu_rtie: // RTI (emulation mode)
236 | // Pop the flags from the stack
237 | la s1, stack_ptr
238 | lhu s0, (s1)
239 | MEM_READ8 1(s0)
240 | andi t0, v0, 0xFFEF // Clear B
241 | andi s4, s4, 0xFF00
242 | or s4, s4, t0
243 |
244 | // Pop the PC from the stack
245 | MEM_READ16 2(s0)
246 | move s7, v0
247 |
248 | // Update the stack pointer
249 | addi t0, s0, 3
250 | j cpu_execute
251 | sh t0, (s1)
252 |
253 | .align 5
254 | cpu_rti: // RTI
255 | // Pop the flags from the stack
256 | la s1, stack_ptr
257 | lhu s0, (s1)
258 | MEM_READ8 1(s0)
259 | andi s4, s4, 0xFF00
260 | or s4, s4, v0
261 |
262 | // Pop the PC and bank from the stack
263 | MEM_READ16 2(s0)
264 | move s7, v0
265 | MEM_READ8 4(s0)
266 | sll t0, v0, 16
267 | add s7, s7, t0
268 |
269 | // Update the stack pointer
270 | addi t0, s0, 4
271 | j update_mode
272 | sh t0, (s1)
273 |
274 | .align 5
275 | cpu_rtle: // RTL (emulation mode)
276 | // Pop the PC+1 from the stack
277 | la s1, stack_ptr
278 | lhu s0, (s1)
279 | MEM_READ16 1(s0)
280 | addi s7, v0, 1
281 |
282 | // Update the stack pointer
283 | addi t0, s0, 2
284 | j cpu_execute
285 | sh t0, (s1)
286 |
287 | .align 5
288 | cpu_rtl: // RTL
289 | // Pop the PC+1 and bank from the stack
290 | la s1, stack_ptr
291 | lhu s0, (s1)
292 | MEM_READ16 1(s0)
293 | addi s7, v0, 1
294 | MEM_READ8 3(s0)
295 | sll t0, v0, 16
296 | add s7, s7, t0
297 |
298 | // Update the stack pointer
299 | addi t0, s0, 3
300 | j cpu_execute
301 | sh t0, (s1)
302 |
303 | .align 5
304 | cpu_rts: // RTS
305 | // Pop the PC+1 from the stack
306 | la s1, stack_ptr
307 | lhu s0, (s1)
308 | MEM_READ16 1(s0)
309 | addi t0, v0, 1
310 | lui t1, 0xFFFF
311 | and s7, s7, t1
312 | add s7, s7, t0
313 |
314 | // Update the stack pointer
315 | addi t0, s0, 2
316 | j cpu_execute
317 | sh t0, (s1)
318 |
319 | .align 5
320 | cpu_bpl: // BPL disp8
321 | // Check the condition
322 | andi t0, s4, 0x80 // N
323 | MEM_READ8 1(s7), lb
324 | bnez t0, cpu_execute
325 | addi s7, s7, 2
326 |
327 | // Add the signed 8-bit immediate value to the PC
328 | add t0, s7, v0
329 | andi t0, t0, 0xFFFF
330 | andi t1, s7, 0xFFFF
331 | sub s7, s7, t1
332 | j cpu_execute
333 | add s7, s7, t0
334 |
335 | .align 5
336 | cpu_bmi: // BMI disp8
337 | // Check the condition
338 | andi t0, s4, 0x80 // N
339 | MEM_READ8 1(s7), lb
340 | beqz t0, cpu_execute
341 | addi s7, s7, 2
342 |
343 | // Add the signed 8-bit immediate value to the PC
344 | add t0, s7, v0
345 | andi t0, t0, 0xFFFF
346 | andi t1, s7, 0xFFFF
347 | sub s7, s7, t1
348 | j cpu_execute
349 | add s7, s7, t0
350 |
351 | .align 5
352 | cpu_bvc: // BVC disp8
353 | // Check the condition
354 | andi t0, s4, 0x40 // V
355 | MEM_READ8 1(s7), lb
356 | bnez t0, cpu_execute
357 | addi s7, s7, 2
358 |
359 | // Add the signed 8-bit immediate value to the PC
360 | add t0, s7, v0
361 | andi t0, t0, 0xFFFF
362 | andi t1, s7, 0xFFFF
363 | sub s7, s7, t1
364 | j cpu_execute
365 | add s7, s7, t0
366 |
367 | .align 5
368 | cpu_bvs: // BVS disp8
369 | // Check the condition
370 | andi t0, s4, 0x40 // V
371 | MEM_READ8 1(s7), lb
372 | beqz t0, cpu_execute
373 | addi s7, s7, 2
374 |
375 | // Add the signed 8-bit immediate value to the PC
376 | add t0, s7, v0
377 | andi t0, t0, 0xFFFF
378 | andi t1, s7, 0xFFFF
379 | sub s7, s7, t1
380 | j cpu_execute
381 | add s7, s7, t0
382 |
383 | .align 5
384 | cpu_bcc: // BCC disp8
385 | // Check the condition
386 | andi t0, s4, 0x1 // C
387 | MEM_READ8 1(s7), lb
388 | bnez t0, cpu_execute
389 | addi s7, s7, 2
390 |
391 | // Add the signed 8-bit immediate value to the PC
392 | add t0, s7, v0
393 | andi t0, t0, 0xFFFF
394 | andi t1, s7, 0xFFFF
395 | sub s7, s7, t1
396 | j cpu_execute
397 | add s7, s7, t0
398 |
399 | .align 5
400 | cpu_bcs: // BCS disp8
401 | // Check the condition
402 | andi t0, s4, 0x1 // C
403 | MEM_READ8 1(s7), lb
404 | beqz t0, cpu_execute
405 | addi s7, s7, 2
406 |
407 | // Add the signed 8-bit immediate value to the PC
408 | add t0, s7, v0
409 | andi t0, t0, 0xFFFF
410 | andi t1, s7, 0xFFFF
411 | sub s7, s7, t1
412 | j cpu_execute
413 | add s7, s7, t0
414 |
415 | .align 5
416 | cpu_bne: // BNE disp8
417 | // Check the condition
418 | andi t0, s4, 0x2 // Z
419 | MEM_READ8 1(s7), lb
420 | bnez t0, cpu_execute
421 | addi s7, s7, 2
422 |
423 | // Add the signed 8-bit immediate value to the PC
424 | add t0, s7, v0
425 | andi t0, t0, 0xFFFF
426 | andi t1, s7, 0xFFFF
427 | sub s7, s7, t1
428 | j cpu_execute
429 | add s7, s7, t0
430 |
431 | .align 5
432 | cpu_beq: // BEQ disp8
433 | // Check the condition
434 | andi t0, s4, 0x2 // Z
435 | MEM_READ8 1(s7), lb
436 | beqz t0, cpu_execute
437 | addi s7, s7, 2
438 |
439 | // Add the signed 8-bit immediate value to the PC
440 | add t0, s7, v0
441 | andi t0, t0, 0xFFFF
442 | andi t1, s7, 0xFFFF
443 | sub s7, s7, t1
444 | add s7, s7, t0
445 |
446 | // Check for a basic type of idle loop
447 | addi t0, v0, 4 // Offset
448 | bnez t0, cpu_execute
449 | li t0, 0x58 // CLI
450 | lbu v0, 4(s7)
451 | bne v0, t0, cpu_execute
452 | li t0, 0xA5 // LDA
453 | lbu v0, 0(s7)
454 | bne v0, t0, cpu_execute
455 | nop
456 |
457 | // Halt the CPU until the next interrupt
458 | ori s4, s4, 0x200 // Halted
459 | j cpu_execute
460 | li s5, 0
461 |
462 | .align 5
463 | cpu_brke: // BRK (emulation mode)
464 | // Update the stack pointer
465 | lhu s0, stack_ptr
466 | addi s0, s0, -3
467 | sh s0, stack_ptr
468 |
469 | // Push the PC+2 to the stack
470 | addi a1, s7, 2
471 | MEM_WRITE16 2(s0)
472 |
473 | // Push the flags to the stack and modify them
474 | ori a1, s4, 0x10 // Set B
475 | MEM_WRITE8 1(s0)
476 | ori s4, s4, 0x4 // Set I
477 | andi s4, s4, 0xFFF7 // Clear D
478 |
479 | // Jump to the BRK vector (emulation mode)
480 | li s0, 0xFFFE
481 | MEM_READ16
482 | j cpu_execute
483 | move s7, v0
484 |
485 | .align 5
486 | cpu_brk: // BRK
487 | // Update the stack pointer
488 | lhu s0, stack_ptr
489 | addi s0, s0, -4
490 | sh s0, stack_ptr
491 |
492 | // Push the PC+2 and bank to the stack
493 | srl a1, s7, 16
494 | MEM_WRITE8 4(s0)
495 | addi a1, s7, 2
496 | MEM_WRITE16 2(s0)
497 |
498 | // Push the flags to the stack and modify them
499 | move a1, s4
500 | MEM_WRITE8 1(s0)
501 | ori s4, s4, 0x4 // Set I
502 | andi s4, s4, 0xFFF7 // Clear D
503 |
504 | // Jump to the BRK vector
505 | li s0, 0xFFE6
506 | MEM_READ16
507 | j cpu_execute
508 | move s7, v0
509 |
510 | .align 5
511 | cpu_cope: // COP (emulation mode)
512 | // Update the stack pointer
513 | lhu s0, stack_ptr
514 | addi s0, s0, -3
515 | sh s0, stack_ptr
516 |
517 | // Push the PC+2 to the stack
518 | addi a1, s7, 2
519 | MEM_WRITE16 2(s0)
520 |
521 | // Push the flags to the stack and modify them
522 | ori a1, s4, 0x10 // Set B
523 | MEM_WRITE8 1(s0)
524 | ori s4, s4, 0x4 // Set I
525 | andi s4, s4, 0xFFF7 // Clear D
526 |
527 | // Jump to the COP vector (emulation mode)
528 | li s0, 0xFFF4
529 | MEM_READ16
530 | j cpu_execute
531 | move s7, v0
532 |
533 | .align 5
534 | cpu_cop: // COP
535 | // Update the stack pointer
536 | lhu s0, stack_ptr
537 | addi s0, s0, -4
538 | sh s0, stack_ptr
539 |
540 | // Push the PC+2 and bank to the stack
541 | srl a1, s7, 16
542 | MEM_WRITE8 4(s0)
543 | addi a1, s7, 2
544 | MEM_WRITE16 2(s0)
545 |
546 | // Push the flags to the stack and modify them
547 | move a1, s4
548 | MEM_WRITE8 1(s0)
549 | ori s4, s4, 0x4 // Set I
550 | andi s4, s4, 0xFFF7 // Clear D
551 |
552 | // Jump to the COP vector
553 | li s0, 0xFFE4
554 | MEM_READ16
555 | j cpu_execute
556 | move s7, v0
557 |
558 | .align 5
559 | cpu_clc: // CLC
560 | // Clear the carry flag
561 | andi s4, s4, 0xFFFE
562 | j cpu_execute
563 | addi s7, s7, 1
564 |
565 | .align 5
566 | cpu_cli: // CLI
567 | // Clear the interrupt flag
568 | andi s4, s4, 0xFFFB
569 | j cpu_execute
570 | addi s7, s7, 1
571 |
572 | .align 5
573 | cpu_cld: // CLD
574 | // Clear the decimal flag
575 | andi s4, s4, 0xFFF7
576 | j update_mode
577 | addi s7, s7, 1
578 |
579 | .align 5
580 | cpu_clv: // CLV
581 | // Clear the overflow flag
582 | andi s4, s4, 0xFFBF
583 | j cpu_execute
584 | addi s7, s7, 1
585 |
586 | .align 5
587 | cpu_sec: // SEC
588 | // Set the carry flag
589 | ori s4, s4, 0x1
590 | j cpu_execute
591 | addi s7, s7, 1
592 |
593 | .align 5
594 | cpu_sei: // SEI
595 | // Set the interrupt flag
596 | ori s4, s4, 0x4
597 | j cpu_execute
598 | addi s7, s7, 1
599 |
600 | .align 5
601 | cpu_sed: // SED
602 | // Set the decimal flag
603 | ori s4, s4, 0x8
604 | j update_mode
605 | addi s7, s7, 1
606 |
607 | .align 5
608 | cpu_rep: // REP #nn
609 | // Clear flags from the 8-bit immediate value
610 | MEM_READ8 1(s7)
611 | not v0, v0
612 | and s4, s4, v0
613 | j update_mode
614 | addi s7, s7, 2
615 |
616 | .align 5
617 | cpu_sep: // SEP #nn
618 | // Set flags from the 8-bit immediate value
619 | MEM_READ8 1(s7)
620 | or s4, s4, v0
621 | j update_mode
622 | addi s7, s7, 2
623 |
624 | .align 5
625 | cpu_xce: // XCE
626 | // Swap the carry and emulation flags
627 | sll t0, s4, 8
628 | srl t1, s4, 8
629 | or t0, t0, t1
630 | andi t0, t0, 0x101
631 | andi s4, s4, 0xFEFE
632 | or s4, s4, t0
633 | j update_mode
634 | addi s7, s7, 1
635 |
636 | .align 5
637 | cpu_stp: // STP
638 | // Stop execution
639 | j cpu_stp
640 | nop
641 |
642 | .align 5
643 | cpu_xba: // XBA
644 | // Swap the accumulator bytes and set flags for the low byte
645 | lhu t0, accumulator
646 | sll t1, t0, 8
647 | srl t0, t0, 8
648 | or t0, t0, t1
649 | sh t0, accumulator
650 | andi a0, t0, 0xFF
651 | j set_nz8
652 | addi s7, s7, 1
653 |
654 | .align 5
655 | cpu_wai: // WAI
656 | // Halt the CPU until the next interrupt
657 | ori s4, s4, 0x200 // Halted
658 | li s5, 0
659 | j cpu_execute
660 | addi s7, s7, 1
661 |
662 | .align 5
663 | cpu_wdm: // WDM #nn
664 | // Do nothing
665 | j cpu_execute
666 | addi s7, s7, 2
667 |
668 | .align 5
669 | cpu_nop: // NOP
670 | // Do nothing
671 | j cpu_execute
672 | addi s7, s7, 1
673 |
--------------------------------------------------------------------------------
/src/cpu_transfer.S:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021-2025 Hydr8gon
3 |
4 | This file is part of sodium64.
5 |
6 | sodium64 is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU General Public License as published
8 | by the Free Software Foundation, either version 3 of the License,
9 | or (at your option) any later version.
10 |
11 | sodium64 is distributed in the hope that it will be useful, but
12 | WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 | General Public License for more details.
15 |
16 | You should have received a copy of the GNU General Public License
17 | along with sodium64. If not, see .
18 | */
19 |
20 | #include "macros.h"
21 |
22 | .globl cpu_tay8
23 | .globl cpu_tay16
24 | .globl cpu_tax8
25 | .globl cpu_tax16
26 | .globl cpu_tsx8
27 | .globl cpu_tsx16
28 | .globl cpu_tya8
29 | .globl cpu_tya16
30 | .globl cpu_txa8
31 | .globl cpu_txa16
32 | .globl cpu_txse
33 | .globl cpu_txs
34 | .globl cpu_txy8
35 | .globl cpu_txy16
36 | .globl cpu_tyx8
37 | .globl cpu_tyx16
38 | .globl cpu_tdc
39 | .globl cpu_tcd
40 | .globl cpu_tsc
41 | .globl cpu_tcse
42 | .globl cpu_tcs
43 | .globl cpu_lda8
44 | .globl cpu_lda16
45 | .globl cpu_ldx8
46 | .globl cpu_ldx16
47 | .globl cpu_ldy8
48 | .globl cpu_ldy16
49 | .globl cpu_stz8
50 | .globl cpu_stz16
51 | .globl cpu_sta8
52 | .globl cpu_sta16
53 | .globl cpu_stx8
54 | .globl cpu_stx16
55 | .globl cpu_sty8
56 | .globl cpu_sty16
57 | .globl cpu_pha8
58 | .globl cpu_pha16
59 | .globl cpu_phx8
60 | .globl cpu_phx16
61 | .globl cpu_phy8
62 | .globl cpu_phy16
63 | .globl cpu_php
64 | .globl cpu_phb
65 | .globl cpu_phk
66 | .globl cpu_phd
67 | .globl cpu_pei
68 | .globl cpu_pea
69 | .globl cpu_per
70 | .globl cpu_pla8
71 | .globl cpu_pla16
72 | .globl cpu_plx8
73 | .globl cpu_plx16
74 | .globl cpu_ply8
75 | .globl cpu_ply16
76 | .globl cpu_pld
77 | .globl cpu_plb
78 | .globl cpu_plp
79 | .globl cpu_mvp8
80 | .globl cpu_mvp16
81 | .globl cpu_mvn8
82 | .globl cpu_mvn16
83 |
84 | .text
85 | .set noreorder
86 |
87 | .align 5
88 | cpu_tay8: // TAY (8-bit)
89 | // Transfer the accumulator to register Y and set flags (8-bit)
90 | lbu a0, accumulator + 1
91 | sb a0, register_y + 1
92 | j set_nz8
93 | addi s7, s7, 1
94 |
95 | .align 5
96 | cpu_tay16: // TAY (16-bit)
97 | // Transfer the accumulator to register Y and set flags (16-bit)
98 | lhu a0, accumulator
99 | sh a0, register_y
100 | j set_nz16
101 | addi s7, s7, 1
102 |
103 | .align 5
104 | cpu_tax8: // TAX (8-bit)
105 | // Transfer the accumulator to register X and set flags (8-bit)
106 | lbu a0, accumulator + 1
107 | sb a0, register_x + 1
108 | j set_nz8
109 | addi s7, s7, 1
110 |
111 | .align 5
112 | cpu_tax16: // TAX (16-bit)
113 | // Transfer the accumulator to register X and set flags (16-bit)
114 | lhu a0, accumulator
115 | sh a0, register_x
116 | j set_nz16
117 | addi s7, s7, 1
118 |
119 | .align 5
120 | cpu_tsx8: // TSX (8-bit)
121 | // Transfer the stack pointer to register X and set flags (8-bit)
122 | lbu a0, stack_ptr + 1
123 | sb a0, register_x + 1
124 | j set_nz8
125 | addi s7, s7, 1
126 |
127 | .align 5
128 | cpu_tsx16: // TSX (16-bit)
129 | // Transfer the stack pointer to register X and set flags (16-bit)
130 | lhu a0, stack_ptr
131 | sh a0, register_x
132 | j set_nz16
133 | addi s7, s7, 1
134 |
135 | .align 5
136 | cpu_tya8: // TYA (8-bit)
137 | // Transfer register Y to the accumulator and set flags (8-bit)
138 | lbu a0, register_y + 1
139 | sb a0, accumulator + 1
140 | j set_nz8
141 | addi s7, s7, 1
142 |
143 | .align 5
144 | cpu_tya16: // TYA (16-bit)
145 | // Transfer register Y to the accumulator and set flags (16-bit)
146 | lhu a0, register_y
147 | sh a0, accumulator
148 | j set_nz16
149 | addi s7, s7, 1
150 |
151 | .align 5
152 | cpu_txa8: // TXA (8-bit)
153 | // Transfer register X to the accumulator and set flags (8-bit)
154 | lbu a0, register_x + 1
155 | sb a0, accumulator + 1
156 | j set_nz8
157 | addi s7, s7, 1
158 |
159 | .align 5
160 | cpu_txa16: // TXA (16-bit)
161 | // Transfer register X to the accumulator and set flags (16-bit)
162 | lhu a0, register_x
163 | sh a0, accumulator
164 | j set_nz16
165 | addi s7, s7, 1
166 |
167 | .align 5
168 | cpu_txse: // TXS (emulation mode)
169 | // Transfer register X to the stack pointer (emulation mode)
170 | lbu a0, register_x + 1
171 | sb a0, stack_ptr + 1
172 | j cpu_execute
173 | addi s7, s7, 1
174 |
175 | .align 5
176 | cpu_txs: // TXS
177 | // Transfer register X to the stack pointer
178 | lhu a0, register_x
179 | sh a0, stack_ptr
180 | j cpu_execute
181 | addi s7, s7, 1
182 |
183 | .align 5
184 | cpu_txy8: // TXY (8-bit)
185 | // Transfer register X to register Y and set flags (8-bit)
186 | lbu a0, register_x + 1
187 | sb a0, register_y + 1
188 | j set_nz8
189 | addi s7, s7, 1
190 |
191 | .align 5
192 | cpu_txy16: // TXY (16-bit)
193 | // Transfer register X to register Y and set flags (16-bit)
194 | lhu a0, register_x
195 | sh a0, register_y
196 | j set_nz16
197 | addi s7, s7, 1
198 |
199 | .align 5
200 | cpu_tyx8: // TYX (8-bit)
201 | // Transfer register Y to register X and set flags (8-bit)
202 | lbu a0, register_y + 1
203 | sb a0, register_x + 1
204 | j set_nz8
205 | addi s7, s7, 1
206 |
207 | .align 5
208 | cpu_tyx16: // TYX (16-bit)
209 | // Transfer register Y to register X and set flags (16-bit)
210 | lhu a0, register_y
211 | sh a0, register_x
212 | j set_nz16
213 | addi s7, s7, 1
214 |
215 | .align 5
216 | cpu_tdc: // TDC
217 | // Transfer the direct offset to the accumulator and set flags
218 | move a0, s8
219 | sh a0, accumulator
220 | j set_nz16
221 | addi s7, s7, 1
222 |
223 | .align 5
224 | cpu_tcd: // TCD
225 | // Transfer the accumulator to the direct offset and set flags
226 | lhu a0, accumulator
227 | move s8, a0
228 | j set_nz16
229 | addi s7, s7, 1
230 |
231 | .align 5
232 | cpu_tsc: // TSC
233 | // Transfer the stack pointer to the accumulator and set flags
234 | lhu a0, stack_ptr
235 | sh a0, accumulator
236 | j set_nz16
237 | addi s7, s7, 1
238 |
239 | .align 5
240 | cpu_tcse: // TCS (emulation mode)
241 | // Transfer the accumulator to the stack pointer (emulation mode)
242 | lbu a0, accumulator + 1
243 | sb a0, stack_ptr + 1
244 | j cpu_execute
245 | addi s7, s7, 1
246 |
247 | .align 5
248 | cpu_tcs: // TCS
249 | // Transfer the accumulator to the stack pointer
250 | lhu a0, accumulator
251 | sh a0, stack_ptr
252 | j cpu_execute
253 | addi s7, s7, 1
254 |
255 | .align 5
256 | cpu_lda8: // LDA op (8-bit)
257 | // Load a value to the accumulator and set flags (8-bit)
258 | MEM_READ8
259 | sb v0, accumulator + 1
260 | j set_nz8
261 | move a0, v0
262 |
263 | .align 5
264 | cpu_lda16: // LDA op (16-bit)
265 | // Load a value to the accumulator and set flags (16-bit)
266 | MEM_READ16
267 | sh v0, accumulator
268 | j set_nz16
269 | move a0, v0
270 |
271 | .align 5
272 | cpu_ldx8: // LDX op (8-bit)
273 | // Load a value to register X and set flags (8-bit)
274 | MEM_READ8
275 | sb v0, register_x + 1
276 | j set_nz8
277 | move a0, v0
278 |
279 | .align 5
280 | cpu_ldx16: // LDX op (16-bit)
281 | // Load a value to register X and set flags (16-bit)
282 | MEM_READ16
283 | sh v0, register_x
284 | j set_nz16
285 | move a0, v0
286 |
287 | .align 5
288 | cpu_ldy8: // LDY op (8-bit)
289 | // Load a value to register Y and set flags (8-bit)
290 | MEM_READ8
291 | sb v0, register_y + 1
292 | j set_nz8
293 | move a0, v0
294 |
295 | .align 5
296 | cpu_ldy16: // LDY op (16-bit)
297 | // Load a value to register Y and set flags (16-bit)
298 | MEM_READ16
299 | sh v0, register_y
300 | j set_nz16
301 | move a0, v0
302 |
303 | .align 5
304 | cpu_stz8: // STZ op (8-bit)
305 | // Store zero to memory (8-bit)
306 | li a1, 0
307 | MEM_WRITE8
308 | j cpu_execute
309 | nop
310 |
311 | .align 5
312 | cpu_stz16: // STZ op (16-bit)
313 | // Store zero to memory (16-bit)
314 | li a1, 0
315 | MEM_WRITE16
316 | j cpu_execute
317 | nop
318 |
319 | .align 5
320 | cpu_sta8: // STA op (8-bit)
321 | // Store the accumulator to memory (8-bit)
322 | lbu a1, accumulator + 1
323 | MEM_WRITE8
324 | j cpu_execute
325 | nop
326 |
327 | .align 5
328 | cpu_sta16: // STA op (16-bit)
329 | // Store the accumulator to memory (16-bit)
330 | lhu a1, accumulator
331 | MEM_WRITE16
332 | j cpu_execute
333 | nop
334 |
335 | .align 5
336 | cpu_stx8: // STX op (8-bit)
337 | // Store register X to memory (8-bit)
338 | lbu a1, register_x + 1
339 | MEM_WRITE8
340 | j cpu_execute
341 | nop
342 |
343 | .align 5
344 | cpu_stx16: // STX op (16-bit)
345 | // Store register X to memory (16-bit)
346 | lhu a1, register_x
347 | MEM_WRITE16
348 | j cpu_execute
349 | nop
350 |
351 | .align 5
352 | cpu_sty8: // STY op (8-bit)
353 | // Store register Y to memory (8-bit)
354 | lbu a1, register_y + 1
355 | MEM_WRITE8
356 | j cpu_execute
357 | nop
358 |
359 | .align 5
360 | cpu_sty16: // STY op (16-bit)
361 | // Store register Y to memory (16-bit)
362 | lhu a1, register_y
363 | MEM_WRITE16
364 | j cpu_execute
365 | nop
366 |
367 | .align 5
368 | cpu_pha8: // PHA (8-bit)
369 | // Push the accumulator to the stack (8-bit)
370 | lhu s0, stack_ptr
371 | lbu a1, accumulator + 1
372 | addi s0, s0, -1
373 | MEM_WRITE8 1(s0)
374 | sh s0, stack_ptr
375 | j cpu_execute
376 | addi s7, s7, 1
377 |
378 | .align 5
379 | cpu_pha16: // PHA (16-bit)
380 | // Push the accumulator to the stack (16-bit)
381 | lhu s0, stack_ptr
382 | lhu a1, accumulator
383 | addi s0, s0, -2
384 | MEM_WRITE16 1(s0)
385 | sh s0, stack_ptr
386 | j cpu_execute
387 | addi s7, s7, 1
388 |
389 | .align 5
390 | cpu_phx8: // PHX (8-bit)
391 | // Push register X to the stack (8-bit)
392 | lhu s0, stack_ptr
393 | lbu a1, register_x + 1
394 | addi s0, s0, -1
395 | MEM_WRITE8 1(s0)
396 | sh s0, stack_ptr
397 | j cpu_execute
398 | addi s7, s7, 1
399 |
400 | .align 5
401 | cpu_phx16: // PHX (16-bit)
402 | // Push register X to the stack (16-bit)
403 | lhu s0, stack_ptr
404 | lhu a1, register_x
405 | addi s0, s0, -2
406 | MEM_WRITE16 1(s0)
407 | sh s0, stack_ptr
408 | j cpu_execute
409 | addi s7, s7, 1
410 |
411 | .align 5
412 | cpu_phy8: // PHY (8-bit)
413 | // Push register Y to the stack (8-bit)
414 | lhu s0, stack_ptr
415 | lbu a1, register_y + 1
416 | addi s0, s0, -1
417 | MEM_WRITE8 1(s0)
418 | sh s0, stack_ptr
419 | j cpu_execute
420 | addi s7, s7, 1
421 |
422 | .align 5
423 | cpu_phy16: // PHY (16-bit)
424 | // Push register Y to the stack (16-bit)
425 | lhu s0, stack_ptr
426 | lhu a1, register_y
427 | addi s0, s0, -2
428 | MEM_WRITE16 1(s0)
429 | sh s0, stack_ptr
430 | j cpu_execute
431 | addi s7, s7, 1
432 |
433 | .align 5
434 | cpu_php: // PHP
435 | // Push the flags to the stack
436 | lhu s0, stack_ptr
437 | andi a1, s4, 0xFF
438 | addi s0, s0, -1
439 | MEM_WRITE8 1(s0)
440 | sh s0, stack_ptr
441 | j cpu_execute
442 | addi s7, s7, 1
443 |
444 | .align 5
445 | cpu_phb: // PHB
446 | // Push the data bank to the stack
447 | lhu s0, stack_ptr
448 | lbu a1, data_bank + 1
449 | addi s0, s0, -1
450 | MEM_WRITE8 1(s0)
451 | sh s0, stack_ptr
452 | j cpu_execute
453 | addi s7, s7, 1
454 |
455 | .align 5
456 | cpu_phk: // PHK
457 | // Push the PC bank to the stack
458 | lhu s0, stack_ptr
459 | srl a1, s7, 16
460 | addi s0, s0, -1
461 | MEM_WRITE8 1(s0)
462 | sh s0, stack_ptr
463 | j cpu_execute
464 | addi s7, s7, 1
465 |
466 | .align 5
467 | cpu_phd: // PHD
468 | // Push the direct offset to the stack
469 | lhu s0, stack_ptr
470 | move a1, s8
471 | addi s0, s0, -2
472 | MEM_WRITE16 1(s0)
473 | sh s0, stack_ptr
474 | j cpu_execute
475 | addi s7, s7, 1
476 |
477 | .align 5
478 | cpu_pei: // PEI nn
479 | // Get the 8-bit immediate value added to the direct offset as an address
480 | MEM_READ8 1(s7)
481 | add s0, s8, v0
482 |
483 | // Load a 16-bit value from memory and push it to the stack
484 | MEM_READ16
485 | lhu s0, stack_ptr
486 | move a1, v0
487 | addi s0, s0, -2
488 | MEM_WRITE16 1(s0)
489 | sh s0, stack_ptr
490 | j cpu_execute
491 | addi s7, s7, 2
492 |
493 | .align 5
494 | cpu_pea: // PEA #nnnn
495 | // Push a 16-bit immediate value to the stack
496 | MEM_READ16 1(s7)
497 | lhu s0, stack_ptr
498 | move a1, v0
499 | addi s0, s0, -2
500 | MEM_WRITE16 1(s0)
501 | sh s0, stack_ptr
502 | j cpu_execute
503 | addi s7, s7, 3
504 |
505 | .align 5
506 | cpu_per: // PER disp16
507 | // Push the PC plus a 16-bit immediate value to the stack
508 | MEM_READ16 1(s7)
509 | addi s7, s7, 3
510 | add a1, s7, v0
511 | la s1, stack_ptr
512 | lhu s0, (s1)
513 | addi s0, s0, -2
514 | MEM_WRITE16 1(s0)
515 | j cpu_execute
516 | sh s0, (s1)
517 |
518 | .align 5
519 | cpu_pla8: // PLA (8-bit)
520 | // Pop the accumulator from the stack and set flags (8-bit)
521 | lhu s0, stack_ptr
522 | MEM_READ8 1(s0)
523 | addi t0, s0, 1
524 | sh t0, stack_ptr
525 | move a0, v0
526 | sb a0, accumulator + 1
527 | j set_nz8
528 | addi s7, s7, 1
529 |
530 | .align 5
531 | cpu_pla16: // PLA (16-bit)
532 | // Pop the accumulator from the stack and set flags (16-bit)
533 | lhu s0, stack_ptr
534 | MEM_READ16 1(s0)
535 | addi t0, s0, 2
536 | sh t0, stack_ptr
537 | move a0, v0
538 | sh a0, accumulator
539 | j set_nz16
540 | addi s7, s7, 1
541 |
542 | .align 5
543 | cpu_plx8: // PLX (8-bit)
544 | // Pop register X from the stack and set flags (8-bit)
545 | lhu s0, stack_ptr
546 | MEM_READ8 1(s0)
547 | addi t0, s0, 1
548 | sh t0, stack_ptr
549 | move a0, v0
550 | sb a0, register_x + 1
551 | j set_nz8
552 | addi s7, s7, 1
553 |
554 | .align 5
555 | cpu_plx16: // PLX (16-bit)
556 | // Pop register X from the stack and set flags (16-bit)
557 | lhu s0, stack_ptr
558 | MEM_READ16 1(s0)
559 | addi t0, s0, 2
560 | sh t0, stack_ptr
561 | move a0, v0
562 | sh a0, register_x
563 | j set_nz16
564 | addi s7, s7, 1
565 |
566 | .align 5
567 | cpu_ply8: // PLY (8-bit)
568 | // Pop register Y from the stack and set flags (8-bit)
569 | lhu s0, stack_ptr
570 | MEM_READ8 1(s0)
571 | addi t0, s0, 1
572 | sh t0, stack_ptr
573 | move a0, v0
574 | sb a0, register_y + 1
575 | j set_nz8
576 | addi s7, s7, 1
577 |
578 | .align 5
579 | cpu_ply16: // PLY (16-bit)
580 | // Pop register Y from the stack and set flags (16-bit)
581 | lhu s0, stack_ptr
582 | MEM_READ16 1(s0)
583 | addi t0, s0, 2
584 | sh t0, stack_ptr
585 | move a0, v0
586 | sh a0, register_y
587 | j set_nz16
588 | addi s7, s7, 1
589 |
590 | .align 5
591 | cpu_pld: // PLD
592 | // Pop the direct offset from the stack and set flags
593 | lhu s0, stack_ptr
594 | MEM_READ16 1(s0)
595 | addi t0, s0, 2
596 | sh t0, stack_ptr
597 | move a0, v0
598 | move s8, a0
599 | j set_nz16
600 | addi s7, s7, 1
601 |
602 | .align 5
603 | cpu_plb: // PLB
604 | // Pop the data bank from the stack and set flags
605 | lhu s0, stack_ptr
606 | MEM_READ8 1(s0)
607 | addi t0, s0, 1
608 | sh t0, stack_ptr
609 | move a0, v0
610 | sb a0, data_bank + 1
611 | j set_nz8
612 | addi s7, s7, 1
613 |
614 | .align 5
615 | cpu_plp: // PLP
616 | // Pop the flags from the stack
617 | lhu s0, stack_ptr
618 | MEM_READ8 1(s0)
619 | addi t0, s0, 1
620 | sh t0, stack_ptr
621 | andi s4, s4, 0xFF00
622 | or s4, s4, v0
623 | j update_mode
624 | addi s7, s7, 1
625 |
626 | .align 5
627 | cpu_mvp8: // MVP ss,dd (8-bit)
628 | // Set the data bank and add register Y to get the destination address (8-bit)
629 | MEM_READ8 1(s7)
630 | sb v0, data_bank + 1
631 | sll s0, v0, 16
632 | lbu t0, register_y + 1
633 | add s0, s0, t0
634 |
635 | // Add register Y to the source bank to get the source address (8-bit)
636 | MEM_READ8 2(s7)
637 | sll s1, v0, 16
638 | lbu t0, register_x + 1
639 | add s1, s1, t0
640 |
641 | // Transfer an 8-bit value from the source to the destination
642 | MEM_READ8 0(s1)
643 | move a1, v0
644 | MEM_WRITE8
645 |
646 | // Decrement the registers (8-bit)
647 | addi t0, s1, -1
648 | sb t0, register_x + 1
649 | addi t0, s0, -1
650 | sb t0, register_y + 1
651 |
652 | // Decrement the accumulator and don't increment the PC until it underflows
653 | la t2, accumulator
654 | lhu t0, (t2)
655 | addi t1, t0, -1
656 | bnez t0, cpu_execute
657 | sh t1, (t2)
658 | j cpu_execute
659 | addi s7, s7, 3
660 |
661 | .align 5
662 | cpu_mvp16: // MVP ss,dd (16-bit)
663 | // Set the data bank and add register Y to get the destination address (16-bit)
664 | MEM_READ8 1(s7)
665 | sb v0, data_bank + 1
666 | sll s0, v0, 16
667 | lhu t0, register_y
668 | add s0, s0, t0
669 |
670 | // Add register Y to the source bank to get the source address (16-bit)
671 | MEM_READ8 2(s7)
672 | sll s1, v0, 16
673 | lhu t0, register_x
674 | add s1, s1, t0
675 |
676 | // Transfer an 8-bit value from the source to the destination
677 | MEM_READ8 0(s1)
678 | move a1, v0
679 | MEM_WRITE8
680 |
681 | // Decrement the registers (16-bit)
682 | addi t0, s1, -1
683 | sh t0, register_x
684 | addi t0, s0, -1
685 | sh t0, register_y
686 |
687 | // Decrement the accumulator and don't increment the PC until it underflows
688 | la t2, accumulator
689 | lhu t0, (t2)
690 | addi t1, t0, -1
691 | bnez t0, cpu_execute
692 | sh t1, (t2)
693 | j cpu_execute
694 | addi s7, s7, 3
695 |
696 | .align 5
697 | cpu_mvn8: // MVN ss,dd (8-bit)
698 | // Set the data bank and add register Y to get the destination address (8-bit)
699 | MEM_READ8 1(s7)
700 | sb v0, data_bank + 1
701 | sll s0, v0, 16
702 | lbu t0, register_y + 1
703 | add s0, s0, t0
704 |
705 | // Add register Y to the source bank to get the source address (8-bit)
706 | MEM_READ8 2(s7)
707 | sll s1, v0, 16
708 | lbu t0, register_x + 1
709 | add s1, s1, t0
710 |
711 | // Transfer an 8-bit value from the source to the destination
712 | MEM_READ8 0(s1)
713 | move a1, v0
714 | MEM_WRITE8
715 |
716 | // Increment the registers (8-bit)
717 | addi t0, s1, 1
718 | sb t0, register_x + 1
719 | addi t0, s0, 1
720 | sb t0, register_y + 1
721 |
722 | // Decrement the accumulator and don't increment the PC until it underflows
723 | la t2, accumulator
724 | lhu t0, (t2)
725 | addi t1, t0, -1
726 | bnez t0, cpu_execute
727 | sh t1, (t2)
728 | j cpu_execute
729 | addi s7, s7, 3
730 |
731 | .align 5
732 | cpu_mvn16: // MVN ss,dd (16-bit)
733 | // Set the data bank and add register Y to get the destination address (16-bit)
734 | MEM_READ8 1(s7)
735 | sb v0, data_bank + 1
736 | sll s0, v0, 16
737 | lhu t0, register_y
738 | add s0, s0, t0
739 |
740 | // Add register Y to the source bank to get the source address (16-bit)
741 | MEM_READ8 2(s7)
742 | sll s1, v0, 16
743 | lhu t0, register_x
744 | add s1, s1, t0
745 |
746 | // Transfer an 8-bit value from the source to the destination
747 | MEM_READ8 0(s1)
748 | move a1, v0
749 | MEM_WRITE8
750 |
751 | // Increment the registers (16-bit)
752 | addi t0, s1, 1
753 | sh t0, register_x
754 | addi t0, s0, 1
755 | sh t0, register_y
756 |
757 | // Decrement the accumulator and don't increment the PC until it underflows
758 | la t2, accumulator
759 | lhu t0, (t2)
760 | addi t1, t0, -1
761 | bnez t0, cpu_execute
762 | sh t1, (t2)
763 | j cpu_execute
764 | addi s7, s7, 3
765 |
--------------------------------------------------------------------------------
/src/defines.h:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021-2025 Hydr8gon
3 |
4 | This file is part of sodium64.
5 |
6 | sodium64 is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU General Public License as published
8 | by the Free Software Foundation, either version 3 of the License,
9 | or (at your option) any later version.
10 |
11 | sodium64 is distributed in the hope that it will be useful, but
12 | WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 | General Public License for more details.
15 |
16 | You should have received a copy of the GNU General Public License
17 | along with sodium64. If not, see .
18 | */
19 |
20 | #include
21 |
22 | // Master cycle counts for CPU memory accesses
23 | #define RAM_CYCLE 8
24 | #define IO_CYCLE 6
25 |
26 | // Approximate master cycle counts for APU components
27 | #define APU_CYCLE 21 // 21477000Hz / 1024000Hz = 20.973632812
28 | #define APU_TIMER1 2688 // 21477000Hz / 8000Hz = 2684.625
29 | #define APU_TIMER2 336 // 21477000Hz / 64000Hz = 335.578125
30 |
31 | // Approximate master cycle count for DSP samples
32 | #define DSP_SAMPLE 672 // 21477000Hz / 32000Hz = 671.15625
33 |
34 | // Maximum opcode bytes that can be compiled in an APU JIT block
35 | #define BLOCK_SIZE 16
36 |
37 | // The frame section data structure's size in bytes
38 | #define SECTION_SIZE 0x40
39 |
40 | // Flags for tracking JIT block state
41 | #define FLAG_SX (1 << 0)
42 | #define FLAG_SY (1 << 1)
43 | #define FLAG_SA (1 << 2)
44 | #define FLAG_SS (1 << 3)
45 | #define FLAG_SF (1 << 4)
46 | #define FLAG_LX (1 << 5)
47 | #define FLAG_LY (1 << 6)
48 | #define FLAG_LA (1 << 7)
49 | #define FLAG_LS (1 << 8)
50 | #define FLAG_LF (1 << 9)
51 | #define FLAG_PC (1 << 10)
52 | #define FLAG_NZ (1 << 11)
53 |
54 | // Register values for emitting JIT code
55 | #define ZERO 0
56 | #define AT_ 1
57 | #define V0 2
58 | #define V1 3
59 | #define A0 4
60 | #define A1 5
61 | #define A2 6
62 | #define A3 7
63 | #define T0 8
64 | #define T1 9
65 | #define T2 10
66 | #define T3 11
67 | #define T4 12
68 | #define T5 13
69 | #define T6 14
70 | #define T7 15
71 | #define S0 16
72 | #define S1 17
73 | #define S2 18
74 | #define S3 19
75 | #define S4 20
76 | #define S5 21
77 | #define S6 22
78 | #define S7 23
79 | #define T8 24
80 | #define T9 25
81 | #define K0 26
82 | #define K1 27
83 | #define GP 28
84 | #define SP 29
85 | #define S8 30
86 | #define RA 31
87 |
88 | // Opcode macros for emitting JIT code
89 | #define ADD(rd, rs, rt) (0x00000020 | ((rd) << 11) | ((rs) << 21) | ((rt) << 16))
90 | #define ADDI(rt, rs, imm) (0x20000000 | ((rt) << 16) | ((rs) << 21) | ((imm) & 0xFFFF))
91 | #define AND(rd, rs, rt) (0x00000024 | ((rd) << 11) | ((rs) << 21) | ((rt) << 16))
92 | #define ANDI(rt, rs, imm) (0x30000000 | ((rt) << 16) | ((rs) << 21) | ((imm) & 0xFFFF))
93 | #define BEQ(rt, rs, ofs) (0x10000000 | ((rt) << 16) | ((rs) << 21) | ((ofs) & 0xFFFF))
94 | #define DIV(rs, rt) (0x0000001A | ((rs) << 21) | ((rt) << 16))
95 | #define LBU(rt, rb, ofs) (0x90000000 | ((rt) << 16) | ((rb) << 21) | ((ofs) & 0xFFFF))
96 | #define LUI(rt, imm) (0x3C000000 | ((rt) << 16) | ((imm) & 0xFFFF))
97 | #define MFHI(rd) (0x00000010 | ((rd) << 11))
98 | #define MFLO(rd) (0x00000012 | ((rd) << 11))
99 | #define MULT(rs, rt) (0x00000018 | ((rs) << 21) | ((rt) << 16))
100 | #define OR(rd, rs, rt) (0x00000025 | ((rd) << 11) | ((rs) << 21) | ((rt) << 16))
101 | #define ORI(rt, rs, imm) (0x34000000 | ((rt) << 16) | ((rs) << 21) | ((imm) & 0xFFFF))
102 | #define SB(rt, rb, ofs) (0xA0000000 | ((rt) << 16) | ((rb) << 21) | ((ofs) & 0xFFFF))
103 | #define SH(rt, rb, ofs) (0xA4000000 | ((rt) << 16) | ((rb) << 21) | ((ofs) & 0xFFFF))
104 | #define SLL(rd, rt, sa) (0x00000000 | ((rd) << 11) | ((rt) << 16) | ((sa) << 6))
105 | #define SLT(rd, rs, rt) (0x0000002A | ((rd) << 11) | ((rs) << 21) | ((rt) << 16))
106 | #define SLTU(rd, rs, rt) (0x0000002B | ((rd) << 11) | ((rs) << 21) | ((rt) << 16))
107 | #define SRL(rd, rt, sa) (0x00000002 | ((rd) << 11) | ((rt) << 16) | ((sa) << 6))
108 | #define SUB(rd, rs, rt) (0x00000022 | ((rd) << 11) | ((rs) << 21) | ((rt) << 16))
109 | #define XOR(rd, rs, rt) (0x00000026 | ((rd) << 11) | ((rs) << 21) | ((rt) << 16))
110 | #define XORI(rt, rs, imm) (0x38000000 | ((rt) << 16) | ((rs) << 21) | ((imm) & 0xFFFF))
111 |
112 | // Addresses of data in RDRAM that are shared between CPU and RSP
113 | #define ROM_BUFFER 0xA0200000
114 | #define JIT_BUFFER (ROM_BUFFER - 0x40000)
115 | #define TILE_CACHE_BG (JIT_BUFFER - 0x40000)
116 | #define TILE_CACHE_OBJ (TILE_CACHE_BG - 0x8000)
117 | #define TILE_STATS_BG (TILE_CACHE_OBJ - 0x1000)
118 | #define TILE_STATS_OBJ (TILE_STATS_BG - 0x200)
119 | #define OBJECT_CACHE (TILE_STATS_OBJ - 0x800)
120 | #define SECTION_QUEUE2 (OBJECT_CACHE - 0x5000)
121 | #define SECTION_QUEUE1 (SECTION_QUEUE2 - 0x5000)
122 | #define DIRTY_QUEUE2 (SECTION_QUEUE1 - 0x400)
123 | #define DIRTY_QUEUE1 (DIRTY_QUEUE2 - 0x400)
124 | #define OAM_QUEUE2 (DIRTY_QUEUE1 - 0x2200)
125 | #define OAM_QUEUE1 (OAM_QUEUE2 - 0x2200)
126 | #define PALETTE_QUEUE2 (OAM_QUEUE1 - 0x800)
127 | #define PALETTE_QUEUE1 (PALETTE_QUEUE2 - 0x800)
128 | #define VRAM_BUFFER (PALETTE_QUEUE1 - 0x10000)
129 | #define MODE7_TEXTURE (VRAM_BUFFER - 0x2000)
130 | #define FRAMEBUFFER3 (MODE7_TEXTURE - 0x20D00)
131 | #define FRAMEBUFFER2 (FRAMEBUFFER3 - 0x20D00)
132 | #define FRAMEBUFFER1 (FRAMEBUFFER2 - 0x20D00)
133 |
134 | // RSP addresses of data in DMEM; used to avoid setting the upper address
135 | #define TEXTURE 0x000
136 | #define TILE_TABLE (TEXTURE + 0x40)
137 | #define VRAM_TABLE (TILE_TABLE + 0x400)
138 | #define OAM (VRAM_TABLE + 0x400)
139 | #define SCRN_DATA (OAM + 0x220)
140 | #define CHAR_DATA (SCRN_DATA + 0x80)
141 | #define MODE7_BOUNDS (CHAR_DATA + 0x80)
142 | #define CACHE_BASES (MODE7_BOUNDS + 0x10)
143 | // Start of section values
144 | #define BGHOFS (CACHE_BASES + 0x10)
145 | #define BGVOFS (BGHOFS + 0x8)
146 | #define BGNBA (BGVOFS + 0x8)
147 | #define OAMADD (BGNBA + 0x2)
148 | #define M7HOFS (OAMADD + 0x2)
149 | #define M7VOFS (M7HOFS + 0x2)
150 | #define M7A (M7VOFS + 0x2)
151 | #define M7B (M7A + 0x2)
152 | #define M7C (M7B + 0x2)
153 | #define M7D (M7C + 0x2)
154 | #define M7X (M7D + 0x2)
155 | #define M7Y (M7X + 0x2)
156 | #define WBGSEL (M7Y + 0x2)
157 | #define SUB_COLOR (WBGSEL + 0x2)
158 | #define MAIN_COLOR (SUB_COLOR + 0x2)
159 | #define BGXSC (MAIN_COLOR + 0x2)
160 | #define WHX (BGXSC + 0x4)
161 | #define OBSEL (WHX + 0x4)
162 | #define M7SEL (OBSEL + 0x1)
163 | #define WOBJSEL (M7SEL + 0x1)
164 | #define WBGLOG (WOBJSEL + 0x1)
165 | #define WOBJLOG (WBGLOG + 0x1)
166 | #define CGWSEL (WOBJLOG + 0x1)
167 | #define CGADSUB (CGWSEL + 0x1)
168 | #define TS (CGADSUB + 0x1)
169 | #define TM (TS + 0x1)
170 | #define TSW (TM + 0x1)
171 | #define TMW (TSW + 0x1)
172 | #define BG_MODE (TMW + 0x1)
173 | #define STAT_FLAGS (BG_MODE + 0x1)
174 | #define SPLIT_LINE (STAT_FLAGS + 0x1)
175 | // Start of frame values
176 | #define MASK_SEL (SPLIT_LINE + 0x1)
177 | #define FB_OFFSET (MASK_SEL + 0x8)
178 | #define FRAME_END (FB_OFFSET + 0x8)
179 | #define VRAM_ADDRS (FRAME_END + 0x8)
180 | #define PALETTE_PTR (VRAM_ADDRS + 0x8)
181 | #define OAM_PTR (PALETTE_PTR + 0x8)
182 | #define DIRTY_PTR (OAM_PTR + 0x8)
183 | #define SECTION_PTR (DIRTY_PTR + 0x8)
184 | #define FRAMEBUFFER (SECTION_PTR + 0x8)
185 | // End of frame values
186 | #define RDP_INIT (FRAMEBUFFER + 0x8)
187 | #define RDP_FRAME (RDP_INIT + 0x40)
188 | #define RDP_FILL (RDP_FRAME + 0x18)
189 | #define RDP_WINDOW (RDP_FILL + 0x38)
190 | #define RDP_TILE (RDP_WINDOW + 0x10)
191 | #define RDP_TILE7 (RDP_TILE + 0x28)
192 | #define TILE_PARAMS (RDP_TILE7 + 0x78)
193 | #define LAYER_CHART (TILE_PARAMS + 0x20)
194 | #define OBJ_SIZES (LAYER_CHART + 0x90)
195 | #define FILL_JUMPS (OBJ_SIZES + 0x20)
196 | #define TILE_JUMPS (FILL_JUMPS + 0x10)
197 | #define SHARED_JUMPS (TILE_JUMPS + 0xC)
198 | #define CACHE_RETS (SHARED_JUMPS + 0xC)
199 | #define CACHE_PTRS (CACHE_RETS + 0x8)
200 | #define SCRN_OFSV (CACHE_PTRS + 0x14)
201 | #define SHIFT_TABLE (SCRN_OFSV + 0x8)
202 | #define TEXREC_OFS (SHIFT_TABLE + 0x8)
203 | #define FILLREC_MASK (TEXREC_OFS + 0x4)
204 | #define LDBLK_BITS (FILLREC_MASK + 0x4)
205 | #define MODE7_MASK (LDBLK_BITS + 0x4)
206 | #define PRIO_CHECKS (MODE7_MASK + 0x4)
207 | #define WIN_BOUNDS (PRIO_CHECKS + 0x8)
208 | #define VEC_DATA 0xF70
209 |
210 | // Macros that convert addresses between cached and uncached
211 | #define CACHED(addr) ((addr) - 0x20000000)
212 | #define UNCACHED(addr) ((addr) + 0x20000000)
213 |
214 | // Macro that converts an RSP DMEM address to a CPU address
215 | #define DMEM(addr) (0xA4000000 + (addr))
216 |
--------------------------------------------------------------------------------
/src/dma.S:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021-2025 Hydr8gon
3 |
4 | This file is part of sodium64.
5 |
6 | sodium64 is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU General Public License as published
8 | by the Free Software Foundation, either version 3 of the License,
9 | or (at your option) any later version.
10 |
11 | sodium64 is distributed in the hope that it will be useful, but
12 | WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 | General Public License for more details.
15 |
16 | You should have received a copy of the GNU General Public License
17 | along with sodium64. If not, see .
18 | */
19 |
20 | #include "macros.h"
21 |
22 | .globl reload_hdma
23 | .globl trigger_hdma
24 | .globl write_mdmaen
25 | .globl read_dmapx
26 | .globl read_bbadx
27 | .globl read_a1txl
28 | .globl read_a1txh
29 | .globl read_a1bx
30 | .globl read_dasxl
31 | .globl read_dasxh
32 | .globl read_dasbx
33 | .globl read_a2axl
34 | .globl read_a2axh
35 | .globl read_ntrlx
36 | .globl read_unusedx
37 | .globl write_hdmaen
38 | .globl write_dmapx
39 | .globl write_bbadx
40 | .globl write_a1txl
41 | .globl write_a1txh
42 | .globl write_a1bx
43 | .globl write_dasxl
44 | .globl write_dasxh
45 | .globl write_dasbx
46 | .globl write_a2axl
47 | .globl write_a2axh
48 | .globl write_ntrlx
49 | .globl write_unusedx
50 |
51 | .data
52 |
53 | .align 4
54 | dma_stack: .hword 0
55 | hdma_mask: .byte 0
56 | end_mask: .byte 0
57 | hdmaen: .byte 0
58 |
59 | .align 4
60 | a1tbx: .word 0:8
61 | dastbx: .word 0:8
62 | a2abx: .word 0:8
63 | dmapx: .byte 0:8
64 | bbadx: .byte 0:8
65 | ntrlx: .byte 0:8
66 | unusedx: .byte 0:8
67 |
68 | .align 4
69 | transfer_modes: // I/O address offsets, repeated every 4 bytes
70 | .byte 0, 0, 0, 0
71 | .byte 0, 1, 0, 1
72 | .byte 0, 0, 0, 0
73 | .byte 0, 0, 1, 1
74 | .byte 0, 1, 2, 3
75 | .byte 0, 1, 0, 1
76 | .byte 0, 0, 0, 0
77 | .byte 0, 0, 1, 1
78 |
79 | .align 4
80 | unit_lengths: .byte 1, 2, 2, 4, 4, 4, 2, 4 // Transfer mode unit lengths
81 | address_steps: .byte 1, 0, -1, 0 // CPU address steps
82 |
83 | .text
84 | .set noreorder
85 |
86 | .align 5
87 | reload_hdma:
88 | // Reload the HDMA channel masks
89 | lbu t0, hdmaen
90 | li t1, 0
91 | sb t0, hdma_mask
92 | sb zero, end_mask
93 |
94 | reload_start:
95 | // Check if the current HDMA channel is enabled
96 | andi t2, t0, 0x1
97 | beqz t2, reload_end
98 |
99 | // Reload the channel's table address and line counter
100 | sll t2, t1, 2
101 | lw t3, a1tbx(t2)
102 | li t4, 1
103 | sw t3, a2abx(t2)
104 | sb t4, ntrlx(t1)
105 |
106 | reload_end:
107 | // Move to the next channel until no more are enabled
108 | srl t0, t0, 1
109 | bnez t0, reload_start
110 | addi t1, t1, 1
111 | jr ra
112 | nop
113 |
114 | .align 5
115 | trigger_hdma:
116 | // Set initial HDMA values and check if any channels are enabled
117 | lbu s0, hdma_mask // Mask
118 | move s1, zero // Index
119 | bnez s0, hdma_start
120 | move gp, ra
121 | jr gp
122 |
123 | hdma_start:
124 | // Disable TLB-mapped I/O registers during HDMA
125 | li t0, 0x1
126 | sb t0, io_disable
127 |
128 | hdma_next:
129 | // Check if the current HDMA channel is enabled
130 | andi t0, s0, 0x1
131 | beqz t0, hdma_end
132 | nop
133 | lbu t0, dmapx(s1)
134 |
135 | // Get the channel's I/O transfer mode (t8) and unit length (t9)
136 | andi t1, t0, 0x7
137 | sll t2, t1, 2
138 | la t8, transfer_modes
139 | add t8, t8, t2
140 | lbu t9, unit_lengths(t1)
141 |
142 | // Decrement the line counter and check if finished
143 | lbu v1, ntrlx(s1)
144 | sll t1, s1, 2
145 | lw s2, a2abx(t1)
146 | addi v1, v1, -1
147 | sb v1, ntrlx(s1)
148 | andi t1, v1, 0x7F
149 | bnez t1, same_entry
150 | nop
151 |
152 | // Reload the line counter for the next table entry
153 | MEM_READ8 0(s2)
154 | sb v0, ntrlx(s1)
155 | addi s2, s2, 1
156 | sll t1, s1, 2
157 | sw s2, a2abx(t1)
158 | beqz v0, hdma_disable
159 | nop
160 | ori v1, v1, 0x80 // Do transfer
161 |
162 | same_entry:
163 | // Check if a transfer should occur (repeat or first)
164 | andi t1, v1, 0x80
165 | beqz t1, hdma_end
166 | nop
167 |
168 | // Check for indirect mode
169 | lbu t0, dmapx(s1)
170 | andi t0, t0, 0x40
171 | bnez t0, hdma_indir
172 |
173 | // Increment the direct address and start a transfer
174 | add t0, s2, t9
175 | sll t1, s1, 2
176 | sh t0, a2abx + 2(t1)
177 | b hdma_ready
178 |
179 | hdma_indir:
180 | // Check if the indirect address should be reloaded
181 | andi t1, v1, 0x7F
182 | bnez t1, indir_ready
183 | nop
184 |
185 | // Read the value and increment the direct address
186 | MEM_READ16 0(s2)
187 | addi t0, s2, 2
188 | sll t1, s1, 2
189 | sh t0, a2abx + 2(t1)
190 |
191 | // Reload the indirect address
192 | lbu t2, dastbx + 1(t1)
193 | sll t2, t2, 16
194 | or s2, t2, v0
195 | sw s2, dastbx(t1)
196 |
197 | indir_ready:
198 | // Increment the indirect address and start a transfer
199 | sll t1, s1, 2
200 | lw s2, dastbx(t1)
201 | add t0, s2, t9
202 | sw t0, dastbx(t1)
203 |
204 | hdma_ready:
205 | // Check the channel's transfer direction
206 | lbu t0, dmapx(s1)
207 | andi t0, t0, 0x80
208 | bnez t0, hio_cpu
209 | nop
210 |
211 | hcpu_io:
212 | // Copy a value from memory to an I/O register
213 | MEM_READ8 0(s2)
214 | lbu a0, bbadx(s1)
215 | lbu t0, (t8)
216 | add a0, a0, t0
217 | sll t0, a0, 2
218 | lw t0, write_iomap(t0)
219 | jalr t0
220 | move a1, v0
221 |
222 | // Move to the next byte until a unit has been transferred
223 | addi s2, s2, 1
224 | addi t9, t9, -1
225 | bnez t9, hcpu_io
226 | addi t8, t8, 1
227 | b hdma_end
228 | nop
229 |
230 | hio_cpu:
231 | // Copy a value from an I/O register to memory
232 | lbu a0, bbadx(s1)
233 | lbu t0, (t8)
234 | add a0, a0, t0
235 | sll t0, a0, 2
236 | lw t0, read_iomap(t0)
237 | jalr t0
238 | nop
239 | move a1, v0
240 | MEM_WRITE8 0(s2)
241 |
242 | // Move to the next byte until a unit has been transferred
243 | addi s2, s2, 1
244 | addi t9, t9, -1
245 | bnez t9, hio_cpu
246 | addi t8, t8, 1
247 | b hdma_end
248 | nop
249 |
250 | hdma_disable:
251 | // Disable an HDMA channel for the rest of the frame
252 | lbu t0, hdma_mask
253 | lbu t1, end_mask
254 | li t2, 1
255 | sll t2, t2, s1
256 | or t1, t1, t2
257 | sb t1, end_mask
258 | xori t1, t1, 0xFF
259 | and t0, t0, t1
260 | sb t0, hdma_mask
261 |
262 | hdma_end:
263 | // Move to the next channel until no more are enabled
264 | srl s0, s0, 1
265 | bnez s0, hdma_next
266 | addi s1, s1, 1
267 | sb zero, io_disable
268 | jr gp
269 | nop
270 |
271 | .align 5
272 | write_mdmaen: // a1: value
273 | // Backup registers and set initial DMA values
274 | move gp, ra
275 | sh a1, dma_stack
276 | andi t8, a1, 0xFF // Mask
277 | move t9, zero // Index
278 |
279 | // Disable TLB-mapped I/O registers during DMA
280 | li t0, 0x1
281 | sb t0, io_disable
282 |
283 | dma_next:
284 | // Check if the current DMA channel is enabled
285 | andi t0, t8, 0x1
286 | beqz t0, dma_end
287 | nop
288 | lbu t0, dmapx(t9)
289 |
290 | // Get the channel's CPU address step (s2)
291 | srl t1, t0, 3
292 | andi t1, t1, 0x3
293 | lb s2, address_steps(t1)
294 |
295 | // Get the channel's I/O transfer mode (sp)
296 | andi t1, t0, 0x7
297 | sll t1, t1, 2
298 | la sp, transfer_modes
299 | add sp, sp, t1
300 |
301 | // Check the channel's transfer direction
302 | andi t0, t0, 0x80
303 | bnez t0, io_cpu
304 |
305 | cpu_io:
306 | // Read a value from the CPU address and adjust
307 | sll t0, t9, 2
308 | lw a0, a1tbx(t0)
309 | MEM_READ8 0(a0)
310 | add a0, a0, s2
311 | sll t0, t9, 2
312 | sh a0, a1tbx + 2(t0)
313 |
314 | // Write the value to the I/O address
315 | lbu a0, bbadx(t9)
316 | lbu t0, (sp)
317 | add a0, a0, t0
318 | sll t0, a0, 2
319 | lw t0, write_iomap(t0)
320 | jalr t0
321 | move a1, v0
322 |
323 | // Move to the next I/O offset in the transfer mode
324 | andi t0, sp, 0x3
325 | sub sp, sp, t0
326 | addi t0, t0, 1
327 | andi t0, t0, 0x3
328 | add sp, sp, t0
329 |
330 | // Decrement byte counter and continue until it hits zero
331 | sll t0, t9, 2
332 | lhu t1, dastbx + 2(t0)
333 | addi t1, t1, -1
334 | sh t1, dastbx + 2(t0)
335 | bnez t1, cpu_io
336 | nop
337 | b dma_end
338 | nop
339 |
340 | io_cpu:
341 | // Read a value from the I/O address
342 | lbu a0, bbadx(t9)
343 | lbu t0, (sp)
344 | add a0, a0, t0
345 | sll t0, a0, 2
346 | lw t0, read_iomap(t0)
347 | jalr t0
348 | nop
349 |
350 | // Move to the next I/O offset in the transfer mode
351 | andi t0, sp, 0x3
352 | sub sp, sp, t0
353 | addi t0, t0, 1
354 | andi t0, t0, 0x3
355 | add sp, sp, t0
356 |
357 | // Write the value to the CPU address and adjust
358 | sll t0, t9, 2
359 | lw a0, a1tbx(t0)
360 | move a1, v0
361 | MEM_WRITE8 0(a0)
362 | add a0, a0, s2
363 | sll t0, t9, 2
364 | sh a0, a1tbx + 2(t0)
365 |
366 | // Decrement byte counter and continue until it hits zero
367 | sll t0, t9, 2
368 | lhu t1, dastbx + 2(t0)
369 | addi t1, t1, -1
370 | sh t1, dastbx + 2(t0)
371 | bnez t1, io_cpu
372 | nop
373 |
374 | dma_end:
375 | // Move to the next channel until no more are enabled
376 | srl t8, t8, 1
377 | bnez t8, dma_next
378 | addi t9, t9, 1
379 | lhu a1, dma_stack
380 | sb zero, io_disable
381 | jr gp
382 | nop
383 |
384 | .align 5
385 | write_hdmaen: // a1: value
386 | // Write to the HDMAEN register and update the HDMA mask
387 | sb a1, hdmaen
388 | lbu t0, hdma_mask
389 | lbu t1, end_mask
390 | or t0, t0, a1
391 | xori t1, t1, 0xFF
392 | and t0, t0, t1
393 | sb t0, hdma_mask
394 | jr ra
395 | nop
396 |
397 | .align 5
398 | read_dmapx: // a0: address - v0: value
399 | // Read from one of the DMAPx registers
400 | srl t0, a0, 4
401 | andi t0, t0, 0x7
402 | lbu v0, dmapx(t0)
403 | jr ra
404 | nop
405 |
406 | .align 5
407 | read_bbadx: // a0: address - v0: value
408 | // Get the low byte of one of the I/O addresses
409 | srl t0, a0, 4
410 | andi t0, t0, 0x7
411 | lbu v0, bbadx(t0)
412 | jr ra
413 | nop
414 |
415 | .align 5
416 | read_a1txl: // a0: address - v0: value
417 | // Get the low byte of one of the CPU addresses
418 | srl t0, a0, 2
419 | andi t0, t0, 0x1C
420 | lbu v0, a1tbx + 3(t0)
421 | jr ra
422 | nop
423 |
424 | .align 5
425 | read_a1txh: // a0: address - v0: value
426 | // Get the high byte of one of the CPU addresses
427 | srl t0, a0, 2
428 | andi t0, t0, 0x1C
429 | lbu v0, a1tbx + 2(t0)
430 | jr ra
431 | nop
432 |
433 | .align 5
434 | read_a1bx: // a0: address - v0: value
435 | // Get the bank of one of the CPU addresses
436 | srl t0, a0, 2
437 | andi t0, t0, 0x1C
438 | lbu v0, a1tbx + 1(t0)
439 | jr ra
440 | nop
441 |
442 | .align 5
443 | read_dasxl: // a0: address - v0: value
444 | // Get the low byte of one of the byte counters
445 | srl t0, a0, 2
446 | andi t0, t0, 0x1C
447 | lbu v0, dastbx + 3(t0)
448 | jr ra
449 | nop
450 |
451 | .align 5
452 | read_dasxh: // a0: address - v0: value
453 | // Get the high byte of one of the byte counters
454 | srl t0, a0, 2
455 | andi t0, t0, 0x1C
456 | lbu v0, dastbx + 2(t0)
457 | jr ra
458 | nop
459 |
460 | .align 5
461 | read_dasbx: // a0: address - v0: value
462 | // Get the bank of one of the indirect HDMA addresses
463 | srl t0, a0, 2
464 | andi t0, t0, 0x1C
465 | lbu v0, dastbx + 1(t0)
466 | jr ra
467 | nop
468 |
469 | .align 5
470 | read_a2axl: // a0: address - v0: value
471 | // Get the low byte of one of the HDMA current addresses
472 | srl t0, a0, 2
473 | andi t0, t0, 0x1C
474 | lbu v0, a2abx + 3(t0)
475 | jr ra
476 | nop
477 |
478 | .align 5
479 | read_a2axh: // a0: address - v0: value
480 | // Get the high byte of one of the HDMA current addresses
481 | srl t0, a0, 2
482 | andi t0, t0, 0x1C
483 | lbu v0, a2abx + 2(t0)
484 | jr ra
485 | nop
486 |
487 | .align 5
488 | read_ntrlx: // a0: address - v0: value
489 | // Get one of the HDMA line counters
490 | srl t0, a0, 4
491 | andi t0, t0, 0x7
492 | lbu v0, ntrlx(t0)
493 | jr ra
494 | nop
495 |
496 | .align 5
497 | read_unusedx: // a0: address - v0: value
498 | // Get one of the unused DMA values
499 | srl t0, a0, 4
500 | andi t0, t0, 0x7
501 | lbu v0, unusedx(t0)
502 | jr ra
503 | nop
504 |
505 | .align 5
506 | write_dmapx: // a0: address, a1: value
507 | // Write to one of the DMAPx registers
508 | srl t0, a0, 4
509 | andi t0, t0, 0x7
510 | sb a1, dmapx(t0)
511 | jr ra
512 | nop
513 |
514 | .align 5
515 | write_bbadx: // a0: address, a1: value
516 | // Set the low byte of one of the I/O addresses
517 | srl t0, a0, 4
518 | andi t0, t0, 0x7
519 | sb a1, bbadx(t0)
520 | jr ra
521 | nop
522 |
523 | .align 5
524 | write_a1txl: // a0: address, a1: value
525 | // Set the low byte of one of the CPU addresses
526 | srl t0, a0, 2
527 | andi t0, t0, 0x1C
528 | sb a1, a1tbx + 3(t0)
529 | jr ra
530 | nop
531 |
532 | .align 5
533 | write_a1txh: // a0: address, a1: value
534 | // Set the high byte of one of the CPU addresses
535 | srl t0, a0, 2
536 | andi t0, t0, 0x1C
537 | sb a1, a1tbx + 2(t0)
538 | jr ra
539 | nop
540 |
541 | .align 5
542 | write_a1bx: // a0: address, a1: value
543 | // Set the bank of one of the CPU addresses
544 | srl t0, a0, 2
545 | andi t0, t0, 0x1C
546 | sb a1, a1tbx + 1(t0)
547 | sb a1, a2abx + 1(t0)
548 | jr ra
549 | nop
550 |
551 | .align 5
552 | write_dasxl: // a0: address, a1: value
553 | // Set the low byte of one of the byte counters
554 | srl t0, a0, 2
555 | andi t0, t0, 0x1C
556 | sb a1, dastbx + 3(t0)
557 | jr ra
558 | nop
559 |
560 | .align 5
561 | write_dasxh: // a0: address, a1: value
562 | // Set the high byte of one of the byte counters
563 | srl t0, a0, 2
564 | andi t0, t0, 0x1C
565 | sb a1, dastbx + 2(t0)
566 | jr ra
567 | nop
568 |
569 | .align 5
570 | write_dasbx: // a0: address, a1: value
571 | // Set the bank of one of the indirect HDMA addresses
572 | srl t0, a0, 2
573 | andi t0, t0, 0x1C
574 | sb a1, dastbx + 1(t0)
575 | jr ra
576 | nop
577 |
578 | .align 5
579 | write_a2axl: // a0: address, a1: value
580 | // Set the low byte of one of the HDMA current addresses
581 | srl t0, a0, 2
582 | andi t0, t0, 0x1C
583 | sb a1, a2abx + 3(t0)
584 | jr ra
585 | nop
586 |
587 | .align 5
588 | write_a2axh: // a0: address, a1: value
589 | // Set the high byte of one of the HDMA current addresses
590 | srl t0, a0, 2
591 | andi t0, t0, 0x1C
592 | sb a1, a2abx + 2(t0)
593 | jr ra
594 | nop
595 |
596 | .align 5
597 | write_ntrlx: // a0: address, a1: value
598 | // Set one of the HDMA line counters
599 | srl t0, a0, 4
600 | andi t0, t0, 0x7
601 | sb a1, ntrlx(t0)
602 | jr ra
603 | nop
604 |
605 | .align 5
606 | write_unusedx: // a0: address, a1: value
607 | // Set one of the unused DMA values
608 | srl t0, a0, 4
609 | andi t0, t0, 0x7
610 | sb a1, unusedx(t0)
611 | jr ra
612 | nop
613 |
--------------------------------------------------------------------------------
/src/dsp.S:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021-2025 Hydr8gon
3 |
4 | This file is part of sodium64.
5 |
6 | sodium64 is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU General Public License as published
8 | by the Free Software Foundation, either version 3 of the License,
9 | or (at your option) any later version.
10 |
11 | sodium64 is distributed in the hope that it will be useful, but
12 | WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 | General Public License for more details.
15 |
16 | You should have received a copy of the GNU General Public License
17 | along with sodium64. If not, see .
18 | */
19 |
20 | #include "defines.h"
21 |
22 | .globl dsp_sample
23 | .globl read_dspaddr
24 | .globl read_dspdata
25 | .globl write_dspaddr
26 | .globl write_dspdata
27 |
28 | .data
29 |
30 | .align 4
31 | dsp_buffer: .word 0:0x800
32 | dsp_regs: .byte 0:0x80
33 |
34 | .align 4
35 | noise_sample: .hword 0x4000
36 | noise_step: .hword 0x8000
37 | dsp_timer: .hword 0
38 | dsp_pointer: .hword 0
39 | dsp_addr: .byte 0
40 | enabled: .byte 0
41 |
42 | .align 4
43 | mode_funcs: .word 0
44 | cached_vols: .word 0
45 | counters: .hword 0
46 | envelopes: .hword 0
47 | next_steps: .hword 0
48 | cur_addrs: .hword 0
49 |
50 | .align 4
51 | voice_dups: .dword 0:14
52 | cur_samples: .dword 0:8
53 |
54 | .align 4
55 | filter_jumps: .word filter_end, filter_1, filter_2, filter_3
56 | gain_jumps: .word linear_dec, expon_dec, linear_inc, bent_inc
57 |
58 | .align 4
59 | write_iomap:
60 | .word write_vol, write_vol, write_reg, write_reg // 0x00-0x03
61 | .word write_reg, write_adsr, write_reg, write_reg // 0x04-0x07
62 | .word write_reg:(0x0F - 0x07) // 0x08-0x0F
63 | .word write_vol, write_vol, write_reg, write_reg // 0x10-0x13
64 | .word write_reg, write_adsr, write_reg, write_reg // 0x14-0x17
65 | .word write_reg:(0x1F - 0x17) // 0x18-0x1F
66 | .word write_vol, write_vol, write_reg, write_reg // 0x20-0x23
67 | .word write_reg, write_adsr, write_reg, write_reg // 0x24-0x27
68 | .word write_reg:(0x2F - 0x27) // 0x28-0x2F
69 | .word write_vol, write_vol, write_reg, write_reg // 0x30-0x33
70 | .word write_reg, write_adsr, write_reg, write_reg // 0x34-0x37
71 | .word write_reg:(0x3F - 0x37) // 0x38-0x3F
72 | .word write_vol, write_vol, write_reg, write_reg // 0x40-0x43
73 | .word write_reg, write_adsr, write_reg, write_reg // 0x44-0x47
74 | .word write_reg:(0x4B - 0x47) // 0x48-0x4B
75 | .word write_kon, write_reg, write_reg, write_reg // 0x4C-0x4F
76 | .word write_vol, write_vol, write_reg, write_reg // 0x50-0x53
77 | .word write_reg, write_adsr, write_reg, write_reg // 0x54-0x57
78 | .word write_reg:(0x5B - 0x57) // 0x58-0x5B
79 | .word write_koff, write_reg, write_reg, write_reg // 0x5C-0x5F
80 | .word write_vol, write_vol, write_reg, write_reg // 0x60-0x63
81 | .word write_reg, write_adsr, write_reg, write_reg // 0x64-0x67
82 | .word write_reg:(0x6B - 0x67) // 0x68-0x6B
83 | .word time_noise, write_reg, write_reg, write_reg // 0x6C-0x6F
84 | .word write_vol, write_vol, write_reg, write_reg // 0x70-0x73
85 | .word write_reg, write_adsr, write_reg, write_reg // 0x74-0x77
86 | .word write_reg:(0x7F - 0x77) // 0x78-0x7F
87 |
88 | .align 4
89 | timer_rates: // Lookup table for timer rates and offsets
90 | .hword -1, 0x8000, 2048, 0, 1536, 1040
91 | .hword 1280, 536, 1024, 0, 768, 1040
92 | .hword 640, 536, 512, 0, 384, 1040
93 | .hword 320, 536, 256, 0, 192, 1040
94 | .hword 160, 536, 128, 0, 96, 1040
95 | .hword 80, 536, 64, 0, 48, 1040
96 | .hword 40, 536, 32, 0, 24, 1040
97 | .hword 20, 536, 16, 0, 12, 1040
98 | .hword 10, 536, 8, 0, 6, 1040
99 | .hword 5, 536, 4, 0, 3, 1040
100 | .hword 2, 0, 1, 0
101 |
102 | .text
103 | .set noreorder
104 |
105 | .align 5
106 | dsp_sample:
107 | // Check if audio is enabled and schedule the next sample
108 | lbu t0, audio_set
109 | beqz t0, apu_execute
110 | addi a3, a3, -DSP_SAMPLE
111 |
112 | // Decrement the global timer once per sample
113 | lhu t0, dsp_timer
114 | bnez t0, not_zero
115 | addi gp, t0, -1
116 | addi gp, gp, 0x7800
117 | not_zero:
118 | sh gp, dsp_timer
119 |
120 | // Handle a noise step if the timer matches
121 | lhu t0, noise_step
122 | bne t0, gp, skip_noise
123 | nop
124 | jal make_noise
125 | nop
126 |
127 | skip_noise:
128 | // Set initial values for generating a sample
129 | li t6, -1 // Voice count
130 | li t5, 0 // Left sample
131 | li t4, 0 // Right sample
132 | lbu sp, enabled
133 |
134 | next_voice:
135 | // Check if a voice is enabled
136 | addi t6, t6, 1
137 | srl t0, sp, t6
138 | beqz t0, write_sample
139 | andi t0, t0, 0x1
140 | beqz t0, next_voice
141 |
142 | // Handle an envelope step if the timer matches
143 | sll t8, t6, 4
144 | lhu t0, next_steps(t8)
145 | bne t0, gp, get_pitch
146 | nop
147 | lw t0, mode_funcs(t8)
148 | jalr t0
149 | nop
150 |
151 | get_pitch:
152 | // Get the pitch scaler for the current voice
153 | lhu t0, dsp_regs + 0x2(t8) // VxPITCH(L/H)
154 | sll t1, t0, 8
155 | srl t0, t0, 8
156 | or t0, t0, t1
157 | andi t0, t0, 0x3FFF
158 |
159 | // Increase the voice's position counter by the scaler
160 | lhu t9, counters(t8)
161 | addu t2, t9, t0
162 | sh t2, counters(t8)
163 |
164 | // Decode a new 4-sample BRR chunk if the counter overflows
165 | xor t9, t9, t2
166 | srl t9, t9, 14
167 | bnez t9, decode_chunk
168 | nop
169 |
170 | mix_sample:
171 | // Load the noise sample if enabled for the voice
172 | lbu t0, dsp_regs + 0x3D // NON
173 | srl t0, t0, t6
174 | andi t0, t0, 0x1
175 | beqz t0, load_sample
176 | nop
177 | lh t0, noise_sample
178 | b skip_sample
179 |
180 | load_sample:
181 | // Load the current sample from the voice's buffer
182 | sll t1, t6, 3
183 | srl t3, t2, 11
184 | andi t3, t3, 0x6
185 | xori t3, t3, 0x6
186 | add t1, t1, t3
187 | lh t0, cur_samples(t1)
188 | beqz t0, next_voice
189 | nop
190 |
191 | skip_sample:
192 | // Get the cached volumes for each channel
193 | lh t2, cached_vols + 0(t8)
194 | lh t3, cached_vols + 2(t8)
195 |
196 | // Mix the sample on the left channel
197 | mult t0, t2
198 | mflo t1
199 | sra t1, t1, 14
200 | beq t2, t3, same_volume
201 | addu t5, t5, t1
202 |
203 | // Mix the sample on the right channel
204 | mult t0, t3
205 | mflo t1
206 | sra t1, t1, 14
207 | same_volume:
208 | b next_voice
209 | addu t4, t4, t1
210 |
211 | .align 5
212 | write_sample: // t4: left, t5: right
213 | // Write a sample to the buffer
214 | andi t4, t4, 0xFFFF
215 | sll t5, t5, 16
216 | or t0, t4, t5
217 | lhu t1, dsp_pointer
218 | sw t0, UNCACHED(dsp_buffer)(t1)
219 |
220 | // Increment the sample pointer
221 | addi a0, t1, 4
222 | andi t0, a0, 0x1FFF
223 | sh t0, dsp_pointer
224 |
225 | // Submit a buffer to the AI when one is ready
226 | andi t0, a0, 0xFFF
227 | bnez t0, apu_execute
228 | lui t0, 0xA450 // AI register upper address
229 | la t1, dsp_buffer - 0x1000
230 | add t1, t1, a0 // Buffer address
231 | sw t1, 0x0000(t0) // AI_DRAM_ADDR
232 | li t1, 0x00001000 // Buffer length
233 | j apu_execute
234 | sw t1, 0x0004(t0) // AI_LENGTH
235 |
236 | .align 5
237 | decode_chunk: // t2: counter, t6: voice, t8: offset
238 | // Stay in the same BRR block until the counter overflows
239 | lhu t0, cur_addrs(t8)
240 | srl t3, t2, 16
241 | beqz t3, decode_samples
242 | nop
243 |
244 | // Move to the next 9-byte BRR block based on current flags
245 | lbu t9, apu_ram(t0) // Header
246 | andi t3, t9, 0x1 // End
247 | beqz t3, decode_samples
248 | addi t0, t0, 9
249 | andi t3, t9, 0x2 // Loop
250 | beqz t3, end_voice
251 | nop
252 |
253 | // Get the address of the voice's sample table entry
254 | lbu t0, dsp_regs + 0x5D // DIR
255 | lbu t3, dsp_regs + 0x4(t8) // VxSRCN
256 | sll t0, t0, 8
257 | sll t3, t3, 2
258 | add t0, t0, t3
259 |
260 | // Load the loop address and start decoding from there
261 | lhu t0, apu_ram + 0x2(t0)
262 | sll t3, t0, 8
263 | srl t0, t0, 8
264 | or t0, t0, t3
265 | andi t0, t0, 0xFFFF
266 |
267 | decode_samples:
268 | // Get the address of the voice's sample buffer
269 | la t3, cur_samples
270 | sll v1, t6, 3
271 | add v1, v1, t3
272 |
273 | // Configure things based on the BRR block header
274 | sh t0, cur_addrs(t8)
275 | lbu t9, apu_ram(t0) // Header
276 | srl t1, t9, 4 // Shift
277 | andi t9, t9, 0xC // Filter
278 | lw t9, filter_jumps(t9)
279 |
280 | // Load 4 4-bit BRR samples at once to be decoded
281 | srl t3, t2, 13
282 | andi t3, t3, 0x6
283 | add t0, t0, t3
284 | lbu t3, apu_ram + 1(t0)
285 | lbu s0, apu_ram + 2(t0)
286 | sll t3, t3, 8
287 | or s0, s0, t3
288 | li s1, 8
289 |
290 | next_sample:
291 | // Pass the next sample, sign-extended and shifted, to the filter
292 | addi s1, s1, -2
293 | sll t3, s1, 1
294 | srl t0, s0, t3
295 | sll t0, t0, 28
296 | sra t0, t0, 28
297 | sll t0, t0, t1
298 | jr t9
299 | sra t0, t0, 1
300 |
301 | filter_1:
302 | // Apply the old sample, multiplied by 0.9375
303 | addi t7, s1, 2
304 | andi t7, t7, 0x6
305 | add t3, v1, t7
306 | lh t7, (t3)
307 | add t0, t0, t7
308 | sra t7, t7, 4
309 | b filter_end
310 | sub t0, t0, t7
311 |
312 | filter_2:
313 | // Apply the old sample, multiplied by 1.90625
314 | addi t7, s1, 2
315 | andi t7, t7, 0x6
316 | add t3, v1, t7
317 | lh t7, (t3)
318 | sll t3, t7, 1
319 | add t0, t0, t3
320 | sll t3, t7, 1
321 | add t3, t3, t7
322 | sra t3, t3, 5
323 | sub t0, t0, t3
324 |
325 | // Apply the older sample, multiplied by -0.9375
326 | addi t7, s1, 4
327 | andi t7, t7, 0x6
328 | add t3, v1, t7
329 | lh t7, (t3)
330 | sub t0, t0, t7
331 | sra t3, t7, 4
332 | b filter_end
333 | add t0, t0, t3
334 |
335 | filter_3:
336 | // Apply the old sample, multiplied by 1.796875
337 | addi t7, s1, 2
338 | andi t7, t7, 0x6
339 | add t3, v1, t7
340 | lh t7, (t3)
341 | sll t3, t7, 1
342 | add t0, t0, t3
343 | sll t3, t7, 3
344 | add t3, t3, t7
345 | sll t7, t7, 2
346 | add t3, t3, t7
347 | sra t3, t3, 6
348 | sub t0, t0, t3
349 |
350 | // Apply the older sample, multiplied by -0.8125
351 | addi t7, s1, 4
352 | andi t7, t7, 0x6
353 | add t3, v1, t7
354 | lh t7, (t3)
355 | sub t0, t0, t7
356 | sll t3, t7, 1
357 | add t3, t3, t7
358 | sra t3, t3, 4
359 | add t0, t0, t3
360 |
361 | filter_end:
362 | // Store the sample and loop until all 4 are done
363 | add t3, v1, s1
364 | bnez s1, next_sample
365 | sh t0, (t3)
366 | j mix_sample
367 |
368 | end_voice:
369 | // Disable the voice if it ends without loop
370 | li t0, 1
371 | sll t0, t0, t6
372 | xori t0, t0, 0xFF
373 | and sp, sp, t0
374 | sb sp, enabled
375 | j next_voice
376 | nop
377 |
378 | .align 5
379 | make_noise:
380 | // Generate a new noise sample based on the old one
381 | lh t0, noise_sample
382 | sll t1, t0, 14
383 | sll t2, t0, 13
384 | xor t1, t1, t2
385 | andi t1, t1, 0x4000
386 | srl t0, t0, 1
387 | or t0, t0, t1
388 | sh t0, noise_sample
389 | j time_noise
390 | nop
391 |
392 | .align 5
393 | time_noise:
394 | // Schedule the next step using the noise rate
395 | lbu t0, dsp_regs + 0x6C // FLG
396 | andi a0, t0, 0x1F
397 | sll a0, a0, 2
398 | j update_step
399 | li t6, 8 // Noise
400 |
401 | .align 5
402 | attack_mode: // t6: voice, t8: offset
403 | // Increase the envelope by 32
404 | lhu t0, envelopes(t8)
405 | addi t0, t0, 32
406 | sh t0, envelopes(t8)
407 |
408 | // Switch to decay mode at level 0x7E0 and clip to 0x7FF
409 | blt t0, 0x7E0, attack_time
410 | nop
411 | la t3, decay_mode
412 | sw t3, mode_funcs(t8)
413 | li t1, 0x7FF
414 | blt t0, t1, decay_time
415 | nop
416 | sh t1, envelopes(t8)
417 | j decay_time
418 | nop
419 |
420 | .align 5
421 | attack_time: // t6: voice
422 | // Schedule the next step using the attack rate
423 | sll t1, t6, 4
424 | lbu t0, dsp_regs + 0x5(t1) // VxADSR1
425 | andi a0, t0, 0xF
426 | sll a0, a0, 3
427 | j update_step
428 | addi a0, a0, 4
429 |
430 | .align 5
431 | decay_mode: // t6: voice, t8: offset
432 | // Decrease the envelope by an exponential amount
433 | lhu t0, envelopes(t8)
434 | addi t2, t0, -1
435 | srl t2, t2, 8
436 | addi t2, t2, 1
437 | sub t0, t0, t2
438 | sh t0, envelopes(t8)
439 |
440 | // Calculate the sustain level boundary
441 | lbu t1, dsp_regs + 0x6(t8) // VxADSR2
442 | andi t1, t1, 0xE0
443 | sll t1, t1, 3
444 | addi t1, t1, 0x100
445 |
446 | // Switch to sustain mode at the boundary and clip to zero
447 | bgt t0, t1, decay_time
448 | nop
449 | la t3, sustain_mode
450 | sw t3, mode_funcs(t8)
451 | bgtz t0, sustain_time
452 | nop
453 | sh zero, envelopes(t8)
454 | j sustain_time
455 | nop
456 |
457 | .align 5
458 | decay_time: // t6: voice
459 | // Schedule the next step using the decay rate
460 | sll t1, t6, 4
461 | lbu t0, dsp_regs + 0x5(t1) // VxADSR1
462 | andi a0, t0, 0x70
463 | srl a0, a0, 1
464 | j update_step
465 | addi a0, a0, 64
466 |
467 | .align 5
468 | sustain_mode: // t6: voice, t8: offset
469 | // Decrease the envelope by an exponential amount
470 | lhu t0, envelopes(t8)
471 | addi t2, t0, -1
472 | srl t2, t2, 8
473 | addi t2, t2, 1
474 | sub t0, t0, t2
475 | sh t0, envelopes(t8)
476 |
477 | // Disable the voice if its envelope decreases to zero
478 | bgtz t0, sustain_time
479 | li t0, 1
480 | sll t0, t0, t6
481 | xori t0, t0, 0xFF
482 | and sp, sp, t0
483 | sb sp, enabled
484 | jr ra
485 | nop
486 |
487 | .align 5
488 | sustain_time: // t6: voice
489 | // Schedule the next step using the sustain rate
490 | sll t1, t6, 4
491 | lbu t0, dsp_regs + 0x6(t1) // VxADSR2
492 | andi a0, t0, 0x1F
493 | j update_step
494 | sll a0, a0, 2
495 |
496 | .align 5
497 | gain_mode: // t6: voice, t8: offset
498 | // Jump to the handler for the current gain mode
499 | lbu t0, dsp_regs + 0x7(t8) // VxGAIN
500 | andi t1, t0, 0x60
501 | srl t1, t1, 3
502 | lw t0, gain_jumps(t1)
503 | jr t0
504 | nop
505 |
506 | linear_dec:
507 | // Decrease the envelope by a fixed amount and clip to zero
508 | lhu t0, envelopes(t8)
509 | addi t0, t0, -32
510 | sh t0, envelopes(t8)
511 | bgtz t0, gain_time
512 | nop
513 | sh zero, envelopes(t8)
514 | j gain_time
515 | nop
516 |
517 | expon_dec:
518 | // Decrease the envelope by an exponential amount and clip to zero
519 | lhu t0, envelopes(t8)
520 | addi t2, t0, -1
521 | srl t2, t2, 8
522 | addi t2, t2, 1
523 | sub t0, t0, t2
524 | sh t0, envelopes(t8)
525 | bgtz t0, gain_time
526 | nop
527 | sh zero, envelopes(t8)
528 | j gain_time
529 | nop
530 |
531 | linear_inc:
532 | // Increase the envelope by a fixed amount and clip to 0x7FF
533 | lhu t0, envelopes(t8)
534 | addi t0, t0, 32
535 | sh t0, envelopes(t8)
536 | li t1, 0x7FF
537 | blt t0, t1, gain_time
538 | nop
539 | sh t1, envelopes(t8)
540 | j gain_time
541 | nop
542 |
543 | bent_inc:
544 | // Increase the envelope by an amount bent at 0x600 and clip to 0x7FF
545 | lhu t0, envelopes(t8)
546 | slti t1, t0, 0x600
547 | sll t1, t1, 1
548 | li t2, 8
549 | sll t1, t2, t1
550 | add t0, t0, t1
551 | sh t0, envelopes(t8)
552 | li t1, 0x7FF
553 | blt t0, t1, gain_time
554 | nop
555 | sh t1, envelopes(t8)
556 | j gain_time
557 | nop
558 |
559 | .align 5
560 | gain_time: // t6: voice
561 | // Check whether direct or custom gain is selected
562 | sll t4, t6, 4
563 | lbu t0, dsp_regs + 0x7(t4) // VxGAIN
564 | andi t1, t0, 0x80
565 | beqz t1, direct_gain
566 |
567 | // Schedule the next step using the custom gain rate
568 | andi a0, t0, 0x1F
569 | sll a0, a0, 2
570 | j update_step
571 |
572 | direct_gain:
573 | // Set a fixed volume for direct gain
574 | sll t0, t0, 4
575 | sh t0, envelopes(t4)
576 | li t0, 0xFFFF
577 | sh t0, next_steps(t4)
578 | j update_vols
579 | nop
580 |
581 | .align 5
582 | update_step: // a0: rate, t6: voice
583 | // Get values for updating a step timer
584 | lhu t2, dsp_timer
585 | lw t0, timer_rates(a0)
586 | addi t2, t2, -1 // Base
587 | srl t1, t0, 16 // Rate
588 | bgez t2, calc_step
589 | andi t0, t0, 0xFFFF // Offset
590 | addi t2, t2, 0x7800
591 |
592 | calc_step:
593 | // Calculate the timer value of the next step
594 | add t0, t0, t2
595 | div t0, t1
596 | mfhi t3
597 | sub t2, t2, t3
598 | bgez t2, set_step
599 | nop
600 | addi t2, t2, 0x7800
601 |
602 | set_step:
603 | // Update a voice's step timer and cached volume
604 | bge t6, 8, set_noise
605 | sll t1, t6, 4
606 | sh t2, next_steps(t1)
607 | j update_vols
608 | nop
609 |
610 | set_noise:
611 | // Update the noise step timer
612 | sh t2, noise_step
613 | jr ra
614 | nop
615 |
616 | .align 5
617 | update_vols: // t6: voice
618 | // Get a voice's envelope and channel volumes
619 | sll t1, t6, 4
620 | lhu t0, envelopes(t1)
621 | lb t2, dsp_regs + 0x0(t1) // VxVOLL
622 | lb t3, dsp_regs + 0x1(t1) // VxVOLR
623 | srl t0, t0, 4
624 |
625 | // Cache the multiplied volumes to save a step during mixing
626 | mult t2, t0
627 | mflo t2
628 | sh t2, cached_vols + 0(t1)
629 | mult t3, t0
630 | mflo t3
631 | sh t3, cached_vols + 2(t1)
632 | jr ra
633 | nop
634 |
635 | .align 5
636 | read_dspaddr: // v0: value
637 | // Read the DSP register address
638 | lbu v0, dsp_addr
639 | jr ra
640 | nop
641 |
642 | .align 5
643 | read_dspdata: // v0: value
644 | // Read a value from a DSP register
645 | lbu t0, dsp_addr
646 | andi t0, t0, 0x7F // Mirror
647 | lbu v0, dsp_regs(t0)
648 | jr ra
649 | nop
650 |
651 | .align 5
652 | write_dspaddr: // a1: value
653 | // Write the DSP register address
654 | sb a1, dsp_addr
655 | jr ra
656 | nop
657 |
658 | .align 5
659 | write_dspdata: // a1: value
660 | // Write a value to a DSP register if within bounds
661 | lbu t0, dsp_addr
662 | andi t1, t0, 0x80
663 | bnez t1, write_reg
664 | sll t1, t0, 2
665 | sb a1, dsp_regs(t0)
666 |
667 | // Jump to the register's handler function
668 | lw t1, write_iomap(t1)
669 | jr t1
670 | nop
671 |
672 | write_reg:
673 | // Do nothing extra for regular register writes
674 | jr ra
675 | nop
676 |
677 | .align 5
678 | write_vol: // t0: address
679 | // Update a voice's cached volumes when changed
680 | j update_vols
681 | srl t6, t0, 4
682 |
683 | .align 5
684 | write_adsr: // t0: address
685 | // Switch a voice's envelope to gain if selected
686 | lbu t1, dsp_regs(t0) // VxADSR1
687 | andi t4, t0, 0xF0
688 | srl t6, t0, 4
689 | andi t0, t1, 0x80
690 | beqz t0, use_gain
691 | nop
692 | jr ra
693 | nop
694 |
695 | .align 5
696 | write_kon: // a1: value
697 | // Set voices to enabled
698 | lbu t0, enabled
699 | move t5, ra
700 | or t0, t0, a1
701 | sb t0, enabled
702 | li t6, -1
703 |
704 | next_on:
705 | // Check if a voice was just enabled
706 | addi t6, t6, 1
707 | srl t0, a1, t6
708 | beqz t0, kon_end
709 | andi t0, t0, 0x1
710 | beqz t0, next_on
711 | nop
712 |
713 | // Get the address of the voice's sample table entry
714 | lbu t0, dsp_regs + 0x5D // DIR
715 | sll t4, t6, 4
716 | lbu t1, dsp_regs + 0x04(t4) // VxSRCN
717 | sll t0, t0, 8
718 | sll t1, t1, 2
719 | add t0, t0, t1
720 |
721 | // Load the start address and reset the voice
722 | lhu t0, apu_ram(t0)
723 | sll t1, t0, 8
724 | srl t0, t0, 8
725 | or t0, t0, t1
726 | sh t0, cur_addrs(t4)
727 | sh zero, counters(t4)
728 | sh zero, envelopes(t4)
729 | sll t1, t6, 3
730 | sd zero, cur_samples(t1)
731 |
732 | // Check whether the voice uses ADSR or gain
733 | lbu t1, dsp_regs + 0x5(t4) // VxADSR1
734 | la ra, next_on
735 | andi t1, t1, 0x80
736 | beqz t1, use_gain
737 | nop
738 |
739 | // Set the voice to attack mode and schedule the first step
740 | la t0, attack_mode
741 | sw t0, mode_funcs(t4)
742 | j attack_time
743 | nop
744 |
745 | use_gain:
746 | // Set the voice to gain mode and schedule the first step
747 | la t0, gain_mode
748 | sw t0, mode_funcs(t4)
749 | j gain_time
750 | nop
751 |
752 | kon_end:
753 | // Return from the function
754 | jr t5
755 | nop
756 |
757 | .align 5
758 | write_koff: // a1: value
759 | // Set voices to disabled
760 | lbu t0, enabled
761 | xori t1, a1, 0xFF
762 | and t0, t0, t1
763 | sb t0, enabled
764 | jr ra
765 | nop
766 |
--------------------------------------------------------------------------------
/src/font.S:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021-2025 Hydr8gon
3 |
4 | This file is part of sodium64.
5 |
6 | sodium64 is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU General Public License as published
8 | by the Free Software Foundation, either version 3 of the License,
9 | or (at your option) any later version.
10 |
11 | sodium64 is distributed in the hope that it will be useful, but
12 | WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 | General Public License for more details.
15 |
16 | You should have received a copy of the GNU General Public License
17 | along with sodium64. If not, see .
18 | */
19 |
20 | #include
21 |
22 | .globl char_lookup
23 | .globl char_sp
24 |
25 | .data
26 |
27 | .align 4
28 | char_lookup:
29 | .word char_sp, char_sp, char_sp, char_sp, char_sp, char_sp, char_sp, char_sp // 0x20-0x27
30 | .word char_sp, char_sp, char_sp, char_sp, char_sp, char_sp, char_sp, char_sp // 0x28-0x2F
31 | .word char_0, char_1, char_2, char_3, char_4, char_5, char_6, char_7 // 0x30-0x37
32 | .word char_8, char_9, char_sp, char_sp, char_lt, char_sp, char_gt, char_sp // 0x38-0x3F
33 | .word char_sp, char_A, char_B, char_C, char_D, char_E, char_F, char_G // 0x40-0x47
34 | .word char_H, char_I, char_J, char_K, char_L, char_M, char_N, char_O // 0x48-0x4F
35 | .word char_P, char_Q, char_R, char_S, char_T, char_U, char_V, char_W // 0x50-0x57
36 | .word char_X, char_Y, char_Z, char_sp, char_sp, char_sp, char_sp, char_sp // 0x58-0x5F
37 |
38 | .align 4
39 | char_sp:
40 | .hword 00, 00, 00, 00, 00, 00, 00, 00
41 | .hword 00, 00, 00, 00, 00, 00, 00, 00
42 | .hword 00, 00, 00, 00, 00, 00, 00, 00
43 | .hword 00, 00, 00, 00, 00, 00, 00, 00
44 | .hword 00, 00, 00, 00, 00, 00, 00, 00
45 | .hword 00, 00, 00, 00, 00, 00, 00, 00
46 | .hword 00, 00, 00, 00, 00, 00, 00, 00
47 | .hword 00, 00, 00, 00, 00, 00, 00, 00
48 |
49 | .align 4
50 | char_0:
51 | .hword 00, 00, -1, -1, -1, -1, 00, 00
52 | .hword 00, -1, -1, -1, -1, -1, -1, 00
53 | .hword 00, -1, -1, 00, 00, -1, -1, 00
54 | .hword 00, -1, -1, 00, 00, -1, -1, 00
55 | .hword 00, -1, -1, 00, 00, -1, -1, 00
56 | .hword 00, -1, -1, 00, 00, -1, -1, 00
57 | .hword 00, -1, -1, -1, -1, -1, -1, 00
58 | .hword 00, 00, -1, -1, -1, -1, 00, 00
59 |
60 | .align 4
61 | char_1:
62 | .hword 00, 00, 00, -1, -1, 00, 00, 00
63 | .hword 00, 00, -1, -1, -1, 00, 00, 00
64 | .hword 00, 00, -1, -1, -1, 00, 00, 00
65 | .hword 00, 00, 00, -1, -1, 00, 00, 00
66 | .hword 00, 00, 00, -1, -1, 00, 00, 00
67 | .hword 00, 00, 00, -1, -1, 00, 00, 00
68 | .hword 00, 00, 00, -1, -1, 00, 00, 00
69 | .hword 00, 00, 00, -1, -1, 00, 00, 00
70 |
71 | .align 4
72 | char_2:
73 | .hword 00, 00, -1, -1, -1, -1, 00, 00
74 | .hword 00, -1, -1, -1, -1, -1, -1, 00
75 | .hword 00, -1, -1, 00, 00, -1, -1, 00
76 | .hword 00, 00, 00, 00, 00, -1, -1, 00
77 | .hword 00, 00, 00, 00, -1, -1, 00, 00
78 | .hword 00, 00, 00, -1, -1, 00, 00, 00
79 | .hword 00, 00, -1, -1, -1, -1, -1, 00
80 | .hword 00, -1, -1, -1, -1, -1, -1, 00
81 |
82 | .align 4
83 | char_3:
84 | .hword 00, 00, -1, -1, -1, -1, 00, 00
85 | .hword 00, -1, -1, -1, -1, -1, -1, 00
86 | .hword 00, -1, -1, 00, 00, -1, -1, 00
87 | .hword 00, 00, 00, 00, -1, -1, 00, 00
88 | .hword 00, 00, 00, 00, -1, -1, 00, 00
89 | .hword 00, -1, -1, 00, 00, -1, -1, 00
90 | .hword 00, -1, -1, -1, -1, -1, -1, 00
91 | .hword 00, 00, -1, -1, -1, -1, 00, 00
92 |
93 | .align 4
94 | char_4:
95 | .hword 00, 00, 00, -1, -1, -1, 00, 00
96 | .hword 00, 00, -1, -1, -1, -1, 00, 00
97 | .hword 00, -1, -1, 00, -1, -1, 00, 00
98 | .hword 00, -1, -1, 00, -1, -1, 00, 00
99 | .hword 00, -1, -1, -1, -1, -1, -1, 00
100 | .hword 00, -1, -1, -1, -1, -1, -1, 00
101 | .hword 00, 00, 00, 00, -1, -1, 00, 00
102 | .hword 00, 00, 00, 00, -1, -1, 00, 00
103 |
104 | .align 4
105 | char_5:
106 | .hword 00, -1, -1, -1, -1, -1, -1, 00
107 | .hword 00, -1, -1, -1, -1, -1, -1, 00
108 | .hword 00, -1, -1, 00, 00, 00, 00, 00
109 | .hword 00, -1, -1, -1, -1, -1, 00, 00
110 | .hword 00, -1, -1, -1, -1, -1, -1, 00
111 | .hword 00, 00, 00, 00, 00, -1, -1, 00
112 | .hword 00, -1, -1, -1, -1, -1, -1, 00
113 | .hword 00, 00, -1, -1, -1, -1, 00, 00
114 |
115 | .align 4
116 | char_6:
117 | .hword 00, 00, -1, -1, -1, -1, 00, 00
118 | .hword 00, -1, -1, -1, -1, -1, -1, 00
119 | .hword 00, -1, -1, 00, 00, 00, 00, 00
120 | .hword 00, -1, -1, -1, -1, -1, 00, 00
121 | .hword 00, -1, -1, -1, -1, -1, -1, 00
122 | .hword 00, -1, -1, 00, 00, -1, -1, 00
123 | .hword 00, -1, -1, -1, -1, -1, -1, 00
124 | .hword 00, 00, -1, -1, -1, -1, 00, 00
125 |
126 | .align 4
127 | char_7:
128 | .hword 00, -1, -1, -1, -1, -1, -1, 00
129 | .hword 00, -1, -1, -1, -1, -1, -1, 00
130 | .hword 00, 00, 00, 00, 00, -1, -1, 00
131 | .hword 00, 00, 00, 00, -1, -1, 00, 00
132 | .hword 00, 00, 00, 00, -1, -1, 00, 00
133 | .hword 00, 00, 00, -1, -1, 00, 00, 00
134 | .hword 00, 00, -1, -1, 00, 00, 00, 00
135 | .hword 00, 00, -1, -1, 00, 00, 00, 00
136 |
137 | .align 4
138 | char_8:
139 | .hword 00, 00, -1, -1, -1, -1, 00, 00
140 | .hword 00, -1, -1, -1, -1, -1, -1, 00
141 | .hword 00, -1, -1, 00, 00, -1, -1, 00
142 | .hword 00, 00, -1, -1, -1, -1, 00, 00
143 | .hword 00, 00, -1, -1, -1, -1, 00, 00
144 | .hword 00, -1, -1, 00, 00, -1, -1, 00
145 | .hword 00, -1, -1, -1, -1, -1, -1, 00
146 | .hword 00, 00, -1, -1, -1, -1, 00, 00
147 |
148 | .align 4
149 | char_9:
150 | .hword 00, 00, -1, -1, -1, -1, 00, 00
151 | .hword 00, -1, -1, -1, -1, -1, -1, 00
152 | .hword 00, -1, -1, 00, 00, -1, -1, 00
153 | .hword 00, -1, -1, -1, -1, -1, -1, 00
154 | .hword 00, 00, -1, -1, -1, -1, -1, 00
155 | .hword 00, 00, 00, 00, 00, -1, -1, 00
156 | .hword 00, -1, -1, -1, -1, -1, -1, 00
157 | .hword 00, 00, -1, -1, -1, -1, 00, 00
158 |
159 | .align 4
160 | char_lt:
161 | .hword 00, 00, 00, 00, -1, -1, 00, 00
162 | .hword 00, 00, 00, -1, -1, 00, 00, 00
163 | .hword 00, 00, -1, -1, 00, 00, 00, 00
164 | .hword 00, -1, -1, 00, 00, 00, 00, 00
165 | .hword 00, -1, -1, 00, 00, 00, 00, 00
166 | .hword 00, 00, -1, -1, 00, 00, 00, 00
167 | .hword 00, 00, 00, -1, -1, 00, 00, 00
168 | .hword 00, 00, 00, 00, -1, -1, 00, 00
169 |
170 | .align 4
171 | char_gt:
172 | .hword 00, 00, -1, -1, 00, 00, 00, 00
173 | .hword 00, 00, 00, -1, -1, 00, 00, 00
174 | .hword 00, 00, 00, 00, -1, -1, 00, 00
175 | .hword 00, 00, 00, 00, 00, -1, -1, 00
176 | .hword 00, 00, 00, 00, 00, -1, -1, 00
177 | .hword 00, 00, 00, 00, -1, -1, 00, 00
178 | .hword 00, 00, 00, -1, -1, 00, 00, 00
179 | .hword 00, 00, -1, -1, 00, 00, 00, 00
180 |
181 | .align 4
182 | char_A:
183 | .hword 00, 00, 00, -1, -1, 00, 00, 00
184 | .hword 00, 00, -1, -1, -1, -1, 00, 00
185 | .hword 00, 00, -1, -1, -1, -1, 00, 00
186 | .hword 00, -1, -1, 00, 00, -1, -1, 00
187 | .hword 00, -1, -1, -1, -1, -1, -1, 00
188 | .hword 00, -1, -1, -1, -1, -1, -1, 00
189 | .hword 00, -1, -1, 00, 00, -1, -1, 00
190 | .hword 00, -1, -1, 00, 00, -1, -1, 00
191 |
192 | .align 4
193 | char_B:
194 | .hword 00, -1, -1, -1, -1, -1, 00, 00
195 | .hword 00, -1, -1, -1, -1, -1, -1, 00
196 | .hword 00, -1, -1, 00, 00, -1, -1, 00
197 | .hword 00, -1, -1, -1, -1, -1, 00, 00
198 | .hword 00, -1, -1, -1, -1, -1, 00, 00
199 | .hword 00, -1, -1, 00, 00, -1, -1, 00
200 | .hword 00, -1, -1, -1, -1, -1, -1, 00
201 | .hword 00, -1, -1, -1, -1, -1, 00, 00
202 |
203 | .align 4
204 | char_C:
205 | .hword 00, 00, -1, -1, -1, -1, 00, 00
206 | .hword 00, -1, -1, -1, -1, -1, -1, 00
207 | .hword 00, -1, -1, 00, 00, -1, -1, 00
208 | .hword 00, -1, -1, 00, 00, 00, 00, 00
209 | .hword 00, -1, -1, 00, 00, 00, 00, 00
210 | .hword 00, -1, -1, 00, 00, -1, -1, 00
211 | .hword 00, -1, -1, -1, -1, -1, -1, 00
212 | .hword 00, 00, -1, -1, -1, -1, 00, 00
213 |
214 | .align 4
215 | char_D:
216 | .hword 00, -1, -1, -1, -1, -1, 00, 00
217 | .hword 00, -1, -1, -1, -1, -1, -1, 00
218 | .hword 00, -1, -1, 00, 00, -1, -1, 00
219 | .hword 00, -1, -1, 00, 00, -1, -1, 00
220 | .hword 00, -1, -1, 00, 00, -1, -1, 00
221 | .hword 00, -1, -1, 00, 00, -1, -1, 00
222 | .hword 00, -1, -1, -1, -1, -1, -1, 00
223 | .hword 00, -1, -1, -1, -1, -1, 00, 00
224 |
225 | .align 4
226 | char_E:
227 | .hword 00, -1, -1, -1, -1, -1, -1, 00
228 | .hword 00, -1, -1, -1, -1, -1, -1, 00
229 | .hword 00, -1, -1, 00, 00, 00, 00, 00
230 | .hword 00, -1, -1, -1, -1, -1, 00, 00
231 | .hword 00, -1, -1, -1, -1, -1, 00, 00
232 | .hword 00, -1, -1, 00, 00, 00, 00, 00
233 | .hword 00, -1, -1, -1, -1, -1, -1, 00
234 | .hword 00, -1, -1, -1, -1, -1, -1, 00
235 |
236 | .align 4
237 | char_F:
238 | .hword 00, -1, -1, -1, -1, -1, -1, 00
239 | .hword 00, -1, -1, -1, -1, -1, -1, 00
240 | .hword 00, -1, -1, 00, 00, 00, 00, 00
241 | .hword 00, -1, -1, -1, -1, -1, 00, 00
242 | .hword 00, -1, -1, -1, -1, -1, 00, 00
243 | .hword 00, -1, -1, 00, 00, 00, 00, 00
244 | .hword 00, -1, -1, 00, 00, 00, 00, 00
245 | .hword 00, -1, -1, 00, 00, 00, 00, 00
246 |
247 | .align 4
248 | char_G:
249 | .hword 00, 00, -1, -1, -1, -1, 00, 00
250 | .hword 00, -1, -1, -1, -1, -1, -1, 00
251 | .hword 00, -1, -1, 00, 00, 00, 00, 00
252 | .hword 00, -1, -1, 00, -1, -1, -1, 00
253 | .hword 00, -1, -1, 00, -1, -1, -1, 00
254 | .hword 00, -1, -1, 00, 00, -1, -1, 00
255 | .hword 00, -1, -1, -1, -1, -1, -1, 00
256 | .hword 00, 00, -1, -1, -1, -1, 00, 00
257 |
258 | .align 4
259 | char_H:
260 | .hword 00, -1, -1, 00, 00, -1, -1, 00
261 | .hword 00, -1, -1, 00, 00, -1, -1, 00
262 | .hword 00, -1, -1, 00, 00, -1, -1, 00
263 | .hword 00, -1, -1, -1, -1, -1, -1, 00
264 | .hword 00, -1, -1, -1, -1, -1, -1, 00
265 | .hword 00, -1, -1, 00, 00, -1, -1, 00
266 | .hword 00, -1, -1, 00, 00, -1, -1, 00
267 | .hword 00, -1, -1, 00, 00, -1, -1, 00
268 |
269 | .align 4
270 | char_I:
271 | .hword 00, 00, -1, -1, -1, -1, 00, 00
272 | .hword 00, 00, -1, -1, -1, -1, 00, 00
273 | .hword 00, 00, 00, -1, -1, 00, 00, 00
274 | .hword 00, 00, 00, -1, -1, 00, 00, 00
275 | .hword 00, 00, 00, -1, -1, 00, 00, 00
276 | .hword 00, 00, 00, -1, -1, 00, 00, 00
277 | .hword 00, 00, -1, -1, -1, -1, 00, 00
278 | .hword 00, 00, -1, -1, -1, -1, 00, 00
279 |
280 | .align 4
281 | char_J:
282 | .hword 00, 00, 00, -1, -1, -1, -1, 00
283 | .hword 00, 00, 00, -1, -1, -1, -1, 00
284 | .hword 00, 00, 00, 00, -1, -1, 00, 00
285 | .hword 00, 00, 00, 00, -1, -1, 00, 00
286 | .hword 00, 00, 00, 00, -1, -1, 00, 00
287 | .hword 00, -1, -1, 00, -1, -1, 00, 00
288 | .hword 00, -1, -1, -1, -1, -1, 00, 00
289 | .hword 00, 00, -1, -1, -1, 00, 00, 00
290 |
291 | .align 4
292 | char_K:
293 | .hword 00, -1, -1, 00, 00, -1, -1, 00
294 | .hword 00, -1, -1, 00, -1, -1, 00, 00
295 | .hword 00, -1, -1, -1, -1, 00, 00, 00
296 | .hword 00, -1, -1, -1, 00, 00, 00, 00
297 | .hword 00, -1, -1, -1, 00, 00, 00, 00
298 | .hword 00, -1, -1, -1, -1, 00, 00, 00
299 | .hword 00, -1, -1, 00, -1, -1, 00, 00
300 | .hword 00, -1, -1, 00, 00, -1, -1, 00
301 |
302 | .align 4
303 | char_L:
304 | .hword 00, -1, -1, 00, 00, 00, 00, 00
305 | .hword 00, -1, -1, 00, 00, 00, 00, 00
306 | .hword 00, -1, -1, 00, 00, 00, 00, 00
307 | .hword 00, -1, -1, 00, 00, 00, 00, 00
308 | .hword 00, -1, -1, 00, 00, 00, 00, 00
309 | .hword 00, -1, -1, 00, 00, 00, 00, 00
310 | .hword 00, -1, -1, -1, -1, -1, -1, 00
311 | .hword 00, -1, -1, -1, -1, -1, -1, 00
312 |
313 | .align 4
314 | char_M:
315 | .hword -1, -1, 00, 00, 00, 00, -1, -1
316 | .hword -1, -1, -1, 00, 00, -1, -1, -1
317 | .hword -1, -1, -1, 00, 00, -1, -1, -1
318 | .hword -1, -1, -1, -1, -1, -1, -1, -1
319 | .hword -1, -1, -1, -1, -1, -1, -1, -1
320 | .hword -1, -1, 00, -1, -1, 00, -1, -1
321 | .hword -1, -1, 00, -1, -1, 00, -1, -1
322 | .hword -1, -1, 00, 00, 00, 00, -1, -1
323 |
324 | .align 4
325 | char_N:
326 | .hword 00, -1, -1, 00, 00, -1, -1, 00
327 | .hword 00, -1, -1, -1, 00, -1, -1, 00
328 | .hword 00, -1, -1, -1, 00, -1, -1, 00
329 | .hword 00, -1, -1, -1, -1, -1, -1, 00
330 | .hword 00, -1, -1, -1, -1, -1, -1, 00
331 | .hword 00, -1, -1, 00, -1, -1, -1, 00
332 | .hword 00, -1, -1, 00, -1, -1, -1, 00
333 | .hword 00, -1, -1, 00, 00, -1, -1, 00
334 |
335 | .align 4
336 | char_O:
337 | .hword 00, 00, -1, -1, -1, -1, 00, 00
338 | .hword 00, -1, -1, -1, -1, -1, -1, 00
339 | .hword 00, -1, -1, 00, 00, -1, -1, 00
340 | .hword 00, -1, -1, 00, 00, -1, -1, 00
341 | .hword 00, -1, -1, 00, 00, -1, -1, 00
342 | .hword 00, -1, -1, 00, 00, -1, -1, 00
343 | .hword 00, -1, -1, -1, -1, -1, -1, 00
344 | .hword 00, 00, -1, -1, -1, -1, 00, 00
345 |
346 | .align 4
347 | char_P:
348 | .hword 00, -1, -1, -1, -1, -1, 00, 00
349 | .hword 00, -1, -1, -1, -1, -1, -1, 00
350 | .hword 00, -1, -1, 00, 00, -1, -1, 00
351 | .hword 00, -1, -1, -1, -1, -1, -1, 00
352 | .hword 00, -1, -1, -1, -1, -1, 00, 00
353 | .hword 00, -1, -1, 00, 00, 00, 00, 00
354 | .hword 00, -1, -1, 00, 00, 00, 00, 00
355 | .hword 00, -1, -1, 00, 00, 00, 00, 00
356 |
357 | .align 4
358 | char_Q:
359 | .hword 00, 00, -1, -1, -1, -1, 00, 00
360 | .hword 00, -1, -1, -1, -1, -1, -1, 00
361 | .hword 00, -1, -1, 00, 00, -1, -1, 00
362 | .hword 00, -1, -1, 00, 00, -1, -1, 00
363 | .hword 00, -1, -1, 00, -1, -1, -1, 00
364 | .hword 00, -1, -1, -1, -1, -1, 00, 00
365 | .hword 00, 00, -1, -1, -1, -1, -1, 00
366 | .hword 00, 00, 00, 00, 00, -1, -1, 00
367 |
368 | .align 4
369 | char_R:
370 | .hword 00, -1, -1, -1, -1, -1, 00, 00
371 | .hword 00, -1, -1, -1, -1, -1, -1, 00
372 | .hword 00, -1, -1, 00, 00, -1, -1, 00
373 | .hword 00, -1, -1, 00, 00, -1, -1, 00
374 | .hword 00, -1, -1, -1, -1, -1, 00, 00
375 | .hword 00, -1, -1, -1, -1, 00, 00, 00
376 | .hword 00, -1, -1, 00, -1, -1, 00, 00
377 | .hword 00, -1, -1, 00, 00, -1, -1, 00
378 |
379 | .align 4
380 | char_S:
381 | .hword 00, 00, -1, -1, -1, -1, 00, 00
382 | .hword 00, -1, -1, -1, -1, -1, -1, 00
383 | .hword 00, -1, -1, 00, 00, 00, 00, 00
384 | .hword 00, -1, -1, -1, -1, -1, 00, 00
385 | .hword 00, 00, -1, -1, -1, -1, -1, 00
386 | .hword 00, 00, 00, 00, 00, -1, -1, 00
387 | .hword 00, -1, -1, -1, -1, -1, -1, 00
388 | .hword 00, 00, -1, -1, -1, -1, 00, 00
389 |
390 | .align 4
391 | char_T:
392 | .hword 00, -1, -1, -1, -1, -1, -1, 00
393 | .hword 00, -1, -1, -1, -1, -1, -1, 00
394 | .hword 00, 00, 00, -1, -1, 00, 00, 00
395 | .hword 00, 00, 00, -1, -1, 00, 00, 00
396 | .hword 00, 00, 00, -1, -1, 00, 00, 00
397 | .hword 00, 00, 00, -1, -1, 00, 00, 00
398 | .hword 00, 00, 00, -1, -1, 00, 00, 00
399 | .hword 00, 00, 00, -1, -1, 00, 00, 00
400 |
401 | .align 4
402 | char_U:
403 | .hword 00, -1, -1, 00, 00, -1, -1, 00
404 | .hword 00, -1, -1, 00, 00, -1, -1, 00
405 | .hword 00, -1, -1, 00, 00, -1, -1, 00
406 | .hword 00, -1, -1, 00, 00, -1, -1, 00
407 | .hword 00, -1, -1, 00, 00, -1, -1, 00
408 | .hword 00, -1, -1, 00, 00, -1, -1, 00
409 | .hword 00, -1, -1, -1, -1, -1, -1, 00
410 | .hword 00, 00, -1, -1, -1, -1, 00, 00
411 |
412 | .align 4
413 | char_V:
414 | .hword 00, -1, -1, 00, 00, -1, -1, 00
415 | .hword 00, -1, -1, 00, 00, -1, -1, 00
416 | .hword 00, -1, -1, 00, 00, -1, -1, 00
417 | .hword 00, -1, -1, 00, 00, -1, -1, 00
418 | .hword 00, -1, -1, 00, 00, -1, -1, 00
419 | .hword 00, 00, -1, -1, -1, -1, 00, 00
420 | .hword 00, 00, -1, -1, -1, -1, 00, 00
421 | .hword 00, 00, 00, -1, -1, 00, 00, 00
422 |
423 | .align 4
424 | char_W:
425 | .hword -1, -1, 00, 00, 00, 00, -1, -1
426 | .hword -1, -1, 00, -1, -1, 00, -1, -1
427 | .hword -1, -1, 00, -1, -1, 00, -1, -1
428 | .hword -1, -1, -1, -1, -1, -1, -1, -1
429 | .hword -1, -1, -1, -1, -1, -1, -1, -1
430 | .hword -1, -1, -1, 00, 00, -1, -1, -1
431 | .hword -1, -1, -1, 00, 00, -1, -1, -1
432 | .hword -1, -1, 01, 00, 00, 00, -1, -1
433 |
434 | .align 4
435 | char_X:
436 | .hword 00, -1, -1, 00, 00, -1, -1, 00
437 | .hword 00, -1, -1, 00, 00, -1, -1, 00
438 | .hword 00, -1, -1, 00, 00, -1, -1, 00
439 | .hword 00, 00, -1, -1, -1, -1, 00, 00
440 | .hword 00, 00, -1, -1, -1, -1, 00, 00
441 | .hword 00, -1, -1, 00, 00, -1, -1, 00
442 | .hword 00, -1, -1, 00, 00, -1, -1, 00
443 | .hword 00, -1, -1, 00, 00, -1, -1, 00
444 |
445 | .align 4
446 | char_Y:
447 | .hword 00, -1, -1, 00, 00, -1, -1, 00
448 | .hword 00, -1, -1, 00, 00, -1, -1, 00
449 | .hword 00, -1, -1, 00, 00, -1, -1, 00
450 | .hword 00, 00, -1, -1, -1, -1, 00, 00
451 | .hword 00, 00, -1, -1, -1, -1, 00, 00
452 | .hword 00, 00, 00, -1, -1, 00, 00, 00
453 | .hword 00, 00, 00, -1, -1, 00, 00, 00
454 | .hword 00, 00, 00, -1, -1, 00, 00, 00
455 |
456 | .align 4
457 | char_Z:
458 | .hword 00, -1, -1, -1, -1, -1, -1, 00
459 | .hword 00, -1, -1, -1, -1, -1, -1, 00
460 | .hword 00, 00, 00, 00, 00, -1, -1, 00
461 | .hword 00, 00, 00, 00, -1, -1, 00, 00
462 | .hword 00, 00, 00, -1, -1, 00, 00, 00
463 | .hword 00, 00, -1, -1, 00, 00, 00, 00
464 | .hword 00, -1, -1, -1, -1, -1, -1, 00
465 | .hword 00, -1, -1, -1, -1, -1, -1, 00
466 |
--------------------------------------------------------------------------------
/src/input.S:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021-2025 Hydr8gon
3 |
4 | This file is part of sodium64.
5 |
6 | sodium64 is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU General Public License as published
8 | by the Free Software Foundation, either version 3 of the License,
9 | or (at your option) any later version.
10 |
11 | sodium64 is distributed in the hope that it will be useful, but
12 | WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 | General Public License for more details.
15 |
16 | You should have received a copy of the GNU General Public License
17 | along with sodium64. If not, see .
18 | */
19 |
20 | #include "defines.h"
21 |
22 | .globl input_init
23 | .globl input_update
24 | .globl get_pressed
25 | .globl read_joyx
26 | .globl read_joyxl
27 | .globl read_joyxh
28 |
29 | .data
30 |
31 | .align 4
32 | btn_state: .byte 0xFF
33 |
34 | .align 4
35 | joybus_cmd: // Reads controller status when copied to PIF RAM
36 | .dword 0xFF010401FFFFFFFF // Read controller 1
37 | .dword 0xFF010401FFFFFFFF // Read controller 2
38 | .dword 0xFF010401FFFFFFFF // Read controller 3
39 | .dword 0xFF010401FFFFFFFF // Read controller 4
40 | .dword 0xFE00000000000000 // Finish execution
41 | .dword 0x0000000000000000 // Nothing
42 | .dword 0x0000000000000000 // Nothing
43 | .dword 0x0000000000000001 // Run command
44 |
45 | .text
46 | .set noreorder
47 |
48 | .align 5
49 | input_init:
50 | // Send the initial joybus command to PIF RAM via DMA
51 | lui t0, 0xA480 // SI register upper address
52 | la t1, joybus_cmd // Input buffer
53 | sw t1, 0x0000(t0) // SI_DRAM_ADDR
54 | li t1, 0x000007C0 // PIF RAM
55 | jr ra
56 | sw t1, 0x0010(t0) // SI_PIF_AD_WR64B
57 |
58 | .align 5
59 | input_update:
60 | // Read the output of the joybus command via DMA
61 | lui k0, 0xA480 // SI register upper address
62 | la k1, joybus_cmd // Output buffer
63 | sw k1, 0x0000(k0) // SI_DRAM_ADDR
64 | li k1, 0x000007C0 // PIF RAM
65 | sw k1, 0x0004(k0) // SI_PIF_AD_RD64B
66 | eret
67 |
68 | .align 5
69 | get_pressed: // v0: button mask
70 | // Check if controller 1 is connected
71 | lbu t1, UNCACHED(joybus_cmd + 2)
72 | andi t1, t1, 0xC0 // Error bits
73 | bnez t1, end_pressed
74 | li v0, 0
75 |
76 | // Get the newly-pressed state of certain buttons
77 | lbu t0, btn_state
78 | lbu t1, UNCACHED(joybus_cmd + 4)
79 | xori t0, t0, 0xFF
80 | and v0, t0, t1
81 | sb t1, btn_state
82 | end_pressed:
83 | jr ra
84 | nop
85 |
86 | .align 5
87 | read_joyx: // a0: address
88 | // Stub to fix input in games like Donkey Kong Country
89 | // TODO: actually implement manual controller ports
90 | jr ra
91 | li v0, 0x1
92 |
93 | .align 5
94 | read_joyxl: // a0: address
95 | // Check if the controller is connected, and return 0 if not
96 | andi t2, a0, 0x6
97 | sll t2, t2, 2
98 | lbu t0, UNCACHED(joybus_cmd + 2)(t2)
99 | andi t0, t0, 0xC0 // Error bits
100 | beqz t0, joyxl_conn
101 | li v0, 0
102 | jr ra
103 | nop
104 |
105 | joyxl_conn:
106 | // Read button data for the controller
107 | lhu t0, UNCACHED(joybus_cmd + 4)(t2)
108 |
109 | // Map N64 LR to SNES LR
110 | andi v0, t0, 0x30
111 |
112 | // Map N64 C-up to SNES X
113 | andi t1, t0, 0x8
114 | sll t1, t1, 3
115 | or v0, v0, t1
116 |
117 | // Map N64 C-right to SNES A
118 | andi t1, t0, 0x1
119 | sll t1, t1, 7
120 | jr ra
121 | or v0, v0, t1
122 |
123 | .align 5
124 | read_joyxh: // a0: address
125 | // Check if the controller is connected, and return 0 if not
126 | andi t2, a0, 0x6
127 | sll t2, t2, 2
128 | lbu t0, UNCACHED(joybus_cmd + 2)(t2)
129 | andi t0, t0, 0xC0 // Error bits
130 | beqz t0, joyxh_conn
131 | li v0, 0
132 | jr ra
133 | nop
134 |
135 | joyxh_conn:
136 | // Read button data for the controller
137 | lhu t0, UNCACHED(joybus_cmd + 4)(t2)
138 |
139 | // Map N64 D-pad to SNES D-pad
140 | srl t1, t0, 8
141 | andi v0, t1, 0xF
142 |
143 | // Map N64 A to SNES Start
144 | andi t1, t0, 0x8000
145 | srl t1, t1, 11
146 | or v0, v0, t1
147 |
148 | // Map N64 B to SNES Select
149 | andi t1, t0, 0x4000
150 | srl t1, t1, 9
151 | or v0, v0, t1
152 |
153 | // Map N64 C-left and C-down to SNES Y and B
154 | andi t1, t0, 0x6
155 | sll t1, t1, 5
156 | jr ra
157 | or v0, v0, t1
158 |
--------------------------------------------------------------------------------
/src/macros.h:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021-2025 Hydr8gon
3 |
4 | This file is part of sodium64.
5 |
6 | sodium64 is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU General Public License as published
8 | by the Free Software Foundation, either version 3 of the License,
9 | or (at your option) any later version.
10 |
11 | sodium64 is distributed in the hope that it will be useful, but
12 | WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 | General Public License for more details.
15 |
16 | You should have received a copy of the GNU General Public License
17 | along with sodium64. If not, see .
18 | */
19 |
20 | #include "defines.h"
21 |
22 | .macro MEM_READ8 addr=0(s0), lb=lbu // v0: value
23 | // Read a byte from memory and decrease the cycle count
24 | \lb v0, \addr
25 | addi s5, s5, -RAM_CYCLE
26 | .endm
27 |
28 | .macro MEM_READ16 addr=0(s0) // v0: value
29 | // Read 2 bytes from memory and form a 16-bit value
30 | lbu v0, 0 + \addr
31 | addi s5, s5, -RAM_CYCLE * 2
32 | move a2, v0
33 | lbu v0, 1 + \addr
34 | sll v0, v0, 8
35 | or v0, v0, a2
36 | .endm
37 |
38 | .macro MEM_WRITE8 addr=0(s0) // a1: value
39 | // Write a byte to memory and decrease the cycle count
40 | sb a1, \addr
41 | addi s5, s5, -RAM_CYCLE
42 | .endm
43 |
44 | .macro MEM_WRITE16 addr=0(s0) // a1: value
45 | // Write a 16-bit value to memory as 2 bytes
46 | move a2, a1
47 | sb a1, 0 + \addr
48 | addi s5, s5, -RAM_CYCLE * 2
49 | srl a1, a2, 8
50 | sb a1, 1 + \addr
51 | move a1, a2
52 | .endm
53 |
54 | .macro EMIT_OP opcode
55 | // Emit a static JIT opcode, optimized for delay slots
56 | lui t0, (\opcode) >> 16
57 | jal emit_op
58 | ori t0, t0, (\opcode) & 0xFFFF
59 | .endm
60 |
--------------------------------------------------------------------------------
/src/main.S:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021-2025 Hydr8gon
3 |
4 | This file is part of sodium64.
5 |
6 | sodium64 is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU General Public License as published
8 | by the Free Software Foundation, either version 3 of the License,
9 | or (at your option) any later version.
10 |
11 | sodium64 is distributed in the hope that it will be useful, but
12 | WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 | General Public License for more details.
15 |
16 | You should have received a copy of the GNU General Public License
17 | along with sodium64. If not, see .
18 | */
19 |
20 | #include "defines.h"
21 |
22 | .globl framebuffer
23 | .globl fps_emulate
24 | .globl fps_display
25 | .globl skip_native
26 | .globl skip_emulate
27 | .globl frame_count
28 |
29 | .globl _start
30 |
31 | .data
32 |
33 | .align 4
34 | framebuffer: .word FRAMEBUFFER1
35 | fps_native: .byte 0
36 | fps_emulate: .byte 0
37 | fps_display: .byte 0
38 | skip_native: .byte 0
39 | skip_emulate: .byte 0
40 | frame_count: .byte 0
41 |
42 | .section .boot
43 | .set noreorder
44 |
45 | _start:
46 | // Jump to the main function (entry point)
47 | j main
48 | nop
49 |
50 | exception_vector:
51 | // Jump to the exception handler (copied to exception vectors)
52 | j exception_handler
53 | nop
54 |
55 | .text
56 |
57 | .align 5
58 | main:
59 | // Disable interrupts and do the magic hardware init
60 | mtc0 zero, $12 // Status
61 | li t0, 8
62 | sw t0, 0xBFC007FC
63 |
64 | // Clear framebuffers and other hardcoded data
65 | li t0, FRAMEBUFFER1
66 | li t1, JIT_BUFFER - 8
67 | clear_vram:
68 | sd zero, (t0)
69 | bne t0, t1, clear_vram
70 | addi t0, t0, 8
71 |
72 | // Initialize the VI to display a 16-bit 256x224 framebuffer
73 | lui t0, 0xA440 // VI register upper address
74 | li t1, 0x00000202 // 16-bit pixel format, no anti-aliasing
75 | sw t1, 0x0000(t0) // VI_CONTROL
76 | li t1, FRAMEBUFFER1 // Framebuffer address
77 | sw t1, 0x0004(t0) // VI_ORIGIN
78 | li t1, 0x00000118 // Framebuffer width
79 | sw t1, 0x0008(t0) // VI_WIDTH
80 | li t1, 0x00000200 // Line to trigger interrupt
81 | sw t1, 0x000C(t0) // VI_V_INTR
82 | li t1, 0x00000000 // Clear interrupt
83 | sw t1, 0x0010(t0) // VI_V_CURRENT
84 | li t1, 0x03E52239 // Various timing parameters
85 | sw t1, 0x0014(t0) // VI_BURST
86 | li t1, 0x0000020D // Lines per frame (NTSC standard)
87 | sw t1, 0x0018(t0) // VI_V_SYNC
88 | li t1, 0x00000C15 // Quarter-pixels per line
89 | sw t1, 0x001C(t0) // VI_H_SYNC
90 | li t1, 0x0C150C15 // Same as above
91 | sw t1, 0x0020(t0) // VI_H_SYNC_LEAP
92 | li t1, 0x006C02EC // Active horizontal pixel range
93 | sw t1, 0x0024(t0) // VI_H_VIDEO
94 | li t1, 0x002501FF // Active vertical pixel range
95 | sw t1, 0x0028(t0) // VI_V_VIDEO
96 | li t1, 0x000E0204 // Vertical color burst range
97 | sw t1, 0x002C(t0) // VI_V_BURST
98 | li t1, 0x000001C0 // Horizontal scale (10-bit fraction, 280/640)
99 | sw t1, 0x0030(t0) // VI_X_SCALE
100 | li t1, 0x00000400 // Vertical scale (10-bit fraction, 240/240)
101 | sw t1, 0x0034(t0) // VI_Y_SCALE
102 |
103 | // Initialize the AI to output 16-bit samples at the DSP rate
104 | lui t0, 0xA450 // AI register upper address
105 | li t1, 0x00000001 // DMA enable
106 | sw t1, 0x0008(t0) // AI_CONTROL
107 | li t1, 48681812 / (60 * 262 * 341 * 4 / DSP_SAMPLE) + 1
108 | sw t1, 0x0010(t0) // AI_DAC_RATE
109 | li t1, 0x0000000F // 16-bit - 1
110 | sw t1, 0x0014(t0) // AI_BIT_RATE
111 |
112 | // Update the exception vectors
113 | lui t0, 0xA000
114 | ld t1, exception_vector
115 | sd t1, 0x0000(t0)
116 | sd t1, 0x0080(t0)
117 | sd t1, 0x0100(t0)
118 | sd t1, 0x0180(t0)
119 | cache 0x10, 0x0000(t0)
120 | cache 0x10, 0x0080(t0)
121 | cache 0x10, 0x0100(t0)
122 | cache 0x10, 0x0180(t0)
123 |
124 | // Use DMA to copy SRAM from the cart to memory
125 | lui t0, 0xA460 // PI register upper address
126 | la t1, sram // Local SRAM
127 | sw t1, 0x0000(t0) // PI_DRAM_ADDR
128 | li t1, 0x08000000 // Cart SRAM
129 | sw t1, 0x0004(t0) // PI_CART_ADDR
130 | li t1, 0x00007FFF // 32KB size
131 | sw t1, 0x000C(t0) // PI_WR_LEN
132 |
133 | pi_wait1:
134 | // Wait for the DMA to complete
135 | lw t1, 0x0010(t0) // PI_STATUS
136 | andi t1, t1, 0x1 // DMA busy
137 | bnez t1, pi_wait1
138 | nop
139 |
140 | // Initialize the emulator
141 | jal reset_tlb
142 | nop
143 | jal input_init
144 | nop
145 | jal memory_init
146 | nop
147 | jal section_init
148 | nop
149 | jal cpu_init
150 | nop
151 |
152 | // Upload the RSP code and data
153 | la a1, rsp_main_text_start
154 | jal rsp_upload
155 | li a0, 0x1000 // IMEM address
156 | la a1, rsp_main_data_start
157 | jal rsp_upload
158 | li a0, 0x0000 // DMEM address
159 | sw zero, 0xA4080000 // SP_PC
160 |
161 | // Enable VI interrupts and start execution
162 | li t0, 0x00000595 // Set VI mask
163 | sw t0, 0xA430000C // MI_MASK
164 | li t0, 0x00000401 // Enable interrupts
165 | j cpu_execute
166 | mtc0 t0, $12 // Status
167 |
168 | .align 5
169 | rsp_upload: // a0: RSP address, a1: DRAM address
170 | // Transfer data to the RSP via DMA
171 | lui t0, 0xA404 // SP register upper address
172 | sw a0, 0x0000(t0) // SP_MEM_ADDR
173 | sw a1, 0x0004(t0) // SP_DRAM_ADDR
174 | li t1, 0x00000FFF // Data size
175 | sw t1, 0x0008(t0) // SP_RD_LEN
176 |
177 | dma_wait:
178 | // Wait for the DMA to complete
179 | lw t1, 0x0018(t0) // SP_DMA_BUSY
180 | bnez t1, dma_wait
181 | nop
182 | jr ra
183 | nop
184 |
185 | .align 5
186 | reset_tlb:
187 | // Map all TLB entries to inaccessible locations
188 | mtc0 zero, $5 // PageMask
189 | li t0, 0x80000000
190 | mtc0 t0, $10 // EntryHi
191 | li t0, 32
192 | next_tlb:
193 | addi t0, t0, -1
194 | mtc0 t0, $0 // Index
195 | bnez t0, next_tlb
196 | tlbwi
197 | jr ra
198 | nop
199 |
200 | .align 5
201 | exception_handler:
202 | // Jump to the appropriate handler for supported exceptions
203 | mfc0 k0, $13 // Cause
204 | andi k0, k0, 0x7C
205 | addi k1, k0, -0x8 // TLB load miss
206 | beqz k1, tlbl_exception
207 | addi k1, k0, -0xC // TLB store miss
208 | beqz k1, tlbs_exception
209 | addi k1, k0, -0x4 // TLB modification
210 | beqz k1, tlbm_exception
211 |
212 | // Jump to the appropriate handler for supported interrupts
213 | mfc0 k0, $13 // Cause
214 | andi k1, k0, 0x1000
215 | bnez k1, reset_interrupt
216 | andi k1, k0, 0x400
217 | bnez k1, vi_interrupt
218 | nop
219 | eret
220 | nop
221 |
222 | .align 5
223 | vi_interrupt:
224 | // Acknowledge the VI interrupt
225 | lui k0, 0xA440
226 | sw zero, 0x0010(k0) // VI_CURRENT
227 |
228 | // Wait one second by counting 60 frames at 60Hz
229 | la k0, fps_native
230 | lbu k1, (k0)
231 | addi k1, k1, -59
232 | beqz k1, update_fps
233 | addi k1, k1, 60
234 | b check_frame
235 | sb k1, (k0)
236 |
237 | update_fps:
238 | // Update the display FPS and reset the counters
239 | la k0, fps_emulate
240 | lbu k1, (k0)
241 | sb zero, (k0)
242 | la k0, fps_native
243 | sb zero, (k0)
244 | la k0, fps_display
245 | sb k1, (k0)
246 |
247 | // Check if SRAM is dirty
248 | la k0, sram_dirty
249 | lbu k1, (k0)
250 | beqz k1, check_frame
251 | sb zero, (k0)
252 |
253 | // Use DMA to copy SRAM from memory to the cart
254 | lui k0, 0xA460 // PI register upper address
255 | la k1, sram // Local SRAM
256 | sw k1, 0x0000(k0) // PI_DRAM_ADDR
257 | li k1, 0x08000000 // Cart SRAM
258 | sw k1, 0x0004(k0) // PI_CART_ADDR
259 | li k1, 0x00007FFF // 32KB size
260 | sw k1, 0x0008(k0) // PI_RD_LEN
261 |
262 | pi_wait2:
263 | // Wait for the DMA to complete
264 | lw k1, 0x0010(k0) // PI_STATUS
265 | andi k1, k1, 0x1 // DMA busy
266 | bnez k1, pi_wait2
267 | nop
268 |
269 | check_frame:
270 | // Ignore V-blanks to match the frames being skipped
271 | la k1, skipped_set
272 | lbu k0, (k1)
273 | srl k0, k0, 2
274 | la k1, skip_native
275 | lbu k1, (k1)
276 | sub k0, k0, k1
277 | blez k0, skip_reset
278 | li k0, 0
279 | addi k0, k1, 1
280 | skip_reset:
281 | la k1, skip_native
282 | bnez k0, input_update
283 | sb k0, (k1)
284 |
285 | // Continue if the next frame is ready; otherwise just update input
286 | la k0, frame_count
287 | lbu k1, (k0)
288 | beqz k1, input_update
289 | nop
290 |
291 | // Get the address of the next of three framebuffers
292 | la k0, framebuffer
293 | lw k0, (k0)
294 | li k1, FRAMEBUFFER2 - FRAMEBUFFER1
295 | add k0, k0, k1
296 | li k1, MODE7_TEXTURE
297 | bne k0, k1, set_buffer
298 | nop
299 | li k0, FRAMEBUFFER1
300 |
301 | set_buffer:
302 | // Set the framebuffer address
303 | lui k1, 0xA440
304 | sw k0, 0x0004(k1) // VI_ORIGIN
305 | la k1, framebuffer
306 | sw k0, (k1)
307 |
308 | // Decrement the frame counter and update input
309 | la k0, frame_count
310 | lbu k1, (k0)
311 | addi k1, k1, -1
312 | j input_update
313 | sb k1, (k0)
314 |
315 | .align 5
316 | reset_interrupt:
317 | // Stop the RSP and prepare the RDP for reset
318 | li t0, 0x00000002 // Set halt
319 | sw t0, 0xA4040010 // SP_STATUS
320 | li t0, 0x00000001 // Use RDRAM
321 | sw t0, 0xA410000C // DP_STATUS
322 |
323 | loop:
324 | // Wait until the system resets
325 | b loop
326 | nop
327 |
--------------------------------------------------------------------------------
/src/menu.S:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021-2025 Hydr8gon
3 |
4 | This file is part of sodium64.
5 |
6 | sodium64 is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU General Public License as published
8 | by the Free Software Foundation, either version 3 of the License,
9 | or (at your option) any later version.
10 |
11 | sodium64 is distributed in the hope that it will be useful, but
12 | WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 | General Public License for more details.
15 |
16 | You should have received a copy of the GNU General Public License
17 | along with sodium64. If not, see .
18 | */
19 |
20 | #include "defines.h"
21 |
22 | .globl layer_set
23 | .globl precision_set
24 | .globl skipped_set
25 | .globl audio_set
26 |
27 | .globl update_menu
28 |
29 | .data
30 |
31 | .align 4
32 | counter_set: .byte 0 << 2
33 | layer_set: .byte 0 << 2
34 | precision_set: .byte 2 << 2
35 | skipped_set: .byte 0 << 2
36 | audio_set: .byte 1 << 2
37 | underclk_set: .byte 1 << 2
38 | left_str: .string "<"
39 | right_str: .string ">"
40 |
41 | .align 4
42 | title_str: .string "SODIUM64 SETTINGS"
43 | counter_str: .string "FPS COUNTER"
44 | layer_str: .string "SUB LAYER ORDER"
45 | precision_str: .string "FRAME PRECISION"
46 | skipped_str: .string "FRAMES SKIPPED"
47 | audio_str: .string "AUDIO OUTPUT"
48 | underclk_str: .string "APU UNDERCLOCK"
49 |
50 | .align 4
51 | on_str: .string " ON "
52 | off_str: .string " OFF "
53 | back_str: .string " BACK "
54 | front_str: .string " FRONT "
55 | lower_str: .string " LOWER "
56 | low_str: .string " LOW "
57 | medium_str: .string " MEDIUM "
58 | high_str: .string " HIGH "
59 | higher_str: .string " HIGHER "
60 | max_str: .string " MAX "
61 | zero_str: .string " 0 "
62 | one_str: .string " 1 "
63 | two_str: .string " 2 "
64 | three_str: .string " 3 "
65 |
66 | .align 4
67 | menu_items: .word counter_str, layer_str, precision_str, skipped_str, audio_str, underclk_str, 0
68 | menu_opts: .word toggle_opts, layer_opts, precision_opts, skipped_opts, toggle_opts, toggle_opts
69 | menu_sets: .word counter_set, layer_set, precision_set, skipped_set, audio_set, underclk_set
70 |
71 | .align 4
72 | toggle_opts: .word off_str, on_str, 0
73 | layer_opts: .word back_str, front_str, 0
74 | precision_opts: .word lower_str, low_str, medium_str, high_str, higher_str, max_str, 0
75 | skipped_opts: .word zero_str, one_str, two_str, three_str, 0
76 |
77 | .text
78 | .set noreorder
79 |
80 | .macro DRAW line
81 | // Copy a line of character pixels to the framebuffer
82 | ld t0, \line * 16 + 0(t2)
83 | ld t1, \line * 16 + 8(t2)
84 | sd t0, \line * 560 + 0(a1)
85 | sd t1, \line * 560 + 8(a1)
86 | .endm
87 |
88 | .align 5
89 | draw_char: // a0: char, a1: top-left offset
90 | // Look up a character sprite if it's within bounds
91 | la t2, char_sp
92 | addi t0, a0, -0x20
93 | bgtu t0, 0x40, unk_char
94 | sll t0, t0, 2
95 | lw t2, char_lookup(t0)
96 |
97 | unk_char:
98 | // Copy the sprite to the framebuffer
99 | DRAW 0
100 | DRAW 1
101 | DRAW 2
102 | DRAW 3
103 | DRAW 4
104 | DRAW 5
105 | DRAW 6
106 | DRAW 7
107 | jr ra
108 | nop
109 |
110 | .align 5
111 | draw_value: // a0: value, a1: top-right offset
112 | // Isolate the lowest base-10 digit and draw its character
113 | li t1, 10
114 | div a0, t1
115 | addi a1, a1, -16
116 | move t9, ra
117 | mfhi t2
118 | jal draw_char
119 | addi a0, t2, 0x30
120 |
121 | // Move to the next digit until they're all drawn
122 | mflo a0
123 | bnez a0, draw_value
124 | move ra, t9
125 | jr ra
126 | nop
127 |
128 | .align 5
129 | draw_string: // a0: string, a1: top-left offset
130 | // Initialize values for drawing a string
131 | move t9, ra
132 | move t8, a0
133 | addi a1, a1, -16
134 | la ra, string_loop
135 |
136 | string_loop:
137 | // Draw each character in the string until terminated
138 | lbu a0, (t8)
139 | beqz a0, end_string
140 | addi t8, t8, 1
141 | j draw_char
142 | addi a1, a1, 16
143 | end_string:
144 | jr t9
145 | nop
146 |
147 | .align 5
148 | update_menu: // a0: framebuffer
149 | // Open the settings menu if start is pressed
150 | jal get_pressed
151 | li v1, 0 // Index
152 | andi t0, v0, 0x10 // Start
153 | bnez t0, start_menu
154 | move t7, a0
155 |
156 | // Draw the FPS counter if running and enabled
157 | lbu t0, counter_set
158 | beqz t0, menu_return
159 | nop
160 | lbu a0, fps_display
161 | jal draw_value
162 | addi a1, t7, 23 * 560 + 44 * 2
163 | b menu_return
164 | nop
165 |
166 | start_menu:
167 | // Wait until there are no extra frames queued
168 | lbu t0, frame_count
169 | bnez t0, start_menu
170 | nop
171 |
172 | // Clear the current framebuffer
173 | lw t0, framebuffer
174 | add t1, t0, FRAMEBUFFER2 - FRAMEBUFFER1
175 | clear_buffer:
176 | sd zero, (t0)
177 | bne t0, t1, clear_buffer
178 | addi t0, t0, 8
179 |
180 | draw_menu:
181 | // Draw the title and initialize values for menu items
182 | lw t7, framebuffer
183 | la a0, title_str
184 | jal draw_string
185 | addi a1, t7, 39 * 560 + 72 * 2
186 | addi t7, t7, 39 * 560 + 48 * 2
187 | li t3, 0
188 |
189 | menu_loop:
190 | // Draw a menu item on the left
191 | lw a0, menu_items(t3)
192 | beqz a0, menu_idle
193 | addi t7, t7, 24 * 560
194 | jal draw_string
195 | move a1, t7
196 |
197 | // Draw the item's setting on the right
198 | lw t0, menu_sets(t3)
199 | lw t1, menu_opts(t3)
200 | lbu t0, (t0)
201 | add t1, t1, t0
202 | lw a0, (t1)
203 | jal draw_string
204 | addi a1, t7, 128 * 2
205 |
206 | // Move to the next item if not selected
207 | bne t3, v1, menu_loop
208 | addi t3, t3, 4
209 |
210 | // Draw selectors around the current item
211 | la a0, left_str
212 | jal draw_string
213 | addi a1, t7, 128 * 2
214 | la a0, right_str
215 | jal draw_string
216 | addi a1, t7, 184 * 2
217 | b menu_loop
218 | nop
219 |
220 | menu_idle:
221 | // Close the menu if start is pressed
222 | jal get_pressed
223 | nop
224 | andi t0, v0, 0x10 // Start
225 | bnez t0, menu_close
226 |
227 | // Move to an upper item if up is pressed
228 | andi t0, v0, 0x8 // Up
229 | beqz t0, check_down
230 | sltu t0, zero, v1
231 | sll t0, t0, 2
232 | b draw_menu
233 | sub v1, v1, t0
234 |
235 | check_down:
236 | // Move to a lower item if down is pressed
237 | andi t0, v0, 0x4 // Down
238 | beqz t0, check_left
239 | nop
240 | lw t0, menu_items + 4(v1)
241 | beqz t0, menu_idle
242 | nop
243 | b draw_menu
244 | addi v1, v1, 4
245 |
246 | check_left:
247 | // Decrease a setting value if left is pressed
248 | andi t0, v0, 0x2 // Left
249 | beqz t0, check_right
250 | nop
251 | lw t0, menu_sets(v1)
252 | lbu t1, (t0)
253 | sltu t2, zero, t1
254 | sll t2, t2, 2
255 | sub t1, t1, t2
256 | b draw_menu
257 | sb t1, (t0)
258 |
259 | check_right:
260 | // Increase a setting value if left is pressed
261 | andi t0, v0, 0x1 // Right
262 | beqz t0, menu_idle
263 | nop
264 | lw t0, menu_sets(v1)
265 | lbu t1, (t0)
266 | lw t2, menu_opts(v1)
267 | add t2, t2, t1
268 | lw t2, 4(t2)
269 | beqz t2, menu_idle
270 | nop
271 | addi t1, t1, 4
272 | b draw_menu
273 | sb t1, (t0)
274 |
275 | menu_close:
276 | // Check if the APU underclock setting changed
277 | lbu t0, underclk_set
278 | lbu t1, apu_clock
279 | li t2, APU_CYCLE
280 | srl t0, t0, 2
281 | sll t2, t2, t0
282 | beq t1, t2, menu_return
283 | nop
284 |
285 | // Update the clock and invalidate the JIT buffer if changed
286 | sb t2, apu_clock
287 | li t0, ROM_BUFFER
288 | sw t0, jit_pointer
289 | b menu_return
290 | nop
291 |
--------------------------------------------------------------------------------
/src/mul_div.S:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2021-2025 Hydr8gon
3 |
4 | This file is part of sodium64.
5 |
6 | sodium64 is free software: you can redistribute it and/or modify
7 | it under the terms of the GNU General Public License as published
8 | by the Free Software Foundation, either version 3 of the License,
9 | or (at your option) any later version.
10 |
11 | sodium64 is distributed in the hope that it will be useful, but
12 | WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 | General Public License for more details.
15 |
16 | You should have received a copy of the GNU General Public License
17 | along with sodium64. If not, see .
18 | */
19 |
20 | #include
21 |
22 | .globl read_rddivl
23 | .globl read_rddivh
24 | .globl read_rdmpyl
25 | .globl read_rdmpyh
26 | .globl write_wrmpya
27 | .globl write_wrmpyb
28 | .globl write_wrdivl
29 | .globl write_wrdivh
30 | .globl write_wrdivb
31 |
32 | .data
33 |
34 | .align 4
35 | wrdiva: .hword 0
36 | rddiv: .hword 0
37 | rdmpy: .hword 0
38 | wrmpya: .byte 0
39 |
40 | .text
41 | .set noreorder
42 |
43 | .align 5
44 | read_rddivl: // v0: value
45 | // Read the low byte of the division quotient
46 | lbu v0, rddiv + 1
47 | jr ra
48 | nop
49 |
50 | .align 5
51 | read_rddivh: // v0: value
52 | // Read the high byte of the division quotient
53 | lbu v0, rddiv
54 | jr ra
55 | nop
56 |
57 | .align 5
58 | read_rdmpyl: // v0: value
59 | // Read the low byte of the multiply product/division remainder
60 | lbu v0, rdmpy + 1
61 | jr ra
62 | nop
63 |
64 | .align 5
65 | read_rdmpyh: // v0: value
66 | // Read the high byte of the multiply product/division remainder
67 | lbu v0, rdmpy
68 | jr ra
69 | nop
70 |
71 | .align 5
72 | write_wrmpya: // a1: value
73 | // Set the 8-bit multiplicand
74 | sb a1, wrmpya
75 | jr ra
76 | nop
77 |
78 | .align 5
79 | write_wrmpyb: // a1: value
80 | // Multiply the multiplicand by the value and set the product
81 | lbu t0, wrmpya
82 | mult t0, a1
83 | mflo t0
84 | sh t0, rdmpy
85 | sh a1, rddiv
86 | jr ra
87 | nop
88 |
89 | .align 5
90 | write_wrdivl: // a1: value
91 | // Set the low byte of the 16-bit dividend
92 | sb a1, wrdiva + 1
93 | jr ra
94 | nop
95 |
96 | .align 5
97 | write_wrdivh: // a1: value
98 | // Set the high byte of the 16-bit dividend
99 | sb a1, wrdiva
100 | jr ra
101 | nop
102 |
103 | .align 5
104 | write_wrdivb: // a1: value
105 | // Divide the dividend by the value and set the quotient and remainder
106 | beqz a1, div_zero
107 | li t0, 0xFFFF
108 | lhu t0, wrdiva
109 | div t0, a1
110 | mflo t0
111 | sh t0, rddiv
112 | mfhi t1
113 | sh t1, rdmpy
114 | jr ra
115 | nop
116 |
117 | div_zero:
118 | // Handle division by zero cases
119 | sh t0, rddiv
120 | lhu t1, wrdiva
121 | sh t1, rdmpy
122 | jr ra
123 | nop
124 |
--------------------------------------------------------------------------------