├── .github ├── FUNDING.yml ├── pull_request_template.md └── workflows │ └── autobuild.yml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── rom-converter.py └── src ├── apu.S ├── apu_address.S ├── apu_alu.S ├── apu_control.S ├── apu_emitter.S ├── apu_transfer.S ├── cpu.S ├── cpu_address.S ├── cpu_alu.S ├── cpu_control.S ├── cpu_transfer.S ├── defines.h ├── dma.S ├── dsp.S ├── font.S ├── input.S ├── macros.h ├── main.S ├── memory.S ├── menu.S ├── mul_div.S ├── ppu.S ├── rsp_main.S └── xcop_dsp1.S /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | patreon: Hydr8gon 2 | custom: paypal.me/Hydr8gon 3 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | Pull requests are not accepted for this project; see the contributing section of the readme for more details. 2 | -------------------------------------------------------------------------------- /.github/workflows/autobuild.yml: -------------------------------------------------------------------------------- 1 | name: Automatic Builds 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - name: Setup Node.js 14 | uses: actions/setup-node@v4 15 | with: 16 | node-version: latest 17 | - name: Install libdragon 18 | run: npm install -g libdragon 19 | - name: Checkout 20 | uses: actions/checkout@v4 21 | - name: Compile 22 | run: | 23 | libdragon init 24 | libdragon make -j$(nproc) 25 | - name: Upload 26 | uses: actions/upload-artifact@v4 27 | with: 28 | name: sodium64 29 | path: | 30 | sodium64.z64 31 | rom-converter.py 32 | 33 | update-release: 34 | runs-on: ubuntu-latest 35 | needs: build 36 | 37 | steps: 38 | - name: Delete old release 39 | uses: dev-drprasad/delete-tag-and-release@v0.2.1 40 | with: 41 | delete_release: true 42 | tag_name: release 43 | env: 44 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 45 | - name: Get artifacts 46 | uses: actions/download-artifact@v4 47 | - name: Package artifacts 48 | run: | 49 | mv sodium64/sodium64.z64 . 50 | mv sodium64/rom-converter.py . 51 | zip -r sodium64.zip sodium64.z64 rom-converter.py 52 | - name: Create new release 53 | uses: ncipollo/release-action@v1 54 | with: 55 | name: Rolling Release 56 | body: These are automatically updated builds of the latest commit. 57 | artifacts: sodium64.zip 58 | tag: release 59 | token: ${{ secrets.GITHUB_TOKEN }} 60 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | /out 3 | /sodium64.z64 4 | /*.sfc 5 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Based on n64.mk from libdragon 2 | 3 | PROJ_NAME := sodium64 4 | BUILD_DIR := build 5 | SRC_DIRS := src 6 | 7 | SFILES := $(foreach dir,$(SRC_DIRS),$(wildcard $(dir)/*.S)) 8 | HFILES := $(foreach dir,$(SRC_DIRS),$(wildcard $(dir)/*.h)) 9 | OFILES := $(patsubst %.S,$(BUILD_DIR)/%.o,$(SFILES)) 10 | 11 | N64_ROM_TITLE = "$(PROJ_NAME)" 12 | N64_ROM_SAVETYPE = sram256k 13 | 14 | # Override this to use a toolchain installed separately from libdragon 15 | N64_GCCPREFIX ?= $(N64_INST) 16 | N64_ROOTDIR = $(N64_INST) 17 | N64_BINDIR = $(N64_ROOTDIR)/bin 18 | N64_INCLUDEDIR = $(N64_ROOTDIR)/mips64-elf/include 19 | N64_LIBDIR = $(N64_ROOTDIR)/mips64-elf/lib 20 | N64_GCCPREFIX_TRIPLET = $(N64_GCCPREFIX)/bin/mips64-elf- 21 | 22 | COMMA := , 23 | 24 | N64_CC = $(N64_GCCPREFIX_TRIPLET)gcc 25 | N64_CXX = $(N64_GCCPREFIX_TRIPLET)g++ 26 | N64_AS = $(N64_GCCPREFIX_TRIPLET)as 27 | N64_AR = $(N64_GCCPREFIX_TRIPLET)ar 28 | N64_LD = $(N64_GCCPREFIX_TRIPLET)ld 29 | N64_OBJCOPY = $(N64_GCCPREFIX_TRIPLET)objcopy 30 | N64_OBJDUMP = $(N64_GCCPREFIX_TRIPLET)objdump 31 | N64_SIZE = $(N64_GCCPREFIX_TRIPLET)size 32 | N64_NM = $(N64_GCCPREFIX_TRIPLET)nm 33 | N64_STRIP = $(N64_GCCPREFIX_TRIPLET)strip 34 | 35 | N64_CHKSUM = $(N64_BINDIR)/chksum64 36 | N64_ED64ROMCONFIG = $(N64_BINDIR)/ed64romconfig 37 | N64_MKDFS = $(N64_BINDIR)/mkdfs 38 | N64_TOOL = $(N64_BINDIR)/n64tool 39 | N64_SYM = $(N64_BINDIR)/n64sym 40 | N64_ELFCOMPRESS = $(N64_BINDIR)/n64elfcompress 41 | N64_AUDIOCONV = $(N64_BINDIR)/audioconv64 42 | N64_MKSPRITE = $(N64_BINDIR)/mksprite 43 | 44 | N64_ASFLAGS = -mtune=vr4300 -march=vr4300 -Wa,--fatal-warnings 45 | N64_RSPASFLAGS = -march=mips1 -mabi=32 -Wa,--fatal-warnings 46 | N64_LDFLAGS = -L$(N64_LIBDIR) -Tn64.ld --gc-sections 47 | N64_TOOLFLAGS = --title $(N64_ROM_TITLE) 48 | N64_ED64ROMCONFIGFLAGS = --savetype $(N64_ROM_SAVETYPE) --regionfree 49 | 50 | COMMA := , 51 | 52 | # Change the dependency chain of .z64 ROMs to use the N64 toolchain 53 | %.z64: CC=$(N64_CC) 54 | %.z64: CXX=$(N64_CXX) 55 | %.z64: AS=$(N64_AS) 56 | %.z64: LD=$(N64_LD) 57 | %.z64: ASFLAGS+=$(N64_ASFLAGS) 58 | %.z64: RSPASFLAGS+=$(N64_RSPASFLAGS) 59 | %.z64: LDFLAGS+=$(N64_LDFLAGS) 60 | %.z64: $(BUILD_DIR)/%.elf 61 | @echo " [Z64] $@" 62 | $(N64_SYM) $< $<.sym 63 | cp $< $<.stripped 64 | $(N64_STRIP) -s $<.stripped 65 | $(N64_ELFCOMPRESS) -o $(dir $<) -c 1 $<.stripped 66 | @rm -f $@ 67 | DFS_FILE="$(filter %.dfs, $^)"; \ 68 | if [ -z "$$DFS_FILE" ]; then \ 69 | $(N64_TOOL) $(N64_TOOLFLAGS) --toc --output $@ --align 256 $<.stripped --align 8 $<.sym; \ 70 | else \ 71 | $(N64_TOOL) $(N64_TOOLFLAGS) --toc --output $@ --align 256 $<.stripped --align 8 $<.sym --align 16 "$$DFS_FILE"; \ 72 | fi 73 | if [ ! -z "$(strip $(N64_ED64ROMCONFIGFLAGS))" ]; then \ 74 | $(N64_ED64ROMCONFIG) $(N64_ED64ROMCONFIGFLAGS) $@; \ 75 | fi 76 | 77 | # Assembly rule. We use .S for both RSP and MIPS assembly code, and we differentiate 78 | # using the prefix of the filename: if it starts with "rsp", it is RSP ucode, otherwise 79 | # it's a standard MIPS assembly file. 80 | $(BUILD_DIR)/%.o: %.S 81 | @mkdir -p $(dir $@) 82 | set -e; \ 83 | FILENAME="$(notdir $(basename $@))"; \ 84 | if case "$$FILENAME" in "rsp"*) true;; *) false;; esac; then \ 85 | SYMPREFIX="$(subst .,_,$(subst /,_,$(basename $@)))"; \ 86 | TEXTSECTION="$(basename $@).text"; \ 87 | DATASECTION="$(basename $@).data"; \ 88 | BINARY="$(basename $@).elf"; \ 89 | echo " [RSP] $<"; \ 90 | $(N64_CC) $(RSPASFLAGS) -L$(N64_LIBDIR) -nostartfiles -Wl,-Trsp.ld -Wl,--gc-sections -Wl,-Map=$(BUILD_DIR)/$(notdir $(basename $@)).map -o $@ $<; \ 91 | mv "$@" $$BINARY; \ 92 | $(N64_OBJCOPY) -O binary -j .text $$BINARY $$TEXTSECTION.bin; \ 93 | $(N64_OBJCOPY) -O binary -j .data $$BINARY $$DATASECTION.bin; \ 94 | $(N64_OBJCOPY) -I binary -O elf32-bigmips -B mips4300 \ 95 | --redefine-sym _binary_$${SYMPREFIX}_text_bin_start=$${FILENAME}_text_start \ 96 | --redefine-sym _binary_$${SYMPREFIX}_text_bin_end=$${FILENAME}_text_end \ 97 | --redefine-sym _binary_$${SYMPREFIX}_text_bin_size=$${FILENAME}_text_size \ 98 | --set-section-alignment .data=8 \ 99 | --rename-section .text=.data $$TEXTSECTION.bin $$TEXTSECTION.o; \ 100 | $(N64_OBJCOPY) -I binary -O elf32-bigmips -B mips4300 \ 101 | --redefine-sym _binary_$${SYMPREFIX}_data_bin_start=$${FILENAME}_data_start \ 102 | --redefine-sym _binary_$${SYMPREFIX}_data_bin_end=$${FILENAME}_data_end \ 103 | --redefine-sym _binary_$${SYMPREFIX}_data_bin_size=$${FILENAME}_data_size \ 104 | --set-section-alignment .data=8 \ 105 | --rename-section .text=.data $$DATASECTION.bin $$DATASECTION.o; \ 106 | $(N64_SIZE) -G $$BINARY; \ 107 | $(N64_LD) -relocatable $$TEXTSECTION.o $$DATASECTION.o -o $@; \ 108 | rm $$TEXTSECTION.bin $$DATASECTION.bin $$TEXTSECTION.o $$DATASECTION.o; \ 109 | else \ 110 | echo " [AS] $<"; \ 111 | $(CC) -c $(ASFLAGS) -o $@ $<; \ 112 | fi 113 | 114 | %.elf: $(N64_LIBDIR)/n64.ld 115 | @mkdir -p $(dir $@) 116 | @echo " [LD] $@" 117 | $(CXX) -o $@ $(filter-out $(N64_LIBDIR)/n64.ld,$^) -lc $(patsubst %,-Wl$(COMMA)%,$(LDFLAGS)) -Wl,-Map=$(BUILD_DIR)/$(notdir $(basename $@)).map 118 | $(N64_SIZE) -G $@ 119 | 120 | .SILENT: 121 | 122 | all: $(PROJ_NAME).z64 123 | 124 | $(BUILD_DIR)/$(PROJ_NAME).elf: $(OFILES) 125 | 126 | $(OFILES): $(HFILES) 127 | 128 | clean: 129 | rm -rf $(BUILD_DIR) $(PROJ_NAME).z64 130 | 131 | .PHONY: all clean 132 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sodium64 2 | A SNES emulator for the N64, written in assembly. 3 | 4 | ### Overview 5 | The goal of sodium64 is to be fast and accurate enough to at least make some SNES games playable on the N64. It handles 6 | rendering entirely on the RSP in order to reduce load on the main CPU. I thought it would be fun to write something 7 | specifically for older hardware in assembly, so sodium64 was born! 8 | 9 | ### Downloads 10 | The latest build of sodium64 is automatically provided via GitHub Actions, and can be downloaded from the 11 | [releases page](https://github.com/Hydr8gon/sodium64/releases). 12 | 13 | ### Usage 14 | Place SNES ROMs with extension `.sfc`/`.smc` in the same folder as `sodium64.z64` and `rom-converter.py`. Run 15 | `rom-converter.py` using [Python](https://www.python.org) to convert the SNES ROMs to N64 ROMs. The output ROMs will be 16 | in a new folder called `out`. 17 | 18 | Alternatively, some flashcarts support loading ROMs directly with a supplied emulator. If you have an EverDrive, copy 19 | `sodium64.z64` to the `ED64/emu` folder on your SD card and rename it to `smc.v64`. SNES ROMs must be in headerless 20 | `.smc` format to work this way; `rom-converter.py` can optionally convert input ROMs for this. 21 | 22 | ### Controls 23 | | **N64** | **SNES** | 24 | |:---------:|:------------:| 25 | | C-Buttons | ABXY | 26 | | D-Pad | D-Pad | 27 | | L/R | L/R | 28 | | A/B | Start/Select | 29 | | Start | Settings | 30 | 31 | ### Contributing 32 | This is a personal project, and I've decided to not review or accept pull requests for it. If you want to help, you can 33 | test things and report issues or provide feedback. If you can afford it, you can also donate to motivate me and allow me 34 | to spend more time on things like this. Nothing is mandatory, and I appreciate any interest in my projects, even if 35 | you're just a user! 36 | 37 | ### Building 38 | Although sodium64 is written in assembly, it relies on [libdragon](https://github.com/DragonMinded/libdragon.git) for 39 | its build system. With that set up, run `make` in the project root directory to start building. 40 | 41 | ### Hardware References 42 | * [Fullsnes](https://problemkaputt.de/fullsnes.htm) - The main source of information on SNES hardware 43 | * [Anomie Docs](https://www.romhacking.net/community/548) - More detailed documentation for certain components 44 | * [6502 Tutorials](http://6502.org/tutorials/) - Has articles thoroughly covering the CPU and its quirks 45 | * [bsnes](https://github.com/bsnes-emu/bsnes) - Reference for the HLE DSP-1 coprocessor commands 46 | 47 | ### Other Links 48 | * [Hydra's Lair](https://hydr8gon.github.io) - Blog where I may or may not write about things 49 | * [Discord Server](https://discord.gg/JbNz7y4) - A place to chat about my projects and stuff 50 | -------------------------------------------------------------------------------- /rom-converter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import os 4 | 5 | convert = 0 6 | 7 | print("sodium64 ROM converter") 8 | print("Place sodium64.z64 and SNES ROMs with extension .sfc/.smc in the same folder as this script.") 9 | print("The out folder will contain the ROMs converted to .z64 format, ready to play.\n") 10 | 11 | if not os.path.isfile("sodium64.z64"): 12 | print("Error: sodium64.z64 not found!") 13 | exit() 14 | 15 | if not os.path.isdir("out"): 16 | os.mkdir("out") 17 | 18 | for filename in os.listdir("."): 19 | ext = filename[-4:].lower() 20 | if (ext == ".sfc" or ext == ".smc") and os.path.isfile(filename): 21 | print("Converting " + filename + "...") 22 | baseFile = open("sodium64.z64", "rb") 23 | inFile = open(filename, "rb") 24 | if ext == ".sfc": 25 | convert = 1 26 | if os.stat(filename).st_size & 0x3FF == 0x200: 27 | inFile.seek(0x200) # Skip header 28 | convert = 1 29 | outFile = open("out/" + filename[:-4] + ".z64", "wb") 30 | outFile.write(baseFile.read()) 31 | outFile.seek(0x104000, 0) 32 | outFile.write(inFile.read()) 33 | inFile.close() 34 | 35 | if convert == 1: 36 | print("\nA header or .sfc extension was detected in one or more ROMs.") 37 | print("Some flashcarts support loading ROMs directly with a supplied emulator.") 38 | print("To be compatible with this, ROMs must be converted to headerless .smc files.") 39 | print("Would you like to convert the input ROMs to this format? (y/N)") 40 | 41 | if input() == "y": 42 | for filename in os.listdir("."): 43 | ext = filename[-4:].lower() 44 | if (ext == ".sfc" or ext == ".smc") and os.path.isfile(filename): 45 | print("Converting " + filename + "...") 46 | file = open(filename, "rb") 47 | if os.stat(filename).st_size & 0x3FF == 0x200: 48 | file.seek(0x200) # Skip header 49 | data = file.read() 50 | file.close() 51 | os.remove(filename) 52 | file = open(filename[:-4] + ".smc", "wb") 53 | file.write(data) 54 | 55 | print("Done!") 56 | -------------------------------------------------------------------------------- /src/apu.S: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021-2025 Hydr8gon 3 | 4 | This file is part of sodium64. 5 | 6 | sodium64 is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, 9 | or (at your option) any later version. 10 | 11 | sodium64 is distributed in the hope that it will be useful, but 12 | WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with sodium64. If not, see . 18 | */ 19 | 20 | #include 21 | #include "defines.h" 22 | 23 | .globl apu_map 24 | .globl apu_ram 25 | .globl apu_count 26 | .globl apu_reg_x 27 | .globl apu_reg_y 28 | .globl apu_accum 29 | .globl apu_stack 30 | .globl apu_flags 31 | .globl apu_clock 32 | 33 | .globl apu_execute 34 | .globl apu_read8 35 | .globl apu_write8 36 | .globl offset_timers 37 | .globl read_apuio0 38 | .globl read_apuio1 39 | .globl read_apuio2 40 | .globl read_apuio3 41 | .globl write_apuio0 42 | .globl write_apuio1 43 | .globl write_apuio2 44 | .globl write_apuio3 45 | 46 | .data 47 | 48 | .align 4 49 | apu_map: .byte 0:0x3FF, 0x40 50 | apu_ram: .byte 0:0x10000 51 | apu_rom: // Allows uploading code from the CPU 52 | .byte 0xCD, 0xEF, 0xBD, 0xE8, 0x00, 0xC6, 0x1D, 0xD0 // 0xFFC0-0xFFC7 53 | .byte 0xFC, 0x8F, 0xAA, 0xF4, 0x8F, 0xBB, 0xF5, 0x78 // 0xFFC8-0xFFCF 54 | .byte 0xCC, 0xF4, 0xD0, 0xFB, 0x2F, 0x19, 0xEB, 0xF4 // 0xFFD0-0xFFD7 55 | .byte 0xD0, 0xFC, 0x7E, 0xF4, 0xD0, 0x0B, 0xE4, 0xF5 // 0xFFD8-0xFFDF 56 | .byte 0xCB, 0xF4, 0xD7, 0x00, 0xFC, 0xD0, 0xF3, 0xAB // 0xFFE0-0xFFE7 57 | .byte 0x01, 0x10, 0xEF, 0x7E, 0xF4, 0x10, 0xEB, 0xBA // 0xFFE8-0xFFEF 58 | .byte 0xF6, 0xDA, 0x00, 0xBA, 0xF4, 0xC4, 0xF4, 0xDD // 0xFFF0-0xFFF7 59 | .byte 0x5D, 0xD0, 0xDB, 0x1F, 0x00, 0x00, 0xC0, 0xFF // 0xFFF8-0xFFFF 60 | 61 | .align 4 62 | apu_cycle1: .word 341 * 4 63 | apu_cycle2: .word 341 * 4 64 | apu_ocycles: .word INT_MIN:3 65 | apu_inputs: .hword 0:2 66 | apu_outputs: .hword 0:2 67 | apu_timers: .byte 0:3 68 | apu_tmdivs: .byte 0:3 69 | apu_tmouts: .byte 0:3 70 | apu_control: .byte 0xB0 71 | 72 | .align 4 73 | apu_count: .hword 0xFFC0 74 | apu_reg_x: .byte 0 75 | apu_reg_y: .byte 0 76 | apu_accum: .byte 0 77 | apu_stack: .byte 0xFF 78 | apu_flags: .byte 0 79 | apu_clock: .byte APU_CYCLE * 2 80 | 81 | .align 4 82 | read_iomap: 83 | .word read_unk, read_unk, read_dspaddr, read_dspdata // 0xF0-0xF3 84 | .word read_cpuio0, read_cpuio1, read_cpuio2, read_cpuio3 // 0xF4-0xF7 85 | .word read_unk, read_unk, read_unk, read_unk // 0xF8-0xFB 86 | .word read_unk, read_t0out, read_t1out, read_t2out // 0xFC-0xFF 87 | 88 | .align 4 89 | write_iomap: 90 | .word write_unk, write_control, write_dspaddr, write_dspdata // 0xF0-0xF3 91 | .word write_cpuio0, write_cpuio1, write_cpuio2, write_cpuio3 // 0xF4-0xF7 92 | .word write_unk, write_unk, write_t0div, write_t1div // 0xF8-0xFB 93 | .word write_t2div, write_unk, write_unk, write_unk // 0xFC-0xFF 94 | 95 | .text 96 | .set noreorder 97 | 98 | .align 5 99 | apu_execute: 100 | // Compile a new JIT block if one doesn't exist for the current PC 101 | lhu s0, apu_count 102 | ble s3, a3, dsp_sample // Check if the DSP should run 103 | sll t0, s0, 2 104 | lw t0, jit_lookup(t0) 105 | beqz t0, compile_block 106 | 107 | // Compile a new JIT block if memory changed at the start 108 | lhu t1, 0(t0) 109 | lw t1, jit_tags(t1) 110 | lw t2, 4(t0) 111 | bne t1, t2, compile_block 112 | 113 | // Compile a new JIT block if memory changed at the end 114 | lhu t2, 2(t0) 115 | lw t2, jit_tags(t2) 116 | lw t1, 8(t0) 117 | bne t1, t2, compile_block 118 | 119 | // Jump to a cached JIT block's code 120 | addi t0, t0, 12 121 | jr t0 122 | nop 123 | 124 | .align 5 125 | apu_read8: // a0: address - v0: value 126 | // Read a byte from APU memory 127 | srl t0, a0, 6 128 | lbu t1, apu_map(t0) 129 | andi t2, a0, 0xFFF0 130 | add t0, a0, t1 131 | lbu v0, apu_ram(t0) 132 | 133 | // Check if the address is an I/O port and decrease the cycle count 134 | lbu t0, apu_clock 135 | beq t2, 0xF0, io_read8 136 | sub s3, s3, t0 137 | read_unk: // Unknown I/O register read; do nothing 138 | jr ra 139 | 140 | io_read8: 141 | // Read from an I/O register in the lookup table 142 | andi t0, a0, 0xF 143 | sll t0, t0, 2 // Word offset 144 | lw t0, read_iomap(t0) 145 | jr t0 146 | nop 147 | 148 | .align 5 149 | apu_write8: // a0: address, a1: value 150 | // Write a byte to APU memory 151 | sb a1, apu_ram(a0) 152 | 153 | // Increment the memory block's tag to indicate change 154 | srl t0, a0, 6 155 | sll t0, t0, 2 156 | lw t1, jit_tags(t0) 157 | addi t1, t1, 1 158 | sw t1, jit_tags(t0) 159 | 160 | // Check if the address is an I/O port and decrease the cycle count 161 | lbu t0, apu_clock 162 | andi t2, a0, 0xFFF0 163 | beq t2, 0xF0, io_write8 164 | sub s3, s3, t0 165 | write_unk:// Unknown I/O register write; do nothing 166 | jr ra 167 | 168 | io_write8: 169 | // Write to an I/O register in the lookup table 170 | andi t0, a0, 0xF 171 | sll t0, t0, 2 // Word offset 172 | lw t0, write_iomap(t0) 173 | jr t0 174 | nop 175 | 176 | .align 5 177 | update_overflows: 178 | // Never overflow timer 0 if it's disabled 179 | lbu t0, apu_control 180 | andi t1, t0, 0x1 181 | bnez t1, stamp_timer0 182 | li t1, INT_MIN 183 | sw t1, apu_ocycles + 0 184 | b overflow_timer1 185 | nop 186 | 187 | stamp_timer0: 188 | // Update the overflow timestamp for timer 0 189 | lbu t1, apu_timers + 0 190 | lbu t2, apu_tmdivs + 0 191 | addi t1, t1, 1 192 | sub t1, t2, t1 193 | andi t1, t1, 0xFF 194 | addi t1, t1, 1 // Ticks until overflow 195 | li t2, APU_TIMER1 // Cycles per timer tick 196 | mult t1, t2 // Cycles until overflow 197 | lw t1, apu_cycle1 198 | mflo t2 199 | sub t1, t1, t2 200 | sw t1, apu_ocycles + 0 201 | 202 | overflow_timer1: 203 | // Never overflow timer 1 if it's disabled 204 | andi t1, t0, 0x2 205 | bnez t1, stamp_timer1 206 | li t1, INT_MIN 207 | sw t1, apu_ocycles + 4 208 | b overflow_timer2 209 | nop 210 | 211 | stamp_timer1: 212 | // Update the overflow timestamp for timer 1 213 | lbu t1, apu_timers + 1 214 | lbu t2, apu_tmdivs + 1 215 | addi t1, t1, 1 216 | sub t1, t2, t1 217 | andi t1, t1, 0xFF 218 | addi t1, t1, 1 // Ticks until overflow 219 | li t2, APU_TIMER1 // Cycles per timer tick 220 | mult t1, t2 // Cycles until overflow 221 | lw t1, apu_cycle1 222 | mflo t2 223 | sub t1, t1, t2 224 | sw t1, apu_ocycles + 4 225 | 226 | overflow_timer2: 227 | // Never overflow timer 2 if it's disabled 228 | andi t1, t0, 0x4 229 | bnez t1, stamp_timer2 230 | li t1, INT_MIN 231 | sw t1, apu_ocycles + 8 232 | jr ra 233 | nop 234 | 235 | stamp_timer2: 236 | // Update the overflow timestamp for timer 2 237 | lbu t1, apu_timers + 2 238 | lbu t2, apu_tmdivs + 2 239 | addi t1, t1, 1 240 | sub t1, t2, t1 241 | andi t1, t1, 0xFF 242 | addi t1, t1, 1 // Ticks until overflow 243 | li t2, APU_TIMER2 // Cycles per timer tick 244 | mult t1, t2 // Cycles until overflow 245 | lw t1, apu_cycle2 246 | mflo t2 247 | sub t1, t1, t2 248 | sw t1, apu_ocycles + 8 249 | jr ra 250 | nop 251 | 252 | .align 5 253 | update_timers: 254 | // Get how many timer 2 ticks passed since the last update 255 | lw t1, apu_cycle2 256 | sub t0, t1, s3 // Cycles since last update 257 | li t1, APU_TIMER2 // Cycles per timer tick 258 | blt t0, t1, early_exit 259 | nop 260 | div t0, t1 261 | mflo t6 // Ticks passed 262 | mfhi t1 // Cycles since latest tick 263 | add t1, t1, s3 264 | sw t1, apu_cycle2 265 | 266 | // Check if timer 2 is enabled 267 | lbu t5, apu_control 268 | andi t0, t5, 0x4 269 | beqz t0, update_timer0 270 | move t4, t6 // Ticks left 271 | 272 | loop_timer2: 273 | // Check if timer 2 will overflow 274 | lw t0, apu_ocycles + 8 275 | bgt s3, t0, inc_timer2 276 | nop 277 | 278 | // Adjust the ticks left for after overflow 279 | lbu t1, apu_timers + 2 280 | lbu t2, apu_tmdivs + 2 281 | addi t1, t1, 1 282 | sub t1, t2, t1 283 | andi t1, t1, 0xFF 284 | addi t1, t1, 1 // Ticks until overflow 285 | sub t4, t4, t1 286 | 287 | // Overflow timer 2 288 | lbu t1, apu_tmouts + 2 289 | addi t1, t1, 1 290 | andi t1, t1, 0xF 291 | sb t1, apu_tmouts + 2 292 | sb zero, apu_timers + 2 293 | 294 | // Update the overflow timestamp for next overflow 295 | addi t1, t2, -1 296 | andi t1, t1, 0xFF 297 | addi t1, t1, 1 // Ticks until overflow 298 | li t2, APU_TIMER2 // Cycles per timer tick 299 | mult t1, t2 // Cycles until overflow 300 | mflo t2 301 | sub t0, t0, t2 302 | sw t0, apu_ocycles + 8 303 | b loop_timer2 304 | nop 305 | 306 | inc_timer2: 307 | // Increment timer 2 without overflow 308 | lbu t0, apu_timers + 2 309 | add t0, t0, t4 310 | sb t0, apu_timers + 2 311 | 312 | update_timer0: 313 | // Get how many timer 0/1 ticks passed since the last update 314 | lw t1, apu_cycle1 315 | sub t0, t1, s3 // Cycles since last update 316 | li t1, APU_TIMER1 // Cycles per timer tick 317 | blt t0, t1, early_exit 318 | nop 319 | div t0, t1 320 | mflo t6 // Ticks passed 321 | mfhi t1 // Cycles since latest tick 322 | add t1, t1, s3 323 | sw t1, apu_cycle1 324 | 325 | // Check if timer 0 is enabled 326 | andi t0, t5, 0x1 327 | beqz t0, update_timer1 328 | move t4, t6 // Ticks left 329 | 330 | loop_timer0: 331 | // Check if timer 0 will overflow 332 | lw t0, apu_ocycles + 0 333 | bgt s3, t0, inc_timer0 334 | nop 335 | 336 | // Adjust the ticks left for after overflow 337 | lbu t1, apu_timers + 0 338 | lbu t2, apu_tmdivs + 0 339 | addi t1, t1, 1 340 | sub t1, t2, t1 341 | andi t1, t1, 0xFF 342 | addi t1, t1, 1 // Ticks until overflow 343 | sub t4, t4, t1 344 | 345 | // Overflow timer 0 346 | lbu t1, apu_tmouts + 0 347 | addi t1, t1, 1 348 | andi t1, t1, 0xF 349 | sb t1, apu_tmouts + 0 350 | sb zero, apu_timers + 0 351 | 352 | // Update the overflow timestamp for next overflow 353 | addi t1, t2, -1 354 | andi t1, t1, 0xFF 355 | addi t1, t1, 1 // Ticks until overflow 356 | li t2, APU_TIMER1 // Cycles per timer tick 357 | mult t1, t2 // Cycles until overflow 358 | mflo t2 359 | sub t0, t0, t2 360 | sw t0, apu_ocycles + 0 361 | b loop_timer0 362 | nop 363 | 364 | inc_timer0: 365 | // Increment timer 0 without overflow 366 | lbu t0, apu_timers + 0 367 | add t0, t0, t4 368 | sb t0, apu_timers + 0 369 | 370 | update_timer1: 371 | // Check if timer 1 is enabled 372 | andi t0, t5, 0x2 373 | beqz t0, early_exit 374 | move t4, t6 // Ticks left 375 | 376 | loop_timer1: 377 | // Check if timer 1 will overflow 378 | lw t0, apu_ocycles + 4 379 | bgt s3, t0, inc_timer1 380 | nop 381 | 382 | // Adjust the ticks left for after overflow 383 | lbu t1, apu_timers + 1 384 | lbu t2, apu_tmdivs + 1 385 | addi t1, t1, 1 386 | sub t1, t2, t1 387 | andi t1, t1, 0xFF 388 | addi t1, t1, 1 // Ticks until overflow 389 | sub t4, t4, t1 390 | 391 | // Overflow timer 1 392 | lbu t1, apu_tmouts + 1 393 | addi t1, t1, 1 394 | andi t1, t1, 0xF 395 | sb t1, apu_tmouts + 1 396 | sb zero, apu_timers + 1 397 | 398 | // Update the overflow timestamp for next overflow 399 | addi t1, t2, -1 400 | andi t1, t1, 0xFF 401 | addi t1, t1, 1 // Ticks until overflow 402 | li t2, APU_TIMER1 // Cycles per timer tick 403 | mult t1, t2 // Cycles until overflow 404 | mflo t2 405 | sub t0, t0, t2 406 | sw t0, apu_ocycles + 4 407 | b loop_timer1 408 | nop 409 | 410 | inc_timer1: 411 | // Increment timer 1 without overflow 412 | lbu t0, apu_timers + 1 413 | add t0, t0, t4 414 | sb t0, apu_timers + 1 415 | early_exit: 416 | jr ra 417 | nop 418 | 419 | .align 5 420 | offset_timers: // a0: cycles 421 | // Offset timer update timestamps for the next event 422 | lw t0, apu_cycle1 423 | lw t1, apu_cycle2 424 | add t0, t0, a0 425 | add t1, t1, a0 426 | sw t0, apu_cycle1 427 | sw t1, apu_cycle2 428 | 429 | // Offset timer 0's overflow timestamp if enabled 430 | la t0, apu_ocycles 431 | li t1, INT_MIN 432 | lw t2, 0(t0) 433 | beq t2, t1, offset1 434 | add t2, t2, a0 435 | sw t2, 0(t0) 436 | 437 | offset1: 438 | // Offset timer 1's overflow timestamp if enabled 439 | lw t3, 4(t0) 440 | beq t3, t1, offset2 441 | add t3, t3, a0 442 | sw t3, 4(t0) 443 | 444 | offset2: 445 | // Offset timer 2's overflow timestamp if enabled 446 | lw t4, 8(t0) 447 | beq t4, t1, offset3 448 | add t4, t4, a0 449 | sw t4, 8(t0) 450 | offset3: 451 | jr ra 452 | nop 453 | 454 | .align 5 455 | read_cpuio0: // v0: value 456 | // Read APU input communication value 0 457 | lbu v0, apu_inputs + 0 458 | jr ra 459 | nop 460 | 461 | .align 5 462 | read_cpuio1: // v0: value 463 | // Read APU input communication value 1 464 | lbu v0, apu_inputs + 1 465 | jr ra 466 | nop 467 | 468 | .align 5 469 | read_cpuio2: // v0: value 470 | // Read APU input communication value 2 471 | lbu v0, apu_inputs + 2 472 | jr ra 473 | nop 474 | 475 | .align 5 476 | read_cpuio3: // v0: value 477 | // Read APU input communication value 3 478 | lbu v0, apu_inputs + 3 479 | jr ra 480 | nop 481 | 482 | .align 5 483 | read_t0out: // v0: value 484 | // Update timers if timer 0 should overflow 485 | lw t0, apu_ocycles + 0 486 | bgt s3, t0, end_t0out 487 | move gp, ra 488 | jal update_timers 489 | nop 490 | 491 | end_t0out: 492 | // Read timer 0's overflow count and reset it 493 | lbu v0, apu_tmouts + 0 494 | sb zero, apu_tmouts + 0 495 | jr gp 496 | nop 497 | 498 | .align 5 499 | read_t1out: // v0: value 500 | // Update timers if timer 1 should overflow 501 | lw t0, apu_ocycles + 4 502 | bgt s3, t0, end_t1out 503 | move gp, ra 504 | jal update_timers 505 | nop 506 | 507 | end_t1out: 508 | // Read timer 1's overflow count and reset it 509 | lbu v0, apu_tmouts + 1 510 | sb zero, apu_tmouts + 1 511 | jr gp 512 | nop 513 | 514 | .align 5 515 | read_t2out: // v0: value 516 | // Update timers if timer 2 should overflow 517 | lw t0, apu_ocycles + 8 518 | bgt s3, t0, end_t2out 519 | move gp, ra 520 | jal update_timers 521 | nop 522 | 523 | end_t2out: 524 | // Read timer 2's overflow count and reset it 525 | lbu v0, apu_tmouts + 2 526 | sb zero, apu_tmouts + 2 527 | jr gp 528 | nop 529 | 530 | .align 5 531 | write_control: // a1: value 532 | // Update timers and write to the APU control register 533 | move gp, ra 534 | jal update_timers 535 | nop 536 | lbu t0, apu_control 537 | move ra, gp 538 | sb a1, apu_control 539 | 540 | // Update the APU ROM mapping 541 | andi t1, a1, 0x80 // ROM enabled 542 | srl t1, t1, 1 543 | sb t1, apu_map + 0x3FF 544 | lw t1, jit_tags + 0xFFC 545 | addi t1, t1, 1 546 | sw t1, jit_tags + 0xFFC 547 | 548 | // Reset timers 0-2 if bit 0-2 turns from 0 to 1 549 | xor t1, t0, a1 550 | and t1, t1, a1 551 | andi t0, t1, 0x1 552 | beqz t0, timer1 553 | nop 554 | sb zero, apu_timers + 0 555 | sb zero, apu_tmouts + 0 556 | timer1: 557 | andi t0, t1, 0x2 558 | beqz t0, timer2 559 | nop 560 | sb zero, apu_timers + 1 561 | sb zero, apu_tmouts + 1 562 | timer2: 563 | andi t0, t1, 0x4 564 | beqz t0, reset0 565 | nop 566 | sb zero, apu_timers + 2 567 | sb zero, apu_tmouts + 2 568 | 569 | reset0: 570 | // Clear the first/last 2 input latches if bit 4/5 is set 571 | andi t0, a1, 0x10 572 | beqz t0, reset1 573 | nop 574 | sh zero, apu_inputs + 0 575 | reset1: 576 | andi t0, a1, 0x20 577 | beqz t0, reset2 578 | nop 579 | sh zero, apu_inputs + 2 580 | reset2: 581 | j update_overflows 582 | 583 | .align 5 584 | write_cpuio0: // a1: value 585 | // Write APU output communication value 0 586 | sb a1, apu_outputs + 0 587 | jr ra 588 | nop 589 | 590 | .align 5 591 | write_cpuio1: // a1: value 592 | // Write APU output communication value 1 593 | sb a1, apu_outputs + 1 594 | jr ra 595 | nop 596 | 597 | .align 5 598 | write_cpuio2: // a1: value 599 | // Write APU output communication value 2 600 | sb a1, apu_outputs + 2 601 | jr ra 602 | nop 603 | 604 | .align 5 605 | write_cpuio3: // a1: value 606 | // Write APU output communication value 3 607 | sb a1, apu_outputs + 3 608 | jr ra 609 | nop 610 | 611 | .align 5 612 | write_t0div: // a1: value 613 | // Update timers and write timer 0's divider value 614 | move gp, ra 615 | jal update_timers 616 | nop 617 | sb a1, apu_tmdivs + 0 618 | j update_overflows 619 | move ra, gp 620 | 621 | .align 5 622 | write_t1div: // a1: value 623 | // Update timers and write timer 1's divider value 624 | move gp, ra 625 | jal update_timers 626 | nop 627 | sb a1, apu_tmdivs + 1 628 | j update_overflows 629 | move ra, gp 630 | 631 | .align 5 632 | write_t2div: // a1: value 633 | // Update timers and write timer 2's divider value 634 | move gp, ra 635 | jal update_timers 636 | nop 637 | sb a1, apu_tmdivs + 2 638 | j update_overflows 639 | move ra, gp 640 | 641 | .align 5 642 | read_apuio0: // v0: value 643 | // Read APU output communication value 0 644 | lbu v0, apu_outputs + 0 645 | jr ra 646 | nop 647 | 648 | .align 5 649 | read_apuio1: // v0: value 650 | // Read APU output communication value 0 651 | lbu v0, apu_outputs + 1 652 | jr ra 653 | nop 654 | 655 | .align 5 656 | read_apuio2: // v0: value 657 | // Read APU output communication value 0 658 | lbu v0, apu_outputs + 2 659 | jr ra 660 | nop 661 | 662 | .align 5 663 | read_apuio3: // v0: value 664 | // Read APU output communication value 0 665 | lbu v0, apu_outputs + 3 666 | jr ra 667 | nop 668 | 669 | .align 5 670 | write_apuio0: // a1: value 671 | // Write APU input communication value 0 672 | sb a1, apu_inputs + 0 673 | jr ra 674 | nop 675 | 676 | .align 5 677 | write_apuio1: // a1: value 678 | // Write APU input communication value 1 679 | sb a1, apu_inputs + 1 680 | jr ra 681 | nop 682 | 683 | .align 5 684 | write_apuio2: // a1: value 685 | // Write APU input communication value 2 686 | sb a1, apu_inputs + 2 687 | jr ra 688 | nop 689 | 690 | .align 5 691 | write_apuio3: // a1: value 692 | // Write APU input communication value 3 693 | sb a1, apu_inputs + 3 694 | jr ra 695 | nop 696 | -------------------------------------------------------------------------------- /src/apu_address.S: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021-2025 Hydr8gon 3 | 4 | This file is part of sodium64. 5 | 6 | sodium64 is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, 9 | or (at your option) any later version. 10 | 11 | sodium64 is distributed in the hope that it will be useful, but 12 | WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with sodium64. If not, see . 18 | */ 19 | 20 | #include "macros.h" 21 | 22 | .globl apu_imm 23 | .globl apu_dira 24 | .globl apu_dirr 25 | .globl apu_drxa 26 | .globl apu_drxr 27 | .globl apu_drya 28 | .globl apu_dryr 29 | .globl apu_brxa 30 | .globl apu_brxr 31 | .globl apu_bxpa 32 | .globl apu_bxpr 33 | .globl apu_absa 34 | .globl apu_absr 35 | .globl apu_abxa 36 | .globl apu_abxr 37 | .globl apu_abya 38 | .globl apu_abyr 39 | .globl apu_idxa 40 | .globl apu_idxr 41 | .globl apu_idya 42 | .globl apu_idyr 43 | .globl apu_drb 44 | .globl apu_dri 45 | .globl apu_dr2 46 | .globl apu_bxy 47 | 48 | .text 49 | .set noreorder 50 | 51 | .align 5 52 | apu_imm: // #nn 53 | // Get the 8-bit immediate value 54 | jal jit_read8 55 | nop 56 | 57 | // Emit code to load a value 58 | li t0, ORI(V0, ZERO, 0) 59 | or t0, t0, v0 60 | j emit_op 61 | move ra, gp 62 | 63 | .align 5 64 | apu_dira: // aa 65 | // Choose whether to emit a memory read or just provide an address 66 | b apu_dir 67 | li t8, 0 68 | apu_dirr: 69 | li t8, 1 70 | 71 | apu_dir: 72 | // Get the 8-bit immediate address 73 | jal jit_read8 74 | nop 75 | 76 | // Update the JIT register state 77 | jal load_flags 78 | nop 79 | 80 | // Emit code to load a value from a zero page address 81 | EMIT_OP SLL(A0, S1, 3) 82 | EMIT_OP ANDI(A0, A0, 0x100) 83 | beqz t8, dir_skip 84 | nop 85 | la t0, apu_read8 86 | jal emit_jal 87 | nop 88 | dir_skip: 89 | li t0, ORI(A0, A0, 0) 90 | or t0, t0, v0 91 | j emit_op 92 | move ra, gp 93 | 94 | .align 5 95 | apu_drxa: // aa+X 96 | // Choose whether to emit a memory read or just provide an address 97 | b apu_drx 98 | li t8, 0 99 | apu_drxr: 100 | li t8, 1 101 | 102 | apu_drx: 103 | // Get the 8-bit immediate address 104 | jal jit_read8 105 | nop 106 | 107 | // Update the JIT register state 108 | jal load_reg_x 109 | nop 110 | jal load_flags 111 | nop 112 | 113 | // Emit code to load a value from a zero page address plus register X 114 | EMIT_OP SLL(A0, S1, 3) 115 | EMIT_OP ANDI(A0, A0, 0x100) 116 | li t0, ADDI(T0, T9, 0) 117 | jal emit_op 118 | or t0, t0, v0 119 | EMIT_OP ANDI(T0, T0, 0xFF) 120 | beqz t8, drx_skip 121 | nop 122 | la t0, apu_read8 123 | jal emit_jal 124 | nop 125 | drx_skip: 126 | li t0, OR(A0, A0, T0) 127 | j emit_op 128 | move ra, gp 129 | 130 | .align 5 131 | apu_drya: // aa+Y 132 | // Choose whether to emit a memory read or just provide an address 133 | b apu_dry 134 | li t8, 0 135 | apu_dryr: 136 | li t8, 1 137 | 138 | apu_dry: 139 | // Get the 8-bit immediate address 140 | jal jit_read8 141 | nop 142 | 143 | // Update the JIT register state 144 | jal load_reg_y 145 | nop 146 | jal load_flags 147 | nop 148 | 149 | // Emit code to load a value from a zero page address plus register Y 150 | EMIT_OP SLL(A0, S1, 3) 151 | EMIT_OP ANDI(A0, A0, 0x100) 152 | li t0, ADDI(T0, T8, 0) 153 | jal emit_op 154 | or t0, t0, v0 155 | EMIT_OP ANDI(T0, T0, 0xFF) 156 | beqz t8, dry_skip 157 | nop 158 | la t0, apu_read8 159 | jal emit_jal 160 | nop 161 | dry_skip: 162 | li t0, OR(A0, A0, T0) 163 | j emit_op 164 | move ra, gp 165 | 166 | .align 5 167 | apu_brxa: // (X) 168 | // Choose whether to emit a memory read or just provide an address 169 | b apu_brx 170 | li t8, 0 171 | apu_brxr: 172 | li t8, 1 173 | 174 | apu_brx: 175 | // Update the JIT register state 176 | jal load_reg_x 177 | nop 178 | jal load_flags 179 | nop 180 | 181 | // Emit code to load a value from register X as a zero page address 182 | EMIT_OP SLL(A0, S1, 3) 183 | EMIT_OP ANDI(A0, A0, 0x100) 184 | EMIT_OP ANDI(T9, T9, 0xFF) 185 | beqz t8, brx_skip 186 | nop 187 | la t0, apu_read8 188 | jal emit_jal 189 | nop 190 | brx_skip: 191 | li t0, OR(A0, A0, T9) 192 | j emit_op 193 | move ra, gp 194 | 195 | .align 5 196 | apu_bxpa: // (X)+ 197 | // Choose whether to emit a memory read or just provide an address 198 | b apu_bxp 199 | li t8, 0 200 | apu_bxpr: 201 | li t8, 1 202 | 203 | apu_bxp: 204 | // Update the JIT register state 205 | jal load_reg_x 206 | ori s1, s1, FLAG_SX 207 | jal load_flags 208 | nop 209 | 210 | // Emit code to load a value from register X as a zero page address with post-increment 211 | EMIT_OP SLL(A0, S1, 3) 212 | EMIT_OP ANDI(A0, A0, 0x100) 213 | EMIT_OP ANDI(T9, T9, 0xFF) 214 | EMIT_OP OR(A0, A0, T9) 215 | beqz t8, bxp_skip 216 | nop 217 | la t0, apu_read8 218 | jal emit_jal 219 | nop 220 | bxp_skip: 221 | li t0, ADDI(T9, T9, 1) 222 | j emit_op 223 | move ra, gp 224 | 225 | .align 5 226 | apu_absa: // aaaa 227 | // Get the 16-bit immediate address 228 | jal jit_read8 229 | nop 230 | jal jit_read8 231 | move t7, v0 232 | sll t0, v0, 8 233 | or t7, t7, t0 234 | 235 | // End the JIT block early if a self-modifying write is detected 236 | sgt t0, t7, a0 237 | addi t2, t7, -2 238 | slt t1, t2, t9 239 | and t0, t0, t1 240 | beqz t0, abs_skip 241 | nop 242 | move t9, t2 243 | 244 | abs_skip: 245 | // Emit code to load an absolute address 246 | li t0, ORI(A0, ZERO, 0) 247 | or t0, t0, t7 248 | j emit_op 249 | move ra, gp 250 | 251 | .align 5 252 | apu_absr: // aaaa 253 | // Get the 16-bit immediate address 254 | jal jit_read8 255 | nop 256 | jal jit_read8 257 | move t7, v0 258 | sll t0, v0, 8 259 | or t7, t7, t0 260 | 261 | // Emit code to load a value from an absolute address 262 | la t0, apu_read8 263 | jal emit_jal 264 | nop 265 | li t0, ORI(A0, ZERO, 0) 266 | or t0, t0, t7 267 | j emit_op 268 | move ra, gp 269 | 270 | .align 5 271 | apu_abxa: // aaaa+X 272 | // Choose whether to emit a memory read or just provide an address 273 | b apu_abx 274 | li t8, 0 275 | apu_abxr: 276 | li t8, 1 277 | 278 | apu_abx: 279 | // Get the 16-bit immediate address 280 | jal jit_read8 281 | nop 282 | jal jit_read8 283 | move t7, v0 284 | sll t0, v0, 8 285 | 286 | // Update the JIT register state 287 | jal load_reg_x 288 | or t7, t7, t0 289 | 290 | // Emit code to load a value from an absolute address plus register X 291 | EMIT_OP ANDI(T9, T9, 0xFF) 292 | li t0, ADDI(A0, T9, 0) 293 | jal emit_op 294 | or t0, t0, t7 295 | beqz t8, abx_skip 296 | nop 297 | la t0, apu_read8 298 | jal emit_jal 299 | nop 300 | abx_skip: 301 | li t0, ANDI(A0, A0, 0xFFFF) 302 | j emit_op 303 | move ra, gp 304 | 305 | .align 5 306 | apu_abya: // aaaa+Y 307 | // Choose whether to emit a memory read or just provide an address 308 | b apu_aby 309 | li t8, 0 310 | apu_abyr: 311 | li t8, 1 312 | 313 | apu_aby: 314 | // Get the 16-bit immediate address 315 | jal jit_read8 316 | nop 317 | jal jit_read8 318 | move t7, v0 319 | sll t0, v0, 8 320 | 321 | // Update the JIT register state 322 | jal load_reg_y 323 | or t7, t7, t0 324 | 325 | // Emit code to load a value from an absolute address plus register Y 326 | EMIT_OP ANDI(T8, T8, 0xFF) 327 | li t0, ADDI(A0, T8, 0) 328 | jal emit_op 329 | or t0, t0, t7 330 | beqz t8, aby_skip 331 | nop 332 | la t0, apu_read8 333 | jal emit_jal 334 | nop 335 | aby_skip: 336 | li t0, ANDI(A0, A0, 0xFFFF) 337 | j emit_op 338 | move ra, gp 339 | 340 | .align 5 341 | apu_idxa: // [aa+X] 342 | // Choose whether to emit a memory read or just provide an address 343 | b apu_idx 344 | li t8, 0 345 | apu_idxr: 346 | li t8, 1 347 | 348 | apu_idx: 349 | // Get the 8-bit immediate address 350 | jal jit_read8 351 | nop 352 | 353 | // Update the JIT register state 354 | jal load_reg_x 355 | nop 356 | jal load_flags 357 | nop 358 | 359 | // Emit code to form a zero page address plus register X 360 | EMIT_OP SLL(A0, S1, 3) 361 | EMIT_OP ANDI(A0, A0, 0x100) 362 | li t0, ADDI(T0, T9, 0) 363 | jal emit_op 364 | or t0, t0, v0 365 | EMIT_OP ANDI(T0, T0, 0xFF) 366 | EMIT_OP OR(A0, A0, T0) 367 | 368 | // Emit code to load a value from a 16-bit address in the zero page 369 | la t0, apu_read8 370 | jal emit_jal 371 | nop 372 | EMIT_OP ADDI(A0, A0, 1) 373 | EMIT_OP SLL(S0, V0, 8) 374 | la t0, apu_read8 375 | jal emit_jal 376 | nop 377 | EMIT_OP ADDI(A0, A0, -1) 378 | beqz t8, idx_skip 379 | nop 380 | la t0, apu_read8 381 | jal emit_jal 382 | nop 383 | idx_skip: 384 | li t0, OR(A0, S0, V0) 385 | j emit_op 386 | move ra, gp 387 | 388 | .align 5 389 | apu_idya: // [aa]+Y 390 | // Choose whether to emit a memory read or just provide an address 391 | b apu_idy 392 | li t8, 0 393 | apu_idyr: 394 | li t8, 1 395 | 396 | apu_idy: 397 | // Get the 8-bit immediate address 398 | jal jit_read8 399 | nop 400 | 401 | // Update the JIT register state 402 | jal load_reg_y 403 | nop 404 | jal load_flags 405 | nop 406 | 407 | // Emit code to form a zero page address 408 | EMIT_OP SLL(A0, S1, 3) 409 | EMIT_OP ANDI(A0, A0, 0x100) 410 | li t0, ORI(A0, A0, 0) 411 | jal emit_op 412 | or t0, t0, v0 413 | 414 | // Emit code to load a 16-bit address in the zero page 415 | la t0, apu_read8 416 | jal emit_jal 417 | nop 418 | EMIT_OP ADDI(A0, A0, 1) 419 | EMIT_OP SLL(S0, V0, 8) 420 | la t0, apu_read8 421 | jal emit_jal 422 | nop 423 | EMIT_OP ADDI(A0, A0, -1) 424 | EMIT_OP OR(A0, S0, V0) 425 | 426 | // Emit code to load a value from an absolute address plus register Y 427 | EMIT_OP ANDI(T8, T8, 0xFF) 428 | EMIT_OP ADD(A0, A0, T8) 429 | beqz t8, idy_skip 430 | nop 431 | la t0, apu_read8 432 | jal emit_jal 433 | nop 434 | idy_skip: 435 | li t0, ANDI(A0, A0, 0xFFFF) 436 | j emit_op 437 | move ra, gp 438 | 439 | .align 5 440 | apu_drb: // aaa.b 441 | // Get the 16-bit immediate value 442 | jal jit_read8 443 | nop 444 | jal jit_read8 445 | move t7, v0 446 | sll t0, v0, 8 447 | or t7, t7, t0 448 | 449 | // Get aaa as an address and b as a bitmask 450 | andi t2, t7, 0x1FFF 451 | li t0, 1 452 | srl t7, t7, 13 453 | sll t7, t0, t7 454 | 455 | // Emit code to load a bitmask and a value from an address 456 | la t0, apu_read8 457 | jal emit_jal 458 | nop 459 | li t0, ORI(A0, ZERO, 0) 460 | jal emit_op 461 | or t0, t0, t2 462 | li t0, ORI(A1, ZERO, 0) 463 | or t0, t0, t7 464 | j emit_op 465 | move ra, gp 466 | 467 | .align 5 468 | apu_dri: // aa,#nn 469 | // Get the 8-bit immediate value and address 470 | jal jit_read8 471 | nop 472 | jal jit_read8 473 | move t7, v0 474 | 475 | // Update the JIT register state 476 | jal load_flags 477 | nop 478 | 479 | // Emit code to load a value 480 | li t0, ORI(V0, ZERO, 0) 481 | jal emit_op 482 | or t0, t0, t7 483 | 484 | // Emit code to form a zero page address 485 | EMIT_OP SLL(A0, S1, 3) 486 | EMIT_OP ANDI(A0, A0, 0x100) 487 | li t0, ORI(A0, A0, 0) 488 | or t0, t0, v0 489 | j emit_op 490 | move ra, gp 491 | 492 | .align 5 493 | apu_dr2: // aa,bb 494 | // Get the two 8-bit immediate addresses 495 | jal jit_read8 496 | nop 497 | jal jit_read8 498 | move t7, v0 499 | 500 | // Update the JIT register state 501 | jal load_flags 502 | nop 503 | 504 | // Emit code to load a value from a zero page address 505 | EMIT_OP SLL(S0, S1, 3) 506 | EMIT_OP ANDI(S0, S0, 0x100) 507 | la t0, apu_read8 508 | jal emit_jal 509 | nop 510 | li t0, ORI(A0, S0, 0) 511 | jal emit_op 512 | or t0, t0, t7 513 | 514 | // Emit code to form another zero page address 515 | li t0, ORI(A0, S0, 0) 516 | or t0, t0, v0 517 | j emit_op 518 | move ra, gp 519 | 520 | .align 5 521 | apu_bxy: // (X),(Y) 522 | // Update the JIT register state 523 | jal load_reg_x 524 | nop 525 | jal load_reg_y 526 | nop 527 | jal load_flags 528 | nop 529 | 530 | // Emit code to load a value from register Y as a zero page address 531 | EMIT_OP SLL(S0, S1, 3) 532 | EMIT_OP ANDI(S0, S0, 0x100) 533 | EMIT_OP ANDI(T8, T8, 0xFF) 534 | la t0, apu_read8 535 | jal emit_jal 536 | nop 537 | EMIT_OP OR(A0, S0, T8) 538 | 539 | // Emit code to form a zero page address with register X 540 | li t0, OR(A0, S0, T9) 541 | j emit_op 542 | move ra, gp 543 | -------------------------------------------------------------------------------- /src/apu_control.S: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021-2025 Hydr8gon 3 | 4 | This file is part of sodium64. 5 | 6 | sodium64 is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, 9 | or (at your option) any later version. 10 | 11 | sodium64 is distributed in the hope that it will be useful, but 12 | WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with sodium64. If not, see . 18 | */ 19 | 20 | #include "macros.h" 21 | 22 | .globl apu_bpl 23 | .globl apu_bmi 24 | .globl apu_bvc 25 | .globl apu_bvs 26 | .globl apu_bcc 27 | .globl apu_bcs 28 | .globl apu_bne 29 | .globl apu_beq 30 | .globl apu_bbc1 31 | .globl apu_bbs1 32 | .globl apu_cbne 33 | .globl apu_dbnzy 34 | .globl apu_dbnzm 35 | .globl apu_bra 36 | .globl apu_jmp 37 | .globl apu_jmpm 38 | .globl apu_call 39 | .globl apu_tcall1 40 | .globl apu_pcall 41 | .globl apu_ret 42 | .globl apu_ret1 43 | .globl apu_brk 44 | .globl apu_clrp 45 | .globl apu_setp 46 | .globl apu_ei 47 | .globl apu_di 48 | 49 | .text 50 | .set noreorder 51 | 52 | .align 5 53 | apu_bpl: // BPL dest 54 | // Sign-extend the offset value and add it to the PC 55 | jal jit_read8 56 | nop 57 | sll v0, v0, 24 58 | sra v0, v0, 24 59 | 60 | // Update the JIT register state 61 | jal load_flags 62 | add v0, v0, a0 63 | jal update_nz 64 | ori s1, s1, FLAG_PC 65 | 66 | // Emit code to branch if the N flag is clear 67 | EMIT_OP ANDI(T0, S1, 0x80) // N 68 | EMIT_OP BEQ(T0, ZERO, 2) 69 | li t0, ORI(S0, ZERO, 0) 70 | jal emit_op 71 | or t0, t0, v0 72 | li t0, ORI(S0, ZERO, 0) 73 | la ra, finish_block 74 | j emit_op 75 | or t0, t0, a0 76 | 77 | .align 5 78 | apu_bmi: // BMI dest 79 | // Sign-extend the offset value and add it to the PC 80 | jal jit_read8 81 | nop 82 | sll v0, v0, 24 83 | sra v0, v0, 24 84 | 85 | // Update the JIT register state 86 | jal load_flags 87 | add v0, v0, a0 88 | jal update_nz 89 | ori s1, s1, FLAG_PC 90 | 91 | // Emit code to branch if the N flag is set 92 | EMIT_OP ANDI(T0, S1, 0x80) // N 93 | EMIT_OP BEQ(T0, ZERO, 2) 94 | li t0, ORI(S0, ZERO, 0) 95 | jal emit_op 96 | or t0, t0, a0 97 | li t0, ORI(S0, ZERO, 0) 98 | la ra, finish_block 99 | j emit_op 100 | or t0, t0, v0 101 | 102 | .align 5 103 | apu_bvc: // BVC dest 104 | // Sign-extend the offset value and add it to the PC 105 | jal jit_read8 106 | nop 107 | sll v0, v0, 24 108 | sra v0, v0, 24 109 | add v0, v0, a0 110 | 111 | // Update the JIT register state 112 | jal load_flags 113 | ori s1, s1, FLAG_PC 114 | 115 | // Emit code to branch if the V flag is clear 116 | EMIT_OP ANDI(T0, S1, 0x40) // V 117 | EMIT_OP BEQ(T0, ZERO, 2) 118 | li t0, ORI(S0, ZERO, 0) 119 | jal emit_op 120 | or t0, t0, v0 121 | li t0, ORI(S0, ZERO, 0) 122 | la ra, finish_block 123 | j emit_op 124 | or t0, t0, a0 125 | 126 | .align 5 127 | apu_bvs: // BVS dest 128 | // Sign-extend the offset value and add it to the PC 129 | jal jit_read8 130 | nop 131 | sll v0, v0, 24 132 | sra v0, v0, 24 133 | add v0, v0, a0 134 | 135 | // Update the JIT register state 136 | jal load_flags 137 | ori s1, s1, FLAG_PC 138 | 139 | // Emit code to branch if the V flag is set 140 | EMIT_OP ANDI(T0, S1, 0x40) // V 141 | EMIT_OP BEQ(T0, ZERO, 2) 142 | li t0, ORI(S0, ZERO, 0) 143 | jal emit_op 144 | or t0, t0, a0 145 | li t0, ORI(S0, ZERO, 0) 146 | la ra, finish_block 147 | j emit_op 148 | or t0, t0, v0 149 | 150 | .align 5 151 | apu_bcc: // BCC dest 152 | // Sign-extend the offset value and add it to the PC 153 | jal jit_read8 154 | nop 155 | sll v0, v0, 24 156 | sra v0, v0, 24 157 | add v0, v0, a0 158 | 159 | // Update the JIT register state 160 | jal load_flags 161 | ori s1, s1, FLAG_PC 162 | 163 | // Emit code to branch if the C flag is clear 164 | EMIT_OP ANDI(T0, S1, 0x1) // C 165 | EMIT_OP BEQ(T0, ZERO, 2) 166 | li t0, ORI(S0, ZERO, 0) 167 | jal emit_op 168 | or t0, t0, v0 169 | li t0, ORI(S0, ZERO, 0) 170 | la ra, finish_block 171 | j emit_op 172 | or t0, t0, a0 173 | 174 | .align 5 175 | apu_bcs: // BCS dest 176 | // Sign-extend the offset value and add it to the PC 177 | jal jit_read8 178 | nop 179 | sll v0, v0, 24 180 | sra v0, v0, 24 181 | add v0, v0, a0 182 | 183 | // Update the JIT register state 184 | jal load_flags 185 | ori s1, s1, FLAG_PC 186 | 187 | // Emit code to branch if the C flag is set 188 | EMIT_OP ANDI(T0, S1, 0x1) // C 189 | EMIT_OP BEQ(T0, ZERO, 2) 190 | li t0, ORI(S0, ZERO, 0) 191 | jal emit_op 192 | or t0, t0, a0 193 | li t0, ORI(S0, ZERO, 0) 194 | la ra, finish_block 195 | j emit_op 196 | or t0, t0, v0 197 | 198 | .align 5 199 | apu_bne: // BNE dest 200 | // Sign-extend the offset value and add it to the PC 201 | jal jit_read8 202 | nop 203 | sll v0, v0, 24 204 | sra v0, v0, 24 205 | 206 | // Update the JIT register state 207 | jal load_flags 208 | add v0, v0, a0 209 | jal update_nz 210 | ori s1, s1, FLAG_PC 211 | 212 | // Emit code to branch if the Z flag is clear 213 | EMIT_OP ANDI(T0, S1, 0x2) // Z 214 | EMIT_OP BEQ(T0, ZERO, 2) 215 | li t0, ORI(S0, ZERO, 0) 216 | jal emit_op 217 | or t0, t0, v0 218 | li t0, ORI(S0, ZERO, 0) 219 | la ra, finish_block 220 | j emit_op 221 | or t0, t0, a0 222 | 223 | .align 5 224 | apu_beq: // BEQ dest 225 | // Sign-extend the offset value and add it to the PC 226 | jal jit_read8 227 | nop 228 | sll v0, v0, 24 229 | sra v0, v0, 24 230 | 231 | // Update the JIT register state 232 | jal load_flags 233 | add v0, v0, a0 234 | jal update_nz 235 | ori s1, s1, FLAG_PC 236 | 237 | // Emit code to branch if the Z flag is set 238 | EMIT_OP ANDI(T0, S1, 0x2) // Z 239 | EMIT_OP BEQ(T0, ZERO, 2) 240 | li t0, ORI(S0, ZERO, 0) 241 | jal emit_op 242 | or t0, t0, a0 243 | li t0, ORI(S0, ZERO, 0) 244 | la ra, finish_block 245 | j emit_op 246 | or t0, t0, v0 247 | 248 | .align 5 249 | apu_bbc1: // BBC op.b,dest 250 | // Create a mask based on the opcode number 251 | srl t0, v1, 8 252 | li t1, 1 253 | sll t2, t1, t0 254 | 255 | // Sign-extend the offset value and add it to the PC 256 | jal jit_read8 257 | ori s1, s1, FLAG_PC // Update the JIT register state 258 | sll v0, v0, 24 259 | sra v0, v0, 24 260 | add v0, v0, a0 261 | 262 | // Emit code to branch if a bit in a memory value is clear 263 | li t0, ANDI(T0, V0, 0) 264 | jal emit_op 265 | or t0, t0, t2 266 | EMIT_OP BEQ(T0, ZERO, 2) 267 | li t0, ORI(S0, ZERO, 0) 268 | jal emit_op 269 | or t0, t0, v0 270 | li t0, ORI(S0, ZERO, 0) 271 | la ra, finish_block 272 | j emit_op 273 | or t0, t0, a0 274 | 275 | .align 5 276 | apu_bbs1: // BBS op.b,dest 277 | // Create a mask based on the opcode number 278 | srl t0, v1, 8 279 | li t1, 1 280 | sll t2, t1, t0 281 | 282 | // Sign-extend the offset value and add it to the PC 283 | jal jit_read8 284 | ori s1, s1, FLAG_PC // Update the JIT register state 285 | sll v0, v0, 24 286 | sra v0, v0, 24 287 | add v0, v0, a0 288 | 289 | // Emit code to branch if a bit in a memory value is set 290 | li t0, ANDI(T0, V0, 0) 291 | jal emit_op 292 | or t0, t0, t2 293 | EMIT_OP BEQ(T0, ZERO, 2) 294 | li t0, ORI(S0, ZERO, 0) 295 | jal emit_op 296 | or t0, t0, a0 297 | li t0, ORI(S0, ZERO, 0) 298 | la ra, finish_block 299 | j emit_op 300 | or t0, t0, v0 301 | 302 | .align 5 303 | apu_cbne: // CBNE op,dest 304 | // Sign-extend the offset value and add it to the PC 305 | jal jit_read8 306 | nop 307 | sll v0, v0, 24 308 | sra v0, v0, 24 309 | add v0, v0, a0 310 | 311 | // Update the JIT register state 312 | jal load_accum 313 | ori s1, s1, FLAG_PC 314 | 315 | // Emit code to branch if the accumulator doesn't equal a memory value 316 | EMIT_OP ANDI(T7, T7, 0xFF) 317 | EMIT_OP BEQ(T7, V0, 2) 318 | li t0, ORI(S0, ZERO, 0) 319 | jal emit_op 320 | or t0, t0, a0 321 | li t0, ORI(S0, ZERO, 0) 322 | la ra, finish_block 323 | j emit_op 324 | or t0, t0, v0 325 | 326 | .align 5 327 | apu_dbnzy: // DBNZ Y,dest 328 | // Sign-extend the offset value and add it to the PC 329 | jal jit_read8 330 | nop 331 | sll v0, v0, 24 332 | sra v0, v0, 24 333 | add v0, v0, a0 334 | 335 | // Update the JIT register state 336 | jal load_reg_y 337 | ori s1, s1, FLAG_SY | FLAG_PC 338 | 339 | // Emit code to decrement register Y and branch if it's not zero 340 | EMIT_OP ANDI(T8, T8, 0xFF) 341 | EMIT_OP ADDI(T8, T8, -1) 342 | EMIT_OP BEQ(T8, ZERO, 2) 343 | li t0, ORI(S0, ZERO, 0) 344 | jal emit_op 345 | or t0, t0, a0 346 | li t0, ORI(S0, ZERO, 0) 347 | la ra, finish_block 348 | j emit_op 349 | or t0, t0, v0 350 | 351 | .align 5 352 | apu_dbnzm: // DBNZ op,dest 353 | // Sign-extend the offset value and add it to the PC 354 | jal jit_read8 355 | ori s1, s1, FLAG_PC // Update the JIT register state 356 | sll v0, v0, 24 357 | sra v0, v0, 24 358 | add v0, v0, a0 359 | 360 | // Emit code to decrement a memory value and branch if it's not zero 361 | la t0, apu_write8 362 | jal emit_jal 363 | nop 364 | EMIT_OP ADDI(A1, V0, -1) 365 | EMIT_OP BEQ(A1, ZERO, 2) 366 | li t0, ORI(S0, ZERO, 0) 367 | jal emit_op 368 | or t0, t0, a0 369 | li t0, ORI(S0, ZERO, 0) 370 | la ra, finish_block 371 | j emit_op 372 | or t0, t0, v0 373 | 374 | .align 5 375 | apu_bra: // BRA dest 376 | // Sign-extend the offset value and add it to the PC 377 | jal jit_read8 378 | ori s1, s1, FLAG_PC // Update the JIT register state 379 | sll t2, v0, 24 380 | sra t2, t2, 24 381 | add t2, t2, a0 382 | 383 | // Emit code to branch unconditionally 384 | li t0, ORI(S0, ZERO, 0) 385 | la ra, finish_block 386 | j emit_op 387 | or t0, t0, t2 388 | 389 | .align 5 390 | apu_jmp: // JMP op 391 | // Emit code to jump to an address 392 | EMIT_OP OR(S0, ZERO, A0) 393 | j finish_block 394 | ori s1, s1, FLAG_PC // Update the JIT register state 395 | 396 | .align 5 397 | apu_jmpm: // JMP [op] 398 | // Emit code to jump to an address from memory 399 | EMIT_OP OR(S0, ZERO, V0) 400 | la t0, apu_read8 401 | jal emit_jal 402 | ori s1, s1, FLAG_PC // Update the JIT register state 403 | EMIT_OP ADDI(A0, A0, 1) 404 | EMIT_OP SLL(T0, V0, 8) 405 | EMIT_OP OR(S0, S0, T0) 406 | j finish_block 407 | nop 408 | 409 | .align 5 410 | apu_call: // CALL op 411 | // Update the JIT register state 412 | jal load_stack 413 | ori s1, s1, FLAG_SS | FLAG_PC 414 | 415 | // Emit code to jump and push the old program counter to the stack 416 | EMIT_OP OR(S0, ZERO, A0) 417 | EMIT_OP ANDI(A0, S2, 0xFF) 418 | EMIT_OP ORI(A0, A0, 0x100) 419 | la t0, apu_write8 420 | jal emit_jal 421 | nop 422 | li t0, ORI(A1, ZERO, 0) 423 | srl t2, a0, 8 424 | jal emit_op 425 | or t0, t0, t2 426 | EMIT_OP ADDI(A0, A0, -1) 427 | la t0, apu_write8 428 | jal emit_jal 429 | nop 430 | li t0, ORI(A1, ZERO, 0) 431 | andi t2, a0, 0xFF 432 | jal emit_op 433 | or t0, t0, t2 434 | EMIT_OP ADDI(S2, S2, -2) 435 | j finish_block 436 | nop 437 | 438 | .align 5 439 | apu_tcall1: // TCALL n 440 | // Create an address based on the opcode number 441 | srl t0, v1, 7 442 | sll t0, t0, 1 443 | li t1, 0xFFDE 444 | sub t7, t1, t0 445 | 446 | // Update the JIT register state 447 | jal load_stack 448 | ori s1, s1, FLAG_SS | FLAG_PC 449 | 450 | // Emit code to jump to an address from memory 451 | la t0, apu_read8 452 | jal emit_jal 453 | nop 454 | li t0, ORI(A0, ZERO, 0) 455 | jal emit_op 456 | or t0, t0, t7 457 | EMIT_OP OR(S0, ZERO, V0) 458 | la t0, apu_read8 459 | jal emit_jal 460 | nop 461 | EMIT_OP ADDI(A0, A0, 1) 462 | EMIT_OP SLL(T0, V0, 8) 463 | EMIT_OP OR(S0, S0, T0) 464 | 465 | // Emit code to push the old program counter to the stack 466 | EMIT_OP ANDI(A0, S2, 0xFF) 467 | EMIT_OP ORI(A0, A0, 0x100) 468 | la t0, apu_write8 469 | jal emit_jal 470 | nop 471 | li t0, ORI(A1, ZERO, 0) 472 | srl t2, a0, 8 473 | jal emit_op 474 | or t0, t0, t2 475 | EMIT_OP ADDI(A0, A0, -1) 476 | la t0, apu_write8 477 | jal emit_jal 478 | nop 479 | li t0, ORI(A1, ZERO, 0) 480 | andi t2, a0, 0xFF 481 | jal emit_op 482 | or t0, t0, t2 483 | EMIT_OP ADDI(S2, S2, -2) 484 | j finish_block 485 | nop 486 | 487 | .align 5 488 | apu_pcall: // PCALL uu 489 | // Create an address based on the parameter 490 | jal jit_read8 491 | nop 492 | ori t2, v0, 0xFF00 493 | 494 | // Update the JIT register state 495 | jal load_stack 496 | ori s1, s1, FLAG_SS | FLAG_PC 497 | 498 | // Emit code to jump to an address in the highest page of RAM 499 | li t0, ORI(S0, ZERO, 0) 500 | jal emit_op 501 | or t0, t0, t2 502 | 503 | // Emit code to push the old program counter to the stack 504 | EMIT_OP ANDI(A0, S2, 0xFF) 505 | EMIT_OP ORI(A0, A0, 0x100) 506 | la t0, apu_write8 507 | jal emit_jal 508 | nop 509 | li t0, ORI(A1, ZERO, 0) 510 | srl t2, a0, 8 511 | jal emit_op 512 | or t0, t0, t2 513 | EMIT_OP ADDI(A0, A0, -1) 514 | la t0, apu_write8 515 | jal emit_jal 516 | nop 517 | li t0, ORI(A1, ZERO, 0) 518 | andi t2, a0, 0xFF 519 | jal emit_op 520 | or t0, t0, t2 521 | EMIT_OP ADDI(S2, S2, -2) 522 | j finish_block 523 | nop 524 | 525 | .align 5 526 | apu_ret: // RET 527 | // Update the JIT register state 528 | jal load_stack 529 | ori s1, s1, FLAG_SS | FLAG_PC 530 | 531 | // Emit code to pop the program counter from the stack 532 | EMIT_OP ADDI(A0, S2, 1) 533 | EMIT_OP ANDI(A0, A0, 0xFF) 534 | li t0, apu_read8 535 | jal emit_jal 536 | nop 537 | EMIT_OP ORI(A0, A0, 0x100) 538 | EMIT_OP OR(S0, ZERO, V0) 539 | la t0, apu_read8 540 | jal emit_jal 541 | nop 542 | EMIT_OP ADDI(A0, A0, 1) 543 | EMIT_OP SLL(T0, V0, 8) 544 | EMIT_OP OR(S0, S0, T0) 545 | EMIT_OP ADDI(S2, S2, 2) 546 | j finish_block 547 | nop 548 | 549 | .align 5 550 | apu_ret1: // RET1 551 | // Update the JIT register state 552 | jal load_stack 553 | ori s1, s1, FLAG_SS | FLAG_SF | FLAG_LF | FLAG_PC 554 | li t0, ~FLAG_NZ 555 | and s1, s1, t0 556 | 557 | // Emit code to pop the flags from the stack 558 | EMIT_OP ADDI(A0, S2, 1) 559 | EMIT_OP ANDI(A0, A0, 0xFF) 560 | la t0, apu_read8 561 | jal emit_jal 562 | nop 563 | EMIT_OP ORI(A0, A0, 0x100) 564 | EMIT_OP OR(S1, ZERO, V0) 565 | 566 | // Emit code to pop the program counter from the stack 567 | la t0, apu_read8 568 | jal emit_jal 569 | nop 570 | EMIT_OP ADDI(A0, A0, 1) 571 | EMIT_OP OR(S0, ZERO, V0) 572 | la t0, apu_read8 573 | jal emit_jal 574 | nop 575 | EMIT_OP ADDI(A0, A0, 1) 576 | EMIT_OP SLL(T0, V0, 8) 577 | EMIT_OP OR(S0, S0, T0) 578 | EMIT_OP ADDI(S2, S2, 3) 579 | j finish_block 580 | nop 581 | 582 | .align 5 583 | apu_brk: // BRK 584 | // Update the JIT register state 585 | jal load_stack 586 | ori s1, s1, FLAG_SS | FLAG_SF | FLAG_PC 587 | jal load_flags 588 | nop 589 | jal update_nz 590 | nop 591 | 592 | // Emit code to jump to an address stored at 0xFFDE 593 | la t0, apu_read8 594 | jal emit_jal 595 | nop 596 | EMIT_OP ORI(A0, ZERO, 0xFFDE) 597 | EMIT_OP OR(S0, ZERO, V0) 598 | la t0, apu_read8 599 | jal emit_jal 600 | nop 601 | EMIT_OP ADDI(A0, A0, 1) 602 | EMIT_OP SLL(T0, V0, 8) 603 | EMIT_OP OR(S0, S0, T0) 604 | 605 | // Emit code to push the old program counter to the stack 606 | EMIT_OP ANDI(A0, S2, 0xFF) 607 | EMIT_OP ORI(A0, A0, 0x100) 608 | la t0, apu_write8 609 | jal emit_jal 610 | nop 611 | li t0, ORI(A1, ZERO, 0) 612 | srl t2, a0, 8 613 | jal emit_op 614 | or t0, t0, t2 615 | EMIT_OP ADDI(A0, A0, -1) 616 | la t0, apu_write8 617 | jal emit_jal 618 | nop 619 | li t0, ORI(A1, ZERO, 0) 620 | andi t2, a0, 0xFF 621 | jal emit_op 622 | or t0, t0, t2 623 | 624 | // Emit code to push the flags to the stack and modify current ones 625 | EMIT_OP ADDI(A0, A0, -1) 626 | la t0, apu_write8 627 | jal emit_jal 628 | nop 629 | EMIT_OP OR(A1, ZERO, S1) 630 | EMIT_OP ANDI(S1, S1, 0xFB) // Clear I 631 | EMIT_OP ORI(S1, S1, 0x10) // Set B 632 | EMIT_OP ADDI(S2, S2, -3) 633 | j finish_block 634 | nop 635 | 636 | .align 5 637 | apu_clrp: // CLRP 638 | // Update the JIT register state 639 | jal load_flags 640 | ori s1, s1, FLAG_SF 641 | 642 | // Emit code to clear the P flag 643 | EMIT_OP ANDI(S1, S1, 0xDF) 644 | j finish_opcode 645 | nop 646 | 647 | 648 | .align 5 649 | apu_setp: // SETP 650 | // Update the JIT register state 651 | jal load_flags 652 | ori s1, s1, FLAG_SF 653 | 654 | // Emit code to set the P flag 655 | EMIT_OP ORI(S1, S1, 0x20) 656 | j finish_opcode 657 | nop 658 | 659 | 660 | .align 5 661 | apu_di: // DI 662 | // Update the JIT register state 663 | jal load_flags 664 | ori s1, s1, FLAG_SF 665 | 666 | // Emit code to clear the I flag 667 | EMIT_OP ANDI(S1, S1, 0xFB) 668 | j finish_opcode 669 | nop 670 | 671 | 672 | .align 5 673 | apu_ei: // EI 674 | // Update the JIT register state 675 | jal load_flags 676 | ori s1, s1, FLAG_SF 677 | 678 | // Emit code to set the I flag 679 | EMIT_OP ORI(S1, S1, 0x4) 680 | j finish_opcode 681 | nop 682 | -------------------------------------------------------------------------------- /src/apu_emitter.S: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021-2025 Hydr8gon 3 | 4 | This file is part of sodium64. 5 | 6 | sodium64 is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, 9 | or (at your option) any later version. 10 | 11 | sodium64 is distributed in the hope that it will be useful, but 12 | WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with sodium64. If not, see . 18 | */ 19 | 20 | #include "macros.h" 21 | 22 | .globl jit_tags 23 | .globl jit_lookup 24 | .globl jit_pointer 25 | 26 | .globl jit_read8 27 | .globl compile_block 28 | .globl finish_opcode 29 | .globl finish_block 30 | .globl load_reg_x 31 | .globl load_reg_y 32 | .globl load_accum 33 | .globl load_stack 34 | .globl load_flags 35 | .globl queue_nz 36 | .globl update_nz 37 | .globl emit_op 38 | .globl emit_j 39 | .globl emit_jal 40 | 41 | .data 42 | 43 | .align 4 44 | jit_tags: .word 0:0x400 45 | jit_lookup: .word 0:0x10000 46 | jit_pointer: .word JIT_BUFFER 47 | 48 | .align 4 49 | jit_opcodes: // Lookup table for addressing and operation functions 50 | .word next_opcode, 0, apu_tcall1, 0, apu_dirr, apu_set1, apu_dirr, apu_bbs1 // 0x00-0x03 51 | .word apu_dirr, apu_ora, apu_absr, apu_ora, apu_brxr, apu_ora, apu_idxr, apu_ora // 0x04-0x07 52 | .word apu_imm, apu_ora, apu_dr2, apu_orm, apu_drb, apu_or1a, apu_dirr, apu_aslm // 0x08-0x0B 53 | .word apu_absr, apu_aslm, apu_php, 0, apu_absr, apu_tset1, apu_brk, 0 // 0x0C-0x0F 54 | .word apu_bpl, 0, apu_tcall1, 0, apu_dirr, apu_clr1, apu_dirr, apu_bbc1 // 0x10-0x13 55 | .word apu_drxr, apu_ora, apu_abxr, apu_ora, apu_abyr, apu_ora, apu_idyr, apu_ora // 0x14-0x17 56 | .word apu_dri, apu_orm, apu_bxy, apu_orm, apu_dirr, apu_decw, apu_drxr, apu_aslm // 0x18-0x1B 57 | .word apu_asla, 0, apu_decx, 0, apu_absr, apu_cmpx, apu_abxr, apu_jmpm // 0x1C-0x1F 58 | .word apu_clrp, 0, apu_tcall1, 0, apu_dirr, apu_set1, apu_dirr, apu_bbs1 // 0x20-0x23 59 | .word apu_dirr, apu_anda, apu_absr, apu_anda, apu_brxr, apu_anda, apu_idxr, apu_anda // 0x24-0x27 60 | .word apu_imm, apu_anda, apu_dr2, apu_andm, apu_drb, apu_or1b, apu_dirr, apu_rolm // 0x28-0x2B 61 | .word apu_absr, apu_rolm, apu_pha, 0, apu_dirr, apu_cbne, apu_bra, 0 // 0x2C-0x2F 62 | .word apu_bmi, 0, apu_tcall1, 0, apu_dirr, apu_clr1, apu_dirr, apu_bbc1 // 0x30-0x33 63 | .word apu_drxr, apu_anda, apu_abxr, apu_anda, apu_abyr, apu_anda, apu_idyr, apu_anda // 0x34-0x37 64 | .word apu_dri, apu_andm, apu_bxy, apu_andm, apu_dirr, apu_incw, apu_drxr, apu_rolm // 0x38-0x3B 65 | .word apu_rola, 0, apu_incx, 0, apu_dirr, apu_cmpx, apu_absa, apu_call // 0x3C-0x3F 66 | .word apu_setp, 0, apu_tcall1, 0, apu_dirr, apu_set1, apu_dirr, apu_bbs1 // 0x40-0x43 67 | .word apu_dirr, apu_eora, apu_absr, apu_eora, apu_brxr, apu_eora, apu_idxr, apu_eora // 0x44-0x47 68 | .word apu_imm, apu_eora, apu_dr2, apu_eorm, apu_drb, apu_and1a, apu_dirr, apu_lsrm // 0x48-0x4B 69 | .word apu_absr, apu_lsrm, apu_phx, 0, apu_absr, apu_tclr1, apu_pcall, 0 // 0x4C-0x4F 70 | .word apu_bvc, 0, apu_tcall1, 0, apu_dirr, apu_clr1, apu_dirr, apu_bbc1 // 0x50-0x53 71 | .word apu_drxr, apu_eora, apu_abxr, apu_eora, apu_abyr, apu_eora, apu_idyr, apu_eora // 0x54-0x57 72 | .word apu_dri, apu_eorm, apu_bxy, apu_eorm, apu_dirr, apu_cmpw, apu_drxr, apu_lsrm // 0x58-0x5B 73 | .word apu_lsra, 0, apu_movxa, 0, apu_absr, apu_cmpy, apu_absa, apu_jmp // 0x5C-0x5F 74 | .word apu_clrc, 0, apu_tcall1, 0, apu_dirr, apu_set1, apu_dirr, apu_bbs1 // 0x60-0x63 75 | .word apu_dirr, apu_cmpa, apu_absr, apu_cmpa, apu_brxr, apu_cmpa, apu_idxr, apu_cmpa // 0x64-0x67 76 | .word apu_imm, apu_cmpa, apu_dr2, apu_cmpm, apu_drb, apu_and1b, apu_dirr, apu_rorm // 0x68-0x6B 77 | .word apu_absr, apu_rorm, apu_phy, 0, apu_dirr, apu_dbnzm, apu_ret, 0 // 0x6C-0x6F 78 | .word apu_bvs, 0, apu_tcall1, 0, apu_dirr, apu_clr1, apu_dirr, apu_bbc1 // 0x70-0x73 79 | .word apu_drxr, apu_cmpa, apu_abxr, apu_cmpa, apu_abyr, apu_cmpa, apu_idyr, apu_cmpa // 0x74-0x77 80 | .word apu_dri, apu_cmpm, apu_bxy, apu_cmpm, apu_dirr, apu_addw, apu_drxr, apu_rorm // 0x78-0x7B 81 | .word apu_rora, 0, apu_movax, 0, apu_dirr, apu_cmpy, apu_ret1, 0 // 0x7C-0x7F 82 | .word apu_setc, 0, apu_tcall1, 0, apu_dirr, apu_set1, apu_dirr, apu_bbs1 // 0x80-0x83 83 | .word apu_dirr, apu_adca, apu_absr, apu_adca, apu_brxr, apu_adca, apu_idxr, apu_adca // 0x84-0x87 84 | .word apu_imm, apu_adca, apu_dr2, apu_adcm, apu_drb, apu_eor1, apu_dirr, apu_decm // 0x88-0x8B 85 | .word apu_absr, apu_decm, apu_imm, apu_movy, apu_ppp, 0, apu_dri, apu_movm // 0x8C-0x8F 86 | .word apu_bcc, 0, apu_tcall1, 0, apu_dirr, apu_clr1, apu_dirr, apu_bbc1 // 0x90-0x93 87 | .word apu_drxr, apu_adca, apu_abxr, apu_adca, apu_abyr, apu_adca, apu_idyr, apu_adca // 0x94-0x97 88 | .word apu_dri, apu_adcm, apu_bxy, apu_adcm, apu_dirr, apu_subw, apu_drxr, apu_decm // 0x98-0x9B 89 | .word apu_deca, 0, apu_movxs, 0, apu_div, 0, apu_xcn, 0 // 0x9C-0x9F 90 | .word apu_ei, 0, apu_tcall1, 0, apu_dirr, apu_set1, apu_dirr, apu_bbs1 // 0xA0-0xA3 91 | .word apu_dirr, apu_sbca, apu_absr, apu_sbca, apu_brxr, apu_sbca, apu_idxr, apu_sbca // 0xA4-0xA7 92 | .word apu_imm, apu_sbca, apu_dr2, apu_sbcm, apu_drb, apu_mov1b, apu_dirr, apu_incm // 0xA8-0xAB 93 | .word apu_absr, apu_incm, apu_imm, apu_cmpy, apu_ppa, 0, apu_bxpa, apu_amov // 0xAC-0xAF 94 | .word apu_bcs, 0, apu_tcall1, 0, apu_dirr, apu_clr1, apu_dirr, apu_bbc1 // 0xB0-0xB3 95 | .word apu_drxr, apu_sbca, apu_abxr, apu_sbca, apu_abyr, apu_sbca, apu_idyr, apu_sbca // 0xB4-0xB7 96 | .word apu_dri, apu_sbcm, apu_bxy, apu_sbcm, apu_dirr, apu_movwya, apu_drxr, apu_incm // 0xB8-0xBB 97 | .word apu_inca, 0, apu_movsx, 0, apu_unk, 0, apu_bxpr, apu_mova // 0xBC-0xBF 98 | .word apu_di, 0, apu_tcall1, 0, apu_dirr, apu_set1, apu_dirr, apu_bbs1 // 0xC0-0xC3 99 | .word apu_dira, apu_amov, apu_absa, apu_amov, apu_brxa, apu_amov, apu_idxa, apu_amov // 0xC4-0xC7 100 | .word apu_imm, apu_cmpx, apu_absa, apu_xmov, apu_drb, apu_mov1a, apu_dira, apu_ymov // 0xC8-0xCB 101 | .word apu_absa, apu_ymov, apu_imm, apu_movx, apu_ppx, 0, apu_mul, 0 // 0xCC-0xCF 102 | .word apu_bne, 0, apu_tcall1, 0, apu_dirr, apu_clr1, apu_dirr, apu_bbc1 // 0xD0-0xD3 103 | .word apu_drxa, apu_amov, apu_abxa, apu_amov, apu_abya, apu_amov, apu_idya, apu_amov // 0xD4-0xD7 104 | .word apu_dira, apu_xmov, apu_drya, apu_xmov, apu_dira, apu_movway, apu_drxa, apu_ymov // 0xD8-0xDB 105 | .word apu_decy, 0, apu_movay, 0, apu_drxr, apu_cbne, apu_unk, 0 // 0xDC-0xDF 106 | .word apu_clrv, 0, apu_tcall1, 0, apu_dirr, apu_set1, apu_dirr, apu_bbs1 // 0xE0-0xE3 107 | .word apu_dirr, apu_mova, apu_absr, apu_mova, apu_brxr, apu_mova, apu_idxr, apu_mova // 0xE4-0xE7 108 | .word apu_imm, apu_mova, apu_absr, apu_movx, apu_drb, apu_not1, apu_dirr, apu_movy // 0xE8-0xEB 109 | .word apu_absr, apu_movy, apu_notc, 0, apu_ppy, 0, apu_unk, 0 // 0xEC-0xEF 110 | .word apu_beq, 0, apu_tcall1, 0, apu_dirr, apu_clr1, apu_dirr, apu_bbc1 // 0xF0-0xF3 111 | .word apu_drxr, apu_mova, apu_abxr, apu_mova, apu_abyr, apu_mova, apu_idyr, apu_mova // 0xF4-0xF7 112 | .word apu_dirr, apu_movx, apu_dryr, apu_movx, apu_dr2, apu_movm, apu_drxr, apu_movy // 0xF8-0xFB 113 | .word apu_incy, 0, apu_movya, 0, apu_dbnzy, 0, apu_unk, 0 // 0xFC-0xFF 114 | 115 | .text 116 | .set noreorder 117 | 118 | .align 5 119 | jit_read8: // a0: address - v0: value 120 | // Read a byte from memory, count cycles, and increment the address 121 | lbu v0, apu_clock 122 | srl t0, a0, 6 123 | lbu t1, apu_map(t0) 124 | sub s2, s2, v0 125 | add t0, a0, t1 126 | lbu v0, apu_ram(t0) 127 | jr ra 128 | addi a0, a0, 1 129 | 130 | .align 5 131 | compile_block: 132 | // Round the JIT pointer to nearest cache line, offset by header size 133 | lw t0, jit_pointer 134 | addi t0, t0, 0x1F + 12 135 | andi t1, t0, 0x1F 136 | sub a1, t0, t1 // Pointer 137 | addi t0, a1, -12 138 | sw t0, jit_pointer 139 | 140 | // Initialize values for tracking JIT block state 141 | move s1, zero // Status 142 | move s2, zero // Cycles 143 | move a0, s0 // PC 144 | addi t9, a0, BLOCK_SIZE // Limit 145 | 146 | // Check JIT bounds and invalidate data cache for the header 147 | li a2, ROM_BUFFER // Bounds 148 | bgeu a1, a2, reset_buffer 149 | srl t1, a0, 6 150 | cache 0x11, CACHED(0)(t0) 151 | 152 | // Set the start memory block and tag in the JIT header 153 | sll t1, t1, 2 154 | sh t1, 0(t0) 155 | lw t1, jit_tags(t1) 156 | sw t1, 4(t0) 157 | 158 | next_opcode: 159 | // Read an opcode from memory and jump to its functions 160 | jal jit_read8 161 | nop 162 | sll v1, v0, 3 163 | lw t1, jit_opcodes + 0(v1) 164 | lw gp, jit_opcodes + 4(v1) 165 | jr t1 166 | nop 167 | 168 | .align 5 169 | finish_opcode: 170 | // Compile another opcode if the limit hasn't been reached 171 | blt a0, t9, next_opcode 172 | nop 173 | 174 | finish_block: 175 | // Update NZ flags at the end of a block 176 | jal update_nz 177 | nop 178 | 179 | // Emit code to store register X if enabled 180 | andi t0, s1, FLAG_SX 181 | beqz t0, skip_sx 182 | li t3, SB(T9, 0, 0) 183 | la t4, apu_reg_x 184 | jal full_address 185 | nop 186 | 187 | skip_sx: 188 | // Emit code to store register Y if enabled 189 | andi t0, s1, FLAG_SY 190 | beqz t0, skip_sy 191 | li t3, SB(T8, 0, 0) 192 | la t4, apu_reg_y 193 | jal full_address 194 | nop 195 | 196 | skip_sy: 197 | // Emit code to store the accumulator if enabled 198 | andi t0, s1, FLAG_SA 199 | beqz t0, skip_sa 200 | li t3, SB(T7, 0, 0) 201 | la t4, apu_accum 202 | jal full_address 203 | nop 204 | 205 | skip_sa: 206 | // Emit code to store the stack pointer if enabled 207 | andi t0, s1, FLAG_SS 208 | beqz t0, skip_ss 209 | li t3, SB(S2, 0, 0) 210 | la t4, apu_stack 211 | jal full_address 212 | nop 213 | 214 | skip_ss: 215 | // Emit code to store the flags if enabled 216 | andi t0, s1, FLAG_SF 217 | beqz t0, skip_sf 218 | li t3, SB(S1, 0, 0) 219 | la t4, apu_flags 220 | jal full_address 221 | nop 222 | 223 | skip_sf: 224 | // Emit code to load the program counter value unless disabled 225 | andi t0, s1, FLAG_PC 226 | bnez t0, skip_pc 227 | li t0, ORI(S0, ZERO, 0) 228 | jal emit_op 229 | or t0, t0, a0 230 | 231 | skip_pc: 232 | // Emit code to store the program counter 233 | la t4, apu_count 234 | jal full_address 235 | li t3, SH(S0, 0, 0) 236 | 237 | // Emit code to adjust APU cycle count and return to the main loop 238 | la t0, cpu_execute 239 | jal emit_j 240 | andi s2, s2, 0xFFFF 241 | li t0, ADDI(S3, S3, 0) 242 | jal emit_op 243 | or t0, t0, s2 244 | 245 | // Update the JIT pointer and use its old value for block lookup 246 | lw t0, jit_pointer 247 | sw a1, jit_pointer 248 | sll t1, s0, 2 249 | lui t2, 0x2000 250 | sub t2, t0, t2 // Cached 251 | sw t2, jit_lookup(t1) 252 | 253 | // Set the end memory block and tag in the JIT header 254 | srl t1, a0, 6 255 | sll t1, t1, 2 256 | sh t1, 2(t0) 257 | lw t1, jit_tags(t1) 258 | 259 | // Jump to the finished JIT block's code 260 | addi t2, t2, 12 261 | jr t2 262 | sw t1, 8(t0) 263 | 264 | .align 5 265 | reset_buffer: 266 | // Clear all block lookup pointers 267 | la t0, jit_lookup 268 | la t1, jit_pointer 269 | reset_loop: 270 | addi t0, t0, 4 271 | bne t0, t1, reset_loop 272 | sw zero, -4(t0) 273 | 274 | // Reset the JIT pointer and restart compilation 275 | li t0, JIT_BUFFER 276 | sw t0, jit_pointer 277 | j compile_block 278 | nop 279 | 280 | .align 5 281 | apu_unk: 282 | // Emit code to loop infinitely for unimplemented opcodes 283 | ori s1, s1, FLAG_PC 284 | li t0, ORI(S0, ZERO, 0) 285 | addi t2, a0, -1 286 | andi t2, t2, 0xFFFF 287 | la ra, finish_block 288 | j emit_op 289 | or t0, t0, t2 290 | 291 | .align 5 292 | full_address: // t3: opcode, t4: address 293 | // Adjust the address for signed offsets 294 | andi t0, t4, 0x8000 295 | sll t0, t0, 1 296 | add t4, t4, t0 297 | 298 | // Emit code for accessing a 32-bit memory address 299 | move t6, ra 300 | li t0, LUI(AT_, 0) 301 | srl t1, t4, 16 302 | jal emit_op 303 | or t0, t0, t1 304 | move t0, t3 305 | li t1, AT_ << 21 306 | andi t2, t4, 0xFFFF 307 | or t0, t1, t2 308 | move ra, t6 309 | j emit_op 310 | or t0, t0, t3 311 | 312 | .align 5 313 | load_reg_x: 314 | // Check if register X has already been loaded 315 | andi t0, s1, FLAG_LX 316 | beqz t0, do_lx 317 | ori s1, s1, FLAG_LX 318 | jr ra 319 | 320 | do_lx: 321 | // Emit code to load register X if needed 322 | li t3, LBU(T9, 0, 0) 323 | la t4, apu_reg_x 324 | j full_address 325 | nop 326 | 327 | .align 5 328 | load_reg_y: 329 | // Check if register Y has already been loaded 330 | andi t0, s1, FLAG_LY 331 | beqz t0, do_ly 332 | ori s1, s1, FLAG_LY 333 | jr ra 334 | 335 | do_ly: 336 | // Emit code to load register Y if needed 337 | li t3, LBU(T8, 0, 0) 338 | la t4, apu_reg_y 339 | j full_address 340 | nop 341 | 342 | .align 5 343 | load_accum: 344 | // Check if the accumulator has already been loaded 345 | andi t0, s1, FLAG_LA 346 | beqz t0, do_la 347 | ori s1, s1, FLAG_LA 348 | jr ra 349 | 350 | do_la: 351 | // Emit code to load the accumulator if needed 352 | li t3, LBU(T7, 0, 0) 353 | la t4, apu_accum 354 | j full_address 355 | nop 356 | 357 | .align 5 358 | load_stack: 359 | // Check if the stack pointer has already been loaded 360 | andi t0, s1, FLAG_LS 361 | beqz t0, do_ls 362 | ori s1, s1, FLAG_LS 363 | jr ra 364 | 365 | do_ls: 366 | // Emit code to load the stack pointer if needed 367 | li t3, LBU(S2, 0, 0) 368 | la t4, apu_stack 369 | j full_address 370 | nop 371 | 372 | .align 5 373 | load_flags: 374 | // Check if the flags have already been loaded 375 | andi t0, s1, FLAG_LF 376 | beqz t0, do_lf 377 | ori s1, s1, FLAG_LF 378 | jr ra 379 | 380 | do_lf: 381 | // Emit code to load the flags if needed 382 | li t3, LBU(S1, 0, 0) 383 | la t4, apu_flags 384 | j full_address 385 | nop 386 | 387 | .align 5 388 | queue_nz: // t1: value 389 | // Emit code to save a value for setting NZ flags later 390 | ori s1, s1, FLAG_NZ 391 | li t0, ANDI(A2, 0, 0xFF) 392 | sll t1, t1, 21 393 | j emit_op 394 | or t0, t0, t1 395 | 396 | .align 5 397 | update_nz: 398 | // Check if a value is queued for setting NZ flags 399 | andi t0, s1, FLAG_NZ 400 | bnez t0, do_nz 401 | move t7, ra 402 | jr ra 403 | nop 404 | 405 | do_nz: 406 | // Emit code to update NZ flags if needed 407 | xori s1, s1, FLAG_NZ 408 | jal load_flags 409 | ori s1, s1, FLAG_SF 410 | EMIT_OP ANDI(S1, S1, 0x7D) 411 | EMIT_OP SLT(T6, ZERO, A2) 412 | EMIT_OP XORI(T6, T6, 0x1) 413 | EMIT_OP SLL(T6, T6, 1) 414 | EMIT_OP OR(S1, S1, T6) 415 | EMIT_OP ANDI(T6, A2, 0x80) 416 | li t0, OR(S1, S1, T6) 417 | j emit_op 418 | move ra, t7 419 | 420 | .align 5 421 | emit_op: // t0: opcode 422 | // Write to the JIT buffer, invalidate caches, and reset on overflow 423 | sw t0, (a1) 424 | cache 0x10, CACHED(0)(a1) 425 | cache 0x11, CACHED(0)(a1) 426 | addi a1, a1, 4 427 | beq a1, a2, reset_buffer 428 | nop 429 | jr ra 430 | nop 431 | 432 | .align 5 433 | emit_j: // t0: target 434 | // Emit a "j" opcode for the JIT 435 | srl t0, t0, 2 436 | li t1, 0x3FFFFFF 437 | and t0, t0, t1 438 | lui t1, 0x0800 439 | j emit_op 440 | or t0, t0, t1 441 | 442 | .align 5 443 | emit_jal: // t0: target 444 | // Emit a "jal" opcode for the JIT 445 | srl t0, t0, 2 446 | li t1, 0x3FFFFFF 447 | and t0, t0, t1 448 | lui t1, 0x0C00 449 | j emit_op 450 | or t0, t0, t1 451 | -------------------------------------------------------------------------------- /src/apu_transfer.S: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021-2025 Hydr8gon 3 | 4 | This file is part of sodium64. 5 | 6 | sodium64 is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, 9 | or (at your option) any later version. 10 | 11 | sodium64 is distributed in the hope that it will be useful, but 12 | WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with sodium64. If not, see . 18 | */ 19 | 20 | #include "macros.h" 21 | 22 | .globl apu_movax 23 | .globl apu_movxa 24 | .globl apu_movay 25 | .globl apu_movya 26 | .globl apu_movxs 27 | .globl apu_movsx 28 | .globl apu_mova 29 | .globl apu_movx 30 | .globl apu_movy 31 | .globl apu_movwya 32 | .globl apu_movm 33 | .globl apu_amov 34 | .globl apu_xmov 35 | .globl apu_ymov 36 | .globl apu_movway 37 | .globl apu_pha 38 | .globl apu_phx 39 | .globl apu_phy 40 | .globl apu_php 41 | .globl apu_ppa 42 | .globl apu_ppx 43 | .globl apu_ppy 44 | .globl apu_ppp 45 | 46 | .text 47 | .set noreorder 48 | 49 | .align 5 50 | apu_movax: // MOV A,X 51 | // Update the JIT register state 52 | jal load_reg_x 53 | ori s1, s1, FLAG_SA | FLAG_LA 54 | 55 | // Emit code to move a value to the accumulator from register X and set flags 56 | EMIT_OP OR(T7, ZERO, T9) 57 | la ra, finish_opcode 58 | j queue_nz 59 | li t1, T7 60 | 61 | .align 5 62 | apu_movxa: // MOV X,A 63 | // Update the JIT register state 64 | jal load_accum 65 | ori s1, s1, FLAG_SX | FLAG_LX 66 | 67 | // Emit code to move a value to register X from the accumulator and set flags 68 | EMIT_OP OR(T9, ZERO, T7) 69 | la ra, finish_opcode 70 | j queue_nz 71 | li t1, T9 72 | 73 | .align 5 74 | apu_movay: // MOV A,Y 75 | // Update the JIT register state 76 | jal load_reg_y 77 | ori s1, s1, FLAG_SA | FLAG_LA 78 | 79 | // Emit code to move a value to the accumulator from register Y and set flags 80 | EMIT_OP OR(T7, ZERO, T8) 81 | la ra, finish_opcode 82 | j queue_nz 83 | li t1, T7 84 | 85 | .align 5 86 | apu_movya: // MOV Y,A 87 | // Update the JIT register state 88 | jal load_accum 89 | ori s1, s1, FLAG_SY | FLAG_LY 90 | 91 | // Emit code to move a value to register Y from the accumulator and set flags 92 | EMIT_OP OR(T8, ZERO, T7) 93 | la ra, finish_opcode 94 | j queue_nz 95 | li t1, T8 96 | 97 | .align 5 98 | apu_movxs: // MOV X,SP 99 | // Update the JIT register state 100 | jal load_stack 101 | ori s1, s1, FLAG_SX | FLAG_LX 102 | 103 | // Emit code to move a value to register X from the stack pointer and set flags 104 | EMIT_OP OR(T9, ZERO, S2) 105 | la ra, finish_opcode 106 | j queue_nz 107 | li t1, T9 108 | 109 | .align 5 110 | apu_movsx: // MOV SP,X 111 | // Update the JIT register state 112 | jal load_reg_x 113 | ori s1, s1, FLAG_SS | FLAG_LS 114 | 115 | // Emit code to move a value to the stack pointer from register X 116 | EMIT_OP OR(S2, ZERO, T9) 117 | j finish_opcode 118 | nop 119 | 120 | .align 5 121 | apu_mova: // MOV A,op 122 | // Update the JIT register state 123 | ori s1, s1, FLAG_SA | FLAG_LA 124 | 125 | // Emit code to move a value to the accumulator and set flags 126 | EMIT_OP OR(T7, ZERO, V0) 127 | la ra, finish_opcode 128 | j queue_nz 129 | li t1, T7 130 | 131 | .align 5 132 | apu_movx: // MOV X,op 133 | // Update the JIT register state 134 | ori s1, s1, FLAG_SX | FLAG_LX 135 | 136 | // Emit code to move a value to register X and set flags 137 | EMIT_OP OR(T9, ZERO, V0) 138 | la ra, finish_opcode 139 | j queue_nz 140 | li t1, T9 141 | 142 | .align 5 143 | apu_movy: // MOV Y,op 144 | // Update the JIT register state 145 | ori s1, s1, FLAG_SY | FLAG_LY 146 | 147 | // Emit code to move a value to register Y and set flags 148 | EMIT_OP OR(T8, ZERO, V0) 149 | la ra, finish_opcode 150 | j queue_nz 151 | li t1, T8 152 | 153 | .align 5 154 | apu_movwya: // MOVW YA,op 155 | // Update the JIT register state 156 | ori s1, s1, FLAG_SY | FLAG_SA | FLAG_LY | FLAG_LA 157 | 158 | // Emit code to move a 16-bit value to register YA 159 | EMIT_OP OR(T7, ZERO, V0) 160 | la t0, apu_read8 161 | jal emit_jal 162 | nop 163 | EMIT_OP ADDI(A0, A0, 1) 164 | EMIT_OP OR(T8, ZERO, V0) 165 | 166 | // Emit code to set flags for the 16-bit value 167 | EMIT_OP SLT(T0, ZERO, T7) 168 | EMIT_OP OR(T0, T0, T8) 169 | la ra, finish_opcode 170 | j queue_nz 171 | li t1, T0 172 | 173 | .align 5 174 | apu_movm: // MOV op 175 | // Emit code to move a value to a memory address 176 | la t0, apu_write8 177 | jal emit_jal 178 | nop 179 | EMIT_OP OR(A1, ZERO, V0) 180 | j finish_opcode 181 | nop 182 | 183 | .align 5 184 | apu_amov: // MOV op,A 185 | // Update the JIT register state 186 | jal load_accum 187 | nop 188 | 189 | // Emit code to move a value to memory from the accumulator 190 | la t0, apu_write8 191 | jal emit_jal 192 | nop 193 | EMIT_OP OR(A1, ZERO, T7) 194 | j finish_opcode 195 | nop 196 | 197 | .align 5 198 | apu_xmov: // MOV op,X 199 | // Update the JIT register state 200 | jal load_reg_x 201 | nop 202 | 203 | // Emit code to move a value to memory from register X 204 | la t0, apu_write8 205 | jal emit_jal 206 | nop 207 | EMIT_OP OR(A1, ZERO, T9) 208 | j finish_opcode 209 | nop 210 | 211 | .align 5 212 | apu_ymov: // MOV op,Y 213 | // Update the JIT register state 214 | jal load_reg_y 215 | nop 216 | 217 | // Emit code to move a value to memory from register Y 218 | la t0, apu_write8 219 | jal emit_jal 220 | nop 221 | EMIT_OP OR(A1, ZERO, T8) 222 | j finish_opcode 223 | nop 224 | 225 | .align 5 226 | apu_movway: // MOVW op,YA 227 | // Update the JIT register state 228 | jal load_reg_y 229 | nop 230 | jal load_accum 231 | nop 232 | 233 | // Emit code to move a 16-bit value to memory from register YA 234 | la t0, apu_write8 235 | jal emit_jal 236 | nop 237 | EMIT_OP OR(A1, ZERO, T7) 238 | EMIT_OP ADDI(A0, A0, 1) 239 | la t0, apu_write8 240 | jal emit_jal 241 | nop 242 | EMIT_OP OR(A1, ZERO, T8) 243 | j finish_opcode 244 | nop 245 | 246 | .align 5 247 | apu_pha: // PUSH A 248 | // Update the JIT register state 249 | jal load_stack 250 | ori s1, s1, FLAG_SS 251 | jal load_accum 252 | nop 253 | 254 | // Emit code to push the accumulator to the stack 255 | EMIT_OP ANDI(A0, S2, 0xFF) 256 | EMIT_OP ORI(A0, A0, 0x100) 257 | EMIT_OP OR(A1, ZERO, T7) 258 | la t0, apu_write8 259 | jal emit_jal 260 | nop 261 | EMIT_OP ADDI(S2, S2, -1) 262 | j finish_opcode 263 | nop 264 | 265 | .align 5 266 | apu_phx: // PUSH X 267 | // Update the JIT register state 268 | jal load_stack 269 | ori s1, s1, FLAG_SS 270 | jal load_reg_x 271 | nop 272 | 273 | // Emit code to push register X to the stack 274 | EMIT_OP ANDI(A0, S2, 0xFF) 275 | EMIT_OP ORI(A0, A0, 0x100) 276 | EMIT_OP OR(A1, ZERO, T9) 277 | la t0, apu_write8 278 | jal emit_jal 279 | nop 280 | EMIT_OP ADDI(S2, S2, -1) 281 | j finish_opcode 282 | nop 283 | 284 | .align 5 285 | apu_phy: // PUSH Y 286 | // Update the JIT register state 287 | jal load_stack 288 | ori s1, s1, FLAG_SS 289 | jal load_reg_y 290 | nop 291 | 292 | // Emit code to push register Y to the stack 293 | EMIT_OP ANDI(A0, S2, 0xFF) 294 | EMIT_OP ORI(A0, A0, 0x100) 295 | EMIT_OP OR(A1, ZERO, T8) 296 | la t0, apu_write8 297 | jal emit_jal 298 | nop 299 | EMIT_OP ADDI(S2, S2, -1) 300 | j finish_opcode 301 | nop 302 | 303 | .align 5 304 | apu_php: // PUSH PSW 305 | // Update the JIT register state 306 | jal load_stack 307 | ori s1, s1, FLAG_SS 308 | jal load_flags 309 | nop 310 | jal update_nz 311 | nop 312 | 313 | // Emit code to push the flags to the stack 314 | EMIT_OP ANDI(A0, S2, 0xFF) 315 | EMIT_OP ORI(A0, A0, 0x100) 316 | EMIT_OP OR(A1, ZERO, S1) 317 | la t0, apu_write8 318 | jal emit_jal 319 | nop 320 | EMIT_OP ADDI(S2, S2, -1) 321 | j finish_opcode 322 | nop 323 | 324 | .align 5 325 | apu_ppa: // POP A 326 | // Update the JIT register state 327 | jal load_stack 328 | ori s1, s1, FLAG_SS | FLAG_SA | FLAG_LA 329 | 330 | // Emit code to pop the accumulator from the stack 331 | EMIT_OP ADDI(S2, S2, 1) 332 | EMIT_OP ANDI(A0, S2, 0xFF) 333 | la t0, apu_read8 334 | jal emit_jal 335 | nop 336 | EMIT_OP ORI(A0, A0, 0x100) 337 | EMIT_OP OR(T7, ZERO, V0) 338 | j finish_opcode 339 | nop 340 | 341 | .align 5 342 | apu_ppx: // POP X 343 | // Update the JIT register state 344 | jal load_stack 345 | ori s1, s1, FLAG_SS | FLAG_SX | FLAG_LX 346 | 347 | // Emit code to pop register X from the stack 348 | EMIT_OP ADDI(S2, S2, 1) 349 | EMIT_OP ANDI(A0, S2, 0xFF) 350 | la t0, apu_read8 351 | jal emit_jal 352 | nop 353 | EMIT_OP ORI(A0, A0, 0x100) 354 | EMIT_OP OR(T9, ZERO, V0) 355 | j finish_opcode 356 | nop 357 | 358 | .align 5 359 | apu_ppy: // POP Y 360 | // Update the JIT register state 361 | jal load_stack 362 | ori s1, s1, FLAG_SS | FLAG_SY | FLAG_LY 363 | 364 | // Emit code to pop the accumulator from the stack 365 | EMIT_OP ADDI(S2, S2, 1) 366 | EMIT_OP ANDI(A0, S2, 0xFF) 367 | la t0, apu_read8 368 | jal emit_jal 369 | nop 370 | EMIT_OP ORI(A0, A0, 0x100) 371 | EMIT_OP OR(T8, ZERO, V0) 372 | j finish_opcode 373 | nop 374 | 375 | .align 5 376 | apu_ppp: // POP PSW 377 | // Update the JIT register state 378 | jal load_stack 379 | ori s1, s1, FLAG_SS | FLAG_SF | FLAG_LF 380 | li t0, ~FLAG_NZ 381 | and s1, s1, t0 382 | 383 | // Emit code to pop the flags from the stack 384 | EMIT_OP ADDI(S2, S2, 1) 385 | EMIT_OP ANDI(A0, S2, 0xFF) 386 | la t0, apu_read8 387 | jal emit_jal 388 | nop 389 | EMIT_OP ORI(A0, A0, 0x100) 390 | EMIT_OP OR(S1, ZERO, V0) 391 | j finish_opcode 392 | nop 393 | -------------------------------------------------------------------------------- /src/cpu_address.S: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021-2025 Hydr8gon 3 | 4 | This file is part of sodium64. 5 | 6 | sodium64 is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, 9 | or (at your option) any later version. 10 | 11 | sodium64 is distributed in the hope that it will be useful, but 12 | WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with sodium64. If not, see . 18 | */ 19 | 20 | #include "macros.h" 21 | 22 | .globl cpu_im8 23 | .globl cpu_i16 24 | .globl cpu_dir 25 | .globl cpu_drx 26 | .globl cpu_dry 27 | .globl cpu_drs 28 | .globl cpu_abs 29 | .globl cpu_abx 30 | .globl cpu_aby 31 | .globl cpu_lng 32 | .globl cpu_lnx 33 | .globl cpu_ind 34 | .globl cpu_idx 35 | .globl cpu_idy 36 | .globl cpu_isy 37 | .globl cpu_idl 38 | .globl cpu_ily 39 | 40 | .text 41 | .set noreorder 42 | 43 | .align 5 44 | cpu_im8: // #nn 45 | // Get the address of the 8-bit immediate value 46 | addi s0, s7, 1 47 | jr gp 48 | addi s7, s7, 2 49 | 50 | cpu_i16: // #nnnn 51 | // Get the address of the 16-bit immediate value 52 | addi s0, s7, 1 53 | jr gp 54 | addi s7, s7, 3 55 | 56 | .align 5 57 | cpu_dir: // nn 58 | // Get the 8-bit immediate value added to the direct offset as an address 59 | MEM_READ8 1(s7) 60 | add s0, s8, v0 61 | andi s0, s0, 0xFFFF 62 | jr gp 63 | addi s7, s7, 2 64 | 65 | .align 5 66 | cpu_drx: // nn,X 67 | // Get the 8-bit immediate value plus register X added to the direct offset as an address 68 | MEM_READ8 1(s7) 69 | lhu t0, register_x 70 | add t0, t0, v0 71 | add s0, s8, t0 72 | andi s0, s0, 0xFFFF 73 | jr gp 74 | addi s7, s7, 2 75 | 76 | .align 5 77 | cpu_dry: // nn,Y 78 | // Get the 8-bit immediate value plus register Y added to the direct offset as an address 79 | MEM_READ8 1(s7) 80 | lhu t0, register_y 81 | add t0, t0, v0 82 | add s0, s8, t0 83 | andi s0, s0, 0xFFFF 84 | jr gp 85 | addi s7, s7, 2 86 | 87 | .align 5 88 | cpu_drs: // nn,S 89 | // Get the 8-bit immediate value added to the stack pointer as an address 90 | MEM_READ8 1(s7) 91 | lhu s0, stack_ptr 92 | add s0, s0, v0 93 | jr gp 94 | addi s7, s7, 2 95 | 96 | .align 5 97 | cpu_abs: // nnnn 98 | // Get the 16-bit immediate value added to the data bank as an address 99 | MEM_READ16 1(s7) 100 | lw s0, data_bank 101 | add s0, s0, v0 102 | jr gp 103 | addi s7, s7, 3 104 | 105 | .align 5 106 | cpu_abx: // nnnn,X 107 | // Get the 16-bit immediate value plus register X added to the data bank as an address 108 | MEM_READ16 1(s7) 109 | lhu t0, register_x 110 | add t0, t0, v0 111 | lw s0, data_bank 112 | add s0, s0, t0 113 | jr gp 114 | addi s7, s7, 3 115 | 116 | .align 5 117 | cpu_aby: // nnnn,Y 118 | // Get the 16-bit immediate value plus register Y added to the data bank as an address 119 | MEM_READ16 1(s7) 120 | lhu t0, register_y 121 | add t0, t0, v0 122 | lw s0, data_bank 123 | add s0, s0, t0 124 | jr gp 125 | addi s7, s7, 3 126 | 127 | .align 5 128 | cpu_lng: // nnnnnn 129 | // Get the 24-bit immediate value as an address 130 | MEM_READ8 3(s7) 131 | sll s0, v0, 16 132 | MEM_READ16 1(s7) 133 | or s0, s0, v0 134 | jr gp 135 | addi s7, s7, 4 136 | 137 | .align 5 138 | cpu_lnx: // nnnnnn,X 139 | // Get the 24-bit immediate value plus register X as an address 140 | MEM_READ8 3(s7) 141 | sll s0, v0, 16 142 | MEM_READ16 1(s7) 143 | lhu t0, register_x 144 | add t0, t0, v0 145 | add s0, s0, t0 146 | jr gp 147 | addi s7, s7, 4 148 | 149 | .align 5 150 | cpu_ind: // (nn) 151 | // Get the 8-bit immediate value added to the direct offset as an address 152 | MEM_READ8 1(s7) 153 | add s0, s8, v0 154 | andi s0, s0, 0xFFFF 155 | 156 | // Get a 16-bit value from memory added to the data bank as an address 157 | MEM_READ16 158 | lw s0, data_bank 159 | add s0, s0, v0 160 | jr gp 161 | addi s7, s7, 2 162 | 163 | .align 5 164 | cpu_idx: // (nn,X) 165 | // Get the 8-bit immediate value plus register X added to the direct offset as an address 166 | MEM_READ8 1(s7) 167 | lhu t0, register_x 168 | add t0, t0, v0 169 | add s0, s8, t0 170 | andi s0, s0, 0xFFFF 171 | 172 | // Get a 16-bit value from memory added to the data bank as an address 173 | MEM_READ16 174 | lw s0, data_bank 175 | add s0, s0, v0 176 | jr gp 177 | addi s7, s7, 2 178 | 179 | .align 5 180 | cpu_idy: // (nn),Y 181 | // Get the 8-bit immediate value added to the direct offset as an address 182 | MEM_READ8 1(s7) 183 | add s0, s8, v0 184 | andi s0, s0, 0xFFFF 185 | 186 | // Get a 16-bit value from memory plus register Y added to the data bank as an address 187 | MEM_READ16 188 | lw s0, data_bank 189 | lhu t0, register_y 190 | add s0, s0, v0 191 | add s0, s0, t0 192 | jr gp 193 | addi s7, s7, 2 194 | 195 | .align 5 196 | cpu_isy: // (nn,S),Y 197 | // Get the 8-bit immediate value added to the stack pointer as an address 198 | MEM_READ8 1(s7) 199 | lhu s0, stack_ptr 200 | add s0, s0, v0 201 | 202 | // Get a 16-bit value from memory plus register Y added to the data bank as an address 203 | MEM_READ16 204 | lw s0, data_bank 205 | lhu t0, register_y 206 | add s0, s0, v0 207 | add s0, s0, t0 208 | jr gp 209 | addi s7, s7, 2 210 | 211 | .align 5 212 | cpu_idl: // [nn] 213 | // Get the 8-bit immediate value added to the direct offset as an address 214 | MEM_READ8 1(s7) 215 | add s0, s8, v0 216 | andi s0, s0, 0xFFFF 217 | 218 | // Read a 24-bit value from memory as an address 219 | MEM_READ8 2(s0) 220 | sll s1, v0, 16 221 | MEM_READ16 222 | or s0, s1, v0 223 | jr gp 224 | addi s7, s7, 2 225 | 226 | .align 5 227 | cpu_ily: // [nn],Y 228 | // Get the 8-bit immediate value added to the direct offset as an address 229 | MEM_READ8 1(s7) 230 | add s0, s8, v0 231 | andi s0, s0, 0xFFFF 232 | 233 | // Read a 24-bit value from memory plus register Y as an address 234 | MEM_READ8 2(s0) 235 | sll s1, v0, 16 236 | MEM_READ16 237 | lhu t0, register_y 238 | add t0, t0, v0 239 | add s0, s1, t0 240 | jr gp 241 | addi s7, s7, 2 242 | -------------------------------------------------------------------------------- /src/cpu_control.S: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021-2025 Hydr8gon 3 | 4 | This file is part of sodium64. 5 | 6 | sodium64 is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, 9 | or (at your option) any later version. 10 | 11 | sodium64 is distributed in the hope that it will be useful, but 12 | WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with sodium64. If not, see . 18 | */ 19 | 20 | #include "macros.h" 21 | 22 | .globl cpu_bra 23 | .globl cpu_brl 24 | .globl cpu_jmp 25 | .globl cpu_jml 26 | .globl cpu_jmpi 27 | .globl cpu_jmpx 28 | .globl cpu_jmli 29 | .globl cpu_jsr 30 | .globl cpu_jsle 31 | .globl cpu_jsl 32 | .globl cpu_jsrx 33 | .globl cpu_rtie 34 | .globl cpu_rti 35 | .globl cpu_rtle 36 | .globl cpu_rtl 37 | .globl cpu_rts 38 | .globl cpu_bpl 39 | .globl cpu_bmi 40 | .globl cpu_bvc 41 | .globl cpu_bvs 42 | .globl cpu_bcc 43 | .globl cpu_bcs 44 | .globl cpu_bne 45 | .globl cpu_beq 46 | .globl cpu_brke 47 | .globl cpu_brk 48 | .globl cpu_cope 49 | .globl cpu_cop 50 | .globl cpu_clc 51 | .globl cpu_cli 52 | .globl cpu_cld 53 | .globl cpu_clv 54 | .globl cpu_sec 55 | .globl cpu_sei 56 | .globl cpu_sed 57 | .globl cpu_rep 58 | .globl cpu_sep 59 | .globl cpu_xce 60 | .globl cpu_stp 61 | .globl cpu_xba 62 | .globl cpu_wai 63 | .globl cpu_wdm 64 | .globl cpu_nop 65 | 66 | .text 67 | .set noreorder 68 | 69 | .align 5 70 | cpu_bra: // BRA disp8 71 | // Add the signed 8-bit immediate value to the PC 72 | MEM_READ8 1(s7), lb 73 | addi s7, s7, 2 74 | add t0, s7, v0 75 | andi t0, t0, 0xFFFF 76 | andi t1, s7, 0xFFFF 77 | sub s7, s7, t1 78 | j cpu_execute 79 | add s7, s7, t0 80 | 81 | .align 5 82 | cpu_brl: // BRL disp16 83 | // Add the signed 16-bit immediate value to the PC 84 | MEM_READ16 1(s7) 85 | addi s7, s7, 3 86 | add t0, s7, v0 87 | andi t0, t0, 0xFFFF 88 | andi t1, s7, 0xFFFF 89 | sub s7, s7, t1 90 | j cpu_execute 91 | add s7, s7, t0 92 | 93 | .align 5 94 | cpu_jmp: // JMP nnnn 95 | // Set the PC to the 16-bit immediate value 96 | MEM_READ16 1(s7) 97 | lui t0, 0xFFFF 98 | and s7, s7, t0 99 | j cpu_execute 100 | add s7, s7, v0 101 | 102 | .align 5 103 | cpu_jml: // JML nnnnnn 104 | // Set the PC and bank to the 24-bit immediate value 105 | MEM_READ16 1(s7) 106 | move s0, v0 107 | MEM_READ8 3(s7) 108 | sll t0, v0, 16 109 | j cpu_execute 110 | or s7, s0, t0 111 | 112 | .align 5 113 | cpu_jmpi: // JMP (nnnn) 114 | // Set the PC to the value at the 16-bit immediate address 115 | MEM_READ16 1(s7) 116 | move s0, v0 117 | MEM_READ16 118 | lui t0, 0xFFFF 119 | and s7, s7, t0 120 | j cpu_execute 121 | add s7, s7, v0 122 | 123 | .align 5 124 | cpu_jmpx: // JMP (nnnn,X) 125 | // Get the 16-bit immediate value plus register X added to the program bank as an address 126 | MEM_READ16 1(s7) 127 | lhu t0, register_x 128 | add t0, t0, v0 129 | lui s1, 0xFFFF 130 | and t1, s7, s1 131 | add s0, t0, t1 132 | 133 | // Set the PC to the memory value at the address 134 | MEM_READ16 135 | and s7, s7, s1 136 | j cpu_execute 137 | add s7, s7, v0 138 | 139 | .align 5 140 | cpu_jmli: // JML [nnnn] 141 | // Set the PC and bank to the value at the 16-bit immediate address 142 | MEM_READ16 1(s7) 143 | move s0, v0 144 | MEM_READ16 145 | move s7, v0 146 | MEM_READ8 2(s0) 147 | sll t0, v0, 16 148 | j cpu_execute 149 | or s7, s7, t0 150 | 151 | .align 5 152 | cpu_jsr: // JSR nnnn 153 | // Update the stack pointer 154 | lhu s0, stack_ptr 155 | addi s0, s0, -2 156 | sh s0, stack_ptr 157 | 158 | // Push the PC+2 to the stack 159 | addi a1, s7, 2 160 | MEM_WRITE16 1(s0) 161 | 162 | // Set the PC to the 16-bit immediate value 163 | MEM_READ16 1(s7) 164 | lui t0, 0xFFFF 165 | and s7, s7, t0 166 | j cpu_execute 167 | add s7, s7, v0 168 | 169 | .align 5 170 | cpu_jsle: // JSL nnnnnn (emulation mode) 171 | // Update the stack pointer 172 | lhu s0, stack_ptr 173 | addi s0, s0, -2 174 | sh s0, stack_ptr 175 | 176 | // Push the PC+3 to the stack 177 | addi a1, s7, 3 178 | MEM_WRITE16 1(s0) 179 | 180 | // Set the PC and bank to the 24-bit immediate value 181 | MEM_READ16 1(s7) 182 | move s0, v0 183 | MEM_READ8 3(s7) 184 | sll t0, v0, 16 185 | j cpu_execute 186 | or s7, s0, t0 187 | 188 | .align 5 189 | cpu_jsl: // JSL nnnnnn 190 | // Update the stack pointer 191 | lhu s0, stack_ptr 192 | addi s0, s0, -3 193 | sh s0, stack_ptr 194 | 195 | // Push the PC+3 and bank to the stack 196 | srl a1, s7, 16 197 | MEM_WRITE8 3(s0) 198 | addi a1, s7, 3 199 | MEM_WRITE16 1(s0) 200 | 201 | // Set the PC and bank to the 24-bit immediate value 202 | MEM_READ16 1(s7) 203 | move s0, v0 204 | MEM_READ8 3(s7) 205 | sll t0, v0, 16 206 | j cpu_execute 207 | or s7, s0, t0 208 | 209 | .align 5 210 | cpu_jsrx: // JSR (nnnn,X) 211 | // Update the stack pointer 212 | lhu s0, stack_ptr 213 | addi s0, s0, -2 214 | sh s0, stack_ptr 215 | 216 | // Push the PC+2 to the stack 217 | addi a1, s7, 2 218 | MEM_WRITE16 1(s0) 219 | 220 | // Get the 16-bit immediate value plus register X added to the program bank as an address 221 | MEM_READ16 1(s7) 222 | lhu t0, register_x 223 | add t0, t0, v0 224 | lui s1, 0xFFFF 225 | and t1, s7, s1 226 | add s0, t0, t1 227 | 228 | // Set the PC to the memory value at the address 229 | MEM_READ16 230 | and s7, s7, s1 231 | j cpu_execute 232 | add s7, s7, v0 233 | 234 | .align 5 235 | cpu_rtie: // RTI (emulation mode) 236 | // Pop the flags from the stack 237 | la s1, stack_ptr 238 | lhu s0, (s1) 239 | MEM_READ8 1(s0) 240 | andi t0, v0, 0xFFEF // Clear B 241 | andi s4, s4, 0xFF00 242 | or s4, s4, t0 243 | 244 | // Pop the PC from the stack 245 | MEM_READ16 2(s0) 246 | move s7, v0 247 | 248 | // Update the stack pointer 249 | addi t0, s0, 3 250 | j cpu_execute 251 | sh t0, (s1) 252 | 253 | .align 5 254 | cpu_rti: // RTI 255 | // Pop the flags from the stack 256 | la s1, stack_ptr 257 | lhu s0, (s1) 258 | MEM_READ8 1(s0) 259 | andi s4, s4, 0xFF00 260 | or s4, s4, v0 261 | 262 | // Pop the PC and bank from the stack 263 | MEM_READ16 2(s0) 264 | move s7, v0 265 | MEM_READ8 4(s0) 266 | sll t0, v0, 16 267 | add s7, s7, t0 268 | 269 | // Update the stack pointer 270 | addi t0, s0, 4 271 | j update_mode 272 | sh t0, (s1) 273 | 274 | .align 5 275 | cpu_rtle: // RTL (emulation mode) 276 | // Pop the PC+1 from the stack 277 | la s1, stack_ptr 278 | lhu s0, (s1) 279 | MEM_READ16 1(s0) 280 | addi s7, v0, 1 281 | 282 | // Update the stack pointer 283 | addi t0, s0, 2 284 | j cpu_execute 285 | sh t0, (s1) 286 | 287 | .align 5 288 | cpu_rtl: // RTL 289 | // Pop the PC+1 and bank from the stack 290 | la s1, stack_ptr 291 | lhu s0, (s1) 292 | MEM_READ16 1(s0) 293 | addi s7, v0, 1 294 | MEM_READ8 3(s0) 295 | sll t0, v0, 16 296 | add s7, s7, t0 297 | 298 | // Update the stack pointer 299 | addi t0, s0, 3 300 | j cpu_execute 301 | sh t0, (s1) 302 | 303 | .align 5 304 | cpu_rts: // RTS 305 | // Pop the PC+1 from the stack 306 | la s1, stack_ptr 307 | lhu s0, (s1) 308 | MEM_READ16 1(s0) 309 | addi t0, v0, 1 310 | lui t1, 0xFFFF 311 | and s7, s7, t1 312 | add s7, s7, t0 313 | 314 | // Update the stack pointer 315 | addi t0, s0, 2 316 | j cpu_execute 317 | sh t0, (s1) 318 | 319 | .align 5 320 | cpu_bpl: // BPL disp8 321 | // Check the condition 322 | andi t0, s4, 0x80 // N 323 | MEM_READ8 1(s7), lb 324 | bnez t0, cpu_execute 325 | addi s7, s7, 2 326 | 327 | // Add the signed 8-bit immediate value to the PC 328 | add t0, s7, v0 329 | andi t0, t0, 0xFFFF 330 | andi t1, s7, 0xFFFF 331 | sub s7, s7, t1 332 | j cpu_execute 333 | add s7, s7, t0 334 | 335 | .align 5 336 | cpu_bmi: // BMI disp8 337 | // Check the condition 338 | andi t0, s4, 0x80 // N 339 | MEM_READ8 1(s7), lb 340 | beqz t0, cpu_execute 341 | addi s7, s7, 2 342 | 343 | // Add the signed 8-bit immediate value to the PC 344 | add t0, s7, v0 345 | andi t0, t0, 0xFFFF 346 | andi t1, s7, 0xFFFF 347 | sub s7, s7, t1 348 | j cpu_execute 349 | add s7, s7, t0 350 | 351 | .align 5 352 | cpu_bvc: // BVC disp8 353 | // Check the condition 354 | andi t0, s4, 0x40 // V 355 | MEM_READ8 1(s7), lb 356 | bnez t0, cpu_execute 357 | addi s7, s7, 2 358 | 359 | // Add the signed 8-bit immediate value to the PC 360 | add t0, s7, v0 361 | andi t0, t0, 0xFFFF 362 | andi t1, s7, 0xFFFF 363 | sub s7, s7, t1 364 | j cpu_execute 365 | add s7, s7, t0 366 | 367 | .align 5 368 | cpu_bvs: // BVS disp8 369 | // Check the condition 370 | andi t0, s4, 0x40 // V 371 | MEM_READ8 1(s7), lb 372 | beqz t0, cpu_execute 373 | addi s7, s7, 2 374 | 375 | // Add the signed 8-bit immediate value to the PC 376 | add t0, s7, v0 377 | andi t0, t0, 0xFFFF 378 | andi t1, s7, 0xFFFF 379 | sub s7, s7, t1 380 | j cpu_execute 381 | add s7, s7, t0 382 | 383 | .align 5 384 | cpu_bcc: // BCC disp8 385 | // Check the condition 386 | andi t0, s4, 0x1 // C 387 | MEM_READ8 1(s7), lb 388 | bnez t0, cpu_execute 389 | addi s7, s7, 2 390 | 391 | // Add the signed 8-bit immediate value to the PC 392 | add t0, s7, v0 393 | andi t0, t0, 0xFFFF 394 | andi t1, s7, 0xFFFF 395 | sub s7, s7, t1 396 | j cpu_execute 397 | add s7, s7, t0 398 | 399 | .align 5 400 | cpu_bcs: // BCS disp8 401 | // Check the condition 402 | andi t0, s4, 0x1 // C 403 | MEM_READ8 1(s7), lb 404 | beqz t0, cpu_execute 405 | addi s7, s7, 2 406 | 407 | // Add the signed 8-bit immediate value to the PC 408 | add t0, s7, v0 409 | andi t0, t0, 0xFFFF 410 | andi t1, s7, 0xFFFF 411 | sub s7, s7, t1 412 | j cpu_execute 413 | add s7, s7, t0 414 | 415 | .align 5 416 | cpu_bne: // BNE disp8 417 | // Check the condition 418 | andi t0, s4, 0x2 // Z 419 | MEM_READ8 1(s7), lb 420 | bnez t0, cpu_execute 421 | addi s7, s7, 2 422 | 423 | // Add the signed 8-bit immediate value to the PC 424 | add t0, s7, v0 425 | andi t0, t0, 0xFFFF 426 | andi t1, s7, 0xFFFF 427 | sub s7, s7, t1 428 | j cpu_execute 429 | add s7, s7, t0 430 | 431 | .align 5 432 | cpu_beq: // BEQ disp8 433 | // Check the condition 434 | andi t0, s4, 0x2 // Z 435 | MEM_READ8 1(s7), lb 436 | beqz t0, cpu_execute 437 | addi s7, s7, 2 438 | 439 | // Add the signed 8-bit immediate value to the PC 440 | add t0, s7, v0 441 | andi t0, t0, 0xFFFF 442 | andi t1, s7, 0xFFFF 443 | sub s7, s7, t1 444 | add s7, s7, t0 445 | 446 | // Check for a basic type of idle loop 447 | addi t0, v0, 4 // Offset 448 | bnez t0, cpu_execute 449 | li t0, 0x58 // CLI 450 | lbu v0, 4(s7) 451 | bne v0, t0, cpu_execute 452 | li t0, 0xA5 // LDA 453 | lbu v0, 0(s7) 454 | bne v0, t0, cpu_execute 455 | nop 456 | 457 | // Halt the CPU until the next interrupt 458 | ori s4, s4, 0x200 // Halted 459 | j cpu_execute 460 | li s5, 0 461 | 462 | .align 5 463 | cpu_brke: // BRK (emulation mode) 464 | // Update the stack pointer 465 | lhu s0, stack_ptr 466 | addi s0, s0, -3 467 | sh s0, stack_ptr 468 | 469 | // Push the PC+2 to the stack 470 | addi a1, s7, 2 471 | MEM_WRITE16 2(s0) 472 | 473 | // Push the flags to the stack and modify them 474 | ori a1, s4, 0x10 // Set B 475 | MEM_WRITE8 1(s0) 476 | ori s4, s4, 0x4 // Set I 477 | andi s4, s4, 0xFFF7 // Clear D 478 | 479 | // Jump to the BRK vector (emulation mode) 480 | li s0, 0xFFFE 481 | MEM_READ16 482 | j cpu_execute 483 | move s7, v0 484 | 485 | .align 5 486 | cpu_brk: // BRK 487 | // Update the stack pointer 488 | lhu s0, stack_ptr 489 | addi s0, s0, -4 490 | sh s0, stack_ptr 491 | 492 | // Push the PC+2 and bank to the stack 493 | srl a1, s7, 16 494 | MEM_WRITE8 4(s0) 495 | addi a1, s7, 2 496 | MEM_WRITE16 2(s0) 497 | 498 | // Push the flags to the stack and modify them 499 | move a1, s4 500 | MEM_WRITE8 1(s0) 501 | ori s4, s4, 0x4 // Set I 502 | andi s4, s4, 0xFFF7 // Clear D 503 | 504 | // Jump to the BRK vector 505 | li s0, 0xFFE6 506 | MEM_READ16 507 | j cpu_execute 508 | move s7, v0 509 | 510 | .align 5 511 | cpu_cope: // COP (emulation mode) 512 | // Update the stack pointer 513 | lhu s0, stack_ptr 514 | addi s0, s0, -3 515 | sh s0, stack_ptr 516 | 517 | // Push the PC+2 to the stack 518 | addi a1, s7, 2 519 | MEM_WRITE16 2(s0) 520 | 521 | // Push the flags to the stack and modify them 522 | ori a1, s4, 0x10 // Set B 523 | MEM_WRITE8 1(s0) 524 | ori s4, s4, 0x4 // Set I 525 | andi s4, s4, 0xFFF7 // Clear D 526 | 527 | // Jump to the COP vector (emulation mode) 528 | li s0, 0xFFF4 529 | MEM_READ16 530 | j cpu_execute 531 | move s7, v0 532 | 533 | .align 5 534 | cpu_cop: // COP 535 | // Update the stack pointer 536 | lhu s0, stack_ptr 537 | addi s0, s0, -4 538 | sh s0, stack_ptr 539 | 540 | // Push the PC+2 and bank to the stack 541 | srl a1, s7, 16 542 | MEM_WRITE8 4(s0) 543 | addi a1, s7, 2 544 | MEM_WRITE16 2(s0) 545 | 546 | // Push the flags to the stack and modify them 547 | move a1, s4 548 | MEM_WRITE8 1(s0) 549 | ori s4, s4, 0x4 // Set I 550 | andi s4, s4, 0xFFF7 // Clear D 551 | 552 | // Jump to the COP vector 553 | li s0, 0xFFE4 554 | MEM_READ16 555 | j cpu_execute 556 | move s7, v0 557 | 558 | .align 5 559 | cpu_clc: // CLC 560 | // Clear the carry flag 561 | andi s4, s4, 0xFFFE 562 | j cpu_execute 563 | addi s7, s7, 1 564 | 565 | .align 5 566 | cpu_cli: // CLI 567 | // Clear the interrupt flag 568 | andi s4, s4, 0xFFFB 569 | j cpu_execute 570 | addi s7, s7, 1 571 | 572 | .align 5 573 | cpu_cld: // CLD 574 | // Clear the decimal flag 575 | andi s4, s4, 0xFFF7 576 | j update_mode 577 | addi s7, s7, 1 578 | 579 | .align 5 580 | cpu_clv: // CLV 581 | // Clear the overflow flag 582 | andi s4, s4, 0xFFBF 583 | j cpu_execute 584 | addi s7, s7, 1 585 | 586 | .align 5 587 | cpu_sec: // SEC 588 | // Set the carry flag 589 | ori s4, s4, 0x1 590 | j cpu_execute 591 | addi s7, s7, 1 592 | 593 | .align 5 594 | cpu_sei: // SEI 595 | // Set the interrupt flag 596 | ori s4, s4, 0x4 597 | j cpu_execute 598 | addi s7, s7, 1 599 | 600 | .align 5 601 | cpu_sed: // SED 602 | // Set the decimal flag 603 | ori s4, s4, 0x8 604 | j update_mode 605 | addi s7, s7, 1 606 | 607 | .align 5 608 | cpu_rep: // REP #nn 609 | // Clear flags from the 8-bit immediate value 610 | MEM_READ8 1(s7) 611 | not v0, v0 612 | and s4, s4, v0 613 | j update_mode 614 | addi s7, s7, 2 615 | 616 | .align 5 617 | cpu_sep: // SEP #nn 618 | // Set flags from the 8-bit immediate value 619 | MEM_READ8 1(s7) 620 | or s4, s4, v0 621 | j update_mode 622 | addi s7, s7, 2 623 | 624 | .align 5 625 | cpu_xce: // XCE 626 | // Swap the carry and emulation flags 627 | sll t0, s4, 8 628 | srl t1, s4, 8 629 | or t0, t0, t1 630 | andi t0, t0, 0x101 631 | andi s4, s4, 0xFEFE 632 | or s4, s4, t0 633 | j update_mode 634 | addi s7, s7, 1 635 | 636 | .align 5 637 | cpu_stp: // STP 638 | // Stop execution 639 | j cpu_stp 640 | nop 641 | 642 | .align 5 643 | cpu_xba: // XBA 644 | // Swap the accumulator bytes and set flags for the low byte 645 | lhu t0, accumulator 646 | sll t1, t0, 8 647 | srl t0, t0, 8 648 | or t0, t0, t1 649 | sh t0, accumulator 650 | andi a0, t0, 0xFF 651 | j set_nz8 652 | addi s7, s7, 1 653 | 654 | .align 5 655 | cpu_wai: // WAI 656 | // Halt the CPU until the next interrupt 657 | ori s4, s4, 0x200 // Halted 658 | li s5, 0 659 | j cpu_execute 660 | addi s7, s7, 1 661 | 662 | .align 5 663 | cpu_wdm: // WDM #nn 664 | // Do nothing 665 | j cpu_execute 666 | addi s7, s7, 2 667 | 668 | .align 5 669 | cpu_nop: // NOP 670 | // Do nothing 671 | j cpu_execute 672 | addi s7, s7, 1 673 | -------------------------------------------------------------------------------- /src/cpu_transfer.S: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021-2025 Hydr8gon 3 | 4 | This file is part of sodium64. 5 | 6 | sodium64 is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, 9 | or (at your option) any later version. 10 | 11 | sodium64 is distributed in the hope that it will be useful, but 12 | WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with sodium64. If not, see . 18 | */ 19 | 20 | #include "macros.h" 21 | 22 | .globl cpu_tay8 23 | .globl cpu_tay16 24 | .globl cpu_tax8 25 | .globl cpu_tax16 26 | .globl cpu_tsx8 27 | .globl cpu_tsx16 28 | .globl cpu_tya8 29 | .globl cpu_tya16 30 | .globl cpu_txa8 31 | .globl cpu_txa16 32 | .globl cpu_txse 33 | .globl cpu_txs 34 | .globl cpu_txy8 35 | .globl cpu_txy16 36 | .globl cpu_tyx8 37 | .globl cpu_tyx16 38 | .globl cpu_tdc 39 | .globl cpu_tcd 40 | .globl cpu_tsc 41 | .globl cpu_tcse 42 | .globl cpu_tcs 43 | .globl cpu_lda8 44 | .globl cpu_lda16 45 | .globl cpu_ldx8 46 | .globl cpu_ldx16 47 | .globl cpu_ldy8 48 | .globl cpu_ldy16 49 | .globl cpu_stz8 50 | .globl cpu_stz16 51 | .globl cpu_sta8 52 | .globl cpu_sta16 53 | .globl cpu_stx8 54 | .globl cpu_stx16 55 | .globl cpu_sty8 56 | .globl cpu_sty16 57 | .globl cpu_pha8 58 | .globl cpu_pha16 59 | .globl cpu_phx8 60 | .globl cpu_phx16 61 | .globl cpu_phy8 62 | .globl cpu_phy16 63 | .globl cpu_php 64 | .globl cpu_phb 65 | .globl cpu_phk 66 | .globl cpu_phd 67 | .globl cpu_pei 68 | .globl cpu_pea 69 | .globl cpu_per 70 | .globl cpu_pla8 71 | .globl cpu_pla16 72 | .globl cpu_plx8 73 | .globl cpu_plx16 74 | .globl cpu_ply8 75 | .globl cpu_ply16 76 | .globl cpu_pld 77 | .globl cpu_plb 78 | .globl cpu_plp 79 | .globl cpu_mvp8 80 | .globl cpu_mvp16 81 | .globl cpu_mvn8 82 | .globl cpu_mvn16 83 | 84 | .text 85 | .set noreorder 86 | 87 | .align 5 88 | cpu_tay8: // TAY (8-bit) 89 | // Transfer the accumulator to register Y and set flags (8-bit) 90 | lbu a0, accumulator + 1 91 | sb a0, register_y + 1 92 | j set_nz8 93 | addi s7, s7, 1 94 | 95 | .align 5 96 | cpu_tay16: // TAY (16-bit) 97 | // Transfer the accumulator to register Y and set flags (16-bit) 98 | lhu a0, accumulator 99 | sh a0, register_y 100 | j set_nz16 101 | addi s7, s7, 1 102 | 103 | .align 5 104 | cpu_tax8: // TAX (8-bit) 105 | // Transfer the accumulator to register X and set flags (8-bit) 106 | lbu a0, accumulator + 1 107 | sb a0, register_x + 1 108 | j set_nz8 109 | addi s7, s7, 1 110 | 111 | .align 5 112 | cpu_tax16: // TAX (16-bit) 113 | // Transfer the accumulator to register X and set flags (16-bit) 114 | lhu a0, accumulator 115 | sh a0, register_x 116 | j set_nz16 117 | addi s7, s7, 1 118 | 119 | .align 5 120 | cpu_tsx8: // TSX (8-bit) 121 | // Transfer the stack pointer to register X and set flags (8-bit) 122 | lbu a0, stack_ptr + 1 123 | sb a0, register_x + 1 124 | j set_nz8 125 | addi s7, s7, 1 126 | 127 | .align 5 128 | cpu_tsx16: // TSX (16-bit) 129 | // Transfer the stack pointer to register X and set flags (16-bit) 130 | lhu a0, stack_ptr 131 | sh a0, register_x 132 | j set_nz16 133 | addi s7, s7, 1 134 | 135 | .align 5 136 | cpu_tya8: // TYA (8-bit) 137 | // Transfer register Y to the accumulator and set flags (8-bit) 138 | lbu a0, register_y + 1 139 | sb a0, accumulator + 1 140 | j set_nz8 141 | addi s7, s7, 1 142 | 143 | .align 5 144 | cpu_tya16: // TYA (16-bit) 145 | // Transfer register Y to the accumulator and set flags (16-bit) 146 | lhu a0, register_y 147 | sh a0, accumulator 148 | j set_nz16 149 | addi s7, s7, 1 150 | 151 | .align 5 152 | cpu_txa8: // TXA (8-bit) 153 | // Transfer register X to the accumulator and set flags (8-bit) 154 | lbu a0, register_x + 1 155 | sb a0, accumulator + 1 156 | j set_nz8 157 | addi s7, s7, 1 158 | 159 | .align 5 160 | cpu_txa16: // TXA (16-bit) 161 | // Transfer register X to the accumulator and set flags (16-bit) 162 | lhu a0, register_x 163 | sh a0, accumulator 164 | j set_nz16 165 | addi s7, s7, 1 166 | 167 | .align 5 168 | cpu_txse: // TXS (emulation mode) 169 | // Transfer register X to the stack pointer (emulation mode) 170 | lbu a0, register_x + 1 171 | sb a0, stack_ptr + 1 172 | j cpu_execute 173 | addi s7, s7, 1 174 | 175 | .align 5 176 | cpu_txs: // TXS 177 | // Transfer register X to the stack pointer 178 | lhu a0, register_x 179 | sh a0, stack_ptr 180 | j cpu_execute 181 | addi s7, s7, 1 182 | 183 | .align 5 184 | cpu_txy8: // TXY (8-bit) 185 | // Transfer register X to register Y and set flags (8-bit) 186 | lbu a0, register_x + 1 187 | sb a0, register_y + 1 188 | j set_nz8 189 | addi s7, s7, 1 190 | 191 | .align 5 192 | cpu_txy16: // TXY (16-bit) 193 | // Transfer register X to register Y and set flags (16-bit) 194 | lhu a0, register_x 195 | sh a0, register_y 196 | j set_nz16 197 | addi s7, s7, 1 198 | 199 | .align 5 200 | cpu_tyx8: // TYX (8-bit) 201 | // Transfer register Y to register X and set flags (8-bit) 202 | lbu a0, register_y + 1 203 | sb a0, register_x + 1 204 | j set_nz8 205 | addi s7, s7, 1 206 | 207 | .align 5 208 | cpu_tyx16: // TYX (16-bit) 209 | // Transfer register Y to register X and set flags (16-bit) 210 | lhu a0, register_y 211 | sh a0, register_x 212 | j set_nz16 213 | addi s7, s7, 1 214 | 215 | .align 5 216 | cpu_tdc: // TDC 217 | // Transfer the direct offset to the accumulator and set flags 218 | move a0, s8 219 | sh a0, accumulator 220 | j set_nz16 221 | addi s7, s7, 1 222 | 223 | .align 5 224 | cpu_tcd: // TCD 225 | // Transfer the accumulator to the direct offset and set flags 226 | lhu a0, accumulator 227 | move s8, a0 228 | j set_nz16 229 | addi s7, s7, 1 230 | 231 | .align 5 232 | cpu_tsc: // TSC 233 | // Transfer the stack pointer to the accumulator and set flags 234 | lhu a0, stack_ptr 235 | sh a0, accumulator 236 | j set_nz16 237 | addi s7, s7, 1 238 | 239 | .align 5 240 | cpu_tcse: // TCS (emulation mode) 241 | // Transfer the accumulator to the stack pointer (emulation mode) 242 | lbu a0, accumulator + 1 243 | sb a0, stack_ptr + 1 244 | j cpu_execute 245 | addi s7, s7, 1 246 | 247 | .align 5 248 | cpu_tcs: // TCS 249 | // Transfer the accumulator to the stack pointer 250 | lhu a0, accumulator 251 | sh a0, stack_ptr 252 | j cpu_execute 253 | addi s7, s7, 1 254 | 255 | .align 5 256 | cpu_lda8: // LDA op (8-bit) 257 | // Load a value to the accumulator and set flags (8-bit) 258 | MEM_READ8 259 | sb v0, accumulator + 1 260 | j set_nz8 261 | move a0, v0 262 | 263 | .align 5 264 | cpu_lda16: // LDA op (16-bit) 265 | // Load a value to the accumulator and set flags (16-bit) 266 | MEM_READ16 267 | sh v0, accumulator 268 | j set_nz16 269 | move a0, v0 270 | 271 | .align 5 272 | cpu_ldx8: // LDX op (8-bit) 273 | // Load a value to register X and set flags (8-bit) 274 | MEM_READ8 275 | sb v0, register_x + 1 276 | j set_nz8 277 | move a0, v0 278 | 279 | .align 5 280 | cpu_ldx16: // LDX op (16-bit) 281 | // Load a value to register X and set flags (16-bit) 282 | MEM_READ16 283 | sh v0, register_x 284 | j set_nz16 285 | move a0, v0 286 | 287 | .align 5 288 | cpu_ldy8: // LDY op (8-bit) 289 | // Load a value to register Y and set flags (8-bit) 290 | MEM_READ8 291 | sb v0, register_y + 1 292 | j set_nz8 293 | move a0, v0 294 | 295 | .align 5 296 | cpu_ldy16: // LDY op (16-bit) 297 | // Load a value to register Y and set flags (16-bit) 298 | MEM_READ16 299 | sh v0, register_y 300 | j set_nz16 301 | move a0, v0 302 | 303 | .align 5 304 | cpu_stz8: // STZ op (8-bit) 305 | // Store zero to memory (8-bit) 306 | li a1, 0 307 | MEM_WRITE8 308 | j cpu_execute 309 | nop 310 | 311 | .align 5 312 | cpu_stz16: // STZ op (16-bit) 313 | // Store zero to memory (16-bit) 314 | li a1, 0 315 | MEM_WRITE16 316 | j cpu_execute 317 | nop 318 | 319 | .align 5 320 | cpu_sta8: // STA op (8-bit) 321 | // Store the accumulator to memory (8-bit) 322 | lbu a1, accumulator + 1 323 | MEM_WRITE8 324 | j cpu_execute 325 | nop 326 | 327 | .align 5 328 | cpu_sta16: // STA op (16-bit) 329 | // Store the accumulator to memory (16-bit) 330 | lhu a1, accumulator 331 | MEM_WRITE16 332 | j cpu_execute 333 | nop 334 | 335 | .align 5 336 | cpu_stx8: // STX op (8-bit) 337 | // Store register X to memory (8-bit) 338 | lbu a1, register_x + 1 339 | MEM_WRITE8 340 | j cpu_execute 341 | nop 342 | 343 | .align 5 344 | cpu_stx16: // STX op (16-bit) 345 | // Store register X to memory (16-bit) 346 | lhu a1, register_x 347 | MEM_WRITE16 348 | j cpu_execute 349 | nop 350 | 351 | .align 5 352 | cpu_sty8: // STY op (8-bit) 353 | // Store register Y to memory (8-bit) 354 | lbu a1, register_y + 1 355 | MEM_WRITE8 356 | j cpu_execute 357 | nop 358 | 359 | .align 5 360 | cpu_sty16: // STY op (16-bit) 361 | // Store register Y to memory (16-bit) 362 | lhu a1, register_y 363 | MEM_WRITE16 364 | j cpu_execute 365 | nop 366 | 367 | .align 5 368 | cpu_pha8: // PHA (8-bit) 369 | // Push the accumulator to the stack (8-bit) 370 | lhu s0, stack_ptr 371 | lbu a1, accumulator + 1 372 | addi s0, s0, -1 373 | MEM_WRITE8 1(s0) 374 | sh s0, stack_ptr 375 | j cpu_execute 376 | addi s7, s7, 1 377 | 378 | .align 5 379 | cpu_pha16: // PHA (16-bit) 380 | // Push the accumulator to the stack (16-bit) 381 | lhu s0, stack_ptr 382 | lhu a1, accumulator 383 | addi s0, s0, -2 384 | MEM_WRITE16 1(s0) 385 | sh s0, stack_ptr 386 | j cpu_execute 387 | addi s7, s7, 1 388 | 389 | .align 5 390 | cpu_phx8: // PHX (8-bit) 391 | // Push register X to the stack (8-bit) 392 | lhu s0, stack_ptr 393 | lbu a1, register_x + 1 394 | addi s0, s0, -1 395 | MEM_WRITE8 1(s0) 396 | sh s0, stack_ptr 397 | j cpu_execute 398 | addi s7, s7, 1 399 | 400 | .align 5 401 | cpu_phx16: // PHX (16-bit) 402 | // Push register X to the stack (16-bit) 403 | lhu s0, stack_ptr 404 | lhu a1, register_x 405 | addi s0, s0, -2 406 | MEM_WRITE16 1(s0) 407 | sh s0, stack_ptr 408 | j cpu_execute 409 | addi s7, s7, 1 410 | 411 | .align 5 412 | cpu_phy8: // PHY (8-bit) 413 | // Push register Y to the stack (8-bit) 414 | lhu s0, stack_ptr 415 | lbu a1, register_y + 1 416 | addi s0, s0, -1 417 | MEM_WRITE8 1(s0) 418 | sh s0, stack_ptr 419 | j cpu_execute 420 | addi s7, s7, 1 421 | 422 | .align 5 423 | cpu_phy16: // PHY (16-bit) 424 | // Push register Y to the stack (16-bit) 425 | lhu s0, stack_ptr 426 | lhu a1, register_y 427 | addi s0, s0, -2 428 | MEM_WRITE16 1(s0) 429 | sh s0, stack_ptr 430 | j cpu_execute 431 | addi s7, s7, 1 432 | 433 | .align 5 434 | cpu_php: // PHP 435 | // Push the flags to the stack 436 | lhu s0, stack_ptr 437 | andi a1, s4, 0xFF 438 | addi s0, s0, -1 439 | MEM_WRITE8 1(s0) 440 | sh s0, stack_ptr 441 | j cpu_execute 442 | addi s7, s7, 1 443 | 444 | .align 5 445 | cpu_phb: // PHB 446 | // Push the data bank to the stack 447 | lhu s0, stack_ptr 448 | lbu a1, data_bank + 1 449 | addi s0, s0, -1 450 | MEM_WRITE8 1(s0) 451 | sh s0, stack_ptr 452 | j cpu_execute 453 | addi s7, s7, 1 454 | 455 | .align 5 456 | cpu_phk: // PHK 457 | // Push the PC bank to the stack 458 | lhu s0, stack_ptr 459 | srl a1, s7, 16 460 | addi s0, s0, -1 461 | MEM_WRITE8 1(s0) 462 | sh s0, stack_ptr 463 | j cpu_execute 464 | addi s7, s7, 1 465 | 466 | .align 5 467 | cpu_phd: // PHD 468 | // Push the direct offset to the stack 469 | lhu s0, stack_ptr 470 | move a1, s8 471 | addi s0, s0, -2 472 | MEM_WRITE16 1(s0) 473 | sh s0, stack_ptr 474 | j cpu_execute 475 | addi s7, s7, 1 476 | 477 | .align 5 478 | cpu_pei: // PEI nn 479 | // Get the 8-bit immediate value added to the direct offset as an address 480 | MEM_READ8 1(s7) 481 | add s0, s8, v0 482 | 483 | // Load a 16-bit value from memory and push it to the stack 484 | MEM_READ16 485 | lhu s0, stack_ptr 486 | move a1, v0 487 | addi s0, s0, -2 488 | MEM_WRITE16 1(s0) 489 | sh s0, stack_ptr 490 | j cpu_execute 491 | addi s7, s7, 2 492 | 493 | .align 5 494 | cpu_pea: // PEA #nnnn 495 | // Push a 16-bit immediate value to the stack 496 | MEM_READ16 1(s7) 497 | lhu s0, stack_ptr 498 | move a1, v0 499 | addi s0, s0, -2 500 | MEM_WRITE16 1(s0) 501 | sh s0, stack_ptr 502 | j cpu_execute 503 | addi s7, s7, 3 504 | 505 | .align 5 506 | cpu_per: // PER disp16 507 | // Push the PC plus a 16-bit immediate value to the stack 508 | MEM_READ16 1(s7) 509 | addi s7, s7, 3 510 | add a1, s7, v0 511 | la s1, stack_ptr 512 | lhu s0, (s1) 513 | addi s0, s0, -2 514 | MEM_WRITE16 1(s0) 515 | j cpu_execute 516 | sh s0, (s1) 517 | 518 | .align 5 519 | cpu_pla8: // PLA (8-bit) 520 | // Pop the accumulator from the stack and set flags (8-bit) 521 | lhu s0, stack_ptr 522 | MEM_READ8 1(s0) 523 | addi t0, s0, 1 524 | sh t0, stack_ptr 525 | move a0, v0 526 | sb a0, accumulator + 1 527 | j set_nz8 528 | addi s7, s7, 1 529 | 530 | .align 5 531 | cpu_pla16: // PLA (16-bit) 532 | // Pop the accumulator from the stack and set flags (16-bit) 533 | lhu s0, stack_ptr 534 | MEM_READ16 1(s0) 535 | addi t0, s0, 2 536 | sh t0, stack_ptr 537 | move a0, v0 538 | sh a0, accumulator 539 | j set_nz16 540 | addi s7, s7, 1 541 | 542 | .align 5 543 | cpu_plx8: // PLX (8-bit) 544 | // Pop register X from the stack and set flags (8-bit) 545 | lhu s0, stack_ptr 546 | MEM_READ8 1(s0) 547 | addi t0, s0, 1 548 | sh t0, stack_ptr 549 | move a0, v0 550 | sb a0, register_x + 1 551 | j set_nz8 552 | addi s7, s7, 1 553 | 554 | .align 5 555 | cpu_plx16: // PLX (16-bit) 556 | // Pop register X from the stack and set flags (16-bit) 557 | lhu s0, stack_ptr 558 | MEM_READ16 1(s0) 559 | addi t0, s0, 2 560 | sh t0, stack_ptr 561 | move a0, v0 562 | sh a0, register_x 563 | j set_nz16 564 | addi s7, s7, 1 565 | 566 | .align 5 567 | cpu_ply8: // PLY (8-bit) 568 | // Pop register Y from the stack and set flags (8-bit) 569 | lhu s0, stack_ptr 570 | MEM_READ8 1(s0) 571 | addi t0, s0, 1 572 | sh t0, stack_ptr 573 | move a0, v0 574 | sb a0, register_y + 1 575 | j set_nz8 576 | addi s7, s7, 1 577 | 578 | .align 5 579 | cpu_ply16: // PLY (16-bit) 580 | // Pop register Y from the stack and set flags (16-bit) 581 | lhu s0, stack_ptr 582 | MEM_READ16 1(s0) 583 | addi t0, s0, 2 584 | sh t0, stack_ptr 585 | move a0, v0 586 | sh a0, register_y 587 | j set_nz16 588 | addi s7, s7, 1 589 | 590 | .align 5 591 | cpu_pld: // PLD 592 | // Pop the direct offset from the stack and set flags 593 | lhu s0, stack_ptr 594 | MEM_READ16 1(s0) 595 | addi t0, s0, 2 596 | sh t0, stack_ptr 597 | move a0, v0 598 | move s8, a0 599 | j set_nz16 600 | addi s7, s7, 1 601 | 602 | .align 5 603 | cpu_plb: // PLB 604 | // Pop the data bank from the stack and set flags 605 | lhu s0, stack_ptr 606 | MEM_READ8 1(s0) 607 | addi t0, s0, 1 608 | sh t0, stack_ptr 609 | move a0, v0 610 | sb a0, data_bank + 1 611 | j set_nz8 612 | addi s7, s7, 1 613 | 614 | .align 5 615 | cpu_plp: // PLP 616 | // Pop the flags from the stack 617 | lhu s0, stack_ptr 618 | MEM_READ8 1(s0) 619 | addi t0, s0, 1 620 | sh t0, stack_ptr 621 | andi s4, s4, 0xFF00 622 | or s4, s4, v0 623 | j update_mode 624 | addi s7, s7, 1 625 | 626 | .align 5 627 | cpu_mvp8: // MVP ss,dd (8-bit) 628 | // Set the data bank and add register Y to get the destination address (8-bit) 629 | MEM_READ8 1(s7) 630 | sb v0, data_bank + 1 631 | sll s0, v0, 16 632 | lbu t0, register_y + 1 633 | add s0, s0, t0 634 | 635 | // Add register Y to the source bank to get the source address (8-bit) 636 | MEM_READ8 2(s7) 637 | sll s1, v0, 16 638 | lbu t0, register_x + 1 639 | add s1, s1, t0 640 | 641 | // Transfer an 8-bit value from the source to the destination 642 | MEM_READ8 0(s1) 643 | move a1, v0 644 | MEM_WRITE8 645 | 646 | // Decrement the registers (8-bit) 647 | addi t0, s1, -1 648 | sb t0, register_x + 1 649 | addi t0, s0, -1 650 | sb t0, register_y + 1 651 | 652 | // Decrement the accumulator and don't increment the PC until it underflows 653 | la t2, accumulator 654 | lhu t0, (t2) 655 | addi t1, t0, -1 656 | bnez t0, cpu_execute 657 | sh t1, (t2) 658 | j cpu_execute 659 | addi s7, s7, 3 660 | 661 | .align 5 662 | cpu_mvp16: // MVP ss,dd (16-bit) 663 | // Set the data bank and add register Y to get the destination address (16-bit) 664 | MEM_READ8 1(s7) 665 | sb v0, data_bank + 1 666 | sll s0, v0, 16 667 | lhu t0, register_y 668 | add s0, s0, t0 669 | 670 | // Add register Y to the source bank to get the source address (16-bit) 671 | MEM_READ8 2(s7) 672 | sll s1, v0, 16 673 | lhu t0, register_x 674 | add s1, s1, t0 675 | 676 | // Transfer an 8-bit value from the source to the destination 677 | MEM_READ8 0(s1) 678 | move a1, v0 679 | MEM_WRITE8 680 | 681 | // Decrement the registers (16-bit) 682 | addi t0, s1, -1 683 | sh t0, register_x 684 | addi t0, s0, -1 685 | sh t0, register_y 686 | 687 | // Decrement the accumulator and don't increment the PC until it underflows 688 | la t2, accumulator 689 | lhu t0, (t2) 690 | addi t1, t0, -1 691 | bnez t0, cpu_execute 692 | sh t1, (t2) 693 | j cpu_execute 694 | addi s7, s7, 3 695 | 696 | .align 5 697 | cpu_mvn8: // MVN ss,dd (8-bit) 698 | // Set the data bank and add register Y to get the destination address (8-bit) 699 | MEM_READ8 1(s7) 700 | sb v0, data_bank + 1 701 | sll s0, v0, 16 702 | lbu t0, register_y + 1 703 | add s0, s0, t0 704 | 705 | // Add register Y to the source bank to get the source address (8-bit) 706 | MEM_READ8 2(s7) 707 | sll s1, v0, 16 708 | lbu t0, register_x + 1 709 | add s1, s1, t0 710 | 711 | // Transfer an 8-bit value from the source to the destination 712 | MEM_READ8 0(s1) 713 | move a1, v0 714 | MEM_WRITE8 715 | 716 | // Increment the registers (8-bit) 717 | addi t0, s1, 1 718 | sb t0, register_x + 1 719 | addi t0, s0, 1 720 | sb t0, register_y + 1 721 | 722 | // Decrement the accumulator and don't increment the PC until it underflows 723 | la t2, accumulator 724 | lhu t0, (t2) 725 | addi t1, t0, -1 726 | bnez t0, cpu_execute 727 | sh t1, (t2) 728 | j cpu_execute 729 | addi s7, s7, 3 730 | 731 | .align 5 732 | cpu_mvn16: // MVN ss,dd (16-bit) 733 | // Set the data bank and add register Y to get the destination address (16-bit) 734 | MEM_READ8 1(s7) 735 | sb v0, data_bank + 1 736 | sll s0, v0, 16 737 | lhu t0, register_y 738 | add s0, s0, t0 739 | 740 | // Add register Y to the source bank to get the source address (16-bit) 741 | MEM_READ8 2(s7) 742 | sll s1, v0, 16 743 | lhu t0, register_x 744 | add s1, s1, t0 745 | 746 | // Transfer an 8-bit value from the source to the destination 747 | MEM_READ8 0(s1) 748 | move a1, v0 749 | MEM_WRITE8 750 | 751 | // Increment the registers (16-bit) 752 | addi t0, s1, 1 753 | sh t0, register_x 754 | addi t0, s0, 1 755 | sh t0, register_y 756 | 757 | // Decrement the accumulator and don't increment the PC until it underflows 758 | la t2, accumulator 759 | lhu t0, (t2) 760 | addi t1, t0, -1 761 | bnez t0, cpu_execute 762 | sh t1, (t2) 763 | j cpu_execute 764 | addi s7, s7, 3 765 | -------------------------------------------------------------------------------- /src/defines.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021-2025 Hydr8gon 3 | 4 | This file is part of sodium64. 5 | 6 | sodium64 is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, 9 | or (at your option) any later version. 10 | 11 | sodium64 is distributed in the hope that it will be useful, but 12 | WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with sodium64. If not, see . 18 | */ 19 | 20 | #include 21 | 22 | // Master cycle counts for CPU memory accesses 23 | #define RAM_CYCLE 8 24 | #define IO_CYCLE 6 25 | 26 | // Approximate master cycle counts for APU components 27 | #define APU_CYCLE 21 // 21477000Hz / 1024000Hz = 20.973632812 28 | #define APU_TIMER1 2688 // 21477000Hz / 8000Hz = 2684.625 29 | #define APU_TIMER2 336 // 21477000Hz / 64000Hz = 335.578125 30 | 31 | // Approximate master cycle count for DSP samples 32 | #define DSP_SAMPLE 672 // 21477000Hz / 32000Hz = 671.15625 33 | 34 | // Maximum opcode bytes that can be compiled in an APU JIT block 35 | #define BLOCK_SIZE 16 36 | 37 | // The frame section data structure's size in bytes 38 | #define SECTION_SIZE 0x40 39 | 40 | // Flags for tracking JIT block state 41 | #define FLAG_SX (1 << 0) 42 | #define FLAG_SY (1 << 1) 43 | #define FLAG_SA (1 << 2) 44 | #define FLAG_SS (1 << 3) 45 | #define FLAG_SF (1 << 4) 46 | #define FLAG_LX (1 << 5) 47 | #define FLAG_LY (1 << 6) 48 | #define FLAG_LA (1 << 7) 49 | #define FLAG_LS (1 << 8) 50 | #define FLAG_LF (1 << 9) 51 | #define FLAG_PC (1 << 10) 52 | #define FLAG_NZ (1 << 11) 53 | 54 | // Register values for emitting JIT code 55 | #define ZERO 0 56 | #define AT_ 1 57 | #define V0 2 58 | #define V1 3 59 | #define A0 4 60 | #define A1 5 61 | #define A2 6 62 | #define A3 7 63 | #define T0 8 64 | #define T1 9 65 | #define T2 10 66 | #define T3 11 67 | #define T4 12 68 | #define T5 13 69 | #define T6 14 70 | #define T7 15 71 | #define S0 16 72 | #define S1 17 73 | #define S2 18 74 | #define S3 19 75 | #define S4 20 76 | #define S5 21 77 | #define S6 22 78 | #define S7 23 79 | #define T8 24 80 | #define T9 25 81 | #define K0 26 82 | #define K1 27 83 | #define GP 28 84 | #define SP 29 85 | #define S8 30 86 | #define RA 31 87 | 88 | // Opcode macros for emitting JIT code 89 | #define ADD(rd, rs, rt) (0x00000020 | ((rd) << 11) | ((rs) << 21) | ((rt) << 16)) 90 | #define ADDI(rt, rs, imm) (0x20000000 | ((rt) << 16) | ((rs) << 21) | ((imm) & 0xFFFF)) 91 | #define AND(rd, rs, rt) (0x00000024 | ((rd) << 11) | ((rs) << 21) | ((rt) << 16)) 92 | #define ANDI(rt, rs, imm) (0x30000000 | ((rt) << 16) | ((rs) << 21) | ((imm) & 0xFFFF)) 93 | #define BEQ(rt, rs, ofs) (0x10000000 | ((rt) << 16) | ((rs) << 21) | ((ofs) & 0xFFFF)) 94 | #define DIV(rs, rt) (0x0000001A | ((rs) << 21) | ((rt) << 16)) 95 | #define LBU(rt, rb, ofs) (0x90000000 | ((rt) << 16) | ((rb) << 21) | ((ofs) & 0xFFFF)) 96 | #define LUI(rt, imm) (0x3C000000 | ((rt) << 16) | ((imm) & 0xFFFF)) 97 | #define MFHI(rd) (0x00000010 | ((rd) << 11)) 98 | #define MFLO(rd) (0x00000012 | ((rd) << 11)) 99 | #define MULT(rs, rt) (0x00000018 | ((rs) << 21) | ((rt) << 16)) 100 | #define OR(rd, rs, rt) (0x00000025 | ((rd) << 11) | ((rs) << 21) | ((rt) << 16)) 101 | #define ORI(rt, rs, imm) (0x34000000 | ((rt) << 16) | ((rs) << 21) | ((imm) & 0xFFFF)) 102 | #define SB(rt, rb, ofs) (0xA0000000 | ((rt) << 16) | ((rb) << 21) | ((ofs) & 0xFFFF)) 103 | #define SH(rt, rb, ofs) (0xA4000000 | ((rt) << 16) | ((rb) << 21) | ((ofs) & 0xFFFF)) 104 | #define SLL(rd, rt, sa) (0x00000000 | ((rd) << 11) | ((rt) << 16) | ((sa) << 6)) 105 | #define SLT(rd, rs, rt) (0x0000002A | ((rd) << 11) | ((rs) << 21) | ((rt) << 16)) 106 | #define SLTU(rd, rs, rt) (0x0000002B | ((rd) << 11) | ((rs) << 21) | ((rt) << 16)) 107 | #define SRL(rd, rt, sa) (0x00000002 | ((rd) << 11) | ((rt) << 16) | ((sa) << 6)) 108 | #define SUB(rd, rs, rt) (0x00000022 | ((rd) << 11) | ((rs) << 21) | ((rt) << 16)) 109 | #define XOR(rd, rs, rt) (0x00000026 | ((rd) << 11) | ((rs) << 21) | ((rt) << 16)) 110 | #define XORI(rt, rs, imm) (0x38000000 | ((rt) << 16) | ((rs) << 21) | ((imm) & 0xFFFF)) 111 | 112 | // Addresses of data in RDRAM that are shared between CPU and RSP 113 | #define ROM_BUFFER 0xA0200000 114 | #define JIT_BUFFER (ROM_BUFFER - 0x40000) 115 | #define TILE_CACHE_BG (JIT_BUFFER - 0x40000) 116 | #define TILE_CACHE_OBJ (TILE_CACHE_BG - 0x8000) 117 | #define TILE_STATS_BG (TILE_CACHE_OBJ - 0x1000) 118 | #define TILE_STATS_OBJ (TILE_STATS_BG - 0x200) 119 | #define OBJECT_CACHE (TILE_STATS_OBJ - 0x800) 120 | #define SECTION_QUEUE2 (OBJECT_CACHE - 0x5000) 121 | #define SECTION_QUEUE1 (SECTION_QUEUE2 - 0x5000) 122 | #define DIRTY_QUEUE2 (SECTION_QUEUE1 - 0x400) 123 | #define DIRTY_QUEUE1 (DIRTY_QUEUE2 - 0x400) 124 | #define OAM_QUEUE2 (DIRTY_QUEUE1 - 0x2200) 125 | #define OAM_QUEUE1 (OAM_QUEUE2 - 0x2200) 126 | #define PALETTE_QUEUE2 (OAM_QUEUE1 - 0x800) 127 | #define PALETTE_QUEUE1 (PALETTE_QUEUE2 - 0x800) 128 | #define VRAM_BUFFER (PALETTE_QUEUE1 - 0x10000) 129 | #define MODE7_TEXTURE (VRAM_BUFFER - 0x2000) 130 | #define FRAMEBUFFER3 (MODE7_TEXTURE - 0x20D00) 131 | #define FRAMEBUFFER2 (FRAMEBUFFER3 - 0x20D00) 132 | #define FRAMEBUFFER1 (FRAMEBUFFER2 - 0x20D00) 133 | 134 | // RSP addresses of data in DMEM; used to avoid setting the upper address 135 | #define TEXTURE 0x000 136 | #define TILE_TABLE (TEXTURE + 0x40) 137 | #define VRAM_TABLE (TILE_TABLE + 0x400) 138 | #define OAM (VRAM_TABLE + 0x400) 139 | #define SCRN_DATA (OAM + 0x220) 140 | #define CHAR_DATA (SCRN_DATA + 0x80) 141 | #define MODE7_BOUNDS (CHAR_DATA + 0x80) 142 | #define CACHE_BASES (MODE7_BOUNDS + 0x10) 143 | // Start of section values 144 | #define BGHOFS (CACHE_BASES + 0x10) 145 | #define BGVOFS (BGHOFS + 0x8) 146 | #define BGNBA (BGVOFS + 0x8) 147 | #define OAMADD (BGNBA + 0x2) 148 | #define M7HOFS (OAMADD + 0x2) 149 | #define M7VOFS (M7HOFS + 0x2) 150 | #define M7A (M7VOFS + 0x2) 151 | #define M7B (M7A + 0x2) 152 | #define M7C (M7B + 0x2) 153 | #define M7D (M7C + 0x2) 154 | #define M7X (M7D + 0x2) 155 | #define M7Y (M7X + 0x2) 156 | #define WBGSEL (M7Y + 0x2) 157 | #define SUB_COLOR (WBGSEL + 0x2) 158 | #define MAIN_COLOR (SUB_COLOR + 0x2) 159 | #define BGXSC (MAIN_COLOR + 0x2) 160 | #define WHX (BGXSC + 0x4) 161 | #define OBSEL (WHX + 0x4) 162 | #define M7SEL (OBSEL + 0x1) 163 | #define WOBJSEL (M7SEL + 0x1) 164 | #define WBGLOG (WOBJSEL + 0x1) 165 | #define WOBJLOG (WBGLOG + 0x1) 166 | #define CGWSEL (WOBJLOG + 0x1) 167 | #define CGADSUB (CGWSEL + 0x1) 168 | #define TS (CGADSUB + 0x1) 169 | #define TM (TS + 0x1) 170 | #define TSW (TM + 0x1) 171 | #define TMW (TSW + 0x1) 172 | #define BG_MODE (TMW + 0x1) 173 | #define STAT_FLAGS (BG_MODE + 0x1) 174 | #define SPLIT_LINE (STAT_FLAGS + 0x1) 175 | // Start of frame values 176 | #define MASK_SEL (SPLIT_LINE + 0x1) 177 | #define FB_OFFSET (MASK_SEL + 0x8) 178 | #define FRAME_END (FB_OFFSET + 0x8) 179 | #define VRAM_ADDRS (FRAME_END + 0x8) 180 | #define PALETTE_PTR (VRAM_ADDRS + 0x8) 181 | #define OAM_PTR (PALETTE_PTR + 0x8) 182 | #define DIRTY_PTR (OAM_PTR + 0x8) 183 | #define SECTION_PTR (DIRTY_PTR + 0x8) 184 | #define FRAMEBUFFER (SECTION_PTR + 0x8) 185 | // End of frame values 186 | #define RDP_INIT (FRAMEBUFFER + 0x8) 187 | #define RDP_FRAME (RDP_INIT + 0x40) 188 | #define RDP_FILL (RDP_FRAME + 0x18) 189 | #define RDP_WINDOW (RDP_FILL + 0x38) 190 | #define RDP_TILE (RDP_WINDOW + 0x10) 191 | #define RDP_TILE7 (RDP_TILE + 0x28) 192 | #define TILE_PARAMS (RDP_TILE7 + 0x78) 193 | #define LAYER_CHART (TILE_PARAMS + 0x20) 194 | #define OBJ_SIZES (LAYER_CHART + 0x90) 195 | #define FILL_JUMPS (OBJ_SIZES + 0x20) 196 | #define TILE_JUMPS (FILL_JUMPS + 0x10) 197 | #define SHARED_JUMPS (TILE_JUMPS + 0xC) 198 | #define CACHE_RETS (SHARED_JUMPS + 0xC) 199 | #define CACHE_PTRS (CACHE_RETS + 0x8) 200 | #define SCRN_OFSV (CACHE_PTRS + 0x14) 201 | #define SHIFT_TABLE (SCRN_OFSV + 0x8) 202 | #define TEXREC_OFS (SHIFT_TABLE + 0x8) 203 | #define FILLREC_MASK (TEXREC_OFS + 0x4) 204 | #define LDBLK_BITS (FILLREC_MASK + 0x4) 205 | #define MODE7_MASK (LDBLK_BITS + 0x4) 206 | #define PRIO_CHECKS (MODE7_MASK + 0x4) 207 | #define WIN_BOUNDS (PRIO_CHECKS + 0x8) 208 | #define VEC_DATA 0xF70 209 | 210 | // Macros that convert addresses between cached and uncached 211 | #define CACHED(addr) ((addr) - 0x20000000) 212 | #define UNCACHED(addr) ((addr) + 0x20000000) 213 | 214 | // Macro that converts an RSP DMEM address to a CPU address 215 | #define DMEM(addr) (0xA4000000 + (addr)) 216 | -------------------------------------------------------------------------------- /src/dma.S: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021-2025 Hydr8gon 3 | 4 | This file is part of sodium64. 5 | 6 | sodium64 is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, 9 | or (at your option) any later version. 10 | 11 | sodium64 is distributed in the hope that it will be useful, but 12 | WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with sodium64. If not, see . 18 | */ 19 | 20 | #include "macros.h" 21 | 22 | .globl reload_hdma 23 | .globl trigger_hdma 24 | .globl write_mdmaen 25 | .globl read_dmapx 26 | .globl read_bbadx 27 | .globl read_a1txl 28 | .globl read_a1txh 29 | .globl read_a1bx 30 | .globl read_dasxl 31 | .globl read_dasxh 32 | .globl read_dasbx 33 | .globl read_a2axl 34 | .globl read_a2axh 35 | .globl read_ntrlx 36 | .globl read_unusedx 37 | .globl write_hdmaen 38 | .globl write_dmapx 39 | .globl write_bbadx 40 | .globl write_a1txl 41 | .globl write_a1txh 42 | .globl write_a1bx 43 | .globl write_dasxl 44 | .globl write_dasxh 45 | .globl write_dasbx 46 | .globl write_a2axl 47 | .globl write_a2axh 48 | .globl write_ntrlx 49 | .globl write_unusedx 50 | 51 | .data 52 | 53 | .align 4 54 | dma_stack: .hword 0 55 | hdma_mask: .byte 0 56 | end_mask: .byte 0 57 | hdmaen: .byte 0 58 | 59 | .align 4 60 | a1tbx: .word 0:8 61 | dastbx: .word 0:8 62 | a2abx: .word 0:8 63 | dmapx: .byte 0:8 64 | bbadx: .byte 0:8 65 | ntrlx: .byte 0:8 66 | unusedx: .byte 0:8 67 | 68 | .align 4 69 | transfer_modes: // I/O address offsets, repeated every 4 bytes 70 | .byte 0, 0, 0, 0 71 | .byte 0, 1, 0, 1 72 | .byte 0, 0, 0, 0 73 | .byte 0, 0, 1, 1 74 | .byte 0, 1, 2, 3 75 | .byte 0, 1, 0, 1 76 | .byte 0, 0, 0, 0 77 | .byte 0, 0, 1, 1 78 | 79 | .align 4 80 | unit_lengths: .byte 1, 2, 2, 4, 4, 4, 2, 4 // Transfer mode unit lengths 81 | address_steps: .byte 1, 0, -1, 0 // CPU address steps 82 | 83 | .text 84 | .set noreorder 85 | 86 | .align 5 87 | reload_hdma: 88 | // Reload the HDMA channel masks 89 | lbu t0, hdmaen 90 | li t1, 0 91 | sb t0, hdma_mask 92 | sb zero, end_mask 93 | 94 | reload_start: 95 | // Check if the current HDMA channel is enabled 96 | andi t2, t0, 0x1 97 | beqz t2, reload_end 98 | 99 | // Reload the channel's table address and line counter 100 | sll t2, t1, 2 101 | lw t3, a1tbx(t2) 102 | li t4, 1 103 | sw t3, a2abx(t2) 104 | sb t4, ntrlx(t1) 105 | 106 | reload_end: 107 | // Move to the next channel until no more are enabled 108 | srl t0, t0, 1 109 | bnez t0, reload_start 110 | addi t1, t1, 1 111 | jr ra 112 | nop 113 | 114 | .align 5 115 | trigger_hdma: 116 | // Set initial HDMA values and check if any channels are enabled 117 | lbu s0, hdma_mask // Mask 118 | move s1, zero // Index 119 | bnez s0, hdma_start 120 | move gp, ra 121 | jr gp 122 | 123 | hdma_start: 124 | // Disable TLB-mapped I/O registers during HDMA 125 | li t0, 0x1 126 | sb t0, io_disable 127 | 128 | hdma_next: 129 | // Check if the current HDMA channel is enabled 130 | andi t0, s0, 0x1 131 | beqz t0, hdma_end 132 | nop 133 | lbu t0, dmapx(s1) 134 | 135 | // Get the channel's I/O transfer mode (t8) and unit length (t9) 136 | andi t1, t0, 0x7 137 | sll t2, t1, 2 138 | la t8, transfer_modes 139 | add t8, t8, t2 140 | lbu t9, unit_lengths(t1) 141 | 142 | // Decrement the line counter and check if finished 143 | lbu v1, ntrlx(s1) 144 | sll t1, s1, 2 145 | lw s2, a2abx(t1) 146 | addi v1, v1, -1 147 | sb v1, ntrlx(s1) 148 | andi t1, v1, 0x7F 149 | bnez t1, same_entry 150 | nop 151 | 152 | // Reload the line counter for the next table entry 153 | MEM_READ8 0(s2) 154 | sb v0, ntrlx(s1) 155 | addi s2, s2, 1 156 | sll t1, s1, 2 157 | sw s2, a2abx(t1) 158 | beqz v0, hdma_disable 159 | nop 160 | ori v1, v1, 0x80 // Do transfer 161 | 162 | same_entry: 163 | // Check if a transfer should occur (repeat or first) 164 | andi t1, v1, 0x80 165 | beqz t1, hdma_end 166 | nop 167 | 168 | // Check for indirect mode 169 | lbu t0, dmapx(s1) 170 | andi t0, t0, 0x40 171 | bnez t0, hdma_indir 172 | 173 | // Increment the direct address and start a transfer 174 | add t0, s2, t9 175 | sll t1, s1, 2 176 | sh t0, a2abx + 2(t1) 177 | b hdma_ready 178 | 179 | hdma_indir: 180 | // Check if the indirect address should be reloaded 181 | andi t1, v1, 0x7F 182 | bnez t1, indir_ready 183 | nop 184 | 185 | // Read the value and increment the direct address 186 | MEM_READ16 0(s2) 187 | addi t0, s2, 2 188 | sll t1, s1, 2 189 | sh t0, a2abx + 2(t1) 190 | 191 | // Reload the indirect address 192 | lbu t2, dastbx + 1(t1) 193 | sll t2, t2, 16 194 | or s2, t2, v0 195 | sw s2, dastbx(t1) 196 | 197 | indir_ready: 198 | // Increment the indirect address and start a transfer 199 | sll t1, s1, 2 200 | lw s2, dastbx(t1) 201 | add t0, s2, t9 202 | sw t0, dastbx(t1) 203 | 204 | hdma_ready: 205 | // Check the channel's transfer direction 206 | lbu t0, dmapx(s1) 207 | andi t0, t0, 0x80 208 | bnez t0, hio_cpu 209 | nop 210 | 211 | hcpu_io: 212 | // Copy a value from memory to an I/O register 213 | MEM_READ8 0(s2) 214 | lbu a0, bbadx(s1) 215 | lbu t0, (t8) 216 | add a0, a0, t0 217 | sll t0, a0, 2 218 | lw t0, write_iomap(t0) 219 | jalr t0 220 | move a1, v0 221 | 222 | // Move to the next byte until a unit has been transferred 223 | addi s2, s2, 1 224 | addi t9, t9, -1 225 | bnez t9, hcpu_io 226 | addi t8, t8, 1 227 | b hdma_end 228 | nop 229 | 230 | hio_cpu: 231 | // Copy a value from an I/O register to memory 232 | lbu a0, bbadx(s1) 233 | lbu t0, (t8) 234 | add a0, a0, t0 235 | sll t0, a0, 2 236 | lw t0, read_iomap(t0) 237 | jalr t0 238 | nop 239 | move a1, v0 240 | MEM_WRITE8 0(s2) 241 | 242 | // Move to the next byte until a unit has been transferred 243 | addi s2, s2, 1 244 | addi t9, t9, -1 245 | bnez t9, hio_cpu 246 | addi t8, t8, 1 247 | b hdma_end 248 | nop 249 | 250 | hdma_disable: 251 | // Disable an HDMA channel for the rest of the frame 252 | lbu t0, hdma_mask 253 | lbu t1, end_mask 254 | li t2, 1 255 | sll t2, t2, s1 256 | or t1, t1, t2 257 | sb t1, end_mask 258 | xori t1, t1, 0xFF 259 | and t0, t0, t1 260 | sb t0, hdma_mask 261 | 262 | hdma_end: 263 | // Move to the next channel until no more are enabled 264 | srl s0, s0, 1 265 | bnez s0, hdma_next 266 | addi s1, s1, 1 267 | sb zero, io_disable 268 | jr gp 269 | nop 270 | 271 | .align 5 272 | write_mdmaen: // a1: value 273 | // Backup registers and set initial DMA values 274 | move gp, ra 275 | sh a1, dma_stack 276 | andi t8, a1, 0xFF // Mask 277 | move t9, zero // Index 278 | 279 | // Disable TLB-mapped I/O registers during DMA 280 | li t0, 0x1 281 | sb t0, io_disable 282 | 283 | dma_next: 284 | // Check if the current DMA channel is enabled 285 | andi t0, t8, 0x1 286 | beqz t0, dma_end 287 | nop 288 | lbu t0, dmapx(t9) 289 | 290 | // Get the channel's CPU address step (s2) 291 | srl t1, t0, 3 292 | andi t1, t1, 0x3 293 | lb s2, address_steps(t1) 294 | 295 | // Get the channel's I/O transfer mode (sp) 296 | andi t1, t0, 0x7 297 | sll t1, t1, 2 298 | la sp, transfer_modes 299 | add sp, sp, t1 300 | 301 | // Check the channel's transfer direction 302 | andi t0, t0, 0x80 303 | bnez t0, io_cpu 304 | 305 | cpu_io: 306 | // Read a value from the CPU address and adjust 307 | sll t0, t9, 2 308 | lw a0, a1tbx(t0) 309 | MEM_READ8 0(a0) 310 | add a0, a0, s2 311 | sll t0, t9, 2 312 | sh a0, a1tbx + 2(t0) 313 | 314 | // Write the value to the I/O address 315 | lbu a0, bbadx(t9) 316 | lbu t0, (sp) 317 | add a0, a0, t0 318 | sll t0, a0, 2 319 | lw t0, write_iomap(t0) 320 | jalr t0 321 | move a1, v0 322 | 323 | // Move to the next I/O offset in the transfer mode 324 | andi t0, sp, 0x3 325 | sub sp, sp, t0 326 | addi t0, t0, 1 327 | andi t0, t0, 0x3 328 | add sp, sp, t0 329 | 330 | // Decrement byte counter and continue until it hits zero 331 | sll t0, t9, 2 332 | lhu t1, dastbx + 2(t0) 333 | addi t1, t1, -1 334 | sh t1, dastbx + 2(t0) 335 | bnez t1, cpu_io 336 | nop 337 | b dma_end 338 | nop 339 | 340 | io_cpu: 341 | // Read a value from the I/O address 342 | lbu a0, bbadx(t9) 343 | lbu t0, (sp) 344 | add a0, a0, t0 345 | sll t0, a0, 2 346 | lw t0, read_iomap(t0) 347 | jalr t0 348 | nop 349 | 350 | // Move to the next I/O offset in the transfer mode 351 | andi t0, sp, 0x3 352 | sub sp, sp, t0 353 | addi t0, t0, 1 354 | andi t0, t0, 0x3 355 | add sp, sp, t0 356 | 357 | // Write the value to the CPU address and adjust 358 | sll t0, t9, 2 359 | lw a0, a1tbx(t0) 360 | move a1, v0 361 | MEM_WRITE8 0(a0) 362 | add a0, a0, s2 363 | sll t0, t9, 2 364 | sh a0, a1tbx + 2(t0) 365 | 366 | // Decrement byte counter and continue until it hits zero 367 | sll t0, t9, 2 368 | lhu t1, dastbx + 2(t0) 369 | addi t1, t1, -1 370 | sh t1, dastbx + 2(t0) 371 | bnez t1, io_cpu 372 | nop 373 | 374 | dma_end: 375 | // Move to the next channel until no more are enabled 376 | srl t8, t8, 1 377 | bnez t8, dma_next 378 | addi t9, t9, 1 379 | lhu a1, dma_stack 380 | sb zero, io_disable 381 | jr gp 382 | nop 383 | 384 | .align 5 385 | write_hdmaen: // a1: value 386 | // Write to the HDMAEN register and update the HDMA mask 387 | sb a1, hdmaen 388 | lbu t0, hdma_mask 389 | lbu t1, end_mask 390 | or t0, t0, a1 391 | xori t1, t1, 0xFF 392 | and t0, t0, t1 393 | sb t0, hdma_mask 394 | jr ra 395 | nop 396 | 397 | .align 5 398 | read_dmapx: // a0: address - v0: value 399 | // Read from one of the DMAPx registers 400 | srl t0, a0, 4 401 | andi t0, t0, 0x7 402 | lbu v0, dmapx(t0) 403 | jr ra 404 | nop 405 | 406 | .align 5 407 | read_bbadx: // a0: address - v0: value 408 | // Get the low byte of one of the I/O addresses 409 | srl t0, a0, 4 410 | andi t0, t0, 0x7 411 | lbu v0, bbadx(t0) 412 | jr ra 413 | nop 414 | 415 | .align 5 416 | read_a1txl: // a0: address - v0: value 417 | // Get the low byte of one of the CPU addresses 418 | srl t0, a0, 2 419 | andi t0, t0, 0x1C 420 | lbu v0, a1tbx + 3(t0) 421 | jr ra 422 | nop 423 | 424 | .align 5 425 | read_a1txh: // a0: address - v0: value 426 | // Get the high byte of one of the CPU addresses 427 | srl t0, a0, 2 428 | andi t0, t0, 0x1C 429 | lbu v0, a1tbx + 2(t0) 430 | jr ra 431 | nop 432 | 433 | .align 5 434 | read_a1bx: // a0: address - v0: value 435 | // Get the bank of one of the CPU addresses 436 | srl t0, a0, 2 437 | andi t0, t0, 0x1C 438 | lbu v0, a1tbx + 1(t0) 439 | jr ra 440 | nop 441 | 442 | .align 5 443 | read_dasxl: // a0: address - v0: value 444 | // Get the low byte of one of the byte counters 445 | srl t0, a0, 2 446 | andi t0, t0, 0x1C 447 | lbu v0, dastbx + 3(t0) 448 | jr ra 449 | nop 450 | 451 | .align 5 452 | read_dasxh: // a0: address - v0: value 453 | // Get the high byte of one of the byte counters 454 | srl t0, a0, 2 455 | andi t0, t0, 0x1C 456 | lbu v0, dastbx + 2(t0) 457 | jr ra 458 | nop 459 | 460 | .align 5 461 | read_dasbx: // a0: address - v0: value 462 | // Get the bank of one of the indirect HDMA addresses 463 | srl t0, a0, 2 464 | andi t0, t0, 0x1C 465 | lbu v0, dastbx + 1(t0) 466 | jr ra 467 | nop 468 | 469 | .align 5 470 | read_a2axl: // a0: address - v0: value 471 | // Get the low byte of one of the HDMA current addresses 472 | srl t0, a0, 2 473 | andi t0, t0, 0x1C 474 | lbu v0, a2abx + 3(t0) 475 | jr ra 476 | nop 477 | 478 | .align 5 479 | read_a2axh: // a0: address - v0: value 480 | // Get the high byte of one of the HDMA current addresses 481 | srl t0, a0, 2 482 | andi t0, t0, 0x1C 483 | lbu v0, a2abx + 2(t0) 484 | jr ra 485 | nop 486 | 487 | .align 5 488 | read_ntrlx: // a0: address - v0: value 489 | // Get one of the HDMA line counters 490 | srl t0, a0, 4 491 | andi t0, t0, 0x7 492 | lbu v0, ntrlx(t0) 493 | jr ra 494 | nop 495 | 496 | .align 5 497 | read_unusedx: // a0: address - v0: value 498 | // Get one of the unused DMA values 499 | srl t0, a0, 4 500 | andi t0, t0, 0x7 501 | lbu v0, unusedx(t0) 502 | jr ra 503 | nop 504 | 505 | .align 5 506 | write_dmapx: // a0: address, a1: value 507 | // Write to one of the DMAPx registers 508 | srl t0, a0, 4 509 | andi t0, t0, 0x7 510 | sb a1, dmapx(t0) 511 | jr ra 512 | nop 513 | 514 | .align 5 515 | write_bbadx: // a0: address, a1: value 516 | // Set the low byte of one of the I/O addresses 517 | srl t0, a0, 4 518 | andi t0, t0, 0x7 519 | sb a1, bbadx(t0) 520 | jr ra 521 | nop 522 | 523 | .align 5 524 | write_a1txl: // a0: address, a1: value 525 | // Set the low byte of one of the CPU addresses 526 | srl t0, a0, 2 527 | andi t0, t0, 0x1C 528 | sb a1, a1tbx + 3(t0) 529 | jr ra 530 | nop 531 | 532 | .align 5 533 | write_a1txh: // a0: address, a1: value 534 | // Set the high byte of one of the CPU addresses 535 | srl t0, a0, 2 536 | andi t0, t0, 0x1C 537 | sb a1, a1tbx + 2(t0) 538 | jr ra 539 | nop 540 | 541 | .align 5 542 | write_a1bx: // a0: address, a1: value 543 | // Set the bank of one of the CPU addresses 544 | srl t0, a0, 2 545 | andi t0, t0, 0x1C 546 | sb a1, a1tbx + 1(t0) 547 | sb a1, a2abx + 1(t0) 548 | jr ra 549 | nop 550 | 551 | .align 5 552 | write_dasxl: // a0: address, a1: value 553 | // Set the low byte of one of the byte counters 554 | srl t0, a0, 2 555 | andi t0, t0, 0x1C 556 | sb a1, dastbx + 3(t0) 557 | jr ra 558 | nop 559 | 560 | .align 5 561 | write_dasxh: // a0: address, a1: value 562 | // Set the high byte of one of the byte counters 563 | srl t0, a0, 2 564 | andi t0, t0, 0x1C 565 | sb a1, dastbx + 2(t0) 566 | jr ra 567 | nop 568 | 569 | .align 5 570 | write_dasbx: // a0: address, a1: value 571 | // Set the bank of one of the indirect HDMA addresses 572 | srl t0, a0, 2 573 | andi t0, t0, 0x1C 574 | sb a1, dastbx + 1(t0) 575 | jr ra 576 | nop 577 | 578 | .align 5 579 | write_a2axl: // a0: address, a1: value 580 | // Set the low byte of one of the HDMA current addresses 581 | srl t0, a0, 2 582 | andi t0, t0, 0x1C 583 | sb a1, a2abx + 3(t0) 584 | jr ra 585 | nop 586 | 587 | .align 5 588 | write_a2axh: // a0: address, a1: value 589 | // Set the high byte of one of the HDMA current addresses 590 | srl t0, a0, 2 591 | andi t0, t0, 0x1C 592 | sb a1, a2abx + 2(t0) 593 | jr ra 594 | nop 595 | 596 | .align 5 597 | write_ntrlx: // a0: address, a1: value 598 | // Set one of the HDMA line counters 599 | srl t0, a0, 4 600 | andi t0, t0, 0x7 601 | sb a1, ntrlx(t0) 602 | jr ra 603 | nop 604 | 605 | .align 5 606 | write_unusedx: // a0: address, a1: value 607 | // Set one of the unused DMA values 608 | srl t0, a0, 4 609 | andi t0, t0, 0x7 610 | sb a1, unusedx(t0) 611 | jr ra 612 | nop 613 | -------------------------------------------------------------------------------- /src/dsp.S: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021-2025 Hydr8gon 3 | 4 | This file is part of sodium64. 5 | 6 | sodium64 is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, 9 | or (at your option) any later version. 10 | 11 | sodium64 is distributed in the hope that it will be useful, but 12 | WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with sodium64. If not, see . 18 | */ 19 | 20 | #include "defines.h" 21 | 22 | .globl dsp_sample 23 | .globl read_dspaddr 24 | .globl read_dspdata 25 | .globl write_dspaddr 26 | .globl write_dspdata 27 | 28 | .data 29 | 30 | .align 4 31 | dsp_buffer: .word 0:0x800 32 | dsp_regs: .byte 0:0x80 33 | 34 | .align 4 35 | noise_sample: .hword 0x4000 36 | noise_step: .hword 0x8000 37 | dsp_timer: .hword 0 38 | dsp_pointer: .hword 0 39 | dsp_addr: .byte 0 40 | enabled: .byte 0 41 | 42 | .align 4 43 | mode_funcs: .word 0 44 | cached_vols: .word 0 45 | counters: .hword 0 46 | envelopes: .hword 0 47 | next_steps: .hword 0 48 | cur_addrs: .hword 0 49 | 50 | .align 4 51 | voice_dups: .dword 0:14 52 | cur_samples: .dword 0:8 53 | 54 | .align 4 55 | filter_jumps: .word filter_end, filter_1, filter_2, filter_3 56 | gain_jumps: .word linear_dec, expon_dec, linear_inc, bent_inc 57 | 58 | .align 4 59 | write_iomap: 60 | .word write_vol, write_vol, write_reg, write_reg // 0x00-0x03 61 | .word write_reg, write_adsr, write_reg, write_reg // 0x04-0x07 62 | .word write_reg:(0x0F - 0x07) // 0x08-0x0F 63 | .word write_vol, write_vol, write_reg, write_reg // 0x10-0x13 64 | .word write_reg, write_adsr, write_reg, write_reg // 0x14-0x17 65 | .word write_reg:(0x1F - 0x17) // 0x18-0x1F 66 | .word write_vol, write_vol, write_reg, write_reg // 0x20-0x23 67 | .word write_reg, write_adsr, write_reg, write_reg // 0x24-0x27 68 | .word write_reg:(0x2F - 0x27) // 0x28-0x2F 69 | .word write_vol, write_vol, write_reg, write_reg // 0x30-0x33 70 | .word write_reg, write_adsr, write_reg, write_reg // 0x34-0x37 71 | .word write_reg:(0x3F - 0x37) // 0x38-0x3F 72 | .word write_vol, write_vol, write_reg, write_reg // 0x40-0x43 73 | .word write_reg, write_adsr, write_reg, write_reg // 0x44-0x47 74 | .word write_reg:(0x4B - 0x47) // 0x48-0x4B 75 | .word write_kon, write_reg, write_reg, write_reg // 0x4C-0x4F 76 | .word write_vol, write_vol, write_reg, write_reg // 0x50-0x53 77 | .word write_reg, write_adsr, write_reg, write_reg // 0x54-0x57 78 | .word write_reg:(0x5B - 0x57) // 0x58-0x5B 79 | .word write_koff, write_reg, write_reg, write_reg // 0x5C-0x5F 80 | .word write_vol, write_vol, write_reg, write_reg // 0x60-0x63 81 | .word write_reg, write_adsr, write_reg, write_reg // 0x64-0x67 82 | .word write_reg:(0x6B - 0x67) // 0x68-0x6B 83 | .word time_noise, write_reg, write_reg, write_reg // 0x6C-0x6F 84 | .word write_vol, write_vol, write_reg, write_reg // 0x70-0x73 85 | .word write_reg, write_adsr, write_reg, write_reg // 0x74-0x77 86 | .word write_reg:(0x7F - 0x77) // 0x78-0x7F 87 | 88 | .align 4 89 | timer_rates: // Lookup table for timer rates and offsets 90 | .hword -1, 0x8000, 2048, 0, 1536, 1040 91 | .hword 1280, 536, 1024, 0, 768, 1040 92 | .hword 640, 536, 512, 0, 384, 1040 93 | .hword 320, 536, 256, 0, 192, 1040 94 | .hword 160, 536, 128, 0, 96, 1040 95 | .hword 80, 536, 64, 0, 48, 1040 96 | .hword 40, 536, 32, 0, 24, 1040 97 | .hword 20, 536, 16, 0, 12, 1040 98 | .hword 10, 536, 8, 0, 6, 1040 99 | .hword 5, 536, 4, 0, 3, 1040 100 | .hword 2, 0, 1, 0 101 | 102 | .text 103 | .set noreorder 104 | 105 | .align 5 106 | dsp_sample: 107 | // Check if audio is enabled and schedule the next sample 108 | lbu t0, audio_set 109 | beqz t0, apu_execute 110 | addi a3, a3, -DSP_SAMPLE 111 | 112 | // Decrement the global timer once per sample 113 | lhu t0, dsp_timer 114 | bnez t0, not_zero 115 | addi gp, t0, -1 116 | addi gp, gp, 0x7800 117 | not_zero: 118 | sh gp, dsp_timer 119 | 120 | // Handle a noise step if the timer matches 121 | lhu t0, noise_step 122 | bne t0, gp, skip_noise 123 | nop 124 | jal make_noise 125 | nop 126 | 127 | skip_noise: 128 | // Set initial values for generating a sample 129 | li t6, -1 // Voice count 130 | li t5, 0 // Left sample 131 | li t4, 0 // Right sample 132 | lbu sp, enabled 133 | 134 | next_voice: 135 | // Check if a voice is enabled 136 | addi t6, t6, 1 137 | srl t0, sp, t6 138 | beqz t0, write_sample 139 | andi t0, t0, 0x1 140 | beqz t0, next_voice 141 | 142 | // Handle an envelope step if the timer matches 143 | sll t8, t6, 4 144 | lhu t0, next_steps(t8) 145 | bne t0, gp, get_pitch 146 | nop 147 | lw t0, mode_funcs(t8) 148 | jalr t0 149 | nop 150 | 151 | get_pitch: 152 | // Get the pitch scaler for the current voice 153 | lhu t0, dsp_regs + 0x2(t8) // VxPITCH(L/H) 154 | sll t1, t0, 8 155 | srl t0, t0, 8 156 | or t0, t0, t1 157 | andi t0, t0, 0x3FFF 158 | 159 | // Increase the voice's position counter by the scaler 160 | lhu t9, counters(t8) 161 | addu t2, t9, t0 162 | sh t2, counters(t8) 163 | 164 | // Decode a new 4-sample BRR chunk if the counter overflows 165 | xor t9, t9, t2 166 | srl t9, t9, 14 167 | bnez t9, decode_chunk 168 | nop 169 | 170 | mix_sample: 171 | // Load the noise sample if enabled for the voice 172 | lbu t0, dsp_regs + 0x3D // NON 173 | srl t0, t0, t6 174 | andi t0, t0, 0x1 175 | beqz t0, load_sample 176 | nop 177 | lh t0, noise_sample 178 | b skip_sample 179 | 180 | load_sample: 181 | // Load the current sample from the voice's buffer 182 | sll t1, t6, 3 183 | srl t3, t2, 11 184 | andi t3, t3, 0x6 185 | xori t3, t3, 0x6 186 | add t1, t1, t3 187 | lh t0, cur_samples(t1) 188 | beqz t0, next_voice 189 | nop 190 | 191 | skip_sample: 192 | // Get the cached volumes for each channel 193 | lh t2, cached_vols + 0(t8) 194 | lh t3, cached_vols + 2(t8) 195 | 196 | // Mix the sample on the left channel 197 | mult t0, t2 198 | mflo t1 199 | sra t1, t1, 14 200 | beq t2, t3, same_volume 201 | addu t5, t5, t1 202 | 203 | // Mix the sample on the right channel 204 | mult t0, t3 205 | mflo t1 206 | sra t1, t1, 14 207 | same_volume: 208 | b next_voice 209 | addu t4, t4, t1 210 | 211 | .align 5 212 | write_sample: // t4: left, t5: right 213 | // Write a sample to the buffer 214 | andi t4, t4, 0xFFFF 215 | sll t5, t5, 16 216 | or t0, t4, t5 217 | lhu t1, dsp_pointer 218 | sw t0, UNCACHED(dsp_buffer)(t1) 219 | 220 | // Increment the sample pointer 221 | addi a0, t1, 4 222 | andi t0, a0, 0x1FFF 223 | sh t0, dsp_pointer 224 | 225 | // Submit a buffer to the AI when one is ready 226 | andi t0, a0, 0xFFF 227 | bnez t0, apu_execute 228 | lui t0, 0xA450 // AI register upper address 229 | la t1, dsp_buffer - 0x1000 230 | add t1, t1, a0 // Buffer address 231 | sw t1, 0x0000(t0) // AI_DRAM_ADDR 232 | li t1, 0x00001000 // Buffer length 233 | j apu_execute 234 | sw t1, 0x0004(t0) // AI_LENGTH 235 | 236 | .align 5 237 | decode_chunk: // t2: counter, t6: voice, t8: offset 238 | // Stay in the same BRR block until the counter overflows 239 | lhu t0, cur_addrs(t8) 240 | srl t3, t2, 16 241 | beqz t3, decode_samples 242 | nop 243 | 244 | // Move to the next 9-byte BRR block based on current flags 245 | lbu t9, apu_ram(t0) // Header 246 | andi t3, t9, 0x1 // End 247 | beqz t3, decode_samples 248 | addi t0, t0, 9 249 | andi t3, t9, 0x2 // Loop 250 | beqz t3, end_voice 251 | nop 252 | 253 | // Get the address of the voice's sample table entry 254 | lbu t0, dsp_regs + 0x5D // DIR 255 | lbu t3, dsp_regs + 0x4(t8) // VxSRCN 256 | sll t0, t0, 8 257 | sll t3, t3, 2 258 | add t0, t0, t3 259 | 260 | // Load the loop address and start decoding from there 261 | lhu t0, apu_ram + 0x2(t0) 262 | sll t3, t0, 8 263 | srl t0, t0, 8 264 | or t0, t0, t3 265 | andi t0, t0, 0xFFFF 266 | 267 | decode_samples: 268 | // Get the address of the voice's sample buffer 269 | la t3, cur_samples 270 | sll v1, t6, 3 271 | add v1, v1, t3 272 | 273 | // Configure things based on the BRR block header 274 | sh t0, cur_addrs(t8) 275 | lbu t9, apu_ram(t0) // Header 276 | srl t1, t9, 4 // Shift 277 | andi t9, t9, 0xC // Filter 278 | lw t9, filter_jumps(t9) 279 | 280 | // Load 4 4-bit BRR samples at once to be decoded 281 | srl t3, t2, 13 282 | andi t3, t3, 0x6 283 | add t0, t0, t3 284 | lbu t3, apu_ram + 1(t0) 285 | lbu s0, apu_ram + 2(t0) 286 | sll t3, t3, 8 287 | or s0, s0, t3 288 | li s1, 8 289 | 290 | next_sample: 291 | // Pass the next sample, sign-extended and shifted, to the filter 292 | addi s1, s1, -2 293 | sll t3, s1, 1 294 | srl t0, s0, t3 295 | sll t0, t0, 28 296 | sra t0, t0, 28 297 | sll t0, t0, t1 298 | jr t9 299 | sra t0, t0, 1 300 | 301 | filter_1: 302 | // Apply the old sample, multiplied by 0.9375 303 | addi t7, s1, 2 304 | andi t7, t7, 0x6 305 | add t3, v1, t7 306 | lh t7, (t3) 307 | add t0, t0, t7 308 | sra t7, t7, 4 309 | b filter_end 310 | sub t0, t0, t7 311 | 312 | filter_2: 313 | // Apply the old sample, multiplied by 1.90625 314 | addi t7, s1, 2 315 | andi t7, t7, 0x6 316 | add t3, v1, t7 317 | lh t7, (t3) 318 | sll t3, t7, 1 319 | add t0, t0, t3 320 | sll t3, t7, 1 321 | add t3, t3, t7 322 | sra t3, t3, 5 323 | sub t0, t0, t3 324 | 325 | // Apply the older sample, multiplied by -0.9375 326 | addi t7, s1, 4 327 | andi t7, t7, 0x6 328 | add t3, v1, t7 329 | lh t7, (t3) 330 | sub t0, t0, t7 331 | sra t3, t7, 4 332 | b filter_end 333 | add t0, t0, t3 334 | 335 | filter_3: 336 | // Apply the old sample, multiplied by 1.796875 337 | addi t7, s1, 2 338 | andi t7, t7, 0x6 339 | add t3, v1, t7 340 | lh t7, (t3) 341 | sll t3, t7, 1 342 | add t0, t0, t3 343 | sll t3, t7, 3 344 | add t3, t3, t7 345 | sll t7, t7, 2 346 | add t3, t3, t7 347 | sra t3, t3, 6 348 | sub t0, t0, t3 349 | 350 | // Apply the older sample, multiplied by -0.8125 351 | addi t7, s1, 4 352 | andi t7, t7, 0x6 353 | add t3, v1, t7 354 | lh t7, (t3) 355 | sub t0, t0, t7 356 | sll t3, t7, 1 357 | add t3, t3, t7 358 | sra t3, t3, 4 359 | add t0, t0, t3 360 | 361 | filter_end: 362 | // Store the sample and loop until all 4 are done 363 | add t3, v1, s1 364 | bnez s1, next_sample 365 | sh t0, (t3) 366 | j mix_sample 367 | 368 | end_voice: 369 | // Disable the voice if it ends without loop 370 | li t0, 1 371 | sll t0, t0, t6 372 | xori t0, t0, 0xFF 373 | and sp, sp, t0 374 | sb sp, enabled 375 | j next_voice 376 | nop 377 | 378 | .align 5 379 | make_noise: 380 | // Generate a new noise sample based on the old one 381 | lh t0, noise_sample 382 | sll t1, t0, 14 383 | sll t2, t0, 13 384 | xor t1, t1, t2 385 | andi t1, t1, 0x4000 386 | srl t0, t0, 1 387 | or t0, t0, t1 388 | sh t0, noise_sample 389 | j time_noise 390 | nop 391 | 392 | .align 5 393 | time_noise: 394 | // Schedule the next step using the noise rate 395 | lbu t0, dsp_regs + 0x6C // FLG 396 | andi a0, t0, 0x1F 397 | sll a0, a0, 2 398 | j update_step 399 | li t6, 8 // Noise 400 | 401 | .align 5 402 | attack_mode: // t6: voice, t8: offset 403 | // Increase the envelope by 32 404 | lhu t0, envelopes(t8) 405 | addi t0, t0, 32 406 | sh t0, envelopes(t8) 407 | 408 | // Switch to decay mode at level 0x7E0 and clip to 0x7FF 409 | blt t0, 0x7E0, attack_time 410 | nop 411 | la t3, decay_mode 412 | sw t3, mode_funcs(t8) 413 | li t1, 0x7FF 414 | blt t0, t1, decay_time 415 | nop 416 | sh t1, envelopes(t8) 417 | j decay_time 418 | nop 419 | 420 | .align 5 421 | attack_time: // t6: voice 422 | // Schedule the next step using the attack rate 423 | sll t1, t6, 4 424 | lbu t0, dsp_regs + 0x5(t1) // VxADSR1 425 | andi a0, t0, 0xF 426 | sll a0, a0, 3 427 | j update_step 428 | addi a0, a0, 4 429 | 430 | .align 5 431 | decay_mode: // t6: voice, t8: offset 432 | // Decrease the envelope by an exponential amount 433 | lhu t0, envelopes(t8) 434 | addi t2, t0, -1 435 | srl t2, t2, 8 436 | addi t2, t2, 1 437 | sub t0, t0, t2 438 | sh t0, envelopes(t8) 439 | 440 | // Calculate the sustain level boundary 441 | lbu t1, dsp_regs + 0x6(t8) // VxADSR2 442 | andi t1, t1, 0xE0 443 | sll t1, t1, 3 444 | addi t1, t1, 0x100 445 | 446 | // Switch to sustain mode at the boundary and clip to zero 447 | bgt t0, t1, decay_time 448 | nop 449 | la t3, sustain_mode 450 | sw t3, mode_funcs(t8) 451 | bgtz t0, sustain_time 452 | nop 453 | sh zero, envelopes(t8) 454 | j sustain_time 455 | nop 456 | 457 | .align 5 458 | decay_time: // t6: voice 459 | // Schedule the next step using the decay rate 460 | sll t1, t6, 4 461 | lbu t0, dsp_regs + 0x5(t1) // VxADSR1 462 | andi a0, t0, 0x70 463 | srl a0, a0, 1 464 | j update_step 465 | addi a0, a0, 64 466 | 467 | .align 5 468 | sustain_mode: // t6: voice, t8: offset 469 | // Decrease the envelope by an exponential amount 470 | lhu t0, envelopes(t8) 471 | addi t2, t0, -1 472 | srl t2, t2, 8 473 | addi t2, t2, 1 474 | sub t0, t0, t2 475 | sh t0, envelopes(t8) 476 | 477 | // Disable the voice if its envelope decreases to zero 478 | bgtz t0, sustain_time 479 | li t0, 1 480 | sll t0, t0, t6 481 | xori t0, t0, 0xFF 482 | and sp, sp, t0 483 | sb sp, enabled 484 | jr ra 485 | nop 486 | 487 | .align 5 488 | sustain_time: // t6: voice 489 | // Schedule the next step using the sustain rate 490 | sll t1, t6, 4 491 | lbu t0, dsp_regs + 0x6(t1) // VxADSR2 492 | andi a0, t0, 0x1F 493 | j update_step 494 | sll a0, a0, 2 495 | 496 | .align 5 497 | gain_mode: // t6: voice, t8: offset 498 | // Jump to the handler for the current gain mode 499 | lbu t0, dsp_regs + 0x7(t8) // VxGAIN 500 | andi t1, t0, 0x60 501 | srl t1, t1, 3 502 | lw t0, gain_jumps(t1) 503 | jr t0 504 | nop 505 | 506 | linear_dec: 507 | // Decrease the envelope by a fixed amount and clip to zero 508 | lhu t0, envelopes(t8) 509 | addi t0, t0, -32 510 | sh t0, envelopes(t8) 511 | bgtz t0, gain_time 512 | nop 513 | sh zero, envelopes(t8) 514 | j gain_time 515 | nop 516 | 517 | expon_dec: 518 | // Decrease the envelope by an exponential amount and clip to zero 519 | lhu t0, envelopes(t8) 520 | addi t2, t0, -1 521 | srl t2, t2, 8 522 | addi t2, t2, 1 523 | sub t0, t0, t2 524 | sh t0, envelopes(t8) 525 | bgtz t0, gain_time 526 | nop 527 | sh zero, envelopes(t8) 528 | j gain_time 529 | nop 530 | 531 | linear_inc: 532 | // Increase the envelope by a fixed amount and clip to 0x7FF 533 | lhu t0, envelopes(t8) 534 | addi t0, t0, 32 535 | sh t0, envelopes(t8) 536 | li t1, 0x7FF 537 | blt t0, t1, gain_time 538 | nop 539 | sh t1, envelopes(t8) 540 | j gain_time 541 | nop 542 | 543 | bent_inc: 544 | // Increase the envelope by an amount bent at 0x600 and clip to 0x7FF 545 | lhu t0, envelopes(t8) 546 | slti t1, t0, 0x600 547 | sll t1, t1, 1 548 | li t2, 8 549 | sll t1, t2, t1 550 | add t0, t0, t1 551 | sh t0, envelopes(t8) 552 | li t1, 0x7FF 553 | blt t0, t1, gain_time 554 | nop 555 | sh t1, envelopes(t8) 556 | j gain_time 557 | nop 558 | 559 | .align 5 560 | gain_time: // t6: voice 561 | // Check whether direct or custom gain is selected 562 | sll t4, t6, 4 563 | lbu t0, dsp_regs + 0x7(t4) // VxGAIN 564 | andi t1, t0, 0x80 565 | beqz t1, direct_gain 566 | 567 | // Schedule the next step using the custom gain rate 568 | andi a0, t0, 0x1F 569 | sll a0, a0, 2 570 | j update_step 571 | 572 | direct_gain: 573 | // Set a fixed volume for direct gain 574 | sll t0, t0, 4 575 | sh t0, envelopes(t4) 576 | li t0, 0xFFFF 577 | sh t0, next_steps(t4) 578 | j update_vols 579 | nop 580 | 581 | .align 5 582 | update_step: // a0: rate, t6: voice 583 | // Get values for updating a step timer 584 | lhu t2, dsp_timer 585 | lw t0, timer_rates(a0) 586 | addi t2, t2, -1 // Base 587 | srl t1, t0, 16 // Rate 588 | bgez t2, calc_step 589 | andi t0, t0, 0xFFFF // Offset 590 | addi t2, t2, 0x7800 591 | 592 | calc_step: 593 | // Calculate the timer value of the next step 594 | add t0, t0, t2 595 | div t0, t1 596 | mfhi t3 597 | sub t2, t2, t3 598 | bgez t2, set_step 599 | nop 600 | addi t2, t2, 0x7800 601 | 602 | set_step: 603 | // Update a voice's step timer and cached volume 604 | bge t6, 8, set_noise 605 | sll t1, t6, 4 606 | sh t2, next_steps(t1) 607 | j update_vols 608 | nop 609 | 610 | set_noise: 611 | // Update the noise step timer 612 | sh t2, noise_step 613 | jr ra 614 | nop 615 | 616 | .align 5 617 | update_vols: // t6: voice 618 | // Get a voice's envelope and channel volumes 619 | sll t1, t6, 4 620 | lhu t0, envelopes(t1) 621 | lb t2, dsp_regs + 0x0(t1) // VxVOLL 622 | lb t3, dsp_regs + 0x1(t1) // VxVOLR 623 | srl t0, t0, 4 624 | 625 | // Cache the multiplied volumes to save a step during mixing 626 | mult t2, t0 627 | mflo t2 628 | sh t2, cached_vols + 0(t1) 629 | mult t3, t0 630 | mflo t3 631 | sh t3, cached_vols + 2(t1) 632 | jr ra 633 | nop 634 | 635 | .align 5 636 | read_dspaddr: // v0: value 637 | // Read the DSP register address 638 | lbu v0, dsp_addr 639 | jr ra 640 | nop 641 | 642 | .align 5 643 | read_dspdata: // v0: value 644 | // Read a value from a DSP register 645 | lbu t0, dsp_addr 646 | andi t0, t0, 0x7F // Mirror 647 | lbu v0, dsp_regs(t0) 648 | jr ra 649 | nop 650 | 651 | .align 5 652 | write_dspaddr: // a1: value 653 | // Write the DSP register address 654 | sb a1, dsp_addr 655 | jr ra 656 | nop 657 | 658 | .align 5 659 | write_dspdata: // a1: value 660 | // Write a value to a DSP register if within bounds 661 | lbu t0, dsp_addr 662 | andi t1, t0, 0x80 663 | bnez t1, write_reg 664 | sll t1, t0, 2 665 | sb a1, dsp_regs(t0) 666 | 667 | // Jump to the register's handler function 668 | lw t1, write_iomap(t1) 669 | jr t1 670 | nop 671 | 672 | write_reg: 673 | // Do nothing extra for regular register writes 674 | jr ra 675 | nop 676 | 677 | .align 5 678 | write_vol: // t0: address 679 | // Update a voice's cached volumes when changed 680 | j update_vols 681 | srl t6, t0, 4 682 | 683 | .align 5 684 | write_adsr: // t0: address 685 | // Switch a voice's envelope to gain if selected 686 | lbu t1, dsp_regs(t0) // VxADSR1 687 | andi t4, t0, 0xF0 688 | srl t6, t0, 4 689 | andi t0, t1, 0x80 690 | beqz t0, use_gain 691 | nop 692 | jr ra 693 | nop 694 | 695 | .align 5 696 | write_kon: // a1: value 697 | // Set voices to enabled 698 | lbu t0, enabled 699 | move t5, ra 700 | or t0, t0, a1 701 | sb t0, enabled 702 | li t6, -1 703 | 704 | next_on: 705 | // Check if a voice was just enabled 706 | addi t6, t6, 1 707 | srl t0, a1, t6 708 | beqz t0, kon_end 709 | andi t0, t0, 0x1 710 | beqz t0, next_on 711 | nop 712 | 713 | // Get the address of the voice's sample table entry 714 | lbu t0, dsp_regs + 0x5D // DIR 715 | sll t4, t6, 4 716 | lbu t1, dsp_regs + 0x04(t4) // VxSRCN 717 | sll t0, t0, 8 718 | sll t1, t1, 2 719 | add t0, t0, t1 720 | 721 | // Load the start address and reset the voice 722 | lhu t0, apu_ram(t0) 723 | sll t1, t0, 8 724 | srl t0, t0, 8 725 | or t0, t0, t1 726 | sh t0, cur_addrs(t4) 727 | sh zero, counters(t4) 728 | sh zero, envelopes(t4) 729 | sll t1, t6, 3 730 | sd zero, cur_samples(t1) 731 | 732 | // Check whether the voice uses ADSR or gain 733 | lbu t1, dsp_regs + 0x5(t4) // VxADSR1 734 | la ra, next_on 735 | andi t1, t1, 0x80 736 | beqz t1, use_gain 737 | nop 738 | 739 | // Set the voice to attack mode and schedule the first step 740 | la t0, attack_mode 741 | sw t0, mode_funcs(t4) 742 | j attack_time 743 | nop 744 | 745 | use_gain: 746 | // Set the voice to gain mode and schedule the first step 747 | la t0, gain_mode 748 | sw t0, mode_funcs(t4) 749 | j gain_time 750 | nop 751 | 752 | kon_end: 753 | // Return from the function 754 | jr t5 755 | nop 756 | 757 | .align 5 758 | write_koff: // a1: value 759 | // Set voices to disabled 760 | lbu t0, enabled 761 | xori t1, a1, 0xFF 762 | and t0, t0, t1 763 | sb t0, enabled 764 | jr ra 765 | nop 766 | -------------------------------------------------------------------------------- /src/font.S: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021-2025 Hydr8gon 3 | 4 | This file is part of sodium64. 5 | 6 | sodium64 is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, 9 | or (at your option) any later version. 10 | 11 | sodium64 is distributed in the hope that it will be useful, but 12 | WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with sodium64. If not, see . 18 | */ 19 | 20 | #include 21 | 22 | .globl char_lookup 23 | .globl char_sp 24 | 25 | .data 26 | 27 | .align 4 28 | char_lookup: 29 | .word char_sp, char_sp, char_sp, char_sp, char_sp, char_sp, char_sp, char_sp // 0x20-0x27 30 | .word char_sp, char_sp, char_sp, char_sp, char_sp, char_sp, char_sp, char_sp // 0x28-0x2F 31 | .word char_0, char_1, char_2, char_3, char_4, char_5, char_6, char_7 // 0x30-0x37 32 | .word char_8, char_9, char_sp, char_sp, char_lt, char_sp, char_gt, char_sp // 0x38-0x3F 33 | .word char_sp, char_A, char_B, char_C, char_D, char_E, char_F, char_G // 0x40-0x47 34 | .word char_H, char_I, char_J, char_K, char_L, char_M, char_N, char_O // 0x48-0x4F 35 | .word char_P, char_Q, char_R, char_S, char_T, char_U, char_V, char_W // 0x50-0x57 36 | .word char_X, char_Y, char_Z, char_sp, char_sp, char_sp, char_sp, char_sp // 0x58-0x5F 37 | 38 | .align 4 39 | char_sp: 40 | .hword 00, 00, 00, 00, 00, 00, 00, 00 41 | .hword 00, 00, 00, 00, 00, 00, 00, 00 42 | .hword 00, 00, 00, 00, 00, 00, 00, 00 43 | .hword 00, 00, 00, 00, 00, 00, 00, 00 44 | .hword 00, 00, 00, 00, 00, 00, 00, 00 45 | .hword 00, 00, 00, 00, 00, 00, 00, 00 46 | .hword 00, 00, 00, 00, 00, 00, 00, 00 47 | .hword 00, 00, 00, 00, 00, 00, 00, 00 48 | 49 | .align 4 50 | char_0: 51 | .hword 00, 00, -1, -1, -1, -1, 00, 00 52 | .hword 00, -1, -1, -1, -1, -1, -1, 00 53 | .hword 00, -1, -1, 00, 00, -1, -1, 00 54 | .hword 00, -1, -1, 00, 00, -1, -1, 00 55 | .hword 00, -1, -1, 00, 00, -1, -1, 00 56 | .hword 00, -1, -1, 00, 00, -1, -1, 00 57 | .hword 00, -1, -1, -1, -1, -1, -1, 00 58 | .hword 00, 00, -1, -1, -1, -1, 00, 00 59 | 60 | .align 4 61 | char_1: 62 | .hword 00, 00, 00, -1, -1, 00, 00, 00 63 | .hword 00, 00, -1, -1, -1, 00, 00, 00 64 | .hword 00, 00, -1, -1, -1, 00, 00, 00 65 | .hword 00, 00, 00, -1, -1, 00, 00, 00 66 | .hword 00, 00, 00, -1, -1, 00, 00, 00 67 | .hword 00, 00, 00, -1, -1, 00, 00, 00 68 | .hword 00, 00, 00, -1, -1, 00, 00, 00 69 | .hword 00, 00, 00, -1, -1, 00, 00, 00 70 | 71 | .align 4 72 | char_2: 73 | .hword 00, 00, -1, -1, -1, -1, 00, 00 74 | .hword 00, -1, -1, -1, -1, -1, -1, 00 75 | .hword 00, -1, -1, 00, 00, -1, -1, 00 76 | .hword 00, 00, 00, 00, 00, -1, -1, 00 77 | .hword 00, 00, 00, 00, -1, -1, 00, 00 78 | .hword 00, 00, 00, -1, -1, 00, 00, 00 79 | .hword 00, 00, -1, -1, -1, -1, -1, 00 80 | .hword 00, -1, -1, -1, -1, -1, -1, 00 81 | 82 | .align 4 83 | char_3: 84 | .hword 00, 00, -1, -1, -1, -1, 00, 00 85 | .hword 00, -1, -1, -1, -1, -1, -1, 00 86 | .hword 00, -1, -1, 00, 00, -1, -1, 00 87 | .hword 00, 00, 00, 00, -1, -1, 00, 00 88 | .hword 00, 00, 00, 00, -1, -1, 00, 00 89 | .hword 00, -1, -1, 00, 00, -1, -1, 00 90 | .hword 00, -1, -1, -1, -1, -1, -1, 00 91 | .hword 00, 00, -1, -1, -1, -1, 00, 00 92 | 93 | .align 4 94 | char_4: 95 | .hword 00, 00, 00, -1, -1, -1, 00, 00 96 | .hword 00, 00, -1, -1, -1, -1, 00, 00 97 | .hword 00, -1, -1, 00, -1, -1, 00, 00 98 | .hword 00, -1, -1, 00, -1, -1, 00, 00 99 | .hword 00, -1, -1, -1, -1, -1, -1, 00 100 | .hword 00, -1, -1, -1, -1, -1, -1, 00 101 | .hword 00, 00, 00, 00, -1, -1, 00, 00 102 | .hword 00, 00, 00, 00, -1, -1, 00, 00 103 | 104 | .align 4 105 | char_5: 106 | .hword 00, -1, -1, -1, -1, -1, -1, 00 107 | .hword 00, -1, -1, -1, -1, -1, -1, 00 108 | .hword 00, -1, -1, 00, 00, 00, 00, 00 109 | .hword 00, -1, -1, -1, -1, -1, 00, 00 110 | .hword 00, -1, -1, -1, -1, -1, -1, 00 111 | .hword 00, 00, 00, 00, 00, -1, -1, 00 112 | .hword 00, -1, -1, -1, -1, -1, -1, 00 113 | .hword 00, 00, -1, -1, -1, -1, 00, 00 114 | 115 | .align 4 116 | char_6: 117 | .hword 00, 00, -1, -1, -1, -1, 00, 00 118 | .hword 00, -1, -1, -1, -1, -1, -1, 00 119 | .hword 00, -1, -1, 00, 00, 00, 00, 00 120 | .hword 00, -1, -1, -1, -1, -1, 00, 00 121 | .hword 00, -1, -1, -1, -1, -1, -1, 00 122 | .hword 00, -1, -1, 00, 00, -1, -1, 00 123 | .hword 00, -1, -1, -1, -1, -1, -1, 00 124 | .hword 00, 00, -1, -1, -1, -1, 00, 00 125 | 126 | .align 4 127 | char_7: 128 | .hword 00, -1, -1, -1, -1, -1, -1, 00 129 | .hword 00, -1, -1, -1, -1, -1, -1, 00 130 | .hword 00, 00, 00, 00, 00, -1, -1, 00 131 | .hword 00, 00, 00, 00, -1, -1, 00, 00 132 | .hword 00, 00, 00, 00, -1, -1, 00, 00 133 | .hword 00, 00, 00, -1, -1, 00, 00, 00 134 | .hword 00, 00, -1, -1, 00, 00, 00, 00 135 | .hword 00, 00, -1, -1, 00, 00, 00, 00 136 | 137 | .align 4 138 | char_8: 139 | .hword 00, 00, -1, -1, -1, -1, 00, 00 140 | .hword 00, -1, -1, -1, -1, -1, -1, 00 141 | .hword 00, -1, -1, 00, 00, -1, -1, 00 142 | .hword 00, 00, -1, -1, -1, -1, 00, 00 143 | .hword 00, 00, -1, -1, -1, -1, 00, 00 144 | .hword 00, -1, -1, 00, 00, -1, -1, 00 145 | .hword 00, -1, -1, -1, -1, -1, -1, 00 146 | .hword 00, 00, -1, -1, -1, -1, 00, 00 147 | 148 | .align 4 149 | char_9: 150 | .hword 00, 00, -1, -1, -1, -1, 00, 00 151 | .hword 00, -1, -1, -1, -1, -1, -1, 00 152 | .hword 00, -1, -1, 00, 00, -1, -1, 00 153 | .hword 00, -1, -1, -1, -1, -1, -1, 00 154 | .hword 00, 00, -1, -1, -1, -1, -1, 00 155 | .hword 00, 00, 00, 00, 00, -1, -1, 00 156 | .hword 00, -1, -1, -1, -1, -1, -1, 00 157 | .hword 00, 00, -1, -1, -1, -1, 00, 00 158 | 159 | .align 4 160 | char_lt: 161 | .hword 00, 00, 00, 00, -1, -1, 00, 00 162 | .hword 00, 00, 00, -1, -1, 00, 00, 00 163 | .hword 00, 00, -1, -1, 00, 00, 00, 00 164 | .hword 00, -1, -1, 00, 00, 00, 00, 00 165 | .hword 00, -1, -1, 00, 00, 00, 00, 00 166 | .hword 00, 00, -1, -1, 00, 00, 00, 00 167 | .hword 00, 00, 00, -1, -1, 00, 00, 00 168 | .hword 00, 00, 00, 00, -1, -1, 00, 00 169 | 170 | .align 4 171 | char_gt: 172 | .hword 00, 00, -1, -1, 00, 00, 00, 00 173 | .hword 00, 00, 00, -1, -1, 00, 00, 00 174 | .hword 00, 00, 00, 00, -1, -1, 00, 00 175 | .hword 00, 00, 00, 00, 00, -1, -1, 00 176 | .hword 00, 00, 00, 00, 00, -1, -1, 00 177 | .hword 00, 00, 00, 00, -1, -1, 00, 00 178 | .hword 00, 00, 00, -1, -1, 00, 00, 00 179 | .hword 00, 00, -1, -1, 00, 00, 00, 00 180 | 181 | .align 4 182 | char_A: 183 | .hword 00, 00, 00, -1, -1, 00, 00, 00 184 | .hword 00, 00, -1, -1, -1, -1, 00, 00 185 | .hword 00, 00, -1, -1, -1, -1, 00, 00 186 | .hword 00, -1, -1, 00, 00, -1, -1, 00 187 | .hword 00, -1, -1, -1, -1, -1, -1, 00 188 | .hword 00, -1, -1, -1, -1, -1, -1, 00 189 | .hword 00, -1, -1, 00, 00, -1, -1, 00 190 | .hword 00, -1, -1, 00, 00, -1, -1, 00 191 | 192 | .align 4 193 | char_B: 194 | .hword 00, -1, -1, -1, -1, -1, 00, 00 195 | .hword 00, -1, -1, -1, -1, -1, -1, 00 196 | .hword 00, -1, -1, 00, 00, -1, -1, 00 197 | .hword 00, -1, -1, -1, -1, -1, 00, 00 198 | .hword 00, -1, -1, -1, -1, -1, 00, 00 199 | .hword 00, -1, -1, 00, 00, -1, -1, 00 200 | .hword 00, -1, -1, -1, -1, -1, -1, 00 201 | .hword 00, -1, -1, -1, -1, -1, 00, 00 202 | 203 | .align 4 204 | char_C: 205 | .hword 00, 00, -1, -1, -1, -1, 00, 00 206 | .hword 00, -1, -1, -1, -1, -1, -1, 00 207 | .hword 00, -1, -1, 00, 00, -1, -1, 00 208 | .hword 00, -1, -1, 00, 00, 00, 00, 00 209 | .hword 00, -1, -1, 00, 00, 00, 00, 00 210 | .hword 00, -1, -1, 00, 00, -1, -1, 00 211 | .hword 00, -1, -1, -1, -1, -1, -1, 00 212 | .hword 00, 00, -1, -1, -1, -1, 00, 00 213 | 214 | .align 4 215 | char_D: 216 | .hword 00, -1, -1, -1, -1, -1, 00, 00 217 | .hword 00, -1, -1, -1, -1, -1, -1, 00 218 | .hword 00, -1, -1, 00, 00, -1, -1, 00 219 | .hword 00, -1, -1, 00, 00, -1, -1, 00 220 | .hword 00, -1, -1, 00, 00, -1, -1, 00 221 | .hword 00, -1, -1, 00, 00, -1, -1, 00 222 | .hword 00, -1, -1, -1, -1, -1, -1, 00 223 | .hword 00, -1, -1, -1, -1, -1, 00, 00 224 | 225 | .align 4 226 | char_E: 227 | .hword 00, -1, -1, -1, -1, -1, -1, 00 228 | .hword 00, -1, -1, -1, -1, -1, -1, 00 229 | .hword 00, -1, -1, 00, 00, 00, 00, 00 230 | .hword 00, -1, -1, -1, -1, -1, 00, 00 231 | .hword 00, -1, -1, -1, -1, -1, 00, 00 232 | .hword 00, -1, -1, 00, 00, 00, 00, 00 233 | .hword 00, -1, -1, -1, -1, -1, -1, 00 234 | .hword 00, -1, -1, -1, -1, -1, -1, 00 235 | 236 | .align 4 237 | char_F: 238 | .hword 00, -1, -1, -1, -1, -1, -1, 00 239 | .hword 00, -1, -1, -1, -1, -1, -1, 00 240 | .hword 00, -1, -1, 00, 00, 00, 00, 00 241 | .hword 00, -1, -1, -1, -1, -1, 00, 00 242 | .hword 00, -1, -1, -1, -1, -1, 00, 00 243 | .hword 00, -1, -1, 00, 00, 00, 00, 00 244 | .hword 00, -1, -1, 00, 00, 00, 00, 00 245 | .hword 00, -1, -1, 00, 00, 00, 00, 00 246 | 247 | .align 4 248 | char_G: 249 | .hword 00, 00, -1, -1, -1, -1, 00, 00 250 | .hword 00, -1, -1, -1, -1, -1, -1, 00 251 | .hword 00, -1, -1, 00, 00, 00, 00, 00 252 | .hword 00, -1, -1, 00, -1, -1, -1, 00 253 | .hword 00, -1, -1, 00, -1, -1, -1, 00 254 | .hword 00, -1, -1, 00, 00, -1, -1, 00 255 | .hword 00, -1, -1, -1, -1, -1, -1, 00 256 | .hword 00, 00, -1, -1, -1, -1, 00, 00 257 | 258 | .align 4 259 | char_H: 260 | .hword 00, -1, -1, 00, 00, -1, -1, 00 261 | .hword 00, -1, -1, 00, 00, -1, -1, 00 262 | .hword 00, -1, -1, 00, 00, -1, -1, 00 263 | .hword 00, -1, -1, -1, -1, -1, -1, 00 264 | .hword 00, -1, -1, -1, -1, -1, -1, 00 265 | .hword 00, -1, -1, 00, 00, -1, -1, 00 266 | .hword 00, -1, -1, 00, 00, -1, -1, 00 267 | .hword 00, -1, -1, 00, 00, -1, -1, 00 268 | 269 | .align 4 270 | char_I: 271 | .hword 00, 00, -1, -1, -1, -1, 00, 00 272 | .hword 00, 00, -1, -1, -1, -1, 00, 00 273 | .hword 00, 00, 00, -1, -1, 00, 00, 00 274 | .hword 00, 00, 00, -1, -1, 00, 00, 00 275 | .hword 00, 00, 00, -1, -1, 00, 00, 00 276 | .hword 00, 00, 00, -1, -1, 00, 00, 00 277 | .hword 00, 00, -1, -1, -1, -1, 00, 00 278 | .hword 00, 00, -1, -1, -1, -1, 00, 00 279 | 280 | .align 4 281 | char_J: 282 | .hword 00, 00, 00, -1, -1, -1, -1, 00 283 | .hword 00, 00, 00, -1, -1, -1, -1, 00 284 | .hword 00, 00, 00, 00, -1, -1, 00, 00 285 | .hword 00, 00, 00, 00, -1, -1, 00, 00 286 | .hword 00, 00, 00, 00, -1, -1, 00, 00 287 | .hword 00, -1, -1, 00, -1, -1, 00, 00 288 | .hword 00, -1, -1, -1, -1, -1, 00, 00 289 | .hword 00, 00, -1, -1, -1, 00, 00, 00 290 | 291 | .align 4 292 | char_K: 293 | .hword 00, -1, -1, 00, 00, -1, -1, 00 294 | .hword 00, -1, -1, 00, -1, -1, 00, 00 295 | .hword 00, -1, -1, -1, -1, 00, 00, 00 296 | .hword 00, -1, -1, -1, 00, 00, 00, 00 297 | .hword 00, -1, -1, -1, 00, 00, 00, 00 298 | .hword 00, -1, -1, -1, -1, 00, 00, 00 299 | .hword 00, -1, -1, 00, -1, -1, 00, 00 300 | .hword 00, -1, -1, 00, 00, -1, -1, 00 301 | 302 | .align 4 303 | char_L: 304 | .hword 00, -1, -1, 00, 00, 00, 00, 00 305 | .hword 00, -1, -1, 00, 00, 00, 00, 00 306 | .hword 00, -1, -1, 00, 00, 00, 00, 00 307 | .hword 00, -1, -1, 00, 00, 00, 00, 00 308 | .hword 00, -1, -1, 00, 00, 00, 00, 00 309 | .hword 00, -1, -1, 00, 00, 00, 00, 00 310 | .hword 00, -1, -1, -1, -1, -1, -1, 00 311 | .hword 00, -1, -1, -1, -1, -1, -1, 00 312 | 313 | .align 4 314 | char_M: 315 | .hword -1, -1, 00, 00, 00, 00, -1, -1 316 | .hword -1, -1, -1, 00, 00, -1, -1, -1 317 | .hword -1, -1, -1, 00, 00, -1, -1, -1 318 | .hword -1, -1, -1, -1, -1, -1, -1, -1 319 | .hword -1, -1, -1, -1, -1, -1, -1, -1 320 | .hword -1, -1, 00, -1, -1, 00, -1, -1 321 | .hword -1, -1, 00, -1, -1, 00, -1, -1 322 | .hword -1, -1, 00, 00, 00, 00, -1, -1 323 | 324 | .align 4 325 | char_N: 326 | .hword 00, -1, -1, 00, 00, -1, -1, 00 327 | .hword 00, -1, -1, -1, 00, -1, -1, 00 328 | .hword 00, -1, -1, -1, 00, -1, -1, 00 329 | .hword 00, -1, -1, -1, -1, -1, -1, 00 330 | .hword 00, -1, -1, -1, -1, -1, -1, 00 331 | .hword 00, -1, -1, 00, -1, -1, -1, 00 332 | .hword 00, -1, -1, 00, -1, -1, -1, 00 333 | .hword 00, -1, -1, 00, 00, -1, -1, 00 334 | 335 | .align 4 336 | char_O: 337 | .hword 00, 00, -1, -1, -1, -1, 00, 00 338 | .hword 00, -1, -1, -1, -1, -1, -1, 00 339 | .hword 00, -1, -1, 00, 00, -1, -1, 00 340 | .hword 00, -1, -1, 00, 00, -1, -1, 00 341 | .hword 00, -1, -1, 00, 00, -1, -1, 00 342 | .hword 00, -1, -1, 00, 00, -1, -1, 00 343 | .hword 00, -1, -1, -1, -1, -1, -1, 00 344 | .hword 00, 00, -1, -1, -1, -1, 00, 00 345 | 346 | .align 4 347 | char_P: 348 | .hword 00, -1, -1, -1, -1, -1, 00, 00 349 | .hword 00, -1, -1, -1, -1, -1, -1, 00 350 | .hword 00, -1, -1, 00, 00, -1, -1, 00 351 | .hword 00, -1, -1, -1, -1, -1, -1, 00 352 | .hword 00, -1, -1, -1, -1, -1, 00, 00 353 | .hword 00, -1, -1, 00, 00, 00, 00, 00 354 | .hword 00, -1, -1, 00, 00, 00, 00, 00 355 | .hword 00, -1, -1, 00, 00, 00, 00, 00 356 | 357 | .align 4 358 | char_Q: 359 | .hword 00, 00, -1, -1, -1, -1, 00, 00 360 | .hword 00, -1, -1, -1, -1, -1, -1, 00 361 | .hword 00, -1, -1, 00, 00, -1, -1, 00 362 | .hword 00, -1, -1, 00, 00, -1, -1, 00 363 | .hword 00, -1, -1, 00, -1, -1, -1, 00 364 | .hword 00, -1, -1, -1, -1, -1, 00, 00 365 | .hword 00, 00, -1, -1, -1, -1, -1, 00 366 | .hword 00, 00, 00, 00, 00, -1, -1, 00 367 | 368 | .align 4 369 | char_R: 370 | .hword 00, -1, -1, -1, -1, -1, 00, 00 371 | .hword 00, -1, -1, -1, -1, -1, -1, 00 372 | .hword 00, -1, -1, 00, 00, -1, -1, 00 373 | .hword 00, -1, -1, 00, 00, -1, -1, 00 374 | .hword 00, -1, -1, -1, -1, -1, 00, 00 375 | .hword 00, -1, -1, -1, -1, 00, 00, 00 376 | .hword 00, -1, -1, 00, -1, -1, 00, 00 377 | .hword 00, -1, -1, 00, 00, -1, -1, 00 378 | 379 | .align 4 380 | char_S: 381 | .hword 00, 00, -1, -1, -1, -1, 00, 00 382 | .hword 00, -1, -1, -1, -1, -1, -1, 00 383 | .hword 00, -1, -1, 00, 00, 00, 00, 00 384 | .hword 00, -1, -1, -1, -1, -1, 00, 00 385 | .hword 00, 00, -1, -1, -1, -1, -1, 00 386 | .hword 00, 00, 00, 00, 00, -1, -1, 00 387 | .hword 00, -1, -1, -1, -1, -1, -1, 00 388 | .hword 00, 00, -1, -1, -1, -1, 00, 00 389 | 390 | .align 4 391 | char_T: 392 | .hword 00, -1, -1, -1, -1, -1, -1, 00 393 | .hword 00, -1, -1, -1, -1, -1, -1, 00 394 | .hword 00, 00, 00, -1, -1, 00, 00, 00 395 | .hword 00, 00, 00, -1, -1, 00, 00, 00 396 | .hword 00, 00, 00, -1, -1, 00, 00, 00 397 | .hword 00, 00, 00, -1, -1, 00, 00, 00 398 | .hword 00, 00, 00, -1, -1, 00, 00, 00 399 | .hword 00, 00, 00, -1, -1, 00, 00, 00 400 | 401 | .align 4 402 | char_U: 403 | .hword 00, -1, -1, 00, 00, -1, -1, 00 404 | .hword 00, -1, -1, 00, 00, -1, -1, 00 405 | .hword 00, -1, -1, 00, 00, -1, -1, 00 406 | .hword 00, -1, -1, 00, 00, -1, -1, 00 407 | .hword 00, -1, -1, 00, 00, -1, -1, 00 408 | .hword 00, -1, -1, 00, 00, -1, -1, 00 409 | .hword 00, -1, -1, -1, -1, -1, -1, 00 410 | .hword 00, 00, -1, -1, -1, -1, 00, 00 411 | 412 | .align 4 413 | char_V: 414 | .hword 00, -1, -1, 00, 00, -1, -1, 00 415 | .hword 00, -1, -1, 00, 00, -1, -1, 00 416 | .hword 00, -1, -1, 00, 00, -1, -1, 00 417 | .hword 00, -1, -1, 00, 00, -1, -1, 00 418 | .hword 00, -1, -1, 00, 00, -1, -1, 00 419 | .hword 00, 00, -1, -1, -1, -1, 00, 00 420 | .hword 00, 00, -1, -1, -1, -1, 00, 00 421 | .hword 00, 00, 00, -1, -1, 00, 00, 00 422 | 423 | .align 4 424 | char_W: 425 | .hword -1, -1, 00, 00, 00, 00, -1, -1 426 | .hword -1, -1, 00, -1, -1, 00, -1, -1 427 | .hword -1, -1, 00, -1, -1, 00, -1, -1 428 | .hword -1, -1, -1, -1, -1, -1, -1, -1 429 | .hword -1, -1, -1, -1, -1, -1, -1, -1 430 | .hword -1, -1, -1, 00, 00, -1, -1, -1 431 | .hword -1, -1, -1, 00, 00, -1, -1, -1 432 | .hword -1, -1, 01, 00, 00, 00, -1, -1 433 | 434 | .align 4 435 | char_X: 436 | .hword 00, -1, -1, 00, 00, -1, -1, 00 437 | .hword 00, -1, -1, 00, 00, -1, -1, 00 438 | .hword 00, -1, -1, 00, 00, -1, -1, 00 439 | .hword 00, 00, -1, -1, -1, -1, 00, 00 440 | .hword 00, 00, -1, -1, -1, -1, 00, 00 441 | .hword 00, -1, -1, 00, 00, -1, -1, 00 442 | .hword 00, -1, -1, 00, 00, -1, -1, 00 443 | .hword 00, -1, -1, 00, 00, -1, -1, 00 444 | 445 | .align 4 446 | char_Y: 447 | .hword 00, -1, -1, 00, 00, -1, -1, 00 448 | .hword 00, -1, -1, 00, 00, -1, -1, 00 449 | .hword 00, -1, -1, 00, 00, -1, -1, 00 450 | .hword 00, 00, -1, -1, -1, -1, 00, 00 451 | .hword 00, 00, -1, -1, -1, -1, 00, 00 452 | .hword 00, 00, 00, -1, -1, 00, 00, 00 453 | .hword 00, 00, 00, -1, -1, 00, 00, 00 454 | .hword 00, 00, 00, -1, -1, 00, 00, 00 455 | 456 | .align 4 457 | char_Z: 458 | .hword 00, -1, -1, -1, -1, -1, -1, 00 459 | .hword 00, -1, -1, -1, -1, -1, -1, 00 460 | .hword 00, 00, 00, 00, 00, -1, -1, 00 461 | .hword 00, 00, 00, 00, -1, -1, 00, 00 462 | .hword 00, 00, 00, -1, -1, 00, 00, 00 463 | .hword 00, 00, -1, -1, 00, 00, 00, 00 464 | .hword 00, -1, -1, -1, -1, -1, -1, 00 465 | .hword 00, -1, -1, -1, -1, -1, -1, 00 466 | -------------------------------------------------------------------------------- /src/input.S: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021-2025 Hydr8gon 3 | 4 | This file is part of sodium64. 5 | 6 | sodium64 is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, 9 | or (at your option) any later version. 10 | 11 | sodium64 is distributed in the hope that it will be useful, but 12 | WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with sodium64. If not, see . 18 | */ 19 | 20 | #include "defines.h" 21 | 22 | .globl input_init 23 | .globl input_update 24 | .globl get_pressed 25 | .globl read_joyx 26 | .globl read_joyxl 27 | .globl read_joyxh 28 | 29 | .data 30 | 31 | .align 4 32 | btn_state: .byte 0xFF 33 | 34 | .align 4 35 | joybus_cmd: // Reads controller status when copied to PIF RAM 36 | .dword 0xFF010401FFFFFFFF // Read controller 1 37 | .dword 0xFF010401FFFFFFFF // Read controller 2 38 | .dword 0xFF010401FFFFFFFF // Read controller 3 39 | .dword 0xFF010401FFFFFFFF // Read controller 4 40 | .dword 0xFE00000000000000 // Finish execution 41 | .dword 0x0000000000000000 // Nothing 42 | .dword 0x0000000000000000 // Nothing 43 | .dword 0x0000000000000001 // Run command 44 | 45 | .text 46 | .set noreorder 47 | 48 | .align 5 49 | input_init: 50 | // Send the initial joybus command to PIF RAM via DMA 51 | lui t0, 0xA480 // SI register upper address 52 | la t1, joybus_cmd // Input buffer 53 | sw t1, 0x0000(t0) // SI_DRAM_ADDR 54 | li t1, 0x000007C0 // PIF RAM 55 | jr ra 56 | sw t1, 0x0010(t0) // SI_PIF_AD_WR64B 57 | 58 | .align 5 59 | input_update: 60 | // Read the output of the joybus command via DMA 61 | lui k0, 0xA480 // SI register upper address 62 | la k1, joybus_cmd // Output buffer 63 | sw k1, 0x0000(k0) // SI_DRAM_ADDR 64 | li k1, 0x000007C0 // PIF RAM 65 | sw k1, 0x0004(k0) // SI_PIF_AD_RD64B 66 | eret 67 | 68 | .align 5 69 | get_pressed: // v0: button mask 70 | // Check if controller 1 is connected 71 | lbu t1, UNCACHED(joybus_cmd + 2) 72 | andi t1, t1, 0xC0 // Error bits 73 | bnez t1, end_pressed 74 | li v0, 0 75 | 76 | // Get the newly-pressed state of certain buttons 77 | lbu t0, btn_state 78 | lbu t1, UNCACHED(joybus_cmd + 4) 79 | xori t0, t0, 0xFF 80 | and v0, t0, t1 81 | sb t1, btn_state 82 | end_pressed: 83 | jr ra 84 | nop 85 | 86 | .align 5 87 | read_joyx: // a0: address 88 | // Stub to fix input in games like Donkey Kong Country 89 | // TODO: actually implement manual controller ports 90 | jr ra 91 | li v0, 0x1 92 | 93 | .align 5 94 | read_joyxl: // a0: address 95 | // Check if the controller is connected, and return 0 if not 96 | andi t2, a0, 0x6 97 | sll t2, t2, 2 98 | lbu t0, UNCACHED(joybus_cmd + 2)(t2) 99 | andi t0, t0, 0xC0 // Error bits 100 | beqz t0, joyxl_conn 101 | li v0, 0 102 | jr ra 103 | nop 104 | 105 | joyxl_conn: 106 | // Read button data for the controller 107 | lhu t0, UNCACHED(joybus_cmd + 4)(t2) 108 | 109 | // Map N64 LR to SNES LR 110 | andi v0, t0, 0x30 111 | 112 | // Map N64 C-up to SNES X 113 | andi t1, t0, 0x8 114 | sll t1, t1, 3 115 | or v0, v0, t1 116 | 117 | // Map N64 C-right to SNES A 118 | andi t1, t0, 0x1 119 | sll t1, t1, 7 120 | jr ra 121 | or v0, v0, t1 122 | 123 | .align 5 124 | read_joyxh: // a0: address 125 | // Check if the controller is connected, and return 0 if not 126 | andi t2, a0, 0x6 127 | sll t2, t2, 2 128 | lbu t0, UNCACHED(joybus_cmd + 2)(t2) 129 | andi t0, t0, 0xC0 // Error bits 130 | beqz t0, joyxh_conn 131 | li v0, 0 132 | jr ra 133 | nop 134 | 135 | joyxh_conn: 136 | // Read button data for the controller 137 | lhu t0, UNCACHED(joybus_cmd + 4)(t2) 138 | 139 | // Map N64 D-pad to SNES D-pad 140 | srl t1, t0, 8 141 | andi v0, t1, 0xF 142 | 143 | // Map N64 A to SNES Start 144 | andi t1, t0, 0x8000 145 | srl t1, t1, 11 146 | or v0, v0, t1 147 | 148 | // Map N64 B to SNES Select 149 | andi t1, t0, 0x4000 150 | srl t1, t1, 9 151 | or v0, v0, t1 152 | 153 | // Map N64 C-left and C-down to SNES Y and B 154 | andi t1, t0, 0x6 155 | sll t1, t1, 5 156 | jr ra 157 | or v0, v0, t1 158 | -------------------------------------------------------------------------------- /src/macros.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021-2025 Hydr8gon 3 | 4 | This file is part of sodium64. 5 | 6 | sodium64 is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, 9 | or (at your option) any later version. 10 | 11 | sodium64 is distributed in the hope that it will be useful, but 12 | WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with sodium64. If not, see . 18 | */ 19 | 20 | #include "defines.h" 21 | 22 | .macro MEM_READ8 addr=0(s0), lb=lbu // v0: value 23 | // Read a byte from memory and decrease the cycle count 24 | \lb v0, \addr 25 | addi s5, s5, -RAM_CYCLE 26 | .endm 27 | 28 | .macro MEM_READ16 addr=0(s0) // v0: value 29 | // Read 2 bytes from memory and form a 16-bit value 30 | lbu v0, 0 + \addr 31 | addi s5, s5, -RAM_CYCLE * 2 32 | move a2, v0 33 | lbu v0, 1 + \addr 34 | sll v0, v0, 8 35 | or v0, v0, a2 36 | .endm 37 | 38 | .macro MEM_WRITE8 addr=0(s0) // a1: value 39 | // Write a byte to memory and decrease the cycle count 40 | sb a1, \addr 41 | addi s5, s5, -RAM_CYCLE 42 | .endm 43 | 44 | .macro MEM_WRITE16 addr=0(s0) // a1: value 45 | // Write a 16-bit value to memory as 2 bytes 46 | move a2, a1 47 | sb a1, 0 + \addr 48 | addi s5, s5, -RAM_CYCLE * 2 49 | srl a1, a2, 8 50 | sb a1, 1 + \addr 51 | move a1, a2 52 | .endm 53 | 54 | .macro EMIT_OP opcode 55 | // Emit a static JIT opcode, optimized for delay slots 56 | lui t0, (\opcode) >> 16 57 | jal emit_op 58 | ori t0, t0, (\opcode) & 0xFFFF 59 | .endm 60 | -------------------------------------------------------------------------------- /src/main.S: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021-2025 Hydr8gon 3 | 4 | This file is part of sodium64. 5 | 6 | sodium64 is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, 9 | or (at your option) any later version. 10 | 11 | sodium64 is distributed in the hope that it will be useful, but 12 | WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with sodium64. If not, see . 18 | */ 19 | 20 | #include "defines.h" 21 | 22 | .globl framebuffer 23 | .globl fps_emulate 24 | .globl fps_display 25 | .globl skip_native 26 | .globl skip_emulate 27 | .globl frame_count 28 | 29 | .globl _start 30 | 31 | .data 32 | 33 | .align 4 34 | framebuffer: .word FRAMEBUFFER1 35 | fps_native: .byte 0 36 | fps_emulate: .byte 0 37 | fps_display: .byte 0 38 | skip_native: .byte 0 39 | skip_emulate: .byte 0 40 | frame_count: .byte 0 41 | 42 | .section .boot 43 | .set noreorder 44 | 45 | _start: 46 | // Jump to the main function (entry point) 47 | j main 48 | nop 49 | 50 | exception_vector: 51 | // Jump to the exception handler (copied to exception vectors) 52 | j exception_handler 53 | nop 54 | 55 | .text 56 | 57 | .align 5 58 | main: 59 | // Disable interrupts and do the magic hardware init 60 | mtc0 zero, $12 // Status 61 | li t0, 8 62 | sw t0, 0xBFC007FC 63 | 64 | // Clear framebuffers and other hardcoded data 65 | li t0, FRAMEBUFFER1 66 | li t1, JIT_BUFFER - 8 67 | clear_vram: 68 | sd zero, (t0) 69 | bne t0, t1, clear_vram 70 | addi t0, t0, 8 71 | 72 | // Initialize the VI to display a 16-bit 256x224 framebuffer 73 | lui t0, 0xA440 // VI register upper address 74 | li t1, 0x00000202 // 16-bit pixel format, no anti-aliasing 75 | sw t1, 0x0000(t0) // VI_CONTROL 76 | li t1, FRAMEBUFFER1 // Framebuffer address 77 | sw t1, 0x0004(t0) // VI_ORIGIN 78 | li t1, 0x00000118 // Framebuffer width 79 | sw t1, 0x0008(t0) // VI_WIDTH 80 | li t1, 0x00000200 // Line to trigger interrupt 81 | sw t1, 0x000C(t0) // VI_V_INTR 82 | li t1, 0x00000000 // Clear interrupt 83 | sw t1, 0x0010(t0) // VI_V_CURRENT 84 | li t1, 0x03E52239 // Various timing parameters 85 | sw t1, 0x0014(t0) // VI_BURST 86 | li t1, 0x0000020D // Lines per frame (NTSC standard) 87 | sw t1, 0x0018(t0) // VI_V_SYNC 88 | li t1, 0x00000C15 // Quarter-pixels per line 89 | sw t1, 0x001C(t0) // VI_H_SYNC 90 | li t1, 0x0C150C15 // Same as above 91 | sw t1, 0x0020(t0) // VI_H_SYNC_LEAP 92 | li t1, 0x006C02EC // Active horizontal pixel range 93 | sw t1, 0x0024(t0) // VI_H_VIDEO 94 | li t1, 0x002501FF // Active vertical pixel range 95 | sw t1, 0x0028(t0) // VI_V_VIDEO 96 | li t1, 0x000E0204 // Vertical color burst range 97 | sw t1, 0x002C(t0) // VI_V_BURST 98 | li t1, 0x000001C0 // Horizontal scale (10-bit fraction, 280/640) 99 | sw t1, 0x0030(t0) // VI_X_SCALE 100 | li t1, 0x00000400 // Vertical scale (10-bit fraction, 240/240) 101 | sw t1, 0x0034(t0) // VI_Y_SCALE 102 | 103 | // Initialize the AI to output 16-bit samples at the DSP rate 104 | lui t0, 0xA450 // AI register upper address 105 | li t1, 0x00000001 // DMA enable 106 | sw t1, 0x0008(t0) // AI_CONTROL 107 | li t1, 48681812 / (60 * 262 * 341 * 4 / DSP_SAMPLE) + 1 108 | sw t1, 0x0010(t0) // AI_DAC_RATE 109 | li t1, 0x0000000F // 16-bit - 1 110 | sw t1, 0x0014(t0) // AI_BIT_RATE 111 | 112 | // Update the exception vectors 113 | lui t0, 0xA000 114 | ld t1, exception_vector 115 | sd t1, 0x0000(t0) 116 | sd t1, 0x0080(t0) 117 | sd t1, 0x0100(t0) 118 | sd t1, 0x0180(t0) 119 | cache 0x10, 0x0000(t0) 120 | cache 0x10, 0x0080(t0) 121 | cache 0x10, 0x0100(t0) 122 | cache 0x10, 0x0180(t0) 123 | 124 | // Use DMA to copy SRAM from the cart to memory 125 | lui t0, 0xA460 // PI register upper address 126 | la t1, sram // Local SRAM 127 | sw t1, 0x0000(t0) // PI_DRAM_ADDR 128 | li t1, 0x08000000 // Cart SRAM 129 | sw t1, 0x0004(t0) // PI_CART_ADDR 130 | li t1, 0x00007FFF // 32KB size 131 | sw t1, 0x000C(t0) // PI_WR_LEN 132 | 133 | pi_wait1: 134 | // Wait for the DMA to complete 135 | lw t1, 0x0010(t0) // PI_STATUS 136 | andi t1, t1, 0x1 // DMA busy 137 | bnez t1, pi_wait1 138 | nop 139 | 140 | // Initialize the emulator 141 | jal reset_tlb 142 | nop 143 | jal input_init 144 | nop 145 | jal memory_init 146 | nop 147 | jal section_init 148 | nop 149 | jal cpu_init 150 | nop 151 | 152 | // Upload the RSP code and data 153 | la a1, rsp_main_text_start 154 | jal rsp_upload 155 | li a0, 0x1000 // IMEM address 156 | la a1, rsp_main_data_start 157 | jal rsp_upload 158 | li a0, 0x0000 // DMEM address 159 | sw zero, 0xA4080000 // SP_PC 160 | 161 | // Enable VI interrupts and start execution 162 | li t0, 0x00000595 // Set VI mask 163 | sw t0, 0xA430000C // MI_MASK 164 | li t0, 0x00000401 // Enable interrupts 165 | j cpu_execute 166 | mtc0 t0, $12 // Status 167 | 168 | .align 5 169 | rsp_upload: // a0: RSP address, a1: DRAM address 170 | // Transfer data to the RSP via DMA 171 | lui t0, 0xA404 // SP register upper address 172 | sw a0, 0x0000(t0) // SP_MEM_ADDR 173 | sw a1, 0x0004(t0) // SP_DRAM_ADDR 174 | li t1, 0x00000FFF // Data size 175 | sw t1, 0x0008(t0) // SP_RD_LEN 176 | 177 | dma_wait: 178 | // Wait for the DMA to complete 179 | lw t1, 0x0018(t0) // SP_DMA_BUSY 180 | bnez t1, dma_wait 181 | nop 182 | jr ra 183 | nop 184 | 185 | .align 5 186 | reset_tlb: 187 | // Map all TLB entries to inaccessible locations 188 | mtc0 zero, $5 // PageMask 189 | li t0, 0x80000000 190 | mtc0 t0, $10 // EntryHi 191 | li t0, 32 192 | next_tlb: 193 | addi t0, t0, -1 194 | mtc0 t0, $0 // Index 195 | bnez t0, next_tlb 196 | tlbwi 197 | jr ra 198 | nop 199 | 200 | .align 5 201 | exception_handler: 202 | // Jump to the appropriate handler for supported exceptions 203 | mfc0 k0, $13 // Cause 204 | andi k0, k0, 0x7C 205 | addi k1, k0, -0x8 // TLB load miss 206 | beqz k1, tlbl_exception 207 | addi k1, k0, -0xC // TLB store miss 208 | beqz k1, tlbs_exception 209 | addi k1, k0, -0x4 // TLB modification 210 | beqz k1, tlbm_exception 211 | 212 | // Jump to the appropriate handler for supported interrupts 213 | mfc0 k0, $13 // Cause 214 | andi k1, k0, 0x1000 215 | bnez k1, reset_interrupt 216 | andi k1, k0, 0x400 217 | bnez k1, vi_interrupt 218 | nop 219 | eret 220 | nop 221 | 222 | .align 5 223 | vi_interrupt: 224 | // Acknowledge the VI interrupt 225 | lui k0, 0xA440 226 | sw zero, 0x0010(k0) // VI_CURRENT 227 | 228 | // Wait one second by counting 60 frames at 60Hz 229 | la k0, fps_native 230 | lbu k1, (k0) 231 | addi k1, k1, -59 232 | beqz k1, update_fps 233 | addi k1, k1, 60 234 | b check_frame 235 | sb k1, (k0) 236 | 237 | update_fps: 238 | // Update the display FPS and reset the counters 239 | la k0, fps_emulate 240 | lbu k1, (k0) 241 | sb zero, (k0) 242 | la k0, fps_native 243 | sb zero, (k0) 244 | la k0, fps_display 245 | sb k1, (k0) 246 | 247 | // Check if SRAM is dirty 248 | la k0, sram_dirty 249 | lbu k1, (k0) 250 | beqz k1, check_frame 251 | sb zero, (k0) 252 | 253 | // Use DMA to copy SRAM from memory to the cart 254 | lui k0, 0xA460 // PI register upper address 255 | la k1, sram // Local SRAM 256 | sw k1, 0x0000(k0) // PI_DRAM_ADDR 257 | li k1, 0x08000000 // Cart SRAM 258 | sw k1, 0x0004(k0) // PI_CART_ADDR 259 | li k1, 0x00007FFF // 32KB size 260 | sw k1, 0x0008(k0) // PI_RD_LEN 261 | 262 | pi_wait2: 263 | // Wait for the DMA to complete 264 | lw k1, 0x0010(k0) // PI_STATUS 265 | andi k1, k1, 0x1 // DMA busy 266 | bnez k1, pi_wait2 267 | nop 268 | 269 | check_frame: 270 | // Ignore V-blanks to match the frames being skipped 271 | la k1, skipped_set 272 | lbu k0, (k1) 273 | srl k0, k0, 2 274 | la k1, skip_native 275 | lbu k1, (k1) 276 | sub k0, k0, k1 277 | blez k0, skip_reset 278 | li k0, 0 279 | addi k0, k1, 1 280 | skip_reset: 281 | la k1, skip_native 282 | bnez k0, input_update 283 | sb k0, (k1) 284 | 285 | // Continue if the next frame is ready; otherwise just update input 286 | la k0, frame_count 287 | lbu k1, (k0) 288 | beqz k1, input_update 289 | nop 290 | 291 | // Get the address of the next of three framebuffers 292 | la k0, framebuffer 293 | lw k0, (k0) 294 | li k1, FRAMEBUFFER2 - FRAMEBUFFER1 295 | add k0, k0, k1 296 | li k1, MODE7_TEXTURE 297 | bne k0, k1, set_buffer 298 | nop 299 | li k0, FRAMEBUFFER1 300 | 301 | set_buffer: 302 | // Set the framebuffer address 303 | lui k1, 0xA440 304 | sw k0, 0x0004(k1) // VI_ORIGIN 305 | la k1, framebuffer 306 | sw k0, (k1) 307 | 308 | // Decrement the frame counter and update input 309 | la k0, frame_count 310 | lbu k1, (k0) 311 | addi k1, k1, -1 312 | j input_update 313 | sb k1, (k0) 314 | 315 | .align 5 316 | reset_interrupt: 317 | // Stop the RSP and prepare the RDP for reset 318 | li t0, 0x00000002 // Set halt 319 | sw t0, 0xA4040010 // SP_STATUS 320 | li t0, 0x00000001 // Use RDRAM 321 | sw t0, 0xA410000C // DP_STATUS 322 | 323 | loop: 324 | // Wait until the system resets 325 | b loop 326 | nop 327 | -------------------------------------------------------------------------------- /src/menu.S: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021-2025 Hydr8gon 3 | 4 | This file is part of sodium64. 5 | 6 | sodium64 is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, 9 | or (at your option) any later version. 10 | 11 | sodium64 is distributed in the hope that it will be useful, but 12 | WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with sodium64. If not, see . 18 | */ 19 | 20 | #include "defines.h" 21 | 22 | .globl layer_set 23 | .globl precision_set 24 | .globl skipped_set 25 | .globl audio_set 26 | 27 | .globl update_menu 28 | 29 | .data 30 | 31 | .align 4 32 | counter_set: .byte 0 << 2 33 | layer_set: .byte 0 << 2 34 | precision_set: .byte 2 << 2 35 | skipped_set: .byte 0 << 2 36 | audio_set: .byte 1 << 2 37 | underclk_set: .byte 1 << 2 38 | left_str: .string "<" 39 | right_str: .string ">" 40 | 41 | .align 4 42 | title_str: .string "SODIUM64 SETTINGS" 43 | counter_str: .string "FPS COUNTER" 44 | layer_str: .string "SUB LAYER ORDER" 45 | precision_str: .string "FRAME PRECISION" 46 | skipped_str: .string "FRAMES SKIPPED" 47 | audio_str: .string "AUDIO OUTPUT" 48 | underclk_str: .string "APU UNDERCLOCK" 49 | 50 | .align 4 51 | on_str: .string " ON " 52 | off_str: .string " OFF " 53 | back_str: .string " BACK " 54 | front_str: .string " FRONT " 55 | lower_str: .string " LOWER " 56 | low_str: .string " LOW " 57 | medium_str: .string " MEDIUM " 58 | high_str: .string " HIGH " 59 | higher_str: .string " HIGHER " 60 | max_str: .string " MAX " 61 | zero_str: .string " 0 " 62 | one_str: .string " 1 " 63 | two_str: .string " 2 " 64 | three_str: .string " 3 " 65 | 66 | .align 4 67 | menu_items: .word counter_str, layer_str, precision_str, skipped_str, audio_str, underclk_str, 0 68 | menu_opts: .word toggle_opts, layer_opts, precision_opts, skipped_opts, toggle_opts, toggle_opts 69 | menu_sets: .word counter_set, layer_set, precision_set, skipped_set, audio_set, underclk_set 70 | 71 | .align 4 72 | toggle_opts: .word off_str, on_str, 0 73 | layer_opts: .word back_str, front_str, 0 74 | precision_opts: .word lower_str, low_str, medium_str, high_str, higher_str, max_str, 0 75 | skipped_opts: .word zero_str, one_str, two_str, three_str, 0 76 | 77 | .text 78 | .set noreorder 79 | 80 | .macro DRAW line 81 | // Copy a line of character pixels to the framebuffer 82 | ld t0, \line * 16 + 0(t2) 83 | ld t1, \line * 16 + 8(t2) 84 | sd t0, \line * 560 + 0(a1) 85 | sd t1, \line * 560 + 8(a1) 86 | .endm 87 | 88 | .align 5 89 | draw_char: // a0: char, a1: top-left offset 90 | // Look up a character sprite if it's within bounds 91 | la t2, char_sp 92 | addi t0, a0, -0x20 93 | bgtu t0, 0x40, unk_char 94 | sll t0, t0, 2 95 | lw t2, char_lookup(t0) 96 | 97 | unk_char: 98 | // Copy the sprite to the framebuffer 99 | DRAW 0 100 | DRAW 1 101 | DRAW 2 102 | DRAW 3 103 | DRAW 4 104 | DRAW 5 105 | DRAW 6 106 | DRAW 7 107 | jr ra 108 | nop 109 | 110 | .align 5 111 | draw_value: // a0: value, a1: top-right offset 112 | // Isolate the lowest base-10 digit and draw its character 113 | li t1, 10 114 | div a0, t1 115 | addi a1, a1, -16 116 | move t9, ra 117 | mfhi t2 118 | jal draw_char 119 | addi a0, t2, 0x30 120 | 121 | // Move to the next digit until they're all drawn 122 | mflo a0 123 | bnez a0, draw_value 124 | move ra, t9 125 | jr ra 126 | nop 127 | 128 | .align 5 129 | draw_string: // a0: string, a1: top-left offset 130 | // Initialize values for drawing a string 131 | move t9, ra 132 | move t8, a0 133 | addi a1, a1, -16 134 | la ra, string_loop 135 | 136 | string_loop: 137 | // Draw each character in the string until terminated 138 | lbu a0, (t8) 139 | beqz a0, end_string 140 | addi t8, t8, 1 141 | j draw_char 142 | addi a1, a1, 16 143 | end_string: 144 | jr t9 145 | nop 146 | 147 | .align 5 148 | update_menu: // a0: framebuffer 149 | // Open the settings menu if start is pressed 150 | jal get_pressed 151 | li v1, 0 // Index 152 | andi t0, v0, 0x10 // Start 153 | bnez t0, start_menu 154 | move t7, a0 155 | 156 | // Draw the FPS counter if running and enabled 157 | lbu t0, counter_set 158 | beqz t0, menu_return 159 | nop 160 | lbu a0, fps_display 161 | jal draw_value 162 | addi a1, t7, 23 * 560 + 44 * 2 163 | b menu_return 164 | nop 165 | 166 | start_menu: 167 | // Wait until there are no extra frames queued 168 | lbu t0, frame_count 169 | bnez t0, start_menu 170 | nop 171 | 172 | // Clear the current framebuffer 173 | lw t0, framebuffer 174 | add t1, t0, FRAMEBUFFER2 - FRAMEBUFFER1 175 | clear_buffer: 176 | sd zero, (t0) 177 | bne t0, t1, clear_buffer 178 | addi t0, t0, 8 179 | 180 | draw_menu: 181 | // Draw the title and initialize values for menu items 182 | lw t7, framebuffer 183 | la a0, title_str 184 | jal draw_string 185 | addi a1, t7, 39 * 560 + 72 * 2 186 | addi t7, t7, 39 * 560 + 48 * 2 187 | li t3, 0 188 | 189 | menu_loop: 190 | // Draw a menu item on the left 191 | lw a0, menu_items(t3) 192 | beqz a0, menu_idle 193 | addi t7, t7, 24 * 560 194 | jal draw_string 195 | move a1, t7 196 | 197 | // Draw the item's setting on the right 198 | lw t0, menu_sets(t3) 199 | lw t1, menu_opts(t3) 200 | lbu t0, (t0) 201 | add t1, t1, t0 202 | lw a0, (t1) 203 | jal draw_string 204 | addi a1, t7, 128 * 2 205 | 206 | // Move to the next item if not selected 207 | bne t3, v1, menu_loop 208 | addi t3, t3, 4 209 | 210 | // Draw selectors around the current item 211 | la a0, left_str 212 | jal draw_string 213 | addi a1, t7, 128 * 2 214 | la a0, right_str 215 | jal draw_string 216 | addi a1, t7, 184 * 2 217 | b menu_loop 218 | nop 219 | 220 | menu_idle: 221 | // Close the menu if start is pressed 222 | jal get_pressed 223 | nop 224 | andi t0, v0, 0x10 // Start 225 | bnez t0, menu_close 226 | 227 | // Move to an upper item if up is pressed 228 | andi t0, v0, 0x8 // Up 229 | beqz t0, check_down 230 | sltu t0, zero, v1 231 | sll t0, t0, 2 232 | b draw_menu 233 | sub v1, v1, t0 234 | 235 | check_down: 236 | // Move to a lower item if down is pressed 237 | andi t0, v0, 0x4 // Down 238 | beqz t0, check_left 239 | nop 240 | lw t0, menu_items + 4(v1) 241 | beqz t0, menu_idle 242 | nop 243 | b draw_menu 244 | addi v1, v1, 4 245 | 246 | check_left: 247 | // Decrease a setting value if left is pressed 248 | andi t0, v0, 0x2 // Left 249 | beqz t0, check_right 250 | nop 251 | lw t0, menu_sets(v1) 252 | lbu t1, (t0) 253 | sltu t2, zero, t1 254 | sll t2, t2, 2 255 | sub t1, t1, t2 256 | b draw_menu 257 | sb t1, (t0) 258 | 259 | check_right: 260 | // Increase a setting value if left is pressed 261 | andi t0, v0, 0x1 // Right 262 | beqz t0, menu_idle 263 | nop 264 | lw t0, menu_sets(v1) 265 | lbu t1, (t0) 266 | lw t2, menu_opts(v1) 267 | add t2, t2, t1 268 | lw t2, 4(t2) 269 | beqz t2, menu_idle 270 | nop 271 | addi t1, t1, 4 272 | b draw_menu 273 | sb t1, (t0) 274 | 275 | menu_close: 276 | // Check if the APU underclock setting changed 277 | lbu t0, underclk_set 278 | lbu t1, apu_clock 279 | li t2, APU_CYCLE 280 | srl t0, t0, 2 281 | sll t2, t2, t0 282 | beq t1, t2, menu_return 283 | nop 284 | 285 | // Update the clock and invalidate the JIT buffer if changed 286 | sb t2, apu_clock 287 | li t0, ROM_BUFFER 288 | sw t0, jit_pointer 289 | b menu_return 290 | nop 291 | -------------------------------------------------------------------------------- /src/mul_div.S: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021-2025 Hydr8gon 3 | 4 | This file is part of sodium64. 5 | 6 | sodium64 is free software: you can redistribute it and/or modify 7 | it under the terms of the GNU General Public License as published 8 | by the Free Software Foundation, either version 3 of the License, 9 | or (at your option) any later version. 10 | 11 | sodium64 is distributed in the hope that it will be useful, but 12 | WITHOUT ANY WARRANTY; without even the implied warranty of 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | General Public License for more details. 15 | 16 | You should have received a copy of the GNU General Public License 17 | along with sodium64. If not, see . 18 | */ 19 | 20 | #include 21 | 22 | .globl read_rddivl 23 | .globl read_rddivh 24 | .globl read_rdmpyl 25 | .globl read_rdmpyh 26 | .globl write_wrmpya 27 | .globl write_wrmpyb 28 | .globl write_wrdivl 29 | .globl write_wrdivh 30 | .globl write_wrdivb 31 | 32 | .data 33 | 34 | .align 4 35 | wrdiva: .hword 0 36 | rddiv: .hword 0 37 | rdmpy: .hword 0 38 | wrmpya: .byte 0 39 | 40 | .text 41 | .set noreorder 42 | 43 | .align 5 44 | read_rddivl: // v0: value 45 | // Read the low byte of the division quotient 46 | lbu v0, rddiv + 1 47 | jr ra 48 | nop 49 | 50 | .align 5 51 | read_rddivh: // v0: value 52 | // Read the high byte of the division quotient 53 | lbu v0, rddiv 54 | jr ra 55 | nop 56 | 57 | .align 5 58 | read_rdmpyl: // v0: value 59 | // Read the low byte of the multiply product/division remainder 60 | lbu v0, rdmpy + 1 61 | jr ra 62 | nop 63 | 64 | .align 5 65 | read_rdmpyh: // v0: value 66 | // Read the high byte of the multiply product/division remainder 67 | lbu v0, rdmpy 68 | jr ra 69 | nop 70 | 71 | .align 5 72 | write_wrmpya: // a1: value 73 | // Set the 8-bit multiplicand 74 | sb a1, wrmpya 75 | jr ra 76 | nop 77 | 78 | .align 5 79 | write_wrmpyb: // a1: value 80 | // Multiply the multiplicand by the value and set the product 81 | lbu t0, wrmpya 82 | mult t0, a1 83 | mflo t0 84 | sh t0, rdmpy 85 | sh a1, rddiv 86 | jr ra 87 | nop 88 | 89 | .align 5 90 | write_wrdivl: // a1: value 91 | // Set the low byte of the 16-bit dividend 92 | sb a1, wrdiva + 1 93 | jr ra 94 | nop 95 | 96 | .align 5 97 | write_wrdivh: // a1: value 98 | // Set the high byte of the 16-bit dividend 99 | sb a1, wrdiva 100 | jr ra 101 | nop 102 | 103 | .align 5 104 | write_wrdivb: // a1: value 105 | // Divide the dividend by the value and set the quotient and remainder 106 | beqz a1, div_zero 107 | li t0, 0xFFFF 108 | lhu t0, wrdiva 109 | div t0, a1 110 | mflo t0 111 | sh t0, rddiv 112 | mfhi t1 113 | sh t1, rdmpy 114 | jr ra 115 | nop 116 | 117 | div_zero: 118 | // Handle division by zero cases 119 | sh t0, rddiv 120 | lhu t1, wrdiva 121 | sh t1, rdmpy 122 | jr ra 123 | nop 124 | --------------------------------------------------------------------------------