├── test ├── requirements.txt ├── tb.gtkw ├── README.md ├── test.py ├── tb.v └── Makefile ├── .vscode ├── extensions.json └── settings.json ├── .devcontainer ├── copy_tt_support_tools.sh ├── devcontainer.json └── Dockerfile ├── .gitignore ├── .github └── workflows │ ├── docs.yaml │ ├── fpga.yaml │ ├── gds.yaml │ └── test.yaml ├── src ├── color_palette.v ├── vga.v ├── config.json ├── dot4.v ├── sine_rom.v ├── slowmpy.v ├── div.v ├── gamepad_pmod.v ├── spi_flash_controller.v ├── project.v └── vsfs.v ├── docs └── info.md ├── info.yaml ├── README.md └── LICENSE /test/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest==8.4.2 2 | cocotb==2.0.0 3 | -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": [ 3 | "mshr-h.veriloghdl", 4 | "surfer-project.surfer" 5 | ] 6 | } -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "verilog.linting.linter": "verilator", 3 | "verilog.formatting.verilogHDL.formatter": "verible-verilog-format" 4 | } 5 | -------------------------------------------------------------------------------- /.devcontainer/copy_tt_support_tools.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | 3 | if [ ! -L tt ]; then 4 | cp -R /ttsetup/tt-support-tools tt 5 | cd tt && git pull && cd .. 6 | fi 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .idea 3 | *.vcd 4 | runs 5 | tt_submission 6 | src/user_config.json 7 | src/config_merged.json 8 | test/sim_build 9 | test/__pycache__/ 10 | test/results.xml 11 | test/gate_level_netlist.v 12 | -------------------------------------------------------------------------------- /.github/workflows/docs.yaml: -------------------------------------------------------------------------------- 1 | name: docs 2 | 3 | on: 4 | push: 5 | workflow_dispatch: 6 | 7 | jobs: 8 | docs: 9 | runs-on: ubuntu-24.04 10 | steps: 11 | - name: Checkout repo 12 | uses: actions/checkout@v4 13 | with: 14 | submodules: recursive 15 | 16 | - name: Build docs 17 | uses: TinyTapeout/tt-gds-action/docs@ttsky25b 18 | -------------------------------------------------------------------------------- /.github/workflows/fpga.yaml: -------------------------------------------------------------------------------- 1 | name: fpga 2 | 3 | on: 4 | push: 5 | # Comment out (or remove) the following line to run the FPGA workflow on every push: 6 | branches: none 7 | workflow_dispatch: 8 | 9 | jobs: 10 | fpga: 11 | runs-on: ubuntu-24.04 12 | steps: 13 | - name: checkout repo 14 | uses: actions/checkout@v4 15 | with: 16 | submodules: recursive 17 | 18 | - name: FPGA bitstream for TT ASIC Sim (ICE40UP5K) 19 | uses: TinyTapeout/tt-gds-action/fpga/ice40up5k@ttsky25b 20 | -------------------------------------------------------------------------------- /src/color_palette.v: -------------------------------------------------------------------------------- 1 | //`timescale 1ns / 1ps 2 | 3 | // white, pink, cyan, green 4 | // color[5:0] R1 R2 G1 G2 B1 B2 5 | 6 | module color_palette ( 7 | input wire [3:0] spi_data, 8 | output wire [5:0] color 9 | ); 10 | 11 | reg [5:0] mem; 12 | assign color = mem; 13 | 14 | always @(*) begin 15 | case(spi_data) 16 | 0: mem = 6'b000000; 17 | 1: mem = 6'b010101; 18 | 2: mem = 6'b101010; 19 | 3: mem = 6'b111111; 20 | 21 | 4: mem = 6'b000000; 22 | 5: mem = 6'b010001; 23 | 6: mem = 6'b100010; 24 | 7: mem = 6'b110011; 25 | 26 | 8: mem = 6'b000000; 27 | 9: mem = 6'b000101; 28 | 10: mem = 6'b001010; 29 | 11: mem = 6'b001111; 30 | 31 | 12: mem = 6'b000000; 32 | 13: mem = 6'b000100; 33 | 14: mem = 6'b001000; 34 | 15: mem = 6'b001100; 35 | default: mem = 6'b000000; 36 | endcase 37 | end 38 | 39 | 40 | endmodule 41 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: 2 | // https://github.com/microsoft/vscode-dev-containers/tree/v0.183.0/containers/ubuntu 3 | { 4 | "name": "Tiny Tapeout Dev Container", 5 | "build": { 6 | "dockerfile": "Dockerfile", 7 | "context": ".." 8 | }, 9 | "runArgs": [ 10 | "--memory=10GB" 11 | ], 12 | "customizations": { 13 | "vscode": { 14 | "settings": { 15 | "terminal.integrated.defaultProfile.linux": "bash" 16 | }, 17 | "extensions": ["mshr-h.veriloghdl", "surfer-project.surfer"] 18 | } 19 | }, 20 | "features": { 21 | "ghcr.io/devcontainers/features/docker-in-docker:2": { 22 | "moby": true, 23 | "azureDnsAutoDetection": true, 24 | "version": "latest", 25 | "dockerDashComposeVersion": "none" 26 | } 27 | }, 28 | "postStartCommand": "/ttsetup/copy_tt_support_tools.sh" 29 | } 30 | -------------------------------------------------------------------------------- /test/tb.gtkw: -------------------------------------------------------------------------------- 1 | [*] 2 | [*] GTKWave Analyzer v3.4.0 (w)1999-2022 BSI 3 | [*] Mon Nov 20 16:00:28 2023 4 | [*] 5 | [dumpfile] "/home/uri/p/tt-new-template-proto/test/tb.vcd" 6 | [dumpfile_mtime] "Mon Nov 20 15:58:34 2023" 7 | [dumpfile_size] 1110 8 | [savefile] "/home/uri/p/tt-new-template-proto/test/tb.gtkw" 9 | [timestart] 0 10 | [size] 1376 600 11 | [pos] -1 -1 12 | *-24.534533 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 13 | [treeopen] tb. 14 | [sst_width] 297 15 | [signals_width] 230 16 | [sst_expanded] 1 17 | [sst_vpaned_height] 158 18 | @28 19 | tb.user_project.ena 20 | @29 21 | tb.user_project.clk 22 | @28 23 | tb.user_project.rst_n 24 | @200 25 | -Inputs 26 | @22 27 | tb.user_project.ui_in[7:0] 28 | @200 29 | -Bidirectional Pins 30 | @22 31 | tb.user_project.uio_in[7:0] 32 | tb.user_project.uio_oe[7:0] 33 | tb.user_project.uio_out[7:0] 34 | @200 35 | -Output Pins 36 | @22 37 | tb.user_project.uo_out[7:0] 38 | [pattern_trace] 1 39 | [pattern_trace] 0 40 | -------------------------------------------------------------------------------- /test/README.md: -------------------------------------------------------------------------------- 1 | # Sample testbench for a Tiny Tapeout project 2 | 3 | This is a sample testbench for a Tiny Tapeout project. It uses [cocotb](https://docs.cocotb.org/en/stable/) to drive the DUT and check the outputs. 4 | See below to get started or for more information, check the [website](https://tinytapeout.com/hdl/testing/). 5 | 6 | ## Setting up 7 | 8 | 1. Edit [Makefile](Makefile) and modify `PROJECT_SOURCES` to point to your Verilog files. 9 | 2. Edit [tb.v](tb.v) and replace `tt_um_example` with your module name. 10 | 11 | ## How to run 12 | 13 | To run the RTL simulation: 14 | 15 | ```sh 16 | make -B 17 | ``` 18 | 19 | To run gatelevel simulation, first harden your project and copy `../runs/wokwi/results/final/verilog/gl/{your_module_name}.v` to `gate_level_netlist.v`. 20 | 21 | Then run: 22 | 23 | ```sh 24 | make -B GATES=yes 25 | ``` 26 | 27 | ## How to view the VCD file 28 | 29 | Using GTKWave 30 | ```sh 31 | gtkwave tb.vcd tb.gtkw 32 | ``` 33 | 34 | Using Surfer 35 | ```sh 36 | surfer tb.vcd 37 | ``` 38 | -------------------------------------------------------------------------------- /src/vga.v: -------------------------------------------------------------------------------- 1 | //`timescale 1ns / 1ps 2 | //`default_nettype none 3 | 4 | // 640x480 @60Hz 5 | // input clk 25Mhz 6 | module vga( 7 | input clk, 8 | input reset, 9 | output HS, VS, 10 | output [9:0] x, 11 | output [9:0] y, 12 | output blank 13 | ); 14 | 15 | // Counter Registers, two each for buffering to avoid glitches 16 | reg [9:0] xc, yc; 17 | 18 | // Horizontal 640 + fp 16 + HS 96 + bp 48 = 800 pixel clocks 19 | // Vertical, 480 + fp 10 lines + VS 2 lines + bp 33 lines = 525 lines 20 | assign blank = ((xc > 639) | (yc > 479)); 21 | assign HS = ~ (xc > (655) && xc < (752)); 22 | assign VS = ~ ((yc > 489) & (yc < 492)); 23 | 24 | assign x = xc; 25 | assign y = yc; 26 | 27 | always @(posedge clk) begin 28 | if (!reset) begin 29 | xc <= 0; 30 | yc <= 0; 31 | end 32 | else begin 33 | if (xc == 799) begin 34 | xc <= 0; 35 | yc <= yc + 1; 36 | end else begin 37 | xc <= xc + 1; 38 | end 39 | if ((yc == 524) && (xc == 799)) begin 40 | yc <= 0; 41 | end 42 | end 43 | end 44 | 45 | endmodule 46 | 47 | 48 | -------------------------------------------------------------------------------- /docs/info.md: -------------------------------------------------------------------------------- 1 | 9 | 10 | ## How it works 11 | 12 | TinyGPU v2.0. A standalone GPU that can display a model file from FLASH. 13 | Render 1K tri at 6.5fps in 320x240, 4-bit color. 14 | 15 | Spec: 16 | - GPU can performs transformation & lighting, rasterization 17 | - 4-bit double buffer, 8-bit depth buffer store on QSPI RAM 18 | - max tri 1K 19 | - backface culling 20 | - 1 dynamic directional light, flat shading 21 | - use Gamepad to transform the model and rotate light 22 | - run at 25Mhz. When fab, it will use around 200k transistor 23 | 24 | ## How to test 25 | 26 | Updating document, Please come back again 27 | 28 | ## External hardware 29 | 30 | - QSPI PMOD 31 | - TinyVGA PMOD 32 | - GamePad PMOD 33 | - SNES controller 34 | -------------------------------------------------------------------------------- /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG VARIANT=ubuntu-22.04 2 | FROM mcr.microsoft.com/vscode/devcontainers/base:${VARIANT} 3 | 4 | ENV DEBIAN_FRONTEND=noninteractive 5 | ENV PDK_ROOT=/home/vscode/ttsetup/pdk 6 | ENV PDK=sky130A 7 | 8 | RUN apt update 9 | RUN apt install -y iverilog gtkwave python3 python3-pip python3-venv python3-tk python-is-python3 libcairo2 verilator libpng-dev libqhull-dev 10 | 11 | # Clone tt-support-tools 12 | RUN mkdir -p /ttsetup 13 | RUN git clone https://github.com/TinyTapeout/tt-support-tools /ttsetup/tt-support-tools 14 | 15 | COPY test/requirements.txt /ttsetup/test_requirements.txt 16 | COPY .devcontainer/copy_tt_support_tools.sh /ttsetup 17 | 18 | RUN pip3 install -r /ttsetup/test_requirements.txt -r /ttsetup/tt-support-tools/requirements.txt 19 | 20 | # Install verible (for formatting) 21 | RUN umask 022 && \ 22 | curl -L https://github.com/chipsalliance/verible/releases/download/v0.0-3795-gf4d72375/verible-v0.0-3795-gf4d72375-linux-static-x86_64.tar.gz | \ 23 | tar zxf - -C /usr/local --strip-components=1 && \ 24 | chmod 755 /usr/local/bin 25 | 26 | # Install LibreLane 27 | RUN pip3 install librelane==2.4.2 28 | -------------------------------------------------------------------------------- /.github/workflows/gds.yaml: -------------------------------------------------------------------------------- 1 | name: gds 2 | 3 | on: 4 | push: 5 | workflow_dispatch: 6 | 7 | jobs: 8 | gds: 9 | runs-on: ubuntu-24.04 10 | steps: 11 | - name: checkout repo 12 | uses: actions/checkout@v4 13 | with: 14 | submodules: recursive 15 | 16 | - name: Build GDS 17 | uses: TinyTapeout/tt-gds-action@ttsky25b 18 | with: 19 | pdk: sky130A 20 | 21 | precheck: 22 | needs: gds 23 | runs-on: ubuntu-24.04 24 | steps: 25 | - name: Run Tiny Tapeout Precheck 26 | uses: TinyTapeout/tt-gds-action/precheck@ttsky25b 27 | 28 | gl_test: 29 | needs: gds 30 | runs-on: ubuntu-24.04 31 | steps: 32 | - name: checkout repo 33 | uses: actions/checkout@v4 34 | with: 35 | submodules: recursive 36 | 37 | - name: GL test 38 | uses: TinyTapeout/tt-gds-action/gl_test@ttsky25b 39 | 40 | viewer: 41 | needs: gds 42 | runs-on: ubuntu-24.04 43 | permissions: 44 | pages: write # to deploy to Pages 45 | id-token: write # to verify the deployment originates from an appropriate source 46 | steps: 47 | - uses: TinyTapeout/tt-gds-action/viewer@ttsky25b 48 | -------------------------------------------------------------------------------- /test/test.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: © 2024 Tiny Tapeout 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | import cocotb 5 | from cocotb.clock import Clock 6 | from cocotb.triggers import ClockCycles 7 | 8 | 9 | @cocotb.test() 10 | async def test_project(dut): 11 | dut._log.info("Start") 12 | 13 | # Set the clock period to 10 us (100 KHz) 14 | clock = Clock(dut.clk, 10, unit="us") 15 | cocotb.start_soon(clock.start()) 16 | 17 | # Reset 18 | dut._log.info("Reset") 19 | dut.ena.value = 1 20 | dut.ui_in.value = 0 21 | dut.uio_in.value = 0 22 | dut.rst_n.value = 0 23 | await ClockCycles(dut.clk, 10) 24 | dut.rst_n.value = 1 25 | 26 | dut._log.info("Test project behavior") 27 | 28 | # Set the input values you want to test 29 | dut.ui_in.value = 20 30 | dut.uio_in.value = 30 31 | 32 | # Wait for one clock cycle to see the output values 33 | await ClockCycles(dut.clk, 1) 34 | 35 | # The following assersion is just an example of how to check the output values. 36 | # Change it to match the actual expected output of your module: 37 | #assert dut.uo_out.value == 50 38 | 39 | # Keep testing the module by changing the input values, waiting for 40 | # one or more clock cycles, and asserting the expected output values. 41 | -------------------------------------------------------------------------------- /.github/workflows/test.yaml: -------------------------------------------------------------------------------- 1 | name: test 2 | on: [push, workflow_dispatch] 3 | jobs: 4 | test: 5 | runs-on: ubuntu-24.04 6 | steps: 7 | - name: Checkout repo 8 | uses: actions/checkout@v4 9 | with: 10 | submodules: recursive 11 | 12 | - name: Install iverilog 13 | shell: bash 14 | run: sudo apt-get update && sudo apt-get install -y iverilog 15 | 16 | # Set Python up and install cocotb 17 | - name: Setup python 18 | uses: actions/setup-python@v5 19 | with: 20 | python-version: '3.11' 21 | 22 | - name: Install Python packages 23 | shell: bash 24 | run: pip install -r test/requirements.txt 25 | 26 | - name: Run tests 27 | run: | 28 | cd test 29 | make clean 30 | make 31 | # make will return success even if the test fails, so check for failure in the results.xml 32 | ! grep failure results.xml 33 | 34 | - name: Test Summary 35 | uses: test-summary/action@v2.3 36 | with: 37 | paths: "test/results.xml" 38 | if: always() 39 | 40 | - name: upload vcd 41 | if: success() || failure() 42 | uses: actions/upload-artifact@v4 43 | with: 44 | name: test-vcd 45 | path: | 46 | test/tb.vcd 47 | test/results.xml 48 | test/output/* 49 | -------------------------------------------------------------------------------- /test/tb.v: -------------------------------------------------------------------------------- 1 | `default_nettype none 2 | `timescale 1ns / 1ps 3 | 4 | /* This testbench just instantiates the module and makes some convenient wires 5 | that can be driven / tested by the cocotb test.py. 6 | */ 7 | module tb (); 8 | 9 | // Dump the signals to a VCD file. You can view it with gtkwave or surfer. 10 | initial begin 11 | $dumpfile("tb.vcd"); 12 | $dumpvars(0, tb); 13 | #1; 14 | end 15 | 16 | // Wire up the inputs and outputs: 17 | reg clk; 18 | reg rst_n; 19 | reg ena; 20 | reg [7:0] ui_in; 21 | reg [7:0] uio_in; 22 | wire [7:0] uo_out; 23 | wire [7:0] uio_out; 24 | wire [7:0] uio_oe; 25 | `ifdef GL_TEST 26 | wire VPWR = 1'b1; 27 | wire VGND = 1'b0; 28 | `endif 29 | 30 | // Replace tt_um_example with your module name: 31 | tt_um_pongsagon_tinygpu_v2 user_project ( 32 | 33 | // Include power ports for the Gate Level test: 34 | `ifdef GL_TEST 35 | .VPWR(VPWR), 36 | .VGND(VGND), 37 | `endif 38 | 39 | .ui_in (ui_in), // Dedicated inputs 40 | .uo_out (uo_out), // Dedicated outputs 41 | .uio_in (uio_in), // IOs: Input path 42 | .uio_out(uio_out), // IOs: Output path 43 | .uio_oe (uio_oe), // IOs: Enable path (active high: 0=input, 1=output) 44 | .ena (ena), // enable - goes high when design is selected 45 | .clk (clk), // clock 46 | .rst_n (rst_n) // not reset 47 | ); 48 | 49 | endmodule 50 | -------------------------------------------------------------------------------- /test/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile 2 | # See https://docs.cocotb.org/en/stable/quickstart.html for more info 3 | 4 | # defaults 5 | SIM ?= icarus 6 | TOPLEVEL_LANG ?= verilog 7 | SRC_DIR = $(PWD)/../src 8 | PROJECT_SOURCES = project.v div.v dot4.v slowmpy.v vga.v vsfs.v spi_flash_controller.v sine_rom.v color_palette.v gamepad_pmod.v 9 | 10 | ifneq ($(GATES),yes) 11 | 12 | # RTL simulation: 13 | SIM_BUILD = sim_build/rtl 14 | VERILOG_SOURCES += $(addprefix $(SRC_DIR)/,$(PROJECT_SOURCES)) 15 | 16 | else 17 | 18 | # Gate level simulation: 19 | SIM_BUILD = sim_build/gl 20 | COMPILE_ARGS += -DGL_TEST 21 | COMPILE_ARGS += -DFUNCTIONAL 22 | COMPILE_ARGS += -DUSE_POWER_PINS 23 | COMPILE_ARGS += -DSIM 24 | COMPILE_ARGS += -DUNIT_DELAY=\#1 25 | VERILOG_SOURCES += $(PDK_ROOT)/sky130A/libs.ref/sky130_fd_sc_hd/verilog/primitives.v 26 | VERILOG_SOURCES += $(PDK_ROOT)/sky130A/libs.ref/sky130_fd_sc_hd/verilog/sky130_fd_sc_hd.v 27 | 28 | # this gets copied in by the GDS action workflow 29 | VERILOG_SOURCES += $(PWD)/gate_level_netlist.v 30 | 31 | endif 32 | 33 | # Allow sharing configuration between design and testbench via `include`: 34 | COMPILE_ARGS += -I$(SRC_DIR) 35 | 36 | # Include the testbench sources: 37 | VERILOG_SOURCES += $(PWD)/tb.v 38 | TOPLEVEL = tb 39 | 40 | # List test modules to run, separated by commas and without the .py suffix: 41 | COCOTB_TEST_MODULES = test 42 | 43 | # include cocotb's make rules to take care of the simulator setup 44 | include $(shell cocotb-config --makefiles)/Makefile.sim 45 | -------------------------------------------------------------------------------- /info.yaml: -------------------------------------------------------------------------------- 1 | # Tiny Tapeout project information 2 | project: 3 | title: "Tiniest GPU V2" # Project title 4 | author: "Matt Pongsagon" # Your name 5 | discord: "pongsagon" # Your discord username, for communication and automatically assigning you a Tapeout role (optional) 6 | description: "A GPU with transformation & lighting, rasterization, flat shading, double buffer, z-buffer" # One line description of what your project does 7 | language: "Verilog" # other examples include SystemVerilog, Amaranth, VHDL, etc 8 | clock_hz: 25175000 # Clock frequency in Hz (or 0 if not applicable) 9 | 10 | # How many tiles your design occupies? A single tile is about 167x108 uM. 11 | tiles: "4x4" # Valid values: 1x1, 1x2, 2x2, 3x2, 4x2, 6x2 or 8x2 12 | 13 | # Your top module name must start with "tt_um_". Make it unique by including your github username: 14 | top_module: "tt_um_pongsagon_tinygpu_v2" 15 | 16 | # List your project's source files here. 17 | # Source files must be in ./src and you must list each source file separately, one per line. 18 | # Don't forget to also update `PROJECT_SOURCES` in test/Makefile. 19 | source_files: 20 | - "project.v" 21 | - "div.v" 22 | - "dot4.v" 23 | - "slowmpy.v" 24 | - "vga.v" 25 | - "vsfs.v" 26 | - "spi_flash_controller.v" 27 | - "sine_rom.v" 28 | - "color_palette.v" 29 | - "gamepad_pmod.v" 30 | 31 | # The pinout of your project. Leave unused pins blank. DO NOT delete or add any pins. 32 | # This section is for the datasheet/website. Use descriptive names (e.g., RX, TX, MOSI, SCL, SEG_A, etc.). 33 | pinout: 34 | # Inputs 35 | ui[0]: "SPI latency[0]" 36 | ui[1]: "SPI latency[1]" 37 | ui[2]: "SPI latency[2]" 38 | ui[3]: "" 39 | ui[4]: "gamepad_latch" 40 | ui[5]: "gamepad_clk" 41 | ui[6]: "gamepad_data" 42 | ui[7]: "" 43 | 44 | # Outputs 45 | uo[0]: "R1" 46 | uo[1]: "G1" 47 | uo[2]: "B1" 48 | uo[3]: "VSync" 49 | uo[4]: "R0" 50 | uo[5]: "G0" 51 | uo[6]: "B0" 52 | uo[7]: "HSync" 53 | 54 | # Bidirectional pins 55 | uio[0]: "CS" 56 | uio[1]: "SD0" 57 | uio[2]: "SD1" 58 | uio[3]: "SCK" 59 | uio[4]: "SD2" 60 | uio[5]: "SD3" 61 | uio[6]: "CS1" 62 | uio[7]: "CS2" 63 | 64 | # Do not change! 65 | yaml_version: 6 66 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![](../../workflows/gds/badge.svg) ![](../../workflows/docs/badge.svg) ![](../../workflows/test/badge.svg) ![](../../workflows/fpga/badge.svg) 2 | 3 | # Tiny Tapeout Verilog Project Template 4 | 5 | - [Read the documentation for project](docs/info.md) 6 | 7 | ## What is Tiny Tapeout? 8 | 9 | Tiny Tapeout is an educational project that aims to make it easier and cheaper than ever to get your digital and analog designs manufactured on a real chip. 10 | 11 | To learn more and get started, visit https://tinytapeout.com. 12 | 13 | ## Set up your Verilog project 14 | 15 | 1. Add your Verilog files to the `src` folder. 16 | 2. Edit the [info.yaml](info.yaml) and update information about your project, paying special attention to the `source_files` and `top_module` properties. If you are upgrading an existing Tiny Tapeout project, check out our [online info.yaml migration tool](https://tinytapeout.github.io/tt-yaml-upgrade-tool/). 17 | 3. Edit [docs/info.md](docs/info.md) and add a description of your project. 18 | 4. Adapt the testbench to your design. See [test/README.md](test/README.md) for more information. 19 | 20 | The GitHub action will automatically build the ASIC files using [LibreLane](https://www.zerotoasiccourse.com/terminology/librelane/). 21 | 22 | ## Enable GitHub actions to build the results page 23 | 24 | - [Enabling GitHub Pages](https://tinytapeout.com/faq/#my-github-action-is-failing-on-the-pages-part) 25 | 26 | ## Resources 27 | 28 | - [FAQ](https://tinytapeout.com/faq/) 29 | - [Digital design lessons](https://tinytapeout.com/digital_design/) 30 | - [Learn how semiconductors work](https://tinytapeout.com/siliwiz/) 31 | - [Join the community](https://tinytapeout.com/discord) 32 | - [Build your design locally](https://www.tinytapeout.com/guides/local-hardening/) 33 | 34 | ## What next? 35 | 36 | - [Submit your design to the next shuttle](https://app.tinytapeout.com/). 37 | - Edit [this README](README.md) and explain your design, how it works, and how to test it. 38 | - Share your project on your social network of choice: 39 | - LinkedIn [#tinytapeout](https://www.linkedin.com/search/results/content/?keywords=%23tinytapeout) [@TinyTapeout](https://www.linkedin.com/company/100708654/) 40 | - Mastodon [#tinytapeout](https://chaos.social/tags/tinytapeout) [@matthewvenn](https://chaos.social/@matthewvenn) 41 | - X (formerly Twitter) [#tinytapeout](https://twitter.com/hashtag/tinytapeout) [@tinytapeout](https://twitter.com/tinytapeout) 42 | - Bluesky [@tinytapeout.com](https://bsky.app/profile/tinytapeout.com) 43 | -------------------------------------------------------------------------------- /src/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "//": "DO NOT EDIT THIS FILE before reading the comments below:", 3 | 4 | "//": "This is the default configuration for Tiny Tapeout projects. It should fit most designs.", 5 | "//": "If you change it, please make sure you understand what you are doing. We are not responsible", 6 | "//": "if your project fails because of a bad configuration.", 7 | 8 | "//": "!!! DO NOT EDIT THIS FILE unless you know what you are doing !!!", 9 | 10 | "//": "If you get stuck with this config, please open an issue or get in touch via the discord.", 11 | 12 | "//": "Here are some of the variables you may want to change:", 13 | 14 | "//": "PL_TARGET_DENSITY_PCT - You can increase this if Global Placement fails with error GPL-0302.", 15 | "//": "Users have reported that values up to 80 worked well for them.", 16 | "PL_TARGET_DENSITY_PCT": 70, 17 | 18 | "//": "CLOCK_PERIOD - Increase this in case you are getting setup time violations.", 19 | "//": "The value is in nanoseconds, so 20ns == 50MHz.", 20 | "CLOCK_PERIOD": 20, 21 | 22 | "//": "Hold slack margin - Increase them in case you are getting hold violations.", 23 | "PL_RESIZER_HOLD_SLACK_MARGIN": 0.1, 24 | "GRT_RESIZER_HOLD_SLACK_MARGIN": 0.05, 25 | 26 | "//": "RUN_LINTER, LINTER_INCLUDE_PDK_MODELS - Disabling the linter is not recommended!", 27 | "RUN_LINTER": 1, 28 | "LINTER_INCLUDE_PDK_MODELS": 1, 29 | 30 | "//": "If you need a custom clock configuration, read the following documentation first:", 31 | "//": "https://tinytapeout.com/faq/#how-can-i-map-an-additional-external-clock-to-one-of-the-gpios", 32 | "CLOCK_PORT": "clk", 33 | 34 | "//": "Configuration docs: https://librelane.readthedocs.io/en/latest/reference/configuration.html", 35 | 36 | "//": "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!", 37 | "//": "!!! DO NOT CHANGE ANYTHING BELOW THIS POINT !!!", 38 | "//": "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!", 39 | 40 | "//": "Save some time", 41 | "RUN_KLAYOUT_XOR": 0, 42 | "RUN_KLAYOUT_DRC": 0, 43 | 44 | "//": "Don't put clock buffers on the outputs", 45 | "DESIGN_REPAIR_BUFFER_OUTPUT_PORTS": 0, 46 | 47 | "//": "Reduce wasted space", 48 | "TOP_MARGIN_MULT": 1, 49 | "BOTTOM_MARGIN_MULT": 1, 50 | "LEFT_MARGIN_MULT": 6, 51 | "RIGHT_MARGIN_MULT": 6, 52 | 53 | "//": "Absolute die size", 54 | "FP_SIZING": "absolute", 55 | 56 | "GRT_ALLOW_CONGESTION": 1, 57 | 58 | "FP_IO_HLENGTH": 2, 59 | "FP_IO_VLENGTH": 2, 60 | 61 | "FP_PDN_VPITCH": 38.87, 62 | 63 | "//": "Clock", 64 | "RUN_CTS": 1, 65 | 66 | "//": "Don't generate power rings", 67 | "FP_PDN_MULTILAYER": 0, 68 | 69 | "//": "MAGIC_DEF_LABELS may cause issues with LVS", 70 | "MAGIC_DEF_LABELS": 0, 71 | 72 | "//": "Only export pin area in LEF (without any connected nets)", 73 | "MAGIC_WRITE_LEF_PINONLY": 1 74 | } 75 | -------------------------------------------------------------------------------- /src/dot4.v: -------------------------------------------------------------------------------- 1 | //`timescale 1ns / 1ps 2 | 3 | /* 4 | * Copyright (c) 2024 Matt Pongsagon Vichitvejpaisal 5 | * SPDX-License-Identifier: Apache-2.0 6 | */ 7 | 8 | // Q8.8 9 | //fix dot4(Vec4f v1, Vec4f v2){ 10 | // return multfix(v1.x,v2.x)+multfix(v1.y,v2.y)+multfix(v1.z,v2.z)+multfix(v1.w,v2.w); 11 | //} 12 | 13 | 14 | module dot4( 15 | input clk, 16 | input reset, 17 | input start, 18 | input signed [15:0] v1_x, 19 | input signed [15:0] v1_y, 20 | input signed [15:0] v1_z, 21 | input signed [15:0] v1_w, 22 | input signed [15:0] v2_x, 23 | input signed [15:0] v2_y, 24 | input signed [15:0] v2_z, 25 | input signed [15:0] v2_w, 26 | output reg done, 27 | output reg signed [15:0] result 28 | ); 29 | 30 | 31 | reg signed [15:0] mul_a; 32 | reg signed [15:0] mul_b; 33 | wire signed [31:0] mul_result; 34 | reg mul_start; 35 | wire mul_done; 36 | wire mul_busy; 37 | wire mul_aux; 38 | slowmpy #(.LGNA(4),.NA(16)) mul2 (.i_clk (clk), .i_reset(reset), .i_stb(mul_start),.i_a(mul_a) 39 | ,.i_b(mul_b),.i_aux(1'b0),.o_done(mul_done),.o_p(mul_result) 40 | ,.o_busy(mul_busy),.o_aux(mul_aux)); 41 | 42 | 43 | reg [2:0] state; // 5 states 44 | reg signed [15:0] tmp_vs_mul1; 45 | 46 | always @(posedge clk) begin 47 | if (!reset) begin 48 | mul_a <= 0; 49 | mul_b <= 0; 50 | mul_start <= 0; 51 | // 52 | state <= 0; 53 | tmp_vs_mul1 <= 0; 54 | // 55 | done <= 0; 56 | result <= 0; 57 | end 58 | else begin 59 | case (state) 60 | 0: begin 61 | done <= 0; 62 | if (start) begin 63 | mul_a <= v1_x; 64 | mul_b <= v2_x; 65 | mul_start <= 1; 66 | done <= 0; 67 | state <= 1; 68 | end 69 | end 70 | 1: begin 71 | mul_start <= 0; 72 | if (mul_done) begin 73 | tmp_vs_mul1 <= mul_result[23:8]; 74 | mul_a <= v1_y; 75 | mul_b <= v2_y; 76 | mul_start <= 1; 77 | state <= 2; 78 | end 79 | end 80 | 2: begin 81 | mul_start <= 0; 82 | if (mul_done) begin 83 | tmp_vs_mul1 <= tmp_vs_mul1 + mul_result[23:8]; 84 | mul_a <= v1_z; 85 | mul_b <= v2_z; 86 | mul_start <= 1; 87 | state <= 3; 88 | end 89 | end 90 | 3: begin 91 | mul_start <= 0; 92 | if (mul_done) begin 93 | tmp_vs_mul1 <= tmp_vs_mul1 + mul_result[23:8]; 94 | mul_a <= v1_w; 95 | mul_b <= v2_w; 96 | mul_start <= 1; 97 | state <= 4; 98 | end 99 | end 100 | 4: begin 101 | mul_start <= 0; 102 | if (mul_done) begin 103 | result <= tmp_vs_mul1 + mul_result[23:8]; 104 | done <= 1; 105 | state <= 0; 106 | end 107 | end 108 | default: begin 109 | 110 | end 111 | endcase 112 | end 113 | 114 | end 115 | 116 | 117 | endmodule 118 | 119 | -------------------------------------------------------------------------------- /src/sine_rom.v: -------------------------------------------------------------------------------- 1 | //`timescale 1ns / 1ps 2 | 3 | 4 | // input 0-89 degree, Q0.8 [0,1] 0.004 5 | // - case: 90 = 256, handle outside this module 6 | // - dont forget to convert Q0.8 to Q8.8 7 | 8 | // const fix sinTable[] = { 9 | // 0,4,8,13,17,22,26,31,35,40,44,48,53,57,61,66,70,74,79,83,87,91,95,100,104,108,112, 10 | // 116,120,124,128,131,135,139,143,146,150,154,157,161,164,167,171,174,177,181,184,187, 11 | // 190,193,196,198,201,204,207,209,212,214,217,219,221,223,226,228,230,232,233,235,237, 12 | // 238,240,242,243,244,246,247,248,249,250,251,252,252,253,254,254,255,255,255,255,255}; 13 | 14 | 15 | 16 | module sine_rom ( 17 | input wire [6:0] angle, 18 | output wire [7:0] value 19 | ); 20 | 21 | reg [7:0] mem; 22 | assign value = mem; 23 | 24 | always @(*) begin 25 | case(angle) 26 | 0: mem = 0; 27 | 1: mem = 4; 28 | 2: mem = 8; 29 | 3: mem = 13; 30 | 4: mem = 17; 31 | 5: mem = 22; 32 | 6: mem = 26; 33 | 7: mem = 31; 34 | 8: mem = 35; 35 | 9: mem = 40; 36 | 10: mem = 44; 37 | 11: mem = 48; 38 | 12: mem = 53; 39 | 13: mem = 57; 40 | 14: mem = 61; 41 | 15: mem = 66; 42 | 16: mem = 70; 43 | 17: mem = 74; 44 | 18: mem = 79; 45 | 19: mem = 83; 46 | 20: mem = 87; 47 | 21: mem = 91; 48 | 22: mem = 95; 49 | 23: mem = 100; 50 | 24: mem = 104; 51 | 25: mem = 108; 52 | 26: mem = 112; 53 | 27: mem = 116; 54 | 28: mem = 120; 55 | 29: mem = 124; 56 | 30: mem = 128; 57 | 31: mem = 131; 58 | 32: mem = 135; 59 | 33: mem = 139; 60 | 34: mem = 143; 61 | 35: mem = 146; 62 | 36: mem = 150; 63 | 37: mem = 154; 64 | 38: mem = 157; 65 | 39: mem = 161; 66 | 40: mem = 164; 67 | 41: mem = 167; 68 | 42: mem = 171; 69 | 43: mem = 174; 70 | 44: mem = 177; 71 | 45: mem = 181; 72 | 46: mem = 184; 73 | 47: mem = 187; 74 | 48: mem = 190; 75 | 49: mem = 193; 76 | 50: mem = 196; 77 | 51: mem = 198; 78 | 52: mem = 201; 79 | 53: mem = 204; 80 | 54: mem = 207; 81 | 55: mem = 209; 82 | 56: mem = 212; 83 | 57: mem = 214; 84 | 58: mem = 217; 85 | 59: mem = 219; 86 | 60: mem = 221; 87 | 61: mem = 223; 88 | 62: mem = 226; 89 | 63: mem = 228; 90 | 64: mem = 230; 91 | 65: mem = 232; 92 | 66: mem = 233; 93 | 67: mem = 235; 94 | 68: mem = 237; 95 | 69: mem = 238; 96 | 70: mem = 240; 97 | 71: mem = 242; 98 | 72: mem = 243; 99 | 73: mem = 244; 100 | 74: mem = 246; 101 | 75: mem = 247; 102 | 76: mem = 248; 103 | 77: mem = 249; 104 | 78: mem = 250; 105 | 79: mem = 251; 106 | 80: mem = 252; 107 | 81: mem = 252; 108 | 82: mem = 253; 109 | 83: mem = 254; 110 | 84: mem = 254; 111 | 85: mem = 255; 112 | 86: mem = 255; 113 | 87: mem = 255; 114 | 88: mem = 255; 115 | 89: mem = 255; 116 | default: mem = 0; 117 | endcase 118 | end 119 | 120 | 121 | 122 | 123 | endmodule 124 | -------------------------------------------------------------------------------- /src/slowmpy.v: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////// 2 | // 3 | // Filename: slowmpy.v 4 | // {{{ 5 | // Project: Zip CPU -- a small, lightweight, RISC CPU soft core 6 | // 7 | // Purpose: This is a signed (OPT_SIGNED=1) or unsigned (OPT_SIGNED=0) 8 | // multiply designed for low logic and slow data signals. It 9 | // takes one clock per bit plus two more to complete the multiply. 10 | // 11 | // The OPT_SIGNED version of this algorithm was found on Wikipedia at 12 | // https://en.wikipedia.org/wiki/Binary_multiplier. 13 | // 14 | // Creator: Dan Gisselquist, Ph.D. 15 | // Gisselquist Technology, LLC 16 | // 17 | //////////////////////////////////////////////////////////////////////////////// 18 | // }}} 19 | // Copyright (C) 2018-2024, Gisselquist Technology, LLC 20 | // {{{ 21 | // This program is free software (firmware): you can redistribute it and/or 22 | // modify it under the terms of the GNU General Public License as published 23 | // by the Free Software Foundation, either version 3 of the License, or (at 24 | // your option) any later version. 25 | // 26 | // This program is distributed in the hope that it will be useful, but WITHOUT 27 | // ANY WARRANTY; without even the implied warranty of MERCHANTIBILITY or 28 | // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 29 | // for more details. 30 | // 31 | // You should have received a copy of the GNU General Public License along 32 | // with this program. (It's in the $(ROOT)/doc directory. Run make with no 33 | // target there if the PDF file isn't present.) If not, see 34 | // for a copy. 35 | // }}} 36 | // License: GPL, v3, as defined and found on www.gnu.org, 37 | // {{{ 38 | // http://www.gnu.org/licenses/gpl.html 39 | // 40 | //////////////////////////////////////////////////////////////////////////////// 41 | // 42 | 43 | // edit by Pongsagon Vichitvejpaisal, 14 May 2024 44 | // - to be use with TinyTapeout 45 | // - changed list: 46 | // - use i_reset to init all reg 47 | // - LGNA = 4, NA = 16 48 | 49 | //`timescale 1ns / 1ps 50 | //`default_nettype none 51 | 52 | module slowmpy #( 53 | parameter LGNA = 5, 54 | parameter [LGNA:0] NA = 20, 55 | parameter [0:0] OPT_SIGNED = 1'b1, 56 | parameter [0:0] OPT_LOWPOWER = 1'b0, 57 | localparam NB = NA // Must be = NA for OPT_SIGNED to work 58 | ) ( 59 | input wire i_clk, i_reset, 60 | // 61 | input wire i_stb, 62 | input wire signed [(NA-1):0] i_a, 63 | input wire signed [(NB-1):0] i_b, 64 | // verilator coverage_off 65 | input wire i_aux, 66 | // verilator coverage_on 67 | output reg o_busy, o_done, 68 | output reg signed [(NA+NB-1):0] o_p, 69 | // verilator coverage_off 70 | output reg o_aux 71 | // verilator coverage_on 72 | ); 73 | 74 | // Declarations 75 | reg [LGNA-1:0] count; 76 | reg [NA-1:0] p_a; 77 | reg [NB-1:0] p_b; 78 | reg [NA+NB-1:0] partial; 79 | // verilator coverage_off 80 | reg aux; 81 | // verilator coverage_on 82 | reg almost_done; 83 | wire pre_done; 84 | wire [NA-1:0] pwire; 85 | 86 | assign pre_done = (count == 0); 87 | 88 | // almost_done 89 | always @(posedge i_clk) begin 90 | almost_done <= (i_reset)&&(o_busy)&&(pre_done); 91 | end 92 | 93 | // aux, o_done, o_busy 94 | always @(posedge i_clk) begin 95 | if (!i_reset) begin 96 | aux <= 0; 97 | o_done <= 0; 98 | o_busy <= 0; 99 | end 100 | else if (!o_busy) begin 101 | o_done <= 0; 102 | o_busy <= i_stb; 103 | aux <= (!OPT_LOWPOWER || i_stb) ? i_aux : 0; 104 | end 105 | else if (almost_done) begin 106 | o_done <= 1; 107 | o_busy <= 0; 108 | end 109 | else begin 110 | o_done <= 0; 111 | end 112 | end 113 | 114 | assign pwire = (p_b[0] ? p_a : 0); 115 | 116 | // count, partial, p_a, p_b 117 | always @(posedge i_clk) begin 118 | if (!o_busy) begin 119 | count <= NA[LGNA-1:0]-1; 120 | partial <= 0; 121 | p_a <= i_a; 122 | p_b <= i_b; 123 | 124 | if (OPT_LOWPOWER && !i_stb) begin 125 | p_a <= 0; 126 | p_b <= 0; 127 | end 128 | end 129 | else begin 130 | p_b <= (p_b >> 1); 131 | // partial[NA+NB-1:NB] <= partial[NA+NB 132 | partial[NB-2:0] <= partial[NB-1:1]; 133 | if ((OPT_SIGNED)&&(pre_done)) 134 | partial[NA+NB-1:NB-1] <= { 1'b0, partial[NA+NB-1:NB]} + { 1'b0, pwire[NA-1], ~pwire[NA-2:0] }; 135 | else if (OPT_SIGNED) 136 | partial[NA+NB-1:NB-1] <= {1'b0,partial[NA+NB-1:NB]} + { 1'b0, !pwire[NA-1], pwire[NA-2:0] }; 137 | else 138 | partial[NA+NB-1:NB-1] <= {1'b0, partial[NA+NB-1:NB]} + ((p_b[0]) ? {1'b0,p_a} : 0); 139 | count <= count - 1; 140 | end 141 | end 142 | 143 | // o_p, o_aux 144 | always @(posedge i_clk) begin 145 | if (almost_done) begin 146 | if (OPT_SIGNED) 147 | o_p <= partial[NA+NB-1:0] + { 1'b1, {(NA-2){1'b0}}, 1'b1, {(NB){1'b0}} }; 148 | else 149 | o_p <= partial[NA+NB-1:0]; 150 | o_aux <= aux; 151 | end 152 | end 153 | 154 | 155 | endmodule 156 | -------------------------------------------------------------------------------- /src/div.v: -------------------------------------------------------------------------------- 1 | // Project F Library - Division: Signed Fixed-Point with Gaussian Rounding 2 | // (C)2023 Will Green, Open source hardware released under the MIT License 3 | // Learn more at https://projectf.io/verilog-lib/ 4 | //`timescale 1ns / 1ps 5 | 6 | module div #( 7 | parameter WIDTH=32, // width of numbers in bits (integer and fractional) 8 | parameter FBITS=16 // fractional bits within WIDTH 9 | ) ( 10 | input clk, // clock 11 | input rst, // reset 12 | input start, // start calculation 13 | output reg busy, // calculation in progress 14 | output reg done, // calculation is complete (high for one tick) 15 | output reg valid, // result is valid 16 | output reg dbz, // divide by zero 17 | output reg ovf, // overflow 18 | input signed [WIDTH-1:0] a, // dividend (numerator) 19 | input signed [WIDTH-1:0] b, // divisor (denominator) 20 | output reg signed [WIDTH-1:0] val // result value: quotient 21 | ); 22 | 23 | localparam WIDTHU = WIDTH - 1; // unsigned widths are 1 bit narrower 24 | localparam FBITSW = (FBITS == 0) ? 1 : FBITS; // avoid negative vector width when FBITS=0 25 | localparam SMALLEST = {1'b1, {WIDTHU{1'b0}}}; // smallest negative number 26 | 27 | localparam ITER = WIDTHU + FBITS; // iteration count: unsigned input width + fractional bits 28 | reg [$clog2(ITER):0] i; // iteration counter (allow ITER+1 iterations for rounding) 29 | 30 | reg a_sig, b_sig, sig_diff; // signs of inputs and whether different 31 | reg [WIDTHU-1:0] au, bu; // absolute version of inputs (unsigned) 32 | reg [WIDTHU-1:0] quo, quo_next; // intermediate quotients (unsigned) 33 | reg [WIDTHU:0] acc, acc_next; // accumulator (unsigned but 1 bit wider) 34 | 35 | // input signs 36 | always @(*) begin 37 | a_sig = a[WIDTH-1+:1]; 38 | b_sig = b[WIDTH-1+:1]; 39 | end 40 | 41 | // division algorithm iteration 42 | always @(*) begin 43 | if (acc >= {1'b0, bu}) begin 44 | acc_next = acc - bu; 45 | {acc_next, quo_next} = {acc_next[WIDTHU-1:0], quo, 1'b1}; 46 | end else begin 47 | {acc_next, quo_next} = {acc, quo} << 1; 48 | end 49 | end 50 | 51 | // calculation state machine 52 | reg [2:0] state; 53 | localparam IDLE = 0; 54 | localparam INIT = 1; 55 | localparam CALC = 2; 56 | localparam ROUND = 3; 57 | localparam SIGN = 4; 58 | // enum cant syn to ASIC 59 | //enum {IDLE, INIT, CALC, ROUND, SIGN} state; 60 | always @(posedge clk) begin 61 | done <= 0; 62 | case (state) 63 | INIT: begin 64 | state <= CALC; 65 | ovf <= 0; 66 | i <= 0; 67 | {acc, quo} <= {{WIDTHU{1'b0}}, au, 1'b0}; // initialize calculation 68 | end 69 | CALC: begin 70 | if (i == WIDTHU-1 && quo_next[WIDTHU-1:WIDTHU-FBITSW] != 0) begin // overflow 71 | state <= IDLE; 72 | busy <= 0; 73 | done <= 1; 74 | ovf <= 1; 75 | end else begin 76 | if (i == ITER-1) state <= ROUND; // calculation complete after next iteration 77 | i <= i + 1; 78 | acc <= acc_next; 79 | quo <= quo_next; 80 | end 81 | end 82 | ROUND: begin // Gaussian rounding 83 | state <= SIGN; 84 | if (quo_next[0] == 1'b1) begin // next digit is 1, so consider rounding 85 | // round up if quotient is odd or remainder is non-zero 86 | if (quo[0] == 1'b1 || acc_next[WIDTHU:1] != 0) quo <= quo + 1; 87 | end 88 | end 89 | SIGN: begin // adjust quotient sign if non-zero and input signs differ 90 | state <= IDLE; 91 | if (quo != 0) val <= (sig_diff) ? {1'b1, -quo} : {1'b0, quo}; 92 | busy <= 0; 93 | done <= 1; 94 | valid <= 1; 95 | end 96 | default: begin // IDLE 97 | if (start) begin 98 | valid <= 0; 99 | val <= 0; 100 | if (b == 0) begin // divide by zero 101 | state <= IDLE; 102 | busy <= 0; 103 | done <= 1; 104 | dbz <= 1; 105 | ovf <= 0; 106 | end else if (a == SMALLEST || b == SMALLEST) begin // overflow 107 | state <= IDLE; 108 | busy <= 0; 109 | done <= 1; 110 | dbz <= 0; 111 | ovf <= 1; 112 | end else begin 113 | state <= INIT; 114 | au <= (a_sig) ? -a[WIDTHU-1:0] : a[WIDTHU-1:0]; // register abs(a) 115 | bu <= (b_sig) ? -b[WIDTHU-1:0] : b[WIDTHU-1:0]; // register abs(b) 116 | sig_diff <= (a_sig ^ b_sig); // register input sign difference 117 | busy <= 1; 118 | dbz <= 0; 119 | ovf <= 0; 120 | end 121 | end 122 | end 123 | endcase 124 | if (!rst) begin 125 | state <= IDLE; 126 | busy <= 0; 127 | done <= 0; 128 | valid <= 0; 129 | dbz <= 0; 130 | ovf <= 0; 131 | val <= 0; 132 | end 133 | end 134 | endmodule 135 | -------------------------------------------------------------------------------- /src/gamepad_pmod.v: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025 Pat Deegan, https://psychogenic.com 3 | * SPDX-License-Identifier: Apache-2.0 4 | * Version: 1.0.0 5 | * 6 | * Interfacing code for the Gamepad Pmod from Psycogenic Technologies, 7 | * designed for Tiny Tapeout. 8 | * 9 | * There are two high-level modules that most users will be interested in: 10 | * - gamepad_pmod_single: for a single controller; 11 | * - gamepad_pmod_dual: for two controllers. 12 | * 13 | * There are also two lower-level modules that you can use if you want to 14 | * handle the interfacing yourself: 15 | * - gamepad_pmod_driver: interfaces with the Pmod and provides the raw data; 16 | * - gamepad_pmod_decoder: decodes the raw data into button states. 17 | * 18 | * The docs, schematics, PCB files, and firmware code for the Gamepad Pmod 19 | * are available at https://github.com/psychogenic/gamepad-pmod. 20 | */ 21 | 22 | /** 23 | * gamepad_pmod_driver -- Serial interface for the Gamepad Pmod. 24 | * 25 | * This module reads raw data from the Gamepad Pmod *serially* 26 | * and stores it in a shift register. When the latch signal is received, 27 | * the data is transferred into `data_reg` for further processing. 28 | * 29 | * Functionality: 30 | * - Synchronizes the `pmod_data`, `pmod_clk`, and `pmod_latch` signals 31 | * to the system clock domain. 32 | * - Captures serial data on each falling edge of `pmod_clk`. 33 | * - Transfers the shifted data into `data_reg` when `pmod_latch` goes low. 34 | * 35 | * Parameters: 36 | * - `BIT_WIDTH`: Defines the width of `data_reg` (default: 24 bits). 37 | * 38 | * Inputs: 39 | * - `rst_n`: Active-low reset. 40 | * - `clk`: System clock. 41 | * - `pmod_data`: Serial data input from the Pmod. 42 | * - `pmod_clk`: Serial clock from the Pmod. 43 | * - `pmod_latch`: Latch signal indicating the end of data transmission. 44 | * 45 | * Outputs: 46 | * - `data_reg`: Captured parallel data after shifting is complete. 47 | */ 48 | module gamepad_pmod_driver #( 49 | parameter BIT_WIDTH = 24 50 | ) ( 51 | input wire rst_n, 52 | input wire clk, 53 | input wire pmod_data, 54 | input wire pmod_clk, 55 | input wire pmod_latch, 56 | output reg [BIT_WIDTH-1:0] data_reg 57 | ); 58 | 59 | reg pmod_clk_prev; 60 | reg pmod_latch_prev; 61 | reg [BIT_WIDTH-1:0] shift_reg; 62 | 63 | // Sync Pmod signals to the clk domain: 64 | reg [1:0] pmod_data_sync; 65 | reg [1:0] pmod_clk_sync; 66 | reg [1:0] pmod_latch_sync; 67 | 68 | always @(posedge clk) begin 69 | if (~rst_n) begin 70 | pmod_data_sync <= 2'b0; 71 | pmod_clk_sync <= 2'b0; 72 | pmod_latch_sync <= 2'b0; 73 | end else begin 74 | pmod_data_sync <= {pmod_data_sync[0], pmod_data}; 75 | pmod_clk_sync <= {pmod_clk_sync[0], pmod_clk}; 76 | pmod_latch_sync <= {pmod_latch_sync[0], pmod_latch}; 77 | end 78 | end 79 | 80 | always @(posedge clk) begin 81 | if (~rst_n) begin 82 | /* Initialize data and shift registers to all 1s so they're detected as "not present". 83 | * This accounts for cases where we have: 84 | * - setup for 2 controllers; 85 | * - only a single controller is connected; and 86 | * - the driver in those cases only sends bits for a single controller. 87 | */ 88 | data_reg <= {BIT_WIDTH{1'b1}}; 89 | shift_reg <= {BIT_WIDTH{1'b1}}; 90 | pmod_clk_prev <= 1'b0; 91 | pmod_latch_prev <= 1'b0; 92 | end 93 | begin 94 | pmod_clk_prev <= pmod_clk_sync[1]; 95 | pmod_latch_prev <= pmod_latch_sync[1]; 96 | 97 | // Capture data on rising edge of pmod_latch: 98 | if (pmod_latch_sync[1] & ~pmod_latch_prev) begin 99 | data_reg <= shift_reg; 100 | end 101 | 102 | // Sample data on rising edge of pmod_clk: 103 | if (pmod_clk_sync[1] & ~pmod_clk_prev) begin 104 | shift_reg <= {shift_reg[BIT_WIDTH-2:0], pmod_data_sync[1]}; 105 | end 106 | end 107 | end 108 | 109 | endmodule 110 | 111 | 112 | /** 113 | * gamepad_pmod_decoder -- Decodes raw data from the Gamepad Pmod. 114 | * 115 | * This module takes a 12-bit parallel data register (`data_reg`) 116 | * and decodes it into individual button states. It also determines 117 | * whether a controller is connected. 118 | * 119 | * Functionality: 120 | * - If `data_reg` contains all `1's` (`0xFFF`), it indicates that no controller is connected. 121 | * - Otherwise, it extracts individual button states from `data_reg`. 122 | * 123 | * Inputs: 124 | * - `data_reg [11:0]`: Captured button state data from the gamepad. 125 | * 126 | * Outputs: 127 | * - `b, y, select, start, up, down, left, right, a, x, l, r`: Individual button states (`1` = pressed, `0` = released). 128 | * - `is_present`: Indicates whether a controller is connected (`1` = connected, `0` = not connected). 129 | */ 130 | module gamepad_pmod_decoder ( 131 | input wire [11:0] data_reg, 132 | output wire b, 133 | output wire y, 134 | output wire select, 135 | output wire start, 136 | output wire up, 137 | output wire down, 138 | output wire left, 139 | output wire right, 140 | output wire a, 141 | output wire x, 142 | output wire l, 143 | output wire r, 144 | output wire is_present 145 | ); 146 | 147 | // When the controller is not connected, the data register will be all 1's 148 | wire reg_empty = (data_reg == 12'hfff); 149 | assign is_present = reg_empty ? 0 : 1'b1; 150 | assign {b, y, select, start, up, down, left, right, a, x, l, r} = reg_empty ? 0 : data_reg; 151 | 152 | endmodule 153 | 154 | 155 | /** 156 | * gamepad_pmod_single -- Main interface for a single Gamepad Pmod controller. 157 | * 158 | * This module provides button states for a **single controller**, reducing 159 | * resource usage (fewer flip-flops) compared to a dual-controller version. 160 | * 161 | * Inputs: 162 | * - `pmod_data`, `pmod_clk`, and `pmod_latch` are the signals from the PMOD interface. 163 | * 164 | * Outputs: 165 | * - Each button's state is provided as a single-bit wire (e.g., `start`, `up`, etc.). 166 | * - `is_present` indicates whether the controller is connected (`1` = connected, `0` = not detected). 167 | */ 168 | module gamepad_pmod_single ( 169 | input wire rst_n, 170 | input wire clk, 171 | input wire pmod_data, 172 | input wire pmod_clk, 173 | input wire pmod_latch, 174 | 175 | output wire b, 176 | output wire y, 177 | output wire select, 178 | output wire start, 179 | output wire up, 180 | output wire down, 181 | output wire left, 182 | output wire right, 183 | output wire a, 184 | output wire x, 185 | output wire l, 186 | output wire r, 187 | output wire is_present 188 | ); 189 | 190 | wire [11:0] gamepad_pmod_data; 191 | 192 | gamepad_pmod_driver #( 193 | .BIT_WIDTH(12) 194 | ) driver ( 195 | .rst_n(rst_n), 196 | .clk(clk), 197 | .pmod_data(pmod_data), 198 | .pmod_clk(pmod_clk), 199 | .pmod_latch(pmod_latch), 200 | .data_reg(gamepad_pmod_data) 201 | ); 202 | 203 | gamepad_pmod_decoder decoder ( 204 | .data_reg(gamepad_pmod_data), 205 | .b(b), 206 | .y(y), 207 | .select(select), 208 | .start(start), 209 | .up(up), 210 | .down(down), 211 | .left(left), 212 | .right(right), 213 | .a(a), 214 | .x(x), 215 | .l(l), 216 | .r(r), 217 | .is_present(is_present) 218 | ); 219 | 220 | endmodule 221 | 222 | 223 | /** 224 | * gamepad_pmod_dual -- Main interface for the Pmod gamepad. 225 | * This module provides button states for two controllers using 226 | * 2-bit vectors for each button (e.g., start[1:0], up[1:0], etc.). 227 | * 228 | * Each button state is represented as a 2-bit vector: 229 | * - Index 0 corresponds to the first controller (e.g., up[0], y[0], etc.). 230 | * - Index 1 corresponds to the second controller (e.g., up[1], y[1], etc.). 231 | * 232 | * The `is_present` signal indicates whether a controller is connected: 233 | * - `is_present[0] == 1` when the first controller is connected. 234 | * - `is_present[1] == 1` when the second controller is connected. 235 | * 236 | * Inputs: 237 | * - `pmod_data`, `pmod_clk`, and `pmod_latch` are the 3 wires coming from the Pmod interface. 238 | * 239 | * Outputs: 240 | * - Button state vectors for each controller. 241 | * - Presence detection via `is_present`. 242 | */ 243 | -------------------------------------------------------------------------------- /src/spi_flash_controller.v: -------------------------------------------------------------------------------- 1 | /* re-implementation and extension of from 2 | https://github.com/MichaelBell/tt07-rle-vga 3 | 4 | Copyright 2023-2024 (c) Michael Bell 5 | SPDX-License-Identifier: Apache-2.0 6 | 7 | 8 | To perform a read: 9 | - Set addr_in and set start_read high for 1 cycle 10 | - Wait for ready to go hi 11 | - The read data is now available on data_out 12 | 13 | If the controller is configured to transfer multiple bytes, then 14 | note that the word transferred in data_out is in big 15 | endian order, i.e. the byte with the lowest address is aligned to 16 | the MSB of the word. 17 | 18 | */ 19 | 20 | 21 | 22 | /* Matt Pongsagon 23 | 24 | Flash 25 | - use EBh command instead of 6Bh, change timing in addr/wait state 26 | cmd 8, addr 6, wait 2+4, 4bit data each clk,... 27 | - only support 4-bit data read 28 | - sclk only stop in the idle state -> will read data more than needed, 29 | just ignore excess data 30 | - FSM will finally stay at hold state and output data until user stop read 31 | 32 | RAM (use only RAM A) 33 | - enter quad mode, h35: cmd 8 34 | after get at_quadmode=1, caller must send stop_txn 35 | - read hOB: cmd 2, addr 6, wait 4, 4bit data 36 | - write h02: cmd 2, addr 6, -, 4bit data 37 | */ 38 | 39 | //`default_nettype wire 40 | //`timescale 1ns / 1ps 41 | 42 | module spi_flash_controller #(parameter DATA_WIDTH_BITS=4, parameter ADDR_BITS=24) ( 43 | input clk, 44 | input rstn, 45 | 46 | // External SPI interface 47 | input [3:0] spi_data_in, 48 | output reg [3:0] spi_data_out, 49 | output reg [3:0] spi_data_oe, 50 | output spi_clk_out, 51 | // low to select 52 | output reg spi_flash_select, 53 | output reg spi_ram_a_select, 54 | output reg spi_ram_b_select, // always=1, use only RAM A 55 | 56 | 57 | // Configuration 58 | input [2:0] latency, 59 | 60 | // Internal interface for reading data 61 | // cmd part 62 | input select_ROM, // 1 = ROM, 0 = RAM 63 | input enter_quadmode, 64 | input start_read, 65 | input start_write, 66 | input stop_txn, // stop read or write 67 | // 68 | input [ADDR_BITS-1:0] addr_in, 69 | input [DATA_WIDTH_BITS-1:0] data_in, // for writing 70 | // 71 | output [DATA_WIDTH_BITS-1:0] data_out, // for reading 72 | output reg data_req, // for writing 73 | output reg data_ready, // for reading 74 | output reg at_quadmode 75 | 76 | // debug 77 | //output [3:0] debug_spi_data_in, 78 | //output [3:0] debug_spi_miso_in 79 | ); 80 | 81 | `define max(a, b) (a > b) ? a : b 82 | 83 | //localparam DATA_WIDTH_BITS = DATA_WIDTH_BYTES * 8; 84 | localparam BITS_REM_BITS = $clog2(`max(DATA_WIDTH_BITS,ADDR_BITS)); 85 | 86 | // read ROM / RAM / write RAM 87 | localparam FSM_IDLE = 0; 88 | localparam FSM_CMD = 1; 89 | localparam FSM_ADDR = 2; 90 | localparam FSM_DUMMY = 3; 91 | localparam FSM_DATA = 4; 92 | localparam FSM_LAT1 = 5; 93 | localparam FSM_LAT2 = 6; 94 | localparam FSM_STREAM = 7; 95 | // enter quad 96 | 97 | 98 | reg [2:0] fsm_state; 99 | reg doing_quadmode; 100 | reg is_writing; 101 | reg [7:0] spi_miso_buf_n; 102 | reg [7:0] spi_miso_buf_p; 103 | reg [ADDR_BITS-1:0] addr; 104 | reg [DATA_WIDTH_BITS-1:0] data; 105 | reg [BITS_REM_BITS-1:0] bits_remaining; // of each state 106 | 107 | 108 | assign data_out = data; 109 | //assign spi_select = fsm_state == FSM_IDLE; 110 | assign spi_clk_out = !clk && (fsm_state > 0); 111 | 112 | 113 | always @(posedge clk) begin 114 | if (!rstn || stop_txn ) begin 115 | fsm_state <= FSM_IDLE; 116 | is_writing <= 0; 117 | bits_remaining <= 0; 118 | data_ready <= 0; 119 | data_req <= 0; 120 | at_quadmode <= 0; 121 | doing_quadmode <= 0; 122 | spi_data_oe <= 4'b0000; 123 | spi_flash_select <= 1; 124 | spi_ram_a_select <= 1; 125 | spi_ram_b_select <= 1; 126 | end else begin 127 | data_ready <= 0; 128 | data_req <= 0; 129 | if (fsm_state == FSM_IDLE) begin 130 | if (start_read || start_write || enter_quadmode) begin 131 | if(select_ROM || enter_quadmode)begin 132 | spi_data_oe <= 4'b0001; 133 | bits_remaining <= 8-1; 134 | doing_quadmode <= (enter_quadmode)? 1:0; 135 | end else begin 136 | is_writing <= !start_read; 137 | spi_data_oe <= 4'b1111; 138 | bits_remaining <= 2-1; 139 | end 140 | fsm_state <= FSM_CMD; 141 | // 142 | spi_flash_select <= (select_ROM)? 0:1; 143 | spi_ram_a_select <= (select_ROM)? 1:0; 144 | end 145 | end else if (fsm_state == FSM_STREAM) begin 146 | // do nothing, keep reading/writing data until stop_txn 147 | data_ready <= !is_writing; 148 | data_req <= is_writing; 149 | end else begin 150 | if (bits_remaining == 0) begin 151 | fsm_state <= fsm_state + 1; 152 | if (fsm_state == FSM_CMD) begin 153 | if(doing_quadmode)begin 154 | at_quadmode <= 1; 155 | fsm_state <= FSM_IDLE; 156 | spi_ram_a_select <= 1; 157 | end else begin 158 | // change to FSM_ADDR 159 | bits_remaining <= (ADDR_BITS >> 2)-1; 160 | spi_data_oe <= 4'b1111; 161 | end 162 | end 163 | else if (fsm_state == FSM_ADDR) begin 164 | // change to FSM_DUMMY or FSM_STREAM 165 | if(select_ROM)begin 166 | bits_remaining <= 6-1; 167 | end else if(is_writing)begin 168 | data_req <= 1; 169 | fsm_state <= FSM_STREAM; 170 | end else begin 171 | bits_remaining <= 4-1; 172 | end 173 | end 174 | else if (fsm_state == FSM_DUMMY) begin 175 | spi_data_oe <= 4'b0000; 176 | bits_remaining <= 0; 177 | end 178 | else if (fsm_state == FSM_DATA) begin 179 | bits_remaining <= 0; 180 | end 181 | else if (fsm_state == FSM_LAT1) begin 182 | bits_remaining <= 0; 183 | end 184 | else if (fsm_state == FSM_LAT2) begin 185 | // 1st 4-bit data ready after this state 186 | data_ready <= 1; 187 | end 188 | end else begin 189 | bits_remaining <= bits_remaining - 1; 190 | end 191 | end 192 | end 193 | end 194 | 195 | 196 | // spi_data_out 197 | always @(*) begin 198 | case (fsm_state) 199 | FSM_CMD: begin // CMD only used for the PSRAM, the flash is always in continuous read mode 200 | if (is_writing) begin 201 | // RAM Write command is 02h 202 | spi_data_out = (bits_remaining == 1)? 4'b0000 : 4'b0010; 203 | end else begin 204 | if(select_ROM) begin 205 | // ROM Read command is EBh, single mode 206 | spi_data_out = {3'b000, !(bits_remaining == 4 || bits_remaining == 2)}; 207 | end else if(doing_quadmode) begin 208 | // Quadmode command is 35h, single mode 209 | spi_data_out = {3'b000, (bits_remaining == 0 || bits_remaining == 2 || bits_remaining == 4 || bits_remaining == 5)}; 210 | end else begin 211 | // RAM Read command is 0Bh 212 | spi_data_out = (bits_remaining == 1)? 4'b0000 : 4'b1011; 213 | end 214 | end 215 | end 216 | FSM_ADDR: spi_data_out = addr[ADDR_BITS-1:ADDR_BITS-4]; 217 | // for write only, chk spi_data_oe 218 | FSM_STREAM: spi_data_out = data_in; 219 | default: spi_data_out = 4'b0000; 220 | endcase 221 | end 222 | 223 | 224 | // addr used in spi_data_out 225 | always @(posedge clk) begin 226 | if (fsm_state == FSM_IDLE && (start_read || start_write)) begin 227 | addr <= addr_in; 228 | end else if (fsm_state == FSM_ADDR) begin 229 | addr <= {addr[ADDR_BITS-5:0], 4'b0000}; 230 | end 231 | end 232 | 233 | 234 | // handle read latency 235 | always @(negedge clk) begin 236 | spi_miso_buf_n <= {spi_miso_buf_n[3:0], spi_data_in}; 237 | end 238 | always @(posedge clk) begin 239 | spi_miso_buf_p <= {spi_miso_buf_p[3:0], spi_data_in}; 240 | end 241 | reg [3:0] spi_miso_in; 242 | always @(*) begin 243 | if (latency[0]) begin 244 | if (latency[1]) spi_miso_in = spi_miso_buf_p[3:0]; 245 | else spi_miso_in = spi_miso_buf_p[7:4]; 246 | end else begin 247 | if (latency[2]) spi_miso_in = spi_miso_buf_n[3:0]; 248 | else spi_miso_in = spi_miso_buf_n[7:4]; 249 | end 250 | end 251 | 252 | // data_out to read caller 253 | always @(posedge clk) begin 254 | // only valid when data_ready == 1 255 | data <= spi_miso_in; 256 | end 257 | 258 | 259 | 260 | //assign debug_spi_miso_in = spi_miso_in; 261 | //assign debug_spi_data_in = spi_data_in; 262 | endmodule -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /src/project.v: -------------------------------------------------------------------------------- 1 | 2 | /* Matt pongsagon 3 | 4 | Task: 5 | 0. enter Quad mode 6 | 0. read 3 byte 7 | - #tri, tex? -> save as reg 8 | 1. copy flash->ram 9 | - 32768 byte: 256x256 tex, 4-bit/texel 10 | - each tri: 22/28 byte 11 | 2. read front, clear back 12 | 3. clear z 13 | 4. swap 14 | 15 | Include module 16 | - vga 17 | - gamepad 18 | - vsfs 19 | - spi_flash_controller 20 | - color_palette 21 | 22 | 23 | Addr flash 24 | - byte 0,1: #tri 25 | - byte 3: tex? 26 | // has tex 27 | - byte 32768: 256x256 tex 28 | - each tri: 28 byte 29 | // no tex 30 | - each tri: 22 byte 31 | 32 | 33 | Addr RAM space 34 | - after reset, FSM will drive start read 35 | - 0: front, FLASH tri 36 | - 38400: back (4bit x 320x240) 37 | - 76800: z (8bit x 320x240) 38 | - 153600: tri (<1024) 39 | // has tex 40 | - 32768 byte: 256x256 tex 41 | - each tri: 28 byte 42 | // no tex 43 | - each tri: 22 byte 44 | 45 | 46 | 47 | - ui_in[2:0]: latency, 48 | - ui_in[6:4]: gamepad pmod 49 | 50 | 51 | */ 52 | 53 | //`timescale 1ns / 1ps 54 | 55 | module tt_um_pongsagon_tinygpu_v2 ( 56 | input wire [7:0] ui_in, // Dedicated inputs 57 | output wire [7:0] uo_out, // Dedicated outputs 58 | input wire [7:0] uio_in, // IOs: Input path 59 | output wire [7:0] uio_out, // IOs: Output path 60 | output wire [7:0] uio_oe, // IOs: Enable path (active high: 0=input, 1=output) 61 | input wire ena, // always 1 when the design is powered, so you can ignore it 62 | input wire clk, // clock 63 | 64 | // // sim version 65 | // output wire [9:0] sim_x, 66 | // output wire [9:0] sim_y, 67 | // output wire sim_blank, 68 | 69 | // // things to watch 70 | // output wire debug_do_swap, 71 | // // output wire debug_ram_notbusy, 72 | // output wire [15:0] debug_x_model_v0, 73 | // output wire [15:0] debug_x_model_v1, 74 | // output wire [15:0] debug_x_model_v2, 75 | // output wire [15:0] debug_y_model_v0, 76 | // output wire [15:0] debug_y_model_v1, 77 | // output wire [15:0] debug_y_model_v2, 78 | // output wire [15:0] debug_z_model_v0, 79 | // output wire [15:0] debug_z_model_v1, 80 | // output wire [15:0] debug_z_model_v2, 81 | // output wire [15:0] debug_nx, 82 | // output wire [15:0] debug_ny, 83 | // output wire [15:0] debug_nz, 84 | // output wire [1:0] debug_tri_color, 85 | // output wire [7:0] debug_vsfs_fsm_state, 86 | // // output wire [9:0] debug_numtri, 87 | // // output wire [4:0] debug_fsm_state, 88 | // // output wire debug_start_printing, 89 | // // output wire [3:0] debug_spi_data, 90 | // output wire [7:0] debug_sub_frame, 91 | // output reg [21:0] debug_clk, // >2.11M 92 | 93 | input wire rst_n // reset_n - low to reset 94 | ); 95 | 96 | 97 | // vga 98 | wire HS,VS; 99 | wire setblack; 100 | wire setblack_; 101 | wire [9:0] x; 102 | wire [9:0] y; 103 | wire blank; 104 | assign setblack = ((x > 320) || (y > 239)); 105 | assign setblack_ = ((x > 319) || (y > 239)); 106 | 107 | vga v( 108 | .clk(clk), 109 | .reset(rst_n), 110 | .HS(HS), 111 | .VS(VS), 112 | .blank(blank), 113 | .x(x), 114 | .y(y) 115 | ); 116 | 117 | 118 | // external SPI interface 119 | wire [3:0] qspi_data_in = {uio_in[5:4], uio_in[2:1]}; 120 | wire [3:0] qspi_data_out; 121 | wire [3:0] qspi_data_oe; 122 | wire qspi_clk_out; 123 | wire qspi_flash_select; 124 | wire qspi_ram_a_select; 125 | wire qspi_ram_b_select; 126 | assign uio_out = {qspi_ram_b_select, qspi_ram_a_select, qspi_data_out[3:2], 127 | qspi_clk_out, qspi_data_out[1:0], qspi_flash_select}; 128 | assign uio_oe = rst_n ? {2'b11, qspi_data_oe[3:2], 1'b1, qspi_data_oe[1:0], 1'b1} : 8'h00; 129 | 130 | 131 | // internal SPI 132 | reg spi_select_ROM; 133 | reg spi_enter_quadmode; 134 | wire spi_start_read; 135 | wire spi_start_write; 136 | wire spi_stop_txn; 137 | wire [23:0] spi_addr; 138 | wire [3:0] spi_data_in; 139 | wire [3:0] spi_data; 140 | wire spi_data_req; 141 | wire spi_data_ready; 142 | wire spi_at_quadmode; 143 | 144 | 145 | // vsfs 146 | wire vsfs_start_read; 147 | wire vsfs_start_write; 148 | wire vsfs_stop_txn; 149 | wire [23:0] vsfs_addr; 150 | wire [3:0] vsfs_data_in; 151 | wire do_swap; 152 | reg start_vsfs; 153 | reg [9:0] numtri; // <1024 tri 154 | 155 | 156 | // main FSM 157 | reg has_tex; 158 | reg [4:0] fsm_state; // 0-31: state 159 | reg [4:0] read_delay; // flash: 24, RAM R: 16, 160 | reg [17:0] numread; // #4bit read, >153600 (#z pixel) 161 | reg [3:0] pixels [5:0]; // do 4pixel at atime, pixels[5:4] is for reading has_tex bit 162 | reg [3:0] buffer [3:0]; 163 | reg [7:0] sub_frame; // must < 255, start with 1 not 0 164 | reg evenframe; // 1: front -> 0, 0: front -> 38400 165 | wire [9:0] yplus1; // to set addr for the next line 166 | assign yplus1 = y + 1; 167 | // 168 | reg [15:0] i_numtri_byte; // for loop read numtri, 1024x28=28672+32768 <65536 169 | wire [15:0] numtri_byte; // numtri * 22 (2 x 3xyz x 3vert + 4 (normal/color)), 22528 170 | wire [15:0] numtri_byte_tex; // numtri * 28 (22 + 2x3 (uv x 3vert)) + 32768; 171 | assign numtri_byte = {2'b0,numtri,4'b0} + {4'b0,numtri,2'b0} + {5'b0,numtri,1'b0}; // 16+4+2 172 | assign numtri_byte_tex = {2'b0,numtri,4'b0} + {3'b0,numtri,3'b0} 173 | + {4'b0,numtri,2'b0} + {16'h8000}; // 16+8+4 174 | 175 | // 176 | reg display_start_read; 177 | reg display_start_write; 178 | reg display_stop_txn; 179 | reg [23:0] display_addr; 180 | reg [3:0] display_data_in; 181 | // 182 | wire vsfs_running; 183 | wire eol; // stop access mem to display image 184 | wire eof; // stop access mem to display image 185 | wire eol_e; 186 | wire eof_e; 187 | wire [9:0] SLOWEST_STATE; 188 | assign SLOWEST_STATE = (has_tex)? 97 : 85; // 65/77 RW time + 16 wait + 4 safe 189 | assign eol = (y < 239) && (x > (10'd799-SLOWEST_STATE)) && (sub_frame > 1); 190 | assign eof = (y == 524) && (x > (10'd799-SLOWEST_STATE)); // true(725,524)->, false(0,0) 191 | assign eol_e = (y < 239) && (x > 782) && (sub_frame > 1); 192 | assign eof_e = (y == 524) && (x > 782); 193 | // mux between VSFS and display to ram 194 | wire ram_notbusy; 195 | wire ram_notbusy_end; 196 | assign ram_notbusy = ((sub_frame > 1) && setblack && !eol && !eof) || 197 | ((sub_frame < 2) && (!eof) && (y > 431)) ; 198 | assign ram_notbusy_end = ((sub_frame > 1) && setblack && !eol_e && !eof_e) || 199 | ((sub_frame < 2) && (!eof_e) && (y > 431)) ; 200 | assign spi_addr = (ram_notbusy_end && vsfs_running)? vsfs_addr : display_addr; 201 | assign spi_data_in = (ram_notbusy_end && vsfs_running)? vsfs_data_in : display_data_in; 202 | assign spi_start_read = (ram_notbusy_end && vsfs_running)? vsfs_start_read : display_start_read; 203 | assign spi_start_write = (ram_notbusy_end && vsfs_running)? vsfs_start_write : display_start_write; 204 | assign spi_stop_txn = (ram_notbusy_end && vsfs_running)? vsfs_stop_txn : display_stop_txn; 205 | 206 | 207 | wire inp_select, inp_start, inp_l, inp_r, gamepad_present; 208 | wire [7:0] gamepad_input; 209 | gamepad_pmod_single driver ( 210 | // Inputs: 211 | .rst_n(rst_n), 212 | .clk(clk), 213 | .pmod_data(ui_in[6]), 214 | .pmod_clk(ui_in[5]), 215 | .pmod_latch(ui_in[4]), 216 | // Outputs: 217 | .b(gamepad_input[5]), 218 | .y(gamepad_input[7]), 219 | .select(inp_select), 220 | .start(inp_start), 221 | .up(gamepad_input[1]), 222 | .down(gamepad_input[0]), 223 | .left(gamepad_input[3]), 224 | .right(gamepad_input[2]), 225 | .a(gamepad_input[4]), 226 | .x(gamepad_input[6]), 227 | .l(inp_l), 228 | .r(inp_r), 229 | .is_present(gamepad_present) 230 | ); 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | vsfs _vsfs( 254 | .clk(clk), 255 | .reset(rst_n), 256 | .vsfs_addr(vsfs_addr), 257 | .vsfs_data_in(vsfs_data_in), 258 | .vsfs_start_read(vsfs_start_read), 259 | .vsfs_start_write(vsfs_start_write), 260 | .vsfs_stop_txn(vsfs_stop_txn), 261 | .spi_data(spi_data), 262 | .spi_data_req(spi_data_req), 263 | .start_vsfs(start_vsfs), 264 | .do_swap(do_swap), 265 | .vsfs_running(vsfs_running), 266 | .x(x), 267 | .y(y), 268 | .numtri(numtri), 269 | .evenframe(evenframe), 270 | .gamepad_input(gamepad_input), 271 | .has_tex(has_tex), 272 | 273 | // 274 | // .debug_x_model_v0(debug_x_model_v0), 275 | // .debug_x_model_v1(debug_x_model_v1), 276 | // .debug_x_model_v2(debug_x_model_v2), 277 | // .debug_y_model_v0(debug_y_model_v0), 278 | // .debug_y_model_v1(debug_y_model_v1), 279 | // .debug_y_model_v2(debug_y_model_v2), 280 | // .debug_z_model_v0(debug_z_model_v0), 281 | // .debug_z_model_v1(debug_z_model_v1), 282 | // .debug_z_model_v2(debug_z_model_v2), 283 | // .debug_nx(debug_nx), 284 | // .debug_ny(debug_ny), 285 | // .debug_nz(debug_nz), 286 | // .debug_tri_color(debug_tri_color), 287 | // .debug_vsfs_fsm_state(debug_vsfs_fsm_state), 288 | 289 | .ram_notbusy(ram_notbusy) 290 | ); 291 | 292 | spi_flash_controller i_spi ( 293 | .clk (clk), 294 | .rstn (rst_n), 295 | 296 | .spi_data_in(qspi_data_in), 297 | .spi_data_out(qspi_data_out), 298 | .spi_data_oe(qspi_data_oe), 299 | .spi_clk_out(qspi_clk_out), 300 | .spi_flash_select (qspi_flash_select), 301 | .spi_ram_a_select (qspi_ram_a_select), 302 | .spi_ram_b_select (qspi_ram_b_select), 303 | 304 | .latency (ui_in[2:0]), 305 | 306 | .select_ROM (spi_select_ROM), 307 | .enter_quadmode (spi_enter_quadmode), 308 | .start_read (spi_start_read), 309 | .start_write (spi_start_write), 310 | .stop_txn (spi_stop_txn), 311 | .addr_in (spi_addr), 312 | .data_in (spi_data_in), 313 | .data_out (spi_data), 314 | .data_req (spi_data_req), 315 | .data_ready (spi_data_ready), 316 | .at_quadmode (spi_at_quadmode) 317 | ); 318 | 319 | 320 | wire [5:0] color; 321 | color_palette _color(.spi_data(spi_data),.color(color)); 322 | 323 | // pixel spi data to uo_out 324 | assign uo_out[0] = (setblack_?0: (has_tex)? spi_data[3] : color[5]); //R1 325 | assign uo_out[1] = (setblack_?0: (has_tex)? spi_data[2] : color[3]); //G1 326 | assign uo_out[2] = (setblack_?0: (has_tex)? spi_data[0] : color[1]); //B1 327 | assign uo_out[3] = VS; 328 | assign uo_out[4] = (setblack_?0: (has_tex)? spi_data[3] : color[4]); //R0 329 | assign uo_out[5] = (setblack_?0: (has_tex)? spi_data[1] : color[2]); //G0 330 | assign uo_out[6] = (setblack_?0: (has_tex)? spi_data[0] : color[0]); //B0 331 | assign uo_out[7] = HS; 332 | 333 | 334 | always @(posedge clk) begin 335 | if(!rst_n) begin 336 | has_tex <= 0; 337 | fsm_state <= 0; 338 | read_delay <= 0; 339 | numread <= 0; 340 | sub_frame <= 0; 341 | evenframe <= 1; 342 | i_numtri_byte <= 0; 343 | // pixels [5:0], buffer[3:0] 344 | // 345 | //debug_clk <= 0; 346 | // SPI 347 | spi_select_ROM <= 0; 348 | spi_enter_quadmode <= 0; 349 | display_start_read <= 0; 350 | display_start_write <= 0; 351 | display_stop_txn <= 0; 352 | display_addr <= 0; 353 | display_data_in <= 0; 354 | // 355 | start_vsfs <= 0; 356 | numtri <= 0; 357 | end else begin 358 | //debug_clk <= debug_clk + 1; 359 | case (fsm_state) 360 | // 0. enter Quad mode (fsm 0-1) 361 | 0: begin 362 | if ((y == 524) && (x == 775)) begin 363 | spi_enter_quadmode <= 1; 364 | fsm_state <= 1; 365 | end 366 | end 367 | // wait for at_quadmode, around 8 clk 368 | 1: begin 369 | spi_enter_quadmode <= 0; 370 | if(spi_at_quadmode)begin 371 | display_stop_txn <= 1; 372 | fsm_state <= 2; 373 | end 374 | end 375 | // 1. copy Flash -> RAM (fsm 2 - 10) 376 | // - read numtri (2 byte) + has_tex (1 byte), at addr 153600 377 | 2: begin 378 | display_stop_txn <= 0; 379 | numread <= 0; 380 | spi_select_ROM <= 1; // flash 381 | display_start_read <= 1; 382 | display_addr <= 0; 383 | fsm_state <= 3; 384 | end 385 | // -- wait for the first flash data to be ready 386 | 3: begin 387 | display_start_read <= 0; 388 | if(read_delay == 24) begin 389 | read_delay <= 0; 390 | pixels[numread[2:0]] <= spi_data; 391 | numread <= 1; 392 | fsm_state <= 4; 393 | end 394 | else begin 395 | read_delay <= read_delay + 1; 396 | end 397 | end 398 | // -- read 5 more 4bit 399 | 4: begin 400 | pixels[numread[2:0]] <= spi_data; 401 | numread <= numread + 1; 402 | if(numread == 5)begin 403 | numread <= 0; 404 | display_stop_txn <= 1; 405 | fsm_state <= 5; 406 | end 407 | end 408 | // -- save to numtri, little endian -> big endian 409 | 5: begin 410 | display_stop_txn <= 0; 411 | has_tex <= pixels[5][0]; 412 | numtri[9:8] <= pixels[3][1:0]; 413 | numtri[3:0] <= pixels[1]; 414 | numtri[7:4] <= pixels[0]; 415 | i_numtri_byte <= 0; 416 | display_addr <= 3; // 1st addr of tex / tri 417 | fsm_state <= 6; 418 | end 419 | 420 | // 2 cases 421 | // tex: 422 | // - for loop copy tex from flash->ram 423 | // - for loop copy each tri: 28 byte 424 | // notex: 425 | // - for loop copy each tri: 22 byte 426 | 6: begin 427 | display_stop_txn <= 0; 428 | numread <= 0; 429 | if(i_numtri_byte == ((has_tex)? numtri_byte_tex : numtri_byte)) begin 430 | i_numtri_byte <= 0; 431 | fsm_state <= 11; 432 | end else begin 433 | spi_select_ROM <= 1; 434 | display_start_read <= 1; 435 | fsm_state <= 7; 436 | end 437 | end 438 | // -- wait for the first flash data to be ready 439 | 7: begin 440 | display_start_read <= 0; 441 | if(read_delay == 24) begin 442 | read_delay <= 0; 443 | buffer[numread[1:0]] <= spi_data; 444 | numread <= 1; 445 | fsm_state <= 8; 446 | end 447 | else begin 448 | read_delay <= read_delay + 1; 449 | end 450 | end 451 | // -- read 3 more 4bit 452 | 8: begin 453 | buffer[numread[1:0]] <= spi_data; 454 | numread <= numread + 1; 455 | if(numread == 3)begin 456 | numread <= 0; 457 | display_stop_txn <= 1; 458 | fsm_state <= 9; 459 | end 460 | end 461 | // -- init to write to RAM tri, little endian -> big endian 462 | 9: begin 463 | display_stop_txn <= 0; 464 | spi_select_ROM <= 0; 465 | display_start_write <= 1; 466 | // addr of ram 467 | display_addr <= 24'd153600 + {8'b0,i_numtri_byte}; 468 | display_data_in <= buffer[numread[1:0] + 2'b10]; // little->big 469 | numread <= numread + 1; 470 | fsm_state <= 10; 471 | end 472 | // -- wait and write 2 byte 473 | 10: begin 474 | display_start_write <= 0; 475 | if (spi_data_req) begin 476 | display_data_in <= buffer[numread[1:0] + 2'b10]; 477 | numread <= numread + 1; 478 | if(numread == 3) begin 479 | numread <= 0; 480 | display_stop_txn <= 1; 481 | // addr of flash 482 | display_addr <= {8'b0,i_numtri_byte} + 5; // 5 = offset #tri+hastex 3byte + next 2 byte 483 | i_numtri_byte <= i_numtri_byte + 2; 484 | fsm_state <= 6; 485 | end 486 | end 487 | end 488 | 489 | 490 | 491 | // 2. RAM front -> vga uo_out[] + clear Back (fsm 11 - 16) 492 | // - wait for eof last line y and x 16 clk ahead to read the first pixel 493 | 11: begin 494 | display_stop_txn <= 0; 495 | start_vsfs <= 0; 496 | if (do_swap) begin 497 | // 4. swap, set for 1clk from VSFS 498 | evenframe <= !evenframe; 499 | sub_frame <= 0; 500 | end 501 | else if ((y == 524) && (x == 783)) begin //mark1: eof 502 | spi_select_ROM <= 0; 503 | display_start_read <= 1; 504 | display_addr <= (evenframe)?0:38400; 505 | //debug_clk <= 0; 506 | sub_frame <= sub_frame + 1; 507 | fsm_state <= 12; 508 | end 509 | end 510 | // - wait for the first data to be ready, display the first pixel 511 | 12: begin 512 | display_start_read <= 0; 513 | if(read_delay == 16) begin 514 | read_delay <= 0; 515 | numread <= 1; 516 | fsm_state <= 13; 517 | end 518 | else begin 519 | read_delay <= read_delay + 1; 520 | end 521 | end 522 | // - keep display pixels for 1 line, until hblank 523 | 13: begin 524 | numread <= numread + 1; 525 | if(numread == 319) begin //mark2: fin draw 1 line 526 | numread <= 0; 527 | display_stop_txn <= 1; 528 | if (sub_frame == 1) begin 529 | // do clear back 530 | fsm_state <= 14; 531 | end else if (y < 239) begin 532 | // go to wait for eol 533 | fsm_state <= 16; 534 | end else begin 535 | // go to wait for eof 536 | fsm_state <= 11; 537 | end 538 | end 539 | end 540 | // - switch to clear back 1 line 541 | 14: begin 542 | display_stop_txn <= 0; 543 | display_start_write <= 1; 544 | display_data_in <= 4'b0000; // black 545 | //addr <= y * 160 + offset of back[0]; 546 | display_addr <= (evenframe)?{7'b0,y,7'b0} + {9'b0,y,5'b0} + 38400: 547 | {7'b0,y,7'b0} + {9'b0,y,5'b0}; 548 | numread <= numread + 1; 549 | fsm_state <= 15; 550 | end 551 | // - wait and clear back 1 line 552 | 15: begin 553 | display_start_write <= 0; 554 | if (spi_data_req) begin 555 | display_data_in <= 4'b0000; 556 | numread <= numread + 1; 557 | if(numread == 319)begin 558 | numread <= 0; 559 | display_stop_txn <= 1; 560 | if (y == 239) begin //mark5: to clearZ 561 | fsm_state <= 17; 562 | end else begin //mark3: fin clear back 1 line 563 | fsm_state <= 16; 564 | end 565 | end 566 | end 567 | end 568 | // - wait for eol state, start read 16 clk ahead of the next line 569 | // enter here from 13 OR 15 570 | 16: begin 571 | display_stop_txn <= 0; 572 | if (x == 783) begin //mark4: eol 573 | display_start_read <= 1; 574 | display_addr <= (evenframe)?{7'b0,yplus1,7'b0} + {9'b0,yplus1,5'b0}: 575 | {7'b0,yplus1,7'b0} + {9'b0,yplus1,5'b0} + 38400; 576 | fsm_state <= 12; 577 | end 578 | end 579 | 580 | // 3. clearZ (fsm 17-18) 581 | 17: begin 582 | display_stop_txn <= 0; 583 | display_start_write <= 1; 584 | display_addr <= 76800; 585 | display_data_in <= 4'b1111; // 0.996 fartest 586 | numread <= numread + 1; 587 | fsm_state <= 18; 588 | end 589 | 18: begin 590 | display_start_write <= 0; 591 | if (spi_data_req) begin 592 | display_data_in <= 4'b1111; 593 | numread <= numread + 1; 594 | if (numread == 153599)begin //mark6: fin clear z 595 | numread <= 0; 596 | display_stop_txn <= 1; 597 | start_vsfs <= 1; 598 | fsm_state <= 11; 599 | end 600 | end 601 | end 602 | 603 | 604 | 605 | default: begin 606 | fsm_state <= 0; 607 | end 608 | endcase 609 | end 610 | end 611 | 612 | // need for sim with verilator 613 | // assign sim_x = x; 614 | // assign sim_y = y; 615 | // assign sim_blank = blank; 616 | // // // debug 617 | // // assign debug_numtri = numtri; 618 | // // assign debug_fsm_state = fsm_state; 619 | // // assign debug_start_printing = ((fsm_state == 11) && ((y == 524) && (x == 783))); 620 | // // assign debug_spi_data = spi_data; 621 | // assign debug_sub_frame = sub_frame; 622 | // // assign debug_ram_notbusy = ram_notbusy; 623 | // assign debug_do_swap = do_swap; 624 | 625 | 626 | endmodule -------------------------------------------------------------------------------- /src/vsfs.v: -------------------------------------------------------------------------------- 1 | 2 | /* Matt pongsagon 3 | 4 | - addr Flash and RAM front start at 0 5 | - addr RAM back start at 38400 (320x240=76800/2, 2pixel:1byte) 6 | - addr RAM z start at 76800 (1pixel:1byte) 7 | - front 4bit, back 4bit, z 8bit 8 | - cal z in Q2.20 save in Q0.8 [0,1] 9 | - addr Tri 10 | // has tex 11 | - start 153600: 32768 byte: 256x256 tex 12 | - start 186368: each tri: 28 byte 13 | // no tex 14 | - start 153600: each tri: 22 byte 15 | 16 | gamepad_input 8 bit 17 | 0,1: +-rotX 18 | 2,3: +-rotY 19 | 4,5: +-Tz 20 | 6,7: +-rotY light 21 | 22 | color mode 1 tex 23 | 0: black 24 | 1: blue 25 | 2: dark green 26 | 15: 27 | 28 | color mode 2 flat shade 29 | 0: white 30 | 1: cyan 31 | 2: magenta 32 | 3: yellow 33 | 34 | VSFS 35 | // free state [22-30, 130-139, 179-189, 224-229,233-239,243-249] 36 | 4.5: use state 2-21 for uv*bar 37 | tex: use state 211-223 38 | 0. perframe, state [140-250], quota 800x430 = 340,000 clk 39 | - Tz, rotx2 -> [M] ([Tz*Rx*Ry]) // cos table, set premul mat manually 40 | - dir light rot 1axis -> update formula 41 | - [M]*[VP] // no cam fix [VP] 42 | - [M]-1 ([Ryt*Rxt*Tz-1]) // override [M], for light-world, campos-world -> model 43 | - [M-1]*campos 44 | - [M-1]*lightdir 45 | 46 | 1. for each tri, state [31-129, 254,255] 47 | - READ tri (pos.xyz x3 Q8.8, face normal Q2.8 x3, color 2bit) 48 | 49 | 2. VS 50 | - x1: backface culling (view-model dot faceNormal-model) 51 | - x3: [MVP]*v, clip->NDC (div), NDC->screen 52 | - x1: light-model dot faceNormal-model 53 | 54 | 3. bbox 55 | - bbox (mul of 4, not chk outofrange, 3clk) 56 | 4. e0, bar, zbar 57 | 4.1 x3: e_init 58 | 4.2 denom 59 | 4.3 x3: bar_init, bar_dx, bar_dy 60 | 4.4 x3: z_bar, z_bar_dx, z_bar_dy 61 | 4.5 x2: uv*bar 62 | 5. for pixel y in bbox (y < bboxMax_Y) 63 | - e0 = e0_init, z = z_bar / e0_init += dy, z_bar += z_bar_dy 64 | - for x in bbox (x < bboxMax_X) 65 | - READ x4 Z+B (42 clk) 66 | - READ x4 texel (35 clk x2) 67 | // x4 pixel 68 | - if ((e0 < 0) && (e1 < 0) && (e2 < 0)) / e0 += dx, z += z_bar_dx 69 | - pixel[0-4].cz = (Z < Zbuffer)? cz: pixel[0-4].cz 70 | // 71 | - WRITE x4 Z+B (28 clk) 72 | 73 | 74 | */ 75 | 76 | //`timescale 1ns / 1ps 77 | 78 | 79 | module vsfs ( 80 | input wire clk, 81 | input wire reset, 82 | output reg [23:0] vsfs_addr, 83 | output reg [3:0] vsfs_data_in, 84 | output reg vsfs_start_read, 85 | output reg vsfs_start_write, 86 | output reg vsfs_stop_txn, 87 | input wire [3:0] spi_data, 88 | input wire spi_data_req, 89 | input wire start_vsfs, 90 | output reg do_swap, 91 | output reg vsfs_running, 92 | input wire [9:0] x, 93 | input wire [9:0] y, 94 | input wire [9:0] numtri, 95 | input wire evenframe, 96 | 97 | // gamepad input 98 | input wire [7:0] gamepad_input, 99 | input wire has_tex, 100 | 101 | 102 | // things to watch 103 | // output wire [15:0] debug_x_model_v0, 104 | // output wire [15:0] debug_x_model_v1, 105 | // output wire [15:0] debug_x_model_v2, 106 | // output wire [15:0] debug_y_model_v0, 107 | // output wire [15:0] debug_y_model_v1, 108 | // output wire [15:0] debug_y_model_v2, 109 | // output wire [15:0] debug_z_model_v0, 110 | // output wire [15:0] debug_z_model_v1, 111 | // output wire [15:0] debug_z_model_v2, 112 | // output wire [15:0] debug_nx, 113 | // output wire [15:0] debug_ny, 114 | // output wire [15:0] debug_nz, 115 | // output wire [1:0] debug_tri_color, 116 | // output wire [7:0] debug_vsfs_fsm_state, 117 | 118 | input wire ram_notbusy 119 | 120 | ); 121 | 122 | 123 | // used by div w 124 | reg signed [31:0] div_a; 125 | reg signed [31:0] div_b; 126 | wire signed [31:0] div_result; 127 | reg div_start; 128 | wire div_done; 129 | wire div_busy; 130 | wire div_valid; 131 | wire div_dbz; 132 | wire div_ovf; 133 | // Q16.16 134 | div div1 (.clk (clk), .rst(reset),.start(div_start),.done(div_done) 135 | ,.a(div_a),.b(div_b),.val(div_result) 136 | ,.busy(div_busy),.valid(div_valid),.dbz(div_dbz),.ovf(div_ovf)); 137 | 138 | // used by denom, (div w and denom can be computed at the same time) 139 | // Q20.20 140 | reg signed [39:0] div2_a; 141 | reg signed [39:0] div2_b; 142 | wire signed [39:0] div2_result; 143 | reg div2_start; 144 | wire div2_done; 145 | wire div2_busy; 146 | wire div2_valid; 147 | wire div2_dbz; 148 | wire div2_ovf; 149 | div #(.WIDTH(40),.FBITS(20)) div2 150 | (.clk (clk), .rst(reset),.start(div2_start),.done(div2_done) 151 | ,.a(div2_a),.b(div2_b),.val(div2_result) 152 | ,.busy(div2_busy),.valid(div2_valid),.dbz(div2_dbz),.ovf(div2_ovf)); 153 | 154 | // mul 22-bit used to compute ei_init, bar 155 | reg signed [21:0] mul_a; 156 | reg signed [21:0] mul_b; 157 | wire signed [43:0] mul_result; 158 | reg mul_start; 159 | wire mul_done; 160 | wire mul_busy; 161 | wire mul_aux; 162 | slowmpy #(.LGNA(5),.NA(22)) mul 163 | (.i_clk (clk), .i_reset(reset), .i_stb(mul_start),.i_a(mul_a) 164 | ,.i_b(mul_b),.i_aux(1'b0),.o_done(mul_done),.o_p(mul_result) 165 | ,.o_busy(mul_busy),.o_aux(mul_aux)); 166 | 167 | reg dot_start; 168 | wire dot_done; 169 | // use in always @(*), not infer registers (that's not what declare the signal of reg type means), 170 | // it infers a multiplexer with constant assignment 171 | reg signed [15:0] v1_x; 172 | reg signed [15:0] v1_y; 173 | reg signed [15:0] v1_z; 174 | reg signed [15:0] v1_w; 175 | reg signed [15:0] v2_x; 176 | reg signed [15:0] v2_y; 177 | reg signed [15:0] v2_z; 178 | reg signed [15:0] v2_w; 179 | wire signed [15:0] dot_result; 180 | dot4 dot (.clk (clk), .reset(reset),.start(dot_start) 181 | ,.v1_x(v1_x),.v1_y(v1_y),.v1_z(v1_z),.v1_w(v1_w) 182 | ,.v2_x(v2_x),.v2_y(v2_y),.v2_z(v2_z),.v2_w(v2_w) 183 | ,.done(dot_done),.result(dot_result)); 184 | 185 | 186 | // main FSM 187 | reg [7:0] fsm_state; 188 | // [M]/[M-1] (use the same reg), [MVP] (row major) 189 | reg signed [15:0] M_00; // Q8.8 190 | reg signed [15:0] M_01; 191 | reg signed [15:0] M_02; 192 | reg signed [15:0] M_03; 193 | reg signed [15:0] M_10; 194 | reg signed [15:0] M_11; 195 | reg signed [15:0] M_12; 196 | reg signed [15:0] M_13; 197 | reg signed [15:0] M_20; 198 | reg signed [15:0] M_21; 199 | reg signed [15:0] M_22; 200 | reg signed [15:0] M_23; 201 | reg signed [15:0] M_30; 202 | reg signed [15:0] M_31; 203 | reg signed [15:0] M_32; 204 | reg signed [15:0] M_33; 205 | reg signed [15:0] MVP_00; // Q8.8 206 | reg signed [15:0] MVP_01; 207 | reg signed [15:0] MVP_02; 208 | reg signed [15:0] MVP_03; 209 | reg signed [15:0] MVP_10; 210 | reg signed [15:0] MVP_11; 211 | reg signed [15:0] MVP_12; 212 | reg signed [15:0] MVP_13; 213 | reg signed [15:0] MVP_20; 214 | reg signed [15:0] MVP_21; 215 | reg signed [15:0] MVP_22; 216 | reg signed [15:0] MVP_23; 217 | reg signed [15:0] MVP_30; 218 | reg signed [15:0] MVP_31; 219 | reg signed [15:0] MVP_32; 220 | reg signed [15:0] MVP_33; 221 | 222 | // read tri from RAM 223 | reg [4:0] read_delay; 224 | reg [17:0] numread; 225 | reg [9:0] tri_idx; // max 1024 tri 226 | wire [15:0] tri_idx_addr; // in byte: numtri * 22 (2 x 3xyz x 3vert + 4 (normal/color)); 227 | wire [15:0] tri_idx_addr_tex; 228 | assign tri_idx_addr = {2'b0,tri_idx,4'b0} + {4'b0,tri_idx,2'b0} + {5'b0,tri_idx,1'b0}; 229 | assign tri_idx_addr_tex = {2'b0,tri_idx,4'b0} + {3'b0,tri_idx,3'b0} + {4'b0,tri_idx,2'b0} + {16'h8000}; 230 | 231 | reg signed [15:0] tri_xyz [13:0]; 232 | 233 | // model space/NDC (use the same reg) 234 | reg signed [15:0] x_model_v0; // Q8.8 from file 235 | reg signed [15:0] x_model_v1; 236 | reg signed [15:0] x_model_v2; 237 | reg signed [15:0] y_model_v0; 238 | reg signed [15:0] y_model_v1; 239 | reg signed [15:0] y_model_v2; 240 | reg signed [15:0] z_model_v0; 241 | reg signed [15:0] z_model_v1; 242 | reg signed [15:0] z_model_v2; 243 | reg [7:0] v0_u; // Q8.0 244 | reg [7:0] v0_v; 245 | reg [7:0] v1_u; 246 | reg [7:0] v1_v; 247 | reg [7:0] v2_u; 248 | reg [7:0] v2_v; 249 | reg signed [15:0] x_clip_v0; // Q8.8 250 | reg signed [15:0] x_clip_v1; 251 | reg signed [15:0] x_clip_v2; 252 | reg signed [15:0] y_clip_v0; 253 | reg signed [15:0] y_clip_v1; 254 | reg signed [15:0] y_clip_v2; 255 | reg signed [15:0] z_clip_v0; 256 | reg signed [15:0] z_clip_v1; 257 | reg signed [15:0] z_clip_v2; 258 | reg signed [15:0] w_clip_v0; 259 | reg signed [15:0] w_clip_v1; 260 | reg signed [15:0] w_clip_v2; 261 | reg signed [15:0] campos_x; // Q8.8, model space, world space(fix) 262 | reg signed [15:0] campos_y; 263 | reg signed [15:0] campos_z; 264 | reg signed [15:0] nx; // Q8.8 <- Q2.8 from file 265 | reg signed [15:0] ny; 266 | reg signed [15:0] nz; 267 | reg signed [15:0] light_x; // Q8.8, init light 268 | reg signed [15:0] light_y; 269 | reg signed [15:0] light_z; 270 | reg signed [15:0] lightW_x; // Q8.8, world space 271 | reg signed [15:0] lightW_y; 272 | reg signed [15:0] lightW_z; 273 | reg signed [15:0] lightM_x; // Q8.8, model space 274 | reg signed [15:0] lightM_y; 275 | reg signed [15:0] lightM_z; 276 | reg signed [15:0] viewdir_x; // Q8.8, model space 277 | reg signed [15:0] viewdir_y; 278 | reg signed [15:0] viewdir_z; 279 | reg [1:0] tri_color; 280 | reg [3:0] shade_color; 281 | // screenspace 282 | reg signed [19:0] x_screen_v0; // Q20.0 283 | reg signed [19:0] x_screen_v1; 284 | reg signed [19:0] x_screen_v2; 285 | reg signed [19:0] y_screen_v0; 286 | reg signed [19:0] y_screen_v1; 287 | reg signed [19:0] y_screen_v2; 288 | reg signed [21:0] z_screen_v0; // Q2.20, to match with bar 289 | reg signed [21:0] z_screen_v1; 290 | reg signed [21:0] z_screen_v2; 291 | reg signed [19:0] bboxMin_X; // Q20.0 292 | reg signed [19:0] bboxMin_Y; 293 | reg signed [19:0] bboxMax_X; 294 | reg signed [19:0] bboxMax_Y; 295 | // 296 | reg signed [19:0] e0_init; // Q20.0 297 | reg signed [19:0] e1_init; 298 | reg signed [19:0] e2_init; 299 | // for compute ei_int, 300 | reg signed [19:0] tmp_ei_mul1; 301 | reg signed [19:0] tmp_ei_mul2; 302 | // bar_iy, bar_iz, denom 303 | reg signed [21:0] denom; // Q2.20 [-1,0.999] 304 | reg signed [21:0] bar_ix; // Q6.16, not using Q2.20 cause data overflow [-1,0.999] 305 | reg signed [21:0] bar_ix_dx; 306 | reg signed [21:0] bar_ix_dy; 307 | reg signed [21:0] bar_iy; // Q6.16 308 | reg signed [21:0] bar_iy_dx; 309 | reg signed [21:0] bar_iy_dy; 310 | reg signed [21:0] bar_iz; 311 | reg signed [21:0] bar_iz_dx; 312 | reg signed [21:0] bar_iz_dy; 313 | // bar interpolate z 314 | reg signed [21:0] z_bar; // Q6.16 315 | reg signed [21:0] z_bar_dx; 316 | reg signed [21:0] z_bar_dy; 317 | // bar uv 318 | reg signed [21:0] u_bar; // Q14.8 (for neg/>1 out of tri bar) 319 | reg signed [21:0] u_bar_dx; 320 | reg signed [21:0] u_bar_dy; 321 | reg signed [21:0] v_bar; 322 | reg signed [21:0] v_bar_dx; 323 | reg signed [21:0] v_bar_dy; 324 | // in for loop 325 | reg [9:0] pixel_y; // Q10.0 326 | reg [9:0] pixel_x; 327 | reg signed [21:0] pixel_z; // Q6.16 328 | reg signed [21:0] pixel_u; // Q10.12 -> Q8.0 when sampling tex 329 | reg signed [21:0] pixel_v; 330 | reg pixel_u8; 331 | reg signed [19:0] e0; 332 | reg signed [19:0] e1; 333 | reg signed [19:0] e2; 334 | // 4-pixel Z, Color buffer 335 | reg [7:0] db_texel; // use for reading from RAM 336 | reg [3:0] texel [3:0]; // Q4.0 337 | reg [7:0] Z_buffer [3:0]; // Q0.8 338 | reg [3:0] C_buffer [3:0]; // Q4.0 339 | 340 | // from gamepad 341 | reg manualRot; // 0: do autoRot, 1: manual 342 | reg [8:0] rotY_angle; // 0-359 343 | reg [8:0] rotY_angle_90; // for fixCos(i) = fixSin(i + 90) 344 | reg [8:0] rotX_angle; 345 | reg [8:0] rotX_angle_90; 346 | reg signed [7:0] translate_z; // [-127,127] 347 | reg [8:0] rotY_light; 348 | reg [8:0] rotY_light_90; 349 | 350 | // to compute [M],[M-1] 351 | wire [7:0] sine_value; 352 | reg [8:0] sine_angle; 353 | reg signed [15:0] cosTheta; // Q8.8, [M] use Q8.8, may reduce to Q2.8 to save space 354 | reg signed [15:0] sinTheta; 355 | reg signed [15:0] cosPhi; 356 | reg signed [15:0] sinPhi; 357 | reg signed [15:0] cosLight; 358 | reg signed [15:0] sinLight; 359 | reg signed [15:0] cosTheta_sinPhi; // Q8.8 360 | reg signed [15:0] sinTheta_cosPhi; 361 | reg signed [15:0] cosTheta_cosPhi; 362 | reg signed [15:0] sinTheta_sinPhi; 363 | reg signed [15:0] sinTheta_Tz; 364 | reg signed [15:0] cosTheta_sinPhi_Tz; 365 | reg signed [15:0] cosTheta_cosPhi_Tz; 366 | sine_rom sine_rom1(.angle(sine_angle[6:0]),.value(sine_value)); 367 | 368 | 369 | // for setting wire input to dot4 module 370 | always @(*)begin 371 | case (fsm_state) 372 | // [M-1]*campos 373 | 230: begin 374 | v1_x = 0; 375 | v1_y = 0; 376 | v1_z = 16'sb0010_1000_0000_0000; // campos in world, fix 377 | v1_w = 16'sb0000_0001_0000_0000; 378 | v2_x = M_00; 379 | v2_y = M_01; 380 | v2_z = M_02; 381 | v2_w = M_03; 382 | end 383 | 231: begin 384 | v1_x = 0; 385 | v1_y = 0; 386 | v1_z = 16'sb0010_1000_0000_0000; 387 | v1_w = 16'sb0000_0001_0000_0000; 388 | v2_x = M_10; 389 | v2_y = M_11; 390 | v2_z = M_12; 391 | v2_w = M_13; 392 | end 393 | 232: begin 394 | v1_x = 0; 395 | v1_y = 0; 396 | v1_z = 16'sb0010_1000_0000_0000; 397 | v1_w = 16'sb0000_0001_0000_0000; 398 | v2_x = M_20; 399 | v2_y = M_21; 400 | v2_z = M_22; 401 | v2_w = M_23; 402 | end 403 | // [M-1]*light 404 | 240: begin 405 | v1_x = lightW_x; 406 | v1_y = lightW_y; 407 | v1_z = lightW_z; 408 | v1_w = 16'sb0000_0000_0000_0000; 409 | v2_x = M_00; 410 | v2_y = M_01; 411 | v2_z = M_02; 412 | v2_w = M_03; 413 | end 414 | 241: begin 415 | v1_x = lightW_x; 416 | v1_y = lightW_y; 417 | v1_z = lightW_z; 418 | v1_w = 16'sb0000_0000_0000_0000; 419 | v2_x = M_10; 420 | v2_y = M_11; 421 | v2_z = M_12; 422 | v2_w = M_13; 423 | end 424 | 242: begin 425 | v1_x = lightW_x; 426 | v1_y = lightW_y; 427 | v1_z = lightW_z; 428 | v1_w = 16'sb0000_0000_0000_0000; 429 | v2_x = M_20; 430 | v2_y = M_21; 431 | v2_z = M_22; 432 | v2_w = M_23; 433 | end 434 | 435 | // backface culling 436 | 36: begin 437 | v1_x = nx; 438 | v1_y = ny; 439 | v1_z = nz; 440 | v1_w = 16'sb0000_0000_0000_0000; 441 | v2_x = viewdir_x; 442 | v2_y = viewdir_y; 443 | v2_z = viewdir_z; 444 | v2_w = 16'sb0000_0000_0000_0000; 445 | end 446 | // for dot(light,n) 447 | 63: begin 448 | v1_x = nx; 449 | v1_y = ny; 450 | v1_z = nz; 451 | v1_w = 16'sb0000_0000_0000_0000; 452 | v2_x = lightM_x; 453 | v2_y = lightM_y; 454 | v2_z = lightM_z; 455 | v2_w = 16'sb0000_0000_0000_0000; 456 | end 457 | // [MVP]*v, state 38-49 458 | 38: begin 459 | v1_x = x_model_v0; 460 | v1_y = y_model_v0; 461 | v1_z = z_model_v0; 462 | v1_w = 16'sb0000_0001_0000_0000; 463 | v2_x = MVP_00; 464 | v2_y = MVP_01; 465 | v2_z = MVP_02; 466 | v2_w = MVP_03; 467 | end 468 | 39: begin 469 | v1_x = x_model_v0; 470 | v1_y = y_model_v0; 471 | v1_z = z_model_v0; 472 | v1_w = 16'sb0000_0001_0000_0000; 473 | v2_x = MVP_10; 474 | v2_y = MVP_11; 475 | v2_z = MVP_12; 476 | v2_w = MVP_13; 477 | end 478 | 40: begin 479 | v1_x = x_model_v0; 480 | v1_y = y_model_v0; 481 | v1_z = z_model_v0; 482 | v1_w = 16'sb0000_0001_0000_0000; 483 | v2_x = MVP_20; 484 | v2_y = MVP_21; 485 | v2_z = MVP_22; 486 | v2_w = MVP_23; 487 | end 488 | 41: begin 489 | v1_x = x_model_v0; 490 | v1_y = y_model_v0; 491 | v1_z = z_model_v0; 492 | v1_w = 16'sb0000_0001_0000_0000; 493 | v2_x = MVP_30; 494 | v2_y = MVP_31; 495 | v2_z = MVP_32; 496 | v2_w = MVP_33; 497 | end 498 | 42: begin 499 | v1_x = x_model_v1; 500 | v1_y = y_model_v1; 501 | v1_z = z_model_v1; 502 | v1_w = 16'sb0000_0001_0000_0000; 503 | v2_x = MVP_00; 504 | v2_y = MVP_01; 505 | v2_z = MVP_02; 506 | v2_w = MVP_03; 507 | end 508 | 43: begin 509 | v1_x = x_model_v1; 510 | v1_y = y_model_v1; 511 | v1_z = z_model_v1; 512 | v1_w = 16'sb0000_0001_0000_0000; 513 | v2_x = MVP_10; 514 | v2_y = MVP_11; 515 | v2_z = MVP_12; 516 | v2_w = MVP_13; 517 | end 518 | 44: begin 519 | v1_x = x_model_v1; 520 | v1_y = y_model_v1; 521 | v1_z = z_model_v1; 522 | v1_w = 16'sb0000_0001_0000_0000; 523 | v2_x = MVP_20; 524 | v2_y = MVP_21; 525 | v2_z = MVP_22; 526 | v2_w = MVP_23; 527 | end 528 | 45: begin 529 | v1_x = x_model_v1; 530 | v1_y = y_model_v1; 531 | v1_z = z_model_v1; 532 | v1_w = 16'sb0000_0001_0000_0000; 533 | v2_x = MVP_30; 534 | v2_y = MVP_31; 535 | v2_z = MVP_32; 536 | v2_w = MVP_33; 537 | end 538 | 46: begin 539 | v1_x = x_model_v2; 540 | v1_y = y_model_v2; 541 | v1_z = z_model_v2; 542 | v1_w = 16'sb0000_0001_0000_0000; 543 | v2_x = MVP_00; 544 | v2_y = MVP_01; 545 | v2_z = MVP_02; 546 | v2_w = MVP_03; 547 | end 548 | 47: begin 549 | v1_x = x_model_v2; 550 | v1_y = y_model_v2; 551 | v1_z = z_model_v2; 552 | v1_w = 16'sb0000_0001_0000_0000; 553 | v2_x = MVP_10; 554 | v2_y = MVP_11; 555 | v2_z = MVP_12; 556 | v2_w = MVP_13; 557 | end 558 | 48: begin 559 | v1_x = x_model_v2; 560 | v1_y = y_model_v2; 561 | v1_z = z_model_v2; 562 | v1_w = 16'sb0000_0001_0000_0000; 563 | v2_x = MVP_20; 564 | v2_y = MVP_21; 565 | v2_z = MVP_22; 566 | v2_w = MVP_23; 567 | end 568 | 49: begin 569 | v1_x = x_model_v2; 570 | v1_y = y_model_v2; 571 | v1_z = z_model_v2; 572 | v1_w = 16'sb0000_0001_0000_0000; 573 | v2_x = MVP_30; 574 | v2_y = MVP_31; 575 | v2_z = MVP_32; 576 | v2_w = MVP_33; 577 | end 578 | // state 177-178 , rot light world 579 | 177: begin 580 | v1_x = light_x; 581 | v1_y = light_y; 582 | v1_z = light_z; 583 | v1_w = 16'sb0; 584 | v2_x = cosLight; 585 | v2_y = 16'sb0; 586 | v2_z = sinLight; 587 | v2_w = 16'sb0; 588 | end 589 | 178: begin 590 | v1_x = light_x; 591 | v1_y = light_y; 592 | v1_z = light_z; 593 | v1_w = 16'sb0; 594 | v2_x = ~sinLight + 16'sb0000_0000_0000_0001; 595 | v2_y = 16'sb0; 596 | v2_z = cosLight; 597 | v2_w = 16'sb0; 598 | end 599 | 600 | // state 191-206 for [M*VP], [VP] fix 601 | 191: begin 602 | v1_x = M_00; 603 | v1_y = M_10; 604 | v1_z = M_20; 605 | v1_w = M_30; 606 | v2_x = 16'sh020f; 607 | v2_y = 0; 608 | v2_z = 0; 609 | v2_w = 0; 610 | end 611 | 192: begin 612 | v1_x = M_01; 613 | v1_y = M_11; 614 | v1_z = M_21; 615 | v1_w = M_31; 616 | v2_x = 16'sh020f; 617 | v2_y = 0; 618 | v2_z = 0; 619 | v2_w = 0; 620 | end 621 | 193: begin 622 | v1_x = M_02; 623 | v1_y = M_12; 624 | v1_z = M_22; 625 | v1_w = M_32; 626 | v2_x = 16'sh020f; 627 | v2_y = 0; 628 | v2_z = 0; 629 | v2_w = 0; 630 | end 631 | 194: begin 632 | v1_x = M_03; 633 | v1_y = M_13; 634 | v1_z = M_23; 635 | v1_w = M_33; 636 | v2_x = 16'sh020f; 637 | v2_y = 0; 638 | v2_z = 0; 639 | v2_w = 0; 640 | end 641 | 195: begin 642 | v1_x = M_00; 643 | v1_y = M_10; 644 | v1_z = M_20; 645 | v1_w = M_30; 646 | v2_x = 0; 647 | v2_y = 16'sh02c0; 648 | v2_z = 0; 649 | v2_w = 0; 650 | end 651 | 196: begin 652 | v1_x = M_01; 653 | v1_y = M_11; 654 | v1_z = M_21; 655 | v1_w = M_31; 656 | v2_x = 0; 657 | v2_y = 16'sh02c0; 658 | v2_z = 0; 659 | v2_w = 0; 660 | end 661 | 197: begin 662 | v1_x = M_02; 663 | v1_y = M_12; 664 | v1_z = M_22; 665 | v1_w = M_32; 666 | v2_x = 0; 667 | v2_y = 16'sh02c0; 668 | v2_z = 0; 669 | v2_w = 0; 670 | end 671 | 198: begin 672 | v1_x = M_03; 673 | v1_y = M_13; 674 | v1_z = M_23; 675 | v1_w = M_33; 676 | v2_x = 0; 677 | v2_y = 16'sh02c0; 678 | v2_z = 0; 679 | v2_w = 0; 680 | end 681 | 199: begin 682 | v1_x = M_00; 683 | v1_y = M_10; 684 | v1_z = M_20; 685 | v1_w = M_30; 686 | v2_x = 0; 687 | v2_y = 0; 688 | v2_z = 16'shfec9; 689 | v2_w = 16'sh1a57; 690 | end 691 | 200: begin 692 | v1_x = M_01; 693 | v1_y = M_11; 694 | v1_z = M_21; 695 | v1_w = M_31; 696 | v2_x = 0; 697 | v2_y = 0; 698 | v2_z = 16'shfec9; 699 | v2_w = 16'sh1a57; 700 | end 701 | 201: begin 702 | v1_x = M_02; 703 | v1_y = M_12; 704 | v1_z = M_22; 705 | v1_w = M_32; 706 | v2_x = 0; 707 | v2_y = 0; 708 | v2_z = 16'shfec9; 709 | v2_w = 16'sh1a57; 710 | end 711 | 202: begin 712 | v1_x = M_03; 713 | v1_y = M_13; 714 | v1_z = M_23; 715 | v1_w = M_33; 716 | v2_x = 0; 717 | v2_y = 0; 718 | v2_z = 16'shfec9; 719 | v2_w = 16'sh1a57; 720 | end 721 | 203: begin 722 | v1_x = M_00; 723 | v1_y = M_10; 724 | v1_z = M_20; 725 | v1_w = M_30; 726 | v2_x = 0; 727 | v2_y = 0; 728 | v2_z = 16'shff01; 729 | v2_w = 16'sh27d8; 730 | end 731 | 204: begin 732 | v1_x = M_01; 733 | v1_y = M_11; 734 | v1_z = M_21; 735 | v1_w = M_31; 736 | v2_x = 0; 737 | v2_y = 0; 738 | v2_z = 16'shff01; 739 | v2_w = 16'sh27d8; 740 | end 741 | 205: begin 742 | v1_x = M_02; 743 | v1_y = M_12; 744 | v1_z = M_22; 745 | v1_w = M_32; 746 | v2_x = 0; 747 | v2_y = 0; 748 | v2_z = 16'shff01; 749 | v2_w = 16'sh27d8; 750 | end 751 | 206: begin 752 | v1_x = M_03; 753 | v1_y = M_13; 754 | v1_z = M_23; 755 | v1_w = M_33; 756 | v2_x = 0; 757 | v2_y = 0; 758 | v2_z = 16'shff01; 759 | v2_w = 16'sh27d8; 760 | end 761 | 762 | 763 | default: begin 764 | v1_x = 0; 765 | v1_y = 0; 766 | v1_z = 0; 767 | v1_w = 0; 768 | v2_x = 0; 769 | v2_y = 0; 770 | v2_z = 0; 771 | v2_w = 0; 772 | end 773 | endcase 774 | end 775 | 776 | 777 | 778 | always @(posedge clk) begin 779 | if(!reset) begin 780 | fsm_state <= 0; 781 | // 782 | vsfs_addr <= 0; 783 | vsfs_data_in <= 0; 784 | vsfs_start_read <= 0; 785 | vsfs_start_write <= 0; 786 | vsfs_stop_txn <= 0; 787 | do_swap <= 0; 788 | vsfs_running <= 0; 789 | // mul, div 790 | div_a <= 0; 791 | div_b <= 0; 792 | div_start <= 0; 793 | div2_a <= 0; 794 | div2_b <= 0; 795 | div2_start <= 0; 796 | mul_a <= 0; 797 | mul_b <= 0; 798 | mul_start <= 0; 799 | dot_start <= 0; 800 | // 801 | M_00 <= 0; // Q8.8 802 | M_01 <= 0; 803 | M_02 <= 0; 804 | M_03 <= 0; 805 | M_10 <= 0; 806 | M_11 <= 0; 807 | M_12 <= 0; 808 | M_13 <= 0; 809 | M_20 <= 0; 810 | M_21 <= 0; 811 | M_22 <= 0; 812 | M_23 <= 0; 813 | M_30 <= 0; 814 | M_31 <= 0; 815 | M_32 <= 0; 816 | M_33 <= 0; 817 | MVP_00 <= 0; // Q8.8 818 | MVP_01 <= 0; 819 | MVP_02 <= 0; 820 | MVP_03 <= 0; 821 | MVP_10 <= 0; 822 | MVP_11 <= 0; 823 | MVP_12 <= 0; 824 | MVP_13 <= 0; 825 | MVP_20 <= 0; 826 | MVP_21 <= 0; 827 | MVP_22 <= 0; 828 | MVP_23 <= 0; 829 | MVP_30 <= 0; 830 | MVP_31 <= 0; 831 | MVP_32 <= 0; 832 | MVP_33 <= 0; 833 | // 834 | manualRot <= 0; 835 | sine_angle <= 0; 836 | rotY_angle <= 0; 837 | rotY_angle_90 <= 0; 838 | rotX_angle <= 0; 839 | rotX_angle_90 <= 0; 840 | rotY_light <= 0; 841 | rotY_light_90 <= 0; 842 | translate_z <= 0; 843 | cosTheta <= 0; 844 | sinTheta <= 0; 845 | cosPhi <= 0; 846 | sinPhi <= 0; 847 | cosLight <= 0; 848 | sinLight <= 0; 849 | cosTheta_sinPhi <= 0; // Q8.8 850 | sinTheta_cosPhi <= 0; 851 | cosTheta_cosPhi <= 0; 852 | sinTheta_sinPhi <= 0; 853 | sinTheta_Tz <= 0; 854 | cosTheta_sinPhi_Tz <= 0; 855 | cosTheta_cosPhi_Tz <= 0; 856 | // 857 | read_delay <= 0; 858 | numread <= 0; 859 | tri_idx <= 0; 860 | // tri_xyz[10:0] 861 | // 862 | x_model_v0 <= 0; // Q8.8 from file 863 | x_model_v1 <= 0; 864 | x_model_v2 <= 0; 865 | y_model_v0 <= 0; 866 | y_model_v1 <= 0; 867 | y_model_v2 <= 0; 868 | z_model_v0 <= 0; 869 | z_model_v1 <= 0; 870 | z_model_v2 <= 0; 871 | v0_u <= 0; 872 | v0_v <= 0; 873 | v1_u <= 0; 874 | v1_v <= 0; 875 | v2_u <= 0; 876 | v2_v <= 0; 877 | x_clip_v0 <= 0; 878 | x_clip_v1 <= 0; 879 | x_clip_v2 <= 0; 880 | y_clip_v0 <= 0; 881 | y_clip_v1 <= 0; 882 | y_clip_v2 <= 0; 883 | z_clip_v0 <= 0; 884 | z_clip_v1 <= 0; 885 | z_clip_v2 <= 0; 886 | w_clip_v0 <= 0; 887 | w_clip_v1 <= 0; 888 | w_clip_v2 <= 0; 889 | campos_x <= 0; 890 | campos_y <= 0; 891 | campos_z <= 16'sb0010_1000_0000_0000; // cam in model, init to cam in world 892 | nx <= 0; 893 | ny <= 0; 894 | nz <= 0; 895 | //light_x <= 16'sb0000_0000_1011_0101; // light in world, init to (0.707,0.707,0) 896 | //light_y <= 16'sb0000_0000_1011_0101; 897 | //light_z <= 16'sb0000_0000_0000_0000; 898 | light_x <= 16'sb0000_0000_0000_0000; // light in world, init to (0,0,1) 899 | light_y <= 16'sb0000_0000_0000_0000; 900 | light_z <= 16'sb0000_0001_0000_0000; 901 | lightW_x <= 0; 902 | lightW_y <= 0; 903 | lightW_z <= 0; 904 | lightM_x <= 0; 905 | lightM_y <= 0; 906 | lightM_z <= 0; 907 | viewdir_x <= 0; 908 | viewdir_y <= 0; 909 | viewdir_z <= 0; 910 | tri_color <= 0; 911 | shade_color <= 0; 912 | x_screen_v0 <= 0; 913 | x_screen_v1 <= 0; 914 | x_screen_v2 <= 0; 915 | y_screen_v0 <= 0; 916 | y_screen_v1 <= 0; 917 | y_screen_v2 <= 0; 918 | z_screen_v0 <= 0; 919 | z_screen_v1 <= 0; 920 | z_screen_v2 <= 0; 921 | bboxMin_X <= 0; 922 | bboxMin_Y <= 0; 923 | bboxMax_X <= 0; 924 | bboxMax_Y <= 0; 925 | // 926 | e0_init <= 0; 927 | e1_init <= 0; 928 | e2_init <= 0; 929 | // compute e0_init 930 | tmp_ei_mul1 <= 0; 931 | tmp_ei_mul2 <= 0; 932 | // bar, denom 933 | denom <= 0; 934 | bar_ix <= 0; 935 | bar_ix_dy <= 0; 936 | bar_ix_dx <= 0; 937 | bar_iy <= 0; 938 | bar_iy_dy <= 0; 939 | bar_iy_dx <= 0; 940 | bar_iz <= 0; 941 | bar_iz_dy <= 0; 942 | bar_iz_dx <= 0; 943 | // 944 | z_bar <= 0; 945 | z_bar_dx <= 0; 946 | z_bar_dy <= 0; 947 | u_bar <= 0;// Q14.8 (for neg/>1 out of tri bar) -> Q8.0 when sampling tex 948 | u_bar_dx <= 0; 949 | u_bar_dy <= 0; 950 | v_bar <= 0; 951 | v_bar_dx <= 0; 952 | v_bar_dy <= 0; 953 | db_texel <= 0; 954 | //Z_buffer[3:0], C_buffer[3:0], texel [3:0] 955 | pixel_y <= 0; 956 | pixel_x <= 0; 957 | pixel_z <= 0; 958 | pixel_u <= 0; 959 | pixel_v <= 0; 960 | pixel_u8 <= 0; 961 | e0 <= 0; 962 | e1 <= 0; 963 | e2 <= 0; 964 | end else begin 965 | case (fsm_state) 966 | /////////////////////////////// 967 | // 0. perframe, state [140-250] 968 | 0: begin 969 | do_swap <= 0; 970 | vsfs_running <= 0; 971 | 972 | // enable manualRot 973 | if ((manualRot == 0) && (gamepad_input != 0)) begin 974 | manualRot <= 1; 975 | end 976 | fsm_state <= 140; 977 | end 978 | 1: begin 979 | // debug black hole state, 980 | fsm_state <= 1; 981 | end 982 | 983 | // - set [M] 984 | 140: begin 985 | // -- set angle from input, manual/auto 986 | if(manualRot == 1) begin 987 | // rotX +- 988 | if(gamepad_input[0] == 1)begin 989 | if(rotX_angle > 355)begin 990 | rotX_angle <= 0; 991 | end else begin 992 | rotX_angle <= rotX_angle + 4; 993 | end 994 | end else if(gamepad_input[1] == 1)begin 995 | if(rotX_angle == 0)begin 996 | rotX_angle <= 356; 997 | end else begin 998 | rotX_angle <= rotX_angle - 4; 999 | end 1000 | end 1001 | // rotY 1002 | if(gamepad_input[2] == 1)begin 1003 | if(rotY_angle > 355)begin 1004 | rotY_angle <= 0; 1005 | end else begin 1006 | rotY_angle <= rotY_angle + 4; 1007 | end 1008 | end else if(gamepad_input[3] == 1)begin 1009 | if(rotY_angle == 0)begin 1010 | rotY_angle <= 356; 1011 | end else begin 1012 | rotY_angle <= rotY_angle - 4; 1013 | end 1014 | end 1015 | // tran Z 1016 | if(gamepad_input[4] == 1)begin 1017 | if(translate_z > 120)begin 1018 | translate_z <= 120; 1019 | end else begin 1020 | translate_z <= translate_z + 2; 1021 | end 1022 | end else if(gamepad_input[5] == 1)begin 1023 | if(translate_z < -120)begin 1024 | translate_z <= -120; 1025 | end else begin 1026 | translate_z <= translate_z - 2; 1027 | end 1028 | end 1029 | // rotLight 1030 | if(gamepad_input[6] == 1)begin 1031 | if(rotY_light > 355)begin 1032 | rotY_light <= 0; 1033 | end else begin 1034 | rotY_light <= rotY_light + 4; 1035 | end 1036 | end else if(gamepad_input[7] == 1)begin 1037 | if(rotY_light == 0)begin 1038 | rotY_light <= 356; 1039 | end else begin 1040 | rotY_light <= rotY_light - 4; 1041 | end 1042 | end 1043 | end else begin 1044 | if(rotY_angle > 355)begin 1045 | rotY_angle <= 0; 1046 | end else begin 1047 | rotY_angle <= rotY_angle + 4; 1048 | end 1049 | end 1050 | fsm_state <= 141; 1051 | end 1052 | 141: begin 1053 | // -- fixCos(i) = fixSin(i + 90) 1054 | if(rotY_angle < 270)begin 1055 | rotY_angle_90 <= rotY_angle + 90; 1056 | end else begin 1057 | rotY_angle_90 <= rotY_angle - 270; 1058 | end 1059 | if(rotX_angle < 270)begin 1060 | rotX_angle_90 <= rotX_angle + 90; 1061 | end else begin 1062 | rotX_angle_90 <= rotX_angle - 270; 1063 | end 1064 | if(rotY_light < 270)begin 1065 | rotY_light_90 <= rotY_light + 90; 1066 | end else begin 1067 | rotY_light_90 <= rotY_light - 270; 1068 | end 1069 | fsm_state <= 142; 1070 | end 1071 | 142: begin 1072 | // -- sinPhi 1073 | if(rotX_angle == 90)begin 1074 | sinPhi <= 16'sb0000_0001_0000_0000; 1075 | fsm_state <= 146; 1076 | end else if(rotX_angle == 270) begin 1077 | sinPhi <= 16'sb1111_1111_0000_0000; 1078 | fsm_state <= 146; 1079 | end else begin 1080 | fsm_state <= 143; 1081 | end 1082 | end 1083 | 143: begin 1084 | if(rotX_angle < 90) begin 1085 | sine_angle <= rotX_angle; 1086 | end else if(rotX_angle < 180) begin 1087 | sine_angle <= 180 - rotX_angle; 1088 | end else if(rotX_angle < 270) begin 1089 | sine_angle <= rotX_angle - 180; 1090 | end else begin 1091 | sine_angle <= 360 - rotX_angle; 1092 | end 1093 | fsm_state <= 144; 1094 | end 1095 | 144: begin 1096 | sinPhi <= {8'b0,sine_value}; 1097 | fsm_state <= 145; 1098 | end 1099 | 145: begin 1100 | if(rotX_angle >= 180) begin 1101 | sinPhi <= ~sinPhi + 16'sb0000_0000_0000_0001; 1102 | end 1103 | fsm_state <= 146; 1104 | end 1105 | 146: begin 1106 | // -- cosPhi 1107 | if(rotX_angle_90 == 90)begin 1108 | cosPhi <= 16'sb0000_0001_0000_0000; 1109 | fsm_state <= 151; 1110 | end else if(rotX_angle_90 == 270) begin 1111 | cosPhi <= 16'sb1111_1111_0000_0000; 1112 | fsm_state <= 151; 1113 | end else begin 1114 | fsm_state <= 147; 1115 | end 1116 | end 1117 | 147: begin 1118 | if(rotX_angle_90 < 90) begin 1119 | sine_angle <= rotX_angle_90; 1120 | end else if(rotX_angle_90 < 180) begin 1121 | sine_angle <= 180 - rotX_angle_90; 1122 | end else if(rotX_angle_90 < 270) begin 1123 | sine_angle <= rotX_angle_90 - 180; 1124 | end else begin 1125 | sine_angle <= 360 - rotX_angle_90; 1126 | end 1127 | 1128 | fsm_state <= 148; 1129 | end 1130 | 148: begin 1131 | cosPhi <= {8'b0,sine_value}; 1132 | fsm_state <= 149; 1133 | end 1134 | 149: begin 1135 | if(rotX_angle_90 >= 180) begin 1136 | cosPhi <= ~cosPhi + 16'sb0000_0000_0000_0001; 1137 | end 1138 | fsm_state <= 151; 1139 | end 1140 | // free state 150 1141 | 151: begin 1142 | // -- sinTheta 1143 | if(rotY_angle == 90)begin 1144 | sinTheta <= 16'sb0000_0001_0000_0000; 1145 | fsm_state <= 155; 1146 | end else if(rotY_angle == 270) begin 1147 | sinTheta <= 16'sb1111_1111_0000_0000; 1148 | fsm_state <= 155; 1149 | end else begin 1150 | fsm_state <= 152; 1151 | end 1152 | end 1153 | 152: begin 1154 | if(rotY_angle < 90) begin 1155 | sine_angle <= rotY_angle; 1156 | end else if(rotY_angle < 180) begin 1157 | sine_angle <= 180 - rotY_angle; 1158 | end else if(rotY_angle < 270) begin 1159 | sine_angle <= rotY_angle - 180; 1160 | end else begin 1161 | sine_angle <= 360 - rotY_angle; 1162 | end 1163 | fsm_state <= 153; 1164 | end 1165 | 153: begin 1166 | sinTheta <= {8'b0,sine_value}; 1167 | fsm_state <= 154; 1168 | end 1169 | 154: begin 1170 | if(rotY_angle >= 180) begin 1171 | sinTheta <= ~sinTheta + 16'sb0000_0000_0000_0001; 1172 | end 1173 | fsm_state <= 155; 1174 | end 1175 | 155: begin 1176 | // -- cosTheta 1177 | if(rotY_angle_90 == 90)begin 1178 | cosTheta <= 16'sb0000_0001_0000_0000; 1179 | fsm_state <= 159; 1180 | end else if(rotY_angle_90 == 270) begin 1181 | cosTheta <= 16'sb1111_1111_0000_0000; 1182 | fsm_state <= 159; 1183 | end else begin 1184 | fsm_state <= 156; 1185 | end 1186 | end 1187 | 156: begin 1188 | if(rotY_angle_90 < 90) begin 1189 | sine_angle <= rotY_angle_90; 1190 | end else if(rotY_angle_90 < 180) begin 1191 | sine_angle <= 180 - rotY_angle_90; 1192 | end else if(rotY_angle_90 < 270) begin 1193 | sine_angle <= rotY_angle_90 - 180; 1194 | end else begin 1195 | sine_angle <= 360 - rotY_angle_90; 1196 | end 1197 | 1198 | fsm_state <= 157; 1199 | end 1200 | 157: begin 1201 | cosTheta <= {8'b0,sine_value}; 1202 | fsm_state <= 158; 1203 | end 1204 | 158: begin 1205 | if(rotY_angle_90 >= 180) begin 1206 | cosTheta <= ~cosTheta + 16'sb0000_0000_0000_0001; 1207 | end 1208 | fsm_state <= 159; 1209 | end 1210 | 1211 | // -- 7 mul sin.cos.t 1212 | // - Q8.8 -> Q8.14, Q8.14 x Q8.14 = Q16.28 -> Q8.8 1213 | // - Tz: Q8.0 -> Q8.14 1214 | 159: begin 1215 | mul_a <= {cosTheta,6'b0}; // Q8.8 -> Q8.14 1216 | mul_b <= {sinPhi,6'b0}; 1217 | mul_start <= 1; 1218 | fsm_state <= 160; 1219 | end 1220 | 160: begin 1221 | mul_start <= 0; 1222 | if (mul_done) begin 1223 | cosTheta_sinPhi <= mul_result[35:20]; 1224 | mul_a <= {sinTheta,6'b0}; 1225 | mul_b <= {cosPhi,6'b0}; 1226 | mul_start <= 1; 1227 | fsm_state <= 161; 1228 | end 1229 | end 1230 | 161: begin 1231 | mul_start <= 0; 1232 | if (mul_done) begin 1233 | sinTheta_cosPhi <= mul_result[35:20]; 1234 | mul_a <= {cosTheta,6'b0}; 1235 | mul_b <= {cosPhi,6'b0}; 1236 | mul_start <= 1; 1237 | fsm_state <= 162; 1238 | end 1239 | end 1240 | 162: begin 1241 | mul_start <= 0; 1242 | if (mul_done) begin 1243 | cosTheta_cosPhi <= mul_result[35:20]; 1244 | mul_a <= {sinTheta,6'b0}; 1245 | mul_b <= {sinPhi,6'b0}; 1246 | mul_start <= 1; 1247 | fsm_state <= 163; 1248 | end 1249 | end 1250 | 163: begin 1251 | mul_start <= 0; 1252 | if (mul_done) begin 1253 | sinTheta_sinPhi <= mul_result[35:20]; 1254 | mul_a <= {sinTheta,6'b0}; 1255 | mul_b <= {translate_z,14'b0}; // Tz: Q8.0 -> Q8.14 1256 | mul_start <= 1; 1257 | fsm_state <= 164; 1258 | end 1259 | end 1260 | 164: begin 1261 | mul_start <= 0; 1262 | if (mul_done) begin 1263 | sinTheta_Tz <= mul_result[35:20]; 1264 | mul_a <= {cosTheta_sinPhi,6'b0}; 1265 | mul_b <= {translate_z,14'b0}; 1266 | mul_start <= 1; 1267 | fsm_state <= 165; 1268 | end 1269 | end 1270 | 165: begin 1271 | mul_start <= 0; 1272 | if (mul_done) begin 1273 | cosTheta_sinPhi_Tz <= mul_result[35:20]; 1274 | mul_a <= {cosTheta_cosPhi,6'b0}; 1275 | mul_b <= {translate_z,14'b0}; 1276 | mul_start <= 1; 1277 | fsm_state <= 166; 1278 | end 1279 | end 1280 | 166: begin 1281 | mul_start <= 0; 1282 | if (mul_done) begin 1283 | cosTheta_cosPhi_Tz <= mul_result[35:20]; 1284 | fsm_state <= 167; 1285 | end 1286 | end 1287 | 167: begin 1288 | M_00 <= cosTheta; // Q8.8 1289 | M_01 <= sinTheta_sinPhi; 1290 | M_02 <= sinTheta_cosPhi; 1291 | M_03 <= 0; 1292 | M_10 <= 0; 1293 | M_11 <= cosPhi; 1294 | M_12 <= ~sinPhi + 16'sb0000_0000_0000_0001; 1295 | M_13 <= 0; 1296 | M_20 <= ~sinTheta + 16'sb0000_0000_0000_0001; 1297 | M_21 <= cosTheta_sinPhi; 1298 | M_22 <= cosTheta_cosPhi; 1299 | M_23 <= {translate_z,8'sb0}; 1300 | M_30 <= 0; 1301 | M_31 <= 0; 1302 | M_32 <= 0; 1303 | M_33 <= 16'sb0000_0001_0000_0000; 1304 | 1305 | fsm_state <= 168; 1306 | end 1307 | // - dir light in world space, rot 1 axis 1308 | 168: begin 1309 | // -- sinLight 1310 | if(rotY_light == 90)begin 1311 | sinLight <= 16'sb0000_0001_0000_0000; 1312 | fsm_state <= 172; 1313 | end else if(rotY_light == 270) begin 1314 | sinLight <= 16'sb1111_1111_0000_0000; 1315 | fsm_state <= 172; 1316 | end else begin 1317 | fsm_state <= 169; 1318 | end 1319 | end 1320 | 169: begin 1321 | if(rotY_light < 90) begin 1322 | sine_angle <= rotY_light; 1323 | end else if(rotY_light < 180) begin 1324 | sine_angle <= 180 - rotY_light; 1325 | end else if(rotY_light < 270) begin 1326 | sine_angle <= rotY_light - 180; 1327 | end else begin 1328 | sine_angle <= 360 - rotY_light; 1329 | end 1330 | fsm_state <= 170; 1331 | end 1332 | 170: begin 1333 | sinLight <= {8'b0,sine_value}; 1334 | fsm_state <= 171; 1335 | end 1336 | 171: begin 1337 | if(rotY_light >= 180) begin 1338 | sinLight <= ~sinLight + 16'sb0000_0000_0000_0001; 1339 | end 1340 | fsm_state <= 172; 1341 | end 1342 | 172: begin 1343 | // -- cosLight 1344 | if(rotY_light_90 == 90)begin 1345 | cosLight <= 16'sb0000_0001_0000_0000; 1346 | fsm_state <= 176; 1347 | end else if(rotY_light_90 == 270) begin 1348 | cosLight <= 16'sb1111_1111_0000_0000; 1349 | fsm_state <= 176; 1350 | end else begin 1351 | fsm_state <= 173; 1352 | end 1353 | end 1354 | 173: begin 1355 | if(rotY_light_90 < 90) begin 1356 | sine_angle <= rotY_light_90; 1357 | end else if(rotY_light_90 < 180) begin 1358 | sine_angle <= 180 - rotY_light_90; 1359 | end else if(rotY_light_90 < 270) begin 1360 | sine_angle <= rotY_light_90 - 180; 1361 | end else begin 1362 | sine_angle <= 360 - rotY_light_90; 1363 | end 1364 | 1365 | fsm_state <= 174; 1366 | end 1367 | 174: begin 1368 | cosLight <= {8'b0,sine_value}; 1369 | fsm_state <= 175; 1370 | end 1371 | 175: begin 1372 | if(rotY_light_90 >= 180) begin 1373 | cosLight <= ~cosLight + 16'sb0000_0000_0000_0001; 1374 | end 1375 | fsm_state <= 176; 1376 | end 1377 | // -- dot x,z to rot light 1378 | 176: begin 1379 | lightW_y <= light_y; 1380 | dot_start <= 1; 1381 | fsm_state <= 177; 1382 | end 1383 | 177: begin 1384 | dot_start <= 0; 1385 | if (dot_done) begin 1386 | lightW_x <= dot_result; 1387 | dot_start <= 1; 1388 | fsm_state <= 178; 1389 | end 1390 | end 1391 | 178: begin 1392 | dot_start <= 0; 1393 | if (dot_done) begin 1394 | lightW_z <= dot_result; 1395 | fsm_state <= 190; 1396 | end 1397 | end 1398 | //free state 179-189 1399 | // - set [M*VP] 1400 | 190: begin 1401 | dot_start <= 1; 1402 | fsm_state <= 191; 1403 | end 1404 | 191: begin 1405 | dot_start <= 0; 1406 | if (dot_done) begin 1407 | MVP_00 <= dot_result; 1408 | dot_start <= 1; 1409 | fsm_state <= 192; 1410 | end 1411 | end 1412 | 192: begin 1413 | dot_start <= 0; 1414 | if (dot_done) begin 1415 | MVP_01 <= dot_result; 1416 | dot_start <= 1; 1417 | fsm_state <= 193; 1418 | end 1419 | end 1420 | 193: begin 1421 | dot_start <= 0; 1422 | if (dot_done) begin 1423 | MVP_02 <= dot_result; 1424 | dot_start <= 1; 1425 | fsm_state <= 194; 1426 | end 1427 | end 1428 | 194: begin 1429 | dot_start <= 0; 1430 | if (dot_done) begin 1431 | MVP_03 <= dot_result; 1432 | dot_start <= 1; 1433 | fsm_state <= 195; 1434 | end 1435 | end 1436 | 195: begin 1437 | dot_start <= 0; 1438 | if (dot_done) begin 1439 | MVP_10 <= dot_result; 1440 | dot_start <= 1; 1441 | fsm_state <= 196; 1442 | end 1443 | end 1444 | 196: begin 1445 | dot_start <= 0; 1446 | if (dot_done) begin 1447 | MVP_11 <= dot_result; 1448 | dot_start <= 1; 1449 | fsm_state <= 197; 1450 | end 1451 | end 1452 | 197: begin 1453 | dot_start <= 0; 1454 | if (dot_done) begin 1455 | MVP_12 <= dot_result; 1456 | dot_start <= 1; 1457 | fsm_state <= 198; 1458 | end 1459 | end 1460 | 198: begin 1461 | dot_start <= 0; 1462 | if (dot_done) begin 1463 | MVP_13 <= dot_result; 1464 | dot_start <= 1; 1465 | fsm_state <= 199; 1466 | end 1467 | end 1468 | 199: begin 1469 | dot_start <= 0; 1470 | if (dot_done) begin 1471 | MVP_20 <= dot_result; 1472 | dot_start <= 1; 1473 | fsm_state <= 200; 1474 | end 1475 | end 1476 | 200: begin 1477 | dot_start <= 0; 1478 | if (dot_done) begin 1479 | MVP_21 <= dot_result; 1480 | dot_start <= 1; 1481 | fsm_state <= 201; 1482 | end 1483 | end 1484 | 201: begin 1485 | dot_start <= 0; 1486 | if (dot_done) begin 1487 | MVP_22 <= dot_result; 1488 | dot_start <= 1; 1489 | fsm_state <= 202; 1490 | end 1491 | end 1492 | 202: begin 1493 | dot_start <= 0; 1494 | if (dot_done) begin 1495 | MVP_23 <= dot_result; 1496 | dot_start <= 1; 1497 | fsm_state <= 203; 1498 | end 1499 | end 1500 | 203: begin 1501 | dot_start <= 0; 1502 | if (dot_done) begin 1503 | MVP_30 <= dot_result; 1504 | dot_start <= 1; 1505 | fsm_state <= 204; 1506 | end 1507 | end 1508 | 204: begin 1509 | dot_start <= 0; 1510 | if (dot_done) begin 1511 | MVP_31 <= dot_result; 1512 | dot_start <= 1; 1513 | fsm_state <= 205; 1514 | end 1515 | end 1516 | 205: begin 1517 | dot_start <= 0; 1518 | if (dot_done) begin 1519 | MVP_32 <= dot_result; 1520 | dot_start <= 1; 1521 | fsm_state <= 206; 1522 | end 1523 | end 1524 | 206: begin 1525 | dot_start <= 0; 1526 | if (dot_done) begin 1527 | MVP_33 <= dot_result; 1528 | fsm_state <= 210; 1529 | end 1530 | end 1531 | 1532 | // - [M-1] 1533 | 210: begin 1534 | M_00 <= cosTheta; // Q8.8 1535 | M_01 <= 0; 1536 | M_02 <= ~sinTheta + 16'sb0000_0000_0000_0001; 1537 | M_03 <= sinTheta_Tz; 1538 | M_10 <= sinTheta_sinPhi; 1539 | M_11 <= cosPhi; 1540 | M_12 <= cosTheta_sinPhi; 1541 | M_13 <= ~cosTheta_sinPhi_Tz + 16'sb0000_0000_0000_0001; 1542 | M_20 <= sinTheta_cosPhi; 1543 | M_21 <= ~sinPhi + 16'sb0000_0000_0000_0001; 1544 | M_22 <= cosTheta_cosPhi; 1545 | M_23 <= ~cosTheta_cosPhi_Tz + 16'sb0000_0000_0000_0001; 1546 | M_30 <= 0; 1547 | M_31 <= 0; 1548 | M_32 <= 0; 1549 | M_33 <= 16'sb0000_0001_0000_0000; 1550 | 1551 | dot_start <= 1; 1552 | fsm_state <= 230; 1553 | end 1554 | 1555 | // - [M-1]*campos 1556 | 230: begin 1557 | dot_start <= 0; 1558 | if (dot_done) begin 1559 | campos_x <= dot_result; 1560 | dot_start <= 1; 1561 | fsm_state <= 231; 1562 | end 1563 | end 1564 | 231: begin 1565 | dot_start <= 0; 1566 | if (dot_done) begin 1567 | campos_y <= dot_result; 1568 | dot_start <= 1; 1569 | fsm_state <= 232; 1570 | end 1571 | end 1572 | 232: begin 1573 | dot_start <= 0; 1574 | if (dot_done) begin 1575 | campos_z <= dot_result; 1576 | dot_start <= 1; 1577 | fsm_state <= 240; 1578 | end 1579 | end 1580 | 1581 | // - [M-1]*lightdir 1582 | 240: begin 1583 | dot_start <= 0; 1584 | if (dot_done) begin 1585 | lightM_x <= dot_result; 1586 | dot_start <= 1; 1587 | fsm_state <= 241; 1588 | end 1589 | end 1590 | 241: begin 1591 | dot_start <= 0; 1592 | if (dot_done) begin 1593 | lightM_y <= dot_result; 1594 | dot_start <= 1; 1595 | fsm_state <= 242; 1596 | end 1597 | end 1598 | 242: begin 1599 | dot_start <= 0; 1600 | if (dot_done) begin 1601 | lightM_z <= dot_result; 1602 | fsm_state <= 250; 1603 | end 1604 | end 1605 | 1606 | 1607 | // wait for start_vsfs, after clear z on the 1st subframe 1608 | 250: begin 1609 | if (start_vsfs) begin 1610 | tri_idx <= 0; 1611 | vsfs_running <= 1; 1612 | fsm_state <= 31; 1613 | end 1614 | end 1615 | /////////////////////////////// 1616 | 1617 | // 1. for each tri 1618 | // - READ tri 1619 | 31: begin 1620 | if(tri_idx == numtri)begin 1621 | // wait a few clk before eof to send do_swap 1622 | if ((y == 524) && (x == 770)) begin 1623 | do_swap <= 1; 1624 | vsfs_running <= 0; 1625 | fsm_state <= 0; 1626 | end 1627 | end else begin 1628 | if (ram_notbusy) begin 1629 | vsfs_stop_txn <= 0; 1630 | vsfs_start_read <= 1; 1631 | // 1632 | vsfs_addr <= (has_tex)? 24'd153600 + {8'b0,tri_idx_addr_tex} : 1633 | 24'd153600 + {8'b0,tri_idx_addr}; 1634 | numread <= 0; 1635 | read_delay <= 0; 1636 | fsm_state <= 32; 1637 | end 1638 | end 1639 | end 1640 | // -- wait for the first flash data to be ready 1641 | 32: begin 1642 | vsfs_start_read <= 0; 1643 | if(read_delay == 16) begin 1644 | read_delay <= 0; 1645 | tri_xyz[numread[5:2]][{~numread[1:0],2'b00} +: 4] <= spi_data; 1646 | numread <= 1; 1647 | fsm_state <= 33; 1648 | end 1649 | else begin 1650 | read_delay <= read_delay + 1; 1651 | end 1652 | end 1653 | // -- read 43 more 4bit OR 55 more (tex) 1654 | 33: begin 1655 | tri_xyz[numread[5:2]][{~numread[1:0],2'b00} +: 4] <= spi_data; 1656 | numread <= numread + 1; 1657 | // 1658 | if(numread == ((has_tex)? 55:43)) begin 1659 | numread <= 0; 1660 | vsfs_stop_txn <= 1; 1661 | fsm_state <= 34; 1662 | end 1663 | end 1664 | // -- chk normal -/+ 1665 | 34: begin 1666 | vsfs_stop_txn <= 0; 1667 | x_model_v0 <= tri_xyz[0]; 1668 | y_model_v0 <= tri_xyz[1]; 1669 | z_model_v0 <= tri_xyz[2]; 1670 | x_model_v1 <= tri_xyz[3]; 1671 | y_model_v1 <= tri_xyz[4]; 1672 | z_model_v1 <= tri_xyz[5]; 1673 | x_model_v2 <= tri_xyz[6]; 1674 | y_model_v2 <= tri_xyz[7]; 1675 | z_model_v2 <= tri_xyz[8]; 1676 | tri_color <= tri_xyz[10][15:14]; 1677 | nz <= (tri_xyz[10][13] == 1'b1)? {6'b1111_11,tri_xyz[10][13:4]} : {6'b0,tri_xyz[10][13:4]}; 1678 | ny <= (tri_xyz[10][3] == 1'b1)? {6'b1111_11,tri_xyz[10][3:0],tri_xyz[9][15:10]} : {6'b0,tri_xyz[10][3:0],tri_xyz[9][15:10]}; 1679 | nx <= (tri_xyz[9][9] == 1'b1)? {6'b1111_11,tri_xyz[9][9:0]} : {6'b0,tri_xyz[9][9:0]}; 1680 | // 1681 | if(has_tex)begin 1682 | v0_u <= tri_xyz[11][7:0]; 1683 | //v0_v <= tri_xyz[11][15:8]; 1684 | v1_u <= tri_xyz[12][7:0]; 1685 | //v1_v <= tri_xyz[12][15:8]; 1686 | v2_u <= tri_xyz[13][7:0]; 1687 | //v2_v <= tri_xyz[13][15:8]; 1688 | 1689 | // v0_u <= 8'd255 - tri_xyz[11][7:0]; 1690 | v0_v <= 8'd255 - tri_xyz[11][15:8]; 1691 | // v1_u <= 8'd255 - tri_xyz[12][7:0]; 1692 | v1_v <= 8'd255 - tri_xyz[12][15:8]; 1693 | // v2_u <= 8'd255 - tri_xyz[13][7:0]; 1694 | v2_v <= 8'd255 - tri_xyz[13][15:8]; 1695 | end else begin 1696 | v0_u <= 0; 1697 | v0_v <= 0; 1698 | v1_u <= 0; 1699 | v1_v <= 0; 1700 | v2_u <= 0; 1701 | v2_v <= 0; 1702 | end 1703 | 1704 | fsm_state <= 35; 1705 | end 1706 | 1707 | /////////////////////////////// 1708 | // 2. VS, 40 states 1709 | // 2.1 backface cullling: viewdir = campos - v1, dot(viewdir,n) 1710 | // 2.2 [MVP]*v, clip->NDC (div w), NDC->screen 1711 | // 2.3 dot(light,n) 1712 | 1713 | // 2.1 backface cullling 1714 | // - viewdir = campos - v1 1715 | 35:begin 1716 | viewdir_x <= campos_x - x_model_v0; 1717 | viewdir_y <= campos_y - y_model_v0; 1718 | viewdir_z <= campos_z - z_model_v0; 1719 | 1720 | dot_start <= 1; 1721 | fsm_state <= 36; 1722 | 1723 | //debug, print out model data 1724 | // tri_idx <= tri_idx + 1; 1725 | // fsm_state <= 31; 1726 | end 1727 | // - dot(viewdir,n) 1728 | 36:begin 1729 | dot_start <= 0; 1730 | if (dot_done) begin 1731 | if (dot_result[15] == 1'b1) begin // backfacing 1732 | tri_idx <= tri_idx + 1; 1733 | fsm_state <= 31; 1734 | 1735 | // debug 1736 | //fsm_state <= 37; 1737 | end else begin 1738 | fsm_state <= 37; 1739 | end 1740 | end 1741 | end 1742 | 37: begin 1743 | dot_start <= 1; 1744 | fsm_state <= 38; 1745 | end 1746 | 1747 | // 2.2 [MVP]*v, clip->NDC (div w), NDC->screen 1748 | // - clip = [MVP] * v 1749 | 38:begin 1750 | dot_start <= 0; 1751 | if (dot_done) begin 1752 | x_clip_v0 <= dot_result; 1753 | dot_start <= 1; 1754 | fsm_state <= 39; 1755 | end 1756 | end 1757 | 39:begin 1758 | dot_start <= 0; 1759 | if (dot_done) begin 1760 | y_clip_v0 <= dot_result; 1761 | dot_start <= 1; 1762 | fsm_state <= 40; 1763 | end 1764 | end 1765 | 40:begin 1766 | dot_start <= 0; 1767 | if (dot_done) begin 1768 | z_clip_v0 <= dot_result; 1769 | dot_start <= 1; 1770 | fsm_state <= 41; 1771 | end 1772 | end 1773 | 41:begin 1774 | dot_start <= 0; 1775 | if (dot_done) begin 1776 | w_clip_v0 <= dot_result; 1777 | dot_start <= 1; 1778 | fsm_state <= 42; 1779 | end 1780 | end 1781 | 42:begin 1782 | dot_start <= 0; 1783 | if (dot_done) begin 1784 | x_clip_v1 <= dot_result; 1785 | dot_start <= 1; 1786 | fsm_state <= 43; 1787 | end 1788 | end 1789 | 43:begin 1790 | dot_start <= 0; 1791 | if (dot_done) begin 1792 | y_clip_v1 <= dot_result; 1793 | dot_start <= 1; 1794 | fsm_state <= 44; 1795 | end 1796 | end 1797 | 44:begin 1798 | dot_start <= 0; 1799 | if (dot_done) begin 1800 | z_clip_v1 <= dot_result; 1801 | dot_start <= 1; 1802 | fsm_state <= 45; 1803 | end 1804 | end 1805 | 45:begin 1806 | dot_start <= 0; 1807 | if (dot_done) begin 1808 | w_clip_v1 <= dot_result; 1809 | dot_start <= 1; 1810 | fsm_state <= 46; 1811 | end 1812 | end 1813 | 46:begin 1814 | dot_start <= 0; 1815 | if (dot_done) begin 1816 | x_clip_v2 <= dot_result; 1817 | dot_start <= 1; 1818 | fsm_state <= 47; 1819 | end 1820 | end 1821 | 47:begin 1822 | dot_start <= 0; 1823 | if (dot_done) begin 1824 | y_clip_v2 <= dot_result; 1825 | dot_start <= 1; 1826 | fsm_state <= 48; 1827 | end 1828 | end 1829 | 48:begin 1830 | dot_start <= 0; 1831 | if (dot_done) begin 1832 | z_clip_v2 <= dot_result; 1833 | dot_start <= 1; 1834 | fsm_state <= 49; 1835 | end 1836 | end 1837 | // - clip->NDC (div w), clip.xyz / clip.w 1838 | 49:begin 1839 | dot_start <= 0; 1840 | if (dot_done) begin 1841 | w_clip_v2 <= dot_result; 1842 | // ndc = clip.xy / clip.w 1843 | // Q8.8->Q16.16 -> Q16.16 = Q16.16/Q16.16 -> Q16.16->Q2.14 1844 | // signed extended[15:0] <= { {8{extend[7]}}, extend[7:0] }; 1845 | div_a <= { {8{x_clip_v0[15]}}, x_clip_v0, 8'b0000_0000}; 1846 | div_b <= { {8{w_clip_v0[15]}}, w_clip_v0, 8'b0000_0000}; 1847 | div_start <= 1; 1848 | fsm_state <= 50; 1849 | end 1850 | end 1851 | 50:begin 1852 | div_start <= 0; 1853 | if (div_done) begin 1854 | x_model_v0 <= div_result[17:2]; 1855 | div_a <= { {8{y_clip_v0[15]}}, y_clip_v0, 8'b0000_0000}; 1856 | div_b <= { {8{w_clip_v0[15]}}, w_clip_v0, 8'b0000_0000}; 1857 | div_start <= 1; 1858 | fsm_state <= 51; 1859 | end 1860 | end 1861 | 51: begin 1862 | div_start <= 0; 1863 | if (div_done) begin 1864 | y_model_v0 <= div_result[17:2]; 1865 | div_a <= { {8{z_clip_v0[15]}}, z_clip_v0, 8'b0000_0000}; 1866 | div_b <= { {8{w_clip_v0[15]}}, w_clip_v0, 8'b0000_0000}; 1867 | div_start <= 1; 1868 | fsm_state <= 52; 1869 | end 1870 | end 1871 | 52: begin 1872 | div_start <= 0; 1873 | if (div_done) begin 1874 | z_model_v0 <= div_result[17:2]; 1875 | div_a <= { {8{x_clip_v1[15]}}, x_clip_v1, 8'b0000_0000}; 1876 | div_b <= { {8{w_clip_v1[15]}}, w_clip_v1, 8'b0000_0000}; 1877 | div_start <= 1; 1878 | fsm_state <= 53; 1879 | end 1880 | end 1881 | 53: begin 1882 | div_start <= 0; 1883 | if (div_done) begin 1884 | x_model_v1 <= div_result[17:2]; 1885 | div_a <= { {8{y_clip_v1[15]}}, y_clip_v1, 8'b0000_0000}; 1886 | div_b <= { {8{w_clip_v1[15]}}, w_clip_v1, 8'b0000_0000}; 1887 | div_start <= 1; 1888 | fsm_state <= 54; 1889 | end 1890 | end 1891 | 54: begin 1892 | div_start <= 0; 1893 | if (div_done) begin 1894 | y_model_v1 <= div_result[17:2]; 1895 | div_a <= { {8{z_clip_v1[15]}}, z_clip_v1, 8'b0000_0000}; 1896 | div_b <= { {8{w_clip_v1[15]}}, w_clip_v1, 8'b0000_0000}; 1897 | div_start <= 1; 1898 | fsm_state <= 55; 1899 | end 1900 | end 1901 | 55: begin 1902 | div_start <= 0; 1903 | if (div_done) begin 1904 | z_model_v1 <= div_result[17:2]; 1905 | div_a <= { {8{x_clip_v2[15]}}, x_clip_v2, 8'b0000_0000}; 1906 | div_b <= { {8{w_clip_v2[15]}}, w_clip_v2, 8'b0000_0000}; 1907 | div_start <= 1; 1908 | fsm_state <= 56; 1909 | end 1910 | end 1911 | 56: begin 1912 | div_start <= 0; 1913 | if (div_done) begin 1914 | x_model_v2 <= div_result[17:2]; 1915 | div_a <= { {8{y_clip_v2[15]}}, y_clip_v2, 8'b0000_0000}; 1916 | div_b <= { {8{w_clip_v2[15]}}, w_clip_v2, 8'b0000_0000}; 1917 | div_start <= 1; 1918 | fsm_state <= 57; 1919 | end 1920 | end 1921 | 57: begin 1922 | div_start <= 0; 1923 | if (div_done) begin 1924 | y_model_v2 <= div_result[17:2]; 1925 | div_a <= { {8{z_clip_v2[15]}}, z_clip_v2, 8'b0000_0000}; 1926 | div_b <= { {8{w_clip_v2[15]}}, w_clip_v2, 8'b0000_0000}; 1927 | div_start <= 1; 1928 | fsm_state <= 58; 1929 | end 1930 | end 1931 | 58: begin 1932 | div_start <= 0; 1933 | if (div_done) begin 1934 | z_model_v2 <= div_result[17:2]; 1935 | fsm_state <= 59; 1936 | end 1937 | end 1938 | // - NDC->screen, screen = [S] * ndc 1939 | 59: begin 1940 | // screen = [S] * ndc 1941 | // x_ndc * 160 + 160 = x_ndc << 7 + x_ndc << 5 + 160 1942 | // Q2.14 (x_ndc) -> Q9.7 (x_ndc << 7) -> Q11.5 1943 | // Q2.14 (x_ndc) -> Q7.9 (x_ndc << 5) -> Q11.5 1944 | x_model_v0 <= {{2{x_model_v0[15]}}, x_model_v0[15:2]} + 1945 | {{4{x_model_v0[15]}}, x_model_v0[15:4]} 1946 | + 16'sb000_1010_0000_00000; // Q11.5 (160) 1947 | x_model_v1 <= {{2{x_model_v1[15]}}, x_model_v1[15:2]} + 1948 | {{4{x_model_v1[15]}}, x_model_v1[15:4]} 1949 | + 16'sb000_1010_0000_00000; 1950 | x_model_v2 <= {{2{x_model_v2[15]}}, x_model_v2[15:2]} + 1951 | {{4{x_model_v2[15]}}, x_model_v2[15:4]} 1952 | + 16'sb000_1010_0000_00000; 1953 | // 120 - y * 120. (128-8) 1954 | // Q2.14 (y_ndc) -> Q9.7 (y_ndc << 7) -> Q11.5 1955 | // Q2.14 (y_ndc) -> Q5.11 (y_ndc << 3) -> Q11.5 1956 | y_model_v0 <= {{2{y_model_v0[15]}}, y_model_v0[15:2]} - 1957 | {{6{y_model_v0[15]}}, y_model_v0[15:6]}; 1958 | y_model_v1 <= {{2{y_model_v1[15]}}, y_model_v1[15:2]} - 1959 | {{6{y_model_v1[15]}}, y_model_v1[15:6]}; 1960 | y_model_v2 <= {{2{y_model_v2[15]}}, y_model_v2[15:2]} - 1961 | {{6{y_model_v2[15]}}, y_model_v2[15:6]}; 1962 | // z/2 + 0.5, 1963 | // Q2.14 (z_ndc) -> Q(z_ndc >> 1) 1964 | // [-1,1] -> [-0.5,0.5] -> [0,1] 1965 | // 01.xxxx -> 1.999 00.1111 (0.999) 1966 | // 01.0000 -> 1. 00.1000 (0.5) 1967 | // 00.xxxx -> 0,0.99. 00.0111 (0.499) 1968 | // // 1969 | // 11.xxxx -> -0.1,-0.99. 11.1001 (-0.1) 1970 | // 11.0000 -> -1 11.1000 (-0.5) 1971 | // 10.xxxx 11.0xxx (-0.5 - -1) 1972 | // 10.0000 -> -1.999. 11.0000 (-1) 1973 | z_model_v0 <= {z_model_v0[15], z_model_v0[15:1]} + 1974 | + 16'sb00_1000_0000_0000_00; // Q2.14 (0.5) 1975 | z_model_v1 <= {z_model_v1[15], z_model_v1[15:1]} + 1976 | + 16'sb00_1000_0000_0000_00; 1977 | z_model_v2 <= {z_model_v2[15], z_model_v2[15:1]} + 1978 | + 16'sb00_1000_0000_0000_00; 1979 | fsm_state <= 60; 1980 | end 1981 | 60: begin 1982 | // y = 120 - y, flip y (y=0 at the top, invert of opengl) 1983 | y_model_v0 <= 16'sb000_0111_1000_00000 - y_model_v0; // Q11.5 (120) 1984 | y_model_v1 <= 16'sb000_0111_1000_00000 - y_model_v1; 1985 | y_model_v2 <= 16'sb000_0111_1000_00000 - y_model_v2; 1986 | fsm_state <= 61; 1987 | end 1988 | 61: begin 1989 | // z_ndc. Q2.14 -> zscreen Q2.20 1990 | z_screen_v0 <= {2'b00,tri_xyz[2],4'b0000}; 1991 | 1992 | x_screen_v0 <= {9'b0000_0000_0,x_model_v0[15:5]}; // Q20.0 (screen), always positive 1993 | x_screen_v1 <= {9'b0000_0000_0,x_model_v1[15:5]}; 1994 | x_screen_v2 <= {9'b0000_0000_0,x_model_v2[15:5]}; 1995 | y_screen_v0 <= {9'b0000_0000_0,y_model_v0[15:5]}; 1996 | y_screen_v1 <= {9'b0000_0000_0,y_model_v1[15:5]}; 1997 | y_screen_v2 <= {9'b0000_0000_0,y_model_v2[15:5]}; 1998 | z_screen_v0 <= {z_model_v0,6'b0}; 1999 | z_screen_v1 <= {z_model_v1,6'b0}; 2000 | z_screen_v2 <= {z_model_v2,6'b0}; 2001 | fsm_state <= 62; 2002 | end 2003 | 2004 | // 2.3 dot(light,n) 2005 | 62:begin 2006 | // if((z_screen_v0[21:20] != 0) || (z_screen_v1[21:20] != 0) || (z_screen_v2[21:20] != 0)) begin 2007 | // fsm_state <= 1; 2008 | // end else begin 2009 | dot_start <= 1; 2010 | fsm_state <= 63; 2011 | //end 2012 | end 2013 | 63: begin 2014 | dot_start <= 0; 2015 | if (dot_done) begin 2016 | if (dot_result[9] == 1'b1) begin // backfacing 1x.xxx 2017 | // if (dot_result[8:6] == 3'b100) begin // 11.000 -> -1 2018 | // shade_color[2:1] <= 2'b11; 2019 | // end 2020 | // else if (dot_result[9:8] == 2'b10) begin // 10.xxx -> -1.xxx 2021 | // shade_color[2:1] <= 2'b11; 2022 | // end 2023 | // else begin 2024 | // shade_color[2:1] <= ~dot_result[7:6]; // 11.xxx -> -0.xxx 2025 | // end 2026 | shade_color <= 4'b0000; 2027 | end 2028 | else begin 2029 | // dot x tri_color 2030 | if (dot_result[8:5] == 4'b1000) begin // 01.000 -> 1 2031 | //shade_color <= 4'b0110; 2032 | shade_color <= {tri_color,2'b11}; 2033 | end 2034 | else begin 2035 | //shade_color <= {1'b0,dot_result[7:6],1'b0}; // 0.000 - 0.111 2036 | shade_color <= {tri_color,dot_result[7:6]}; 2037 | end 2038 | end 2039 | fsm_state <= 74; 2040 | end 2041 | end 2042 | 2043 | /////////////////////////////// 2044 | 2045 | // 3. bbox 2046 | // x mul of 4, floor-Min, floor-Max (will do 3 more pixels) 2047 | 74: begin 2048 | bboxMin_X <= (x_screen_v0 < x_screen_v1)? x_screen_v0 : x_screen_v1; 2049 | bboxMin_Y <= (y_screen_v0 < y_screen_v1)? y_screen_v0 : y_screen_v1; 2050 | bboxMax_X <= (x_screen_v0 > x_screen_v1)? x_screen_v0 : x_screen_v1; 2051 | bboxMax_Y <= (y_screen_v0 > y_screen_v1)? y_screen_v0 : y_screen_v1; 2052 | fsm_state <= 75; 2053 | end 2054 | 75: begin 2055 | bboxMin_X <= (bboxMin_X < x_screen_v2)? bboxMin_X : x_screen_v2; 2056 | bboxMin_Y <= (bboxMin_Y < y_screen_v2)? bboxMin_Y : y_screen_v2; 2057 | bboxMax_X <= (bboxMax_X > x_screen_v2)? bboxMax_X : x_screen_v2; 2058 | bboxMax_Y <= (bboxMax_Y > y_screen_v2)? bboxMax_Y : y_screen_v2; 2059 | fsm_state <= 76; 2060 | end 2061 | 76: begin 2062 | bboxMin_X[1:0] <= 2'b00; 2063 | bboxMax_X[1:0] <= 2'b00; 2064 | fsm_state <= 77; 2065 | end 2066 | 2067 | // 4.1 e0_init, e1_init, e2_init 2068 | // e0_init = (bboxmin.x - pts[0].x)*(pts[1].y-pts[0].y) + (pts[0].y - bboxmin.y ) * (pts[1].x-pts[0].x); 2069 | // e1_init = (bboxmin.x - pts[1].x)*(pts[2].y-pts[1].y) + (pts[1].y - bboxmin.y ) * (pts[2].x-pts[1].x); 2070 | // e2_init = (bboxmin.x - pts[2].x)*(pts[0].y-pts[2].y) + (pts[2].y - bboxmin.y ) * (pts[0].x-pts[2].x); 2071 | // Q20.2 x Q20.2 = Q40.4->Q20.0 2072 | 2073 | // - e0_init 2074 | 77: begin 2075 | mul_a <= {bboxMin_X - x_screen_v0,2'b00}; // bboxmin.x - pts[0].x 2076 | mul_b <= {y_screen_v1 - y_screen_v0,2'b00}; // pts[1].y-pts[0].y (a0) 2077 | mul_start <= 1; 2078 | fsm_state <= 78; 2079 | end 2080 | 78: begin 2081 | mul_start <= 0; 2082 | if (mul_done) begin 2083 | tmp_ei_mul1 <= mul_result[23:4]; // ready in 23clk for 20bit mul 2084 | mul_a <= {y_screen_v0 - bboxMin_Y,2'b00}; // pts[0].y - bboxmin.y 2085 | mul_b <= {x_screen_v1 - x_screen_v0,2'b00}; // pts[1].x-pts[0].x (b0) 2086 | mul_start <= 1; 2087 | fsm_state <= 79; 2088 | end 2089 | end 2090 | 79: begin 2091 | mul_start <= 0; 2092 | if (mul_done) begin 2093 | tmp_ei_mul2 <= mul_result[23:4]; // ready in 23clk for 20bit mul 2094 | fsm_state <= 80; 2095 | end 2096 | end 2097 | // - e1_init 2098 | 80: begin 2099 | e0_init <= tmp_ei_mul2 + tmp_ei_mul1; // fin e0_init 2100 | mul_a <= {bboxMin_X - x_screen_v1,2'b00}; 2101 | mul_b <= {y_screen_v2 - y_screen_v1,2'b00}; 2102 | mul_start <= 1; 2103 | fsm_state <= 81; 2104 | end 2105 | 81: begin 2106 | mul_start <= 0; 2107 | if (mul_done) begin 2108 | tmp_ei_mul1 <= mul_result[23:4]; 2109 | mul_a <= {y_screen_v1 - bboxMin_Y,2'b00}; 2110 | mul_b <= {x_screen_v2 - x_screen_v1,2'b00}; 2111 | mul_start <= 1; 2112 | fsm_state <= 82; 2113 | end 2114 | end 2115 | 82: begin 2116 | mul_start <= 0; 2117 | if (mul_done) begin 2118 | tmp_ei_mul2 <= mul_result[23:4]; // ready in 23clk for 20bit mul 2119 | fsm_state <= 83; 2120 | end 2121 | end 2122 | // - e2_init 2123 | 83: begin 2124 | e1_init <= tmp_ei_mul2 + tmp_ei_mul1; // fin e1_init 2125 | mul_a <= {bboxMin_X - x_screen_v2,2'b00}; 2126 | mul_b <= {y_screen_v0 - y_screen_v2,2'b00}; 2127 | mul_start <= 1; 2128 | fsm_state <= 84; 2129 | end 2130 | 84: begin 2131 | mul_start <= 0; 2132 | if (mul_done) begin 2133 | tmp_ei_mul1 <= mul_result[23:4]; 2134 | mul_a <= {y_screen_v2 - bboxMin_Y,2'b00}; 2135 | mul_b <= {x_screen_v0 - x_screen_v2,2'b00}; 2136 | mul_start <= 1; 2137 | fsm_state <= 85; 2138 | end 2139 | end 2140 | 85: begin 2141 | mul_start <= 0; 2142 | if (mul_done) begin 2143 | tmp_ei_mul2 <= mul_result[23:4]; 2144 | fsm_state <= 86; 2145 | end 2146 | end 2147 | 86: begin 2148 | e2_init <= tmp_ei_mul2 + tmp_ei_mul1; // fin e2_init 2149 | fsm_state <= 87; 2150 | end 2151 | 2152 | // 4.2 denom 2153 | // Q20.0 denom_i = (y1-y2)(x0-x2)+(x2-x1)(y0-y2) 2154 | // Q2.20 denom = float2fix14(1.0f/denom_i); 2155 | // 640x640 x2 = 819,200 2^20 2156 | // 1/819,200 = 0.00000122, 1/2^20 2157 | 87: begin 2158 | mul_a <= {y_screen_v1 - y_screen_v2,2'b00}; 2159 | mul_b <= {x_screen_v0 - x_screen_v2,2'b00}; 2160 | mul_start <= 1; 2161 | fsm_state <= 88; 2162 | end 2163 | 88: begin 2164 | mul_start <= 0; 2165 | if (mul_done) begin 2166 | tmp_ei_mul1 <= mul_result[23:4]; // ready in 23clk for 20bit mul 2167 | mul_a <= {y_screen_v0 - y_screen_v2,2'b00}; 2168 | mul_b <= {x_screen_v2 - x_screen_v1,2'b00}; 2169 | mul_start <= 1; 2170 | fsm_state <= 89; 2171 | end 2172 | end 2173 | 89: begin 2174 | mul_start <= 0; 2175 | if (mul_done) begin 2176 | tmp_ei_mul2 <= tmp_ei_mul1 + mul_result[23:4]; // denom_i 2177 | fsm_state <= 90; 2178 | end 2179 | end 2180 | 90: begin 2181 | // Q20.0->Q20.20 -> Q20.20/Q20.20 2182 | div2_a <= { 20'b0000_0000_0000_0000_0001, 20'b0000_0000_0000_0000_0000}; // 1.0f/denom_i 2183 | div2_b <= { tmp_ei_mul2, 20'b0000_0000_0000_0000_0000}; 2184 | div2_start <= 1; 2185 | fsm_state <= 91; 2186 | end 2187 | 91: begin 2188 | div2_start <= 0; 2189 | if (div2_done) begin 2190 | // Q20.20->Q2.20 2191 | denom <= {div2_result[21:0]}; 2192 | fsm_state <= 92; 2193 | end 2194 | end 2195 | 2196 | //4.3 bar_ix, bar_iy, bar_iz 2197 | // Q2.20 = (Q20.0->)Q20.2 * Q2.20 2198 | // = 500,000 * 0.000002 = 1 2199 | // = 10,000 * 0.000002 = 0.02 2200 | // Q2.20 = 500 * 0.000002 = 0.001, 2^10 2201 | // = 100 * 0.000002 = 0.0002, 2^16 = 0.000015 2202 | // 2203 | // bar_iy <= {(y2-y0)(bboxMin_X-x2)+(x0-x2)(bboxMin_Y-y2)} * denom; // 3 mul 2204 | // bar_iy_dy <= x0x2 * denom; // 1 mul 2205 | // bar_iy_dx <= y2y0 * denom; // 1 mul 2206 | // bar_iz <= {(y0-y1)(bboxMin_X-x0)+(x1-x0)(bboxMin_Y-y0)} * denom; // 3 mul 2207 | // bar_iz_dy <= x1x0 * denom; // 1 mul 2208 | // bar_iz_dx <= y0y1 * denom; // 1 mul 2209 | // bar_ix <= 1 - (bar_iy + bar_iz) 2210 | // bar_ix <= {(y1-y2)(bboxMin_X-x2)+(x2-x1)(bboxMin_Y-y2)} * denom; 2211 | // bar_ix_dy <= x2x1 * denom; 2212 | // bar_ix_dx <= y1y2 * denom; 2213 | 2214 | // denom Q2.20 -> Q6.16 2215 | // x_screen, bbox Q20.0 2216 | // Q20.2 x Q2.20 = Q22.22 -> Q6.16 for bar 2217 | 2218 | 92: begin 2219 | mul_a <= {y_screen_v2 - y_screen_v0,2'b00}; 2220 | mul_b <= {bboxMin_X - x_screen_v2,2'b00}; 2221 | mul_start <= 1; 2222 | fsm_state <= 93; 2223 | end 2224 | 93: begin 2225 | mul_start <= 0; 2226 | if (mul_done) begin 2227 | tmp_ei_mul1 <= mul_result[23:4]; // ready in 23clk for 20bit mul 2228 | mul_a <= {x_screen_v0 - x_screen_v2,2'b00}; 2229 | mul_b <= {bboxMin_Y - y_screen_v2,2'b00}; 2230 | mul_start <= 1; 2231 | fsm_state <= 94; 2232 | end 2233 | end 2234 | 94: begin 2235 | mul_start <= 0; 2236 | if (mul_done) begin 2237 | mul_a <= {tmp_ei_mul1 + mul_result[23:4],2'b00}; // ready in 23clk for 20bit mul 2238 | mul_b <= denom; 2239 | mul_start <= 1; 2240 | fsm_state <= 95; 2241 | end 2242 | end 2243 | 95: begin 2244 | mul_start <= 0; 2245 | if (mul_done) begin 2246 | bar_iy <= mul_result[27:6]; // ready in 23clk for 20bit mul 2247 | // 2248 | mul_a <= {x_screen_v0 - x_screen_v2,2'b00}; 2249 | mul_b <= denom; 2250 | mul_start <= 1; 2251 | fsm_state <= 96; 2252 | end 2253 | end 2254 | 96: begin 2255 | mul_start <= 0; 2256 | if (mul_done) begin 2257 | bar_iy_dy <= mul_result[27:6]; // ready in 23clk for 20bit mul 2258 | // 2259 | mul_a <= {y_screen_v2 - y_screen_v0,2'b00}; 2260 | mul_b <= denom; 2261 | mul_start <= 1; 2262 | fsm_state <= 97; 2263 | end 2264 | end 2265 | 97: begin 2266 | mul_start <= 0; 2267 | if (mul_done) begin 2268 | bar_iy_dx <= mul_result[27:6]; // Q6.16, Q2.20 is [23:2] 2269 | // 2270 | mul_a <= {y_screen_v0 - y_screen_v1,2'b00}; 2271 | mul_b <= {bboxMin_X - x_screen_v0,2'b00}; 2272 | mul_start <= 1; 2273 | fsm_state <= 98; 2274 | end 2275 | end 2276 | 98: begin 2277 | mul_start <= 0; 2278 | if (mul_done) begin 2279 | tmp_ei_mul1 <= mul_result[23:4]; // ready in 23clk for 20bit mul 2280 | mul_a <= {x_screen_v1 - x_screen_v0,2'b00}; 2281 | mul_b <= {bboxMin_Y - y_screen_v0,2'b00}; 2282 | mul_start <= 1; 2283 | fsm_state <= 99; 2284 | end 2285 | end 2286 | 99: begin 2287 | mul_start <= 0; 2288 | if (mul_done) begin 2289 | mul_a <= {tmp_ei_mul1 + mul_result[23:4],2'b00}; // ready in 23clk for 20bit mul 2290 | mul_b <= denom; 2291 | mul_start <= 1; 2292 | fsm_state <= 100; 2293 | end 2294 | end 2295 | 100: begin 2296 | mul_start <= 0; 2297 | if (mul_done) begin 2298 | bar_iz <= mul_result[27:6]; // ready in 23clk for 20bit mul 2299 | // 2300 | mul_a <= {x_screen_v1 - x_screen_v0,2'b00}; 2301 | mul_b <= denom; 2302 | mul_start <= 1; 2303 | fsm_state <= 101; 2304 | end 2305 | end 2306 | 101: begin 2307 | mul_start <= 0; 2308 | if (mul_done) begin 2309 | bar_iz_dy <= mul_result[27:6]; // ready in 23clk for 20bit mul 2310 | // 2311 | bar_ix <= bar_iy + bar_iz; 2312 | // 2313 | mul_a <= {y_screen_v0 - y_screen_v1,2'b00}; 2314 | mul_b <= denom; 2315 | mul_start <= 1; 2316 | fsm_state <= 102; 2317 | end 2318 | end 2319 | 102: begin 2320 | mul_start <= 0; 2321 | if (mul_done) begin 2322 | bar_iz_dx <= mul_result[27:6]; // ready in 23clk for 20bit mul 2323 | // 2324 | bar_ix <= 22'b00_0001_0000_0000_0000_0000 - bar_ix; // 1 in Q6.16 2325 | // 2326 | mul_a <= {x_screen_v2 - x_screen_v1,2'b00}; 2327 | mul_b <= denom; 2328 | mul_start <= 1; 2329 | fsm_state <= 103; 2330 | end 2331 | end 2332 | 103: begin 2333 | mul_start <= 0; 2334 | if (mul_done) begin 2335 | bar_ix_dy <= mul_result[27:6]; // ready in 23clk for 20bit mul 2336 | // 2337 | mul_a <= {y_screen_v1 - y_screen_v2,2'b00}; 2338 | mul_b <= denom; 2339 | mul_start <= 1; 2340 | fsm_state <= 104; 2341 | end 2342 | end 2343 | 104: begin 2344 | mul_start <= 0; 2345 | if (mul_done) begin 2346 | bar_ix_dx <= mul_result[27:6]; // ready in 23clk for 20bit mul 2347 | // 2348 | fsm_state <= 105; 2349 | end 2350 | end 2351 | 2352 | //4.4 Z_bar: state 105-114 2353 | // z_bar = z_screen_v0*bar_ix + z_screen_v1*bar_iy + z_screen_v2*bar_iz 2354 | // z_bar_dx = z_screen_v0*bar_ix_dx + z_screen_v1*bar_iy_dx + z_screen_v2*bar_iz_dx 2355 | // z_bar_dy = z_screen_v0*bar_ix_dy + z_screen_v1*bar_iy_dy + z_screen_v2*bar_iz_dy 2356 | // 2357 | // Q2.20 * Q6.16 = Q8.36 -> Q6.16 [41:20] same bit select 2358 | 105: begin 2359 | mul_a <= z_screen_v0; 2360 | mul_b <= bar_ix; 2361 | mul_start <= 1; 2362 | fsm_state <= 106; 2363 | end 2364 | 106: begin 2365 | mul_start <= 0; 2366 | if (mul_done) begin 2367 | z_bar <= mul_result[41:20]; 2368 | mul_a <= z_screen_v1; 2369 | mul_b <= bar_iy; 2370 | mul_start <= 1; 2371 | fsm_state <= 107; 2372 | end 2373 | end 2374 | 107: begin 2375 | mul_start <= 0; 2376 | if (mul_done) begin 2377 | z_bar <= z_bar + mul_result[41:20]; // ready in 23clk for 20bit mul 2378 | mul_a <= z_screen_v2; 2379 | mul_b <= bar_iz; 2380 | mul_start <= 1; 2381 | fsm_state <= 108; 2382 | end 2383 | end 2384 | 108: begin 2385 | mul_start <= 0; 2386 | if (mul_done) begin 2387 | z_bar <= z_bar + mul_result[41:20]; // ready in 23clk for 20bit mul 2388 | // 2389 | mul_a <= z_screen_v0; 2390 | mul_b <= bar_ix_dx; 2391 | mul_start <= 1; 2392 | fsm_state <= 109; 2393 | end 2394 | end 2395 | 109: begin 2396 | mul_start <= 0; 2397 | if (mul_done) begin 2398 | z_bar_dx <= mul_result[41:20]; // ready in 23clk for 20bit mul 2399 | mul_a <= z_screen_v1; 2400 | mul_b <= bar_iy_dx; 2401 | mul_start <= 1; 2402 | fsm_state <= 110; 2403 | end 2404 | end 2405 | 110: begin 2406 | mul_start <= 0; 2407 | if (mul_done) begin 2408 | z_bar_dx <= z_bar_dx + mul_result[41:20]; // ready in 23clk for 20bit mul 2409 | mul_a <= z_screen_v2; 2410 | mul_b <= bar_iz_dx; 2411 | mul_start <= 1; 2412 | fsm_state <= 111; 2413 | end 2414 | end 2415 | 111: begin 2416 | mul_start <= 0; 2417 | if (mul_done) begin 2418 | z_bar_dx <= z_bar_dx + mul_result[41:20]; // ready in 23clk for 20bit mul 2419 | // 2420 | mul_a <= z_screen_v0; 2421 | mul_b <= bar_ix_dy; 2422 | mul_start <= 1; 2423 | fsm_state <= 112; 2424 | end 2425 | end 2426 | 112: begin 2427 | mul_start <= 0; 2428 | if (mul_done) begin 2429 | z_bar_dy <= mul_result[41:20]; // ready in 23clk for 20bit mul 2430 | mul_a <= z_screen_v1; 2431 | mul_b <= bar_iy_dy; 2432 | mul_start <= 1; 2433 | fsm_state <= 113; 2434 | end 2435 | end 2436 | 113: begin 2437 | mul_start <= 0; 2438 | if (mul_done) begin 2439 | z_bar_dy <= z_bar_dy + mul_result[41:20]; // ready in 23clk for 20bit mul 2440 | mul_a <= z_screen_v2; 2441 | mul_b <= bar_iz_dy; 2442 | mul_start <= 1; 2443 | fsm_state <= 114; 2444 | end 2445 | end 2446 | 114: begin 2447 | mul_start <= 0; 2448 | if (mul_done) begin 2449 | z_bar_dy <= z_bar_dy + mul_result[41:20]; // ready in 23clk for 20bit mul 2450 | // 2451 | pixel_y <= bboxMin_Y[9:0]; 2452 | 2453 | if(has_tex)begin 2454 | // do uv*bar 2455 | fsm_state <= 2; 2456 | end else begin 2457 | // skip uv * bar 2458 | fsm_state <= 115; 2459 | end 2460 | 2461 | end 2462 | end 2463 | 2464 | 2465 | //4.5 uv_bar: state 2-21 2466 | // u_bar = v0_u * bar_ix + v1_u * bar_iy + v2_u * bar_iz 2467 | // u_bar_dx = v0_u * bar_ix_dx + v1_u * bar_iy_dx + v2_u * bar_iz_dx 2468 | // u_bar_dy = v0_u * bar_ix_dy + v1_u * bar_iy_dy + v2_u * bar_iz_dy 2469 | // 2470 | // Q8.0 -> Q8.14 * Q6.16 = Q14.30 -> Q14.8 [43:22] 2471 | // mul will treat uv as signed 2472 | // Q8.0 -> Q9.13 * Q6.16 = Q15.29 -> Q15.7 [43:22] 2473 | // percision dx,dy 2474 | // Q8.0 -> Q9.13 * Q6.16 = Q15.29 -> Q10.12 [38:17] 2475 | 2: begin 2476 | mul_a <= {1'b0,v0_u,13'b0}; 2477 | mul_b <= bar_ix; 2478 | mul_start <= 1; 2479 | fsm_state <= 3; 2480 | end 2481 | 3: begin 2482 | mul_start <= 0; 2483 | if (mul_done) begin 2484 | u_bar <= mul_result[38:17] ; 2485 | mul_a <= {1'b0,v1_u,13'b0}; 2486 | mul_b <= bar_iy; 2487 | mul_start <= 1; 2488 | fsm_state <= 4; 2489 | end 2490 | end 2491 | 4: begin 2492 | mul_start <= 0; 2493 | if (mul_done) begin 2494 | u_bar <= u_bar + mul_result[38:17]; // ready in 23clk for 20bit mul 2495 | mul_a <= {1'b0,v2_u,13'b0}; 2496 | mul_b <= bar_iz; 2497 | mul_start <= 1; 2498 | fsm_state <= 5; 2499 | end 2500 | end 2501 | 5: begin 2502 | mul_start <= 0; 2503 | if (mul_done) begin 2504 | u_bar <= u_bar + mul_result[38:17]; // ready in 23clk for 20bit mul 2505 | // 2506 | mul_a <= {1'b0,v0_u,13'b0}; 2507 | mul_b <= bar_ix_dx; 2508 | mul_start <= 1; 2509 | fsm_state <= 6; 2510 | end 2511 | end 2512 | 6: begin 2513 | mul_start <= 0; 2514 | if (mul_done) begin 2515 | u_bar_dx <= mul_result[38:17]; // ready in 23clk for 20bit mul 2516 | mul_a <= {1'b0,v1_u,13'b0}; 2517 | mul_b <= bar_iy_dx; 2518 | mul_start <= 1; 2519 | fsm_state <= 7; 2520 | end 2521 | end 2522 | 7: begin 2523 | mul_start <= 0; 2524 | if (mul_done) begin 2525 | u_bar_dx <= u_bar_dx + mul_result[38:17]; // ready in 23clk for 20bit mul 2526 | mul_a <= {1'b0,v2_u,13'b0}; 2527 | mul_b <= bar_iz_dx; 2528 | mul_start <= 1; 2529 | fsm_state <= 8; 2530 | end 2531 | end 2532 | 8: begin 2533 | mul_start <= 0; 2534 | if (mul_done) begin 2535 | u_bar_dx <= u_bar_dx + mul_result[38:17]; // ready in 23clk for 20bit mul 2536 | // 2537 | mul_a <= {1'b0,v0_u,13'b0}; 2538 | mul_b <= bar_ix_dy; 2539 | mul_start <= 1; 2540 | fsm_state <= 9; 2541 | end 2542 | end 2543 | 9: begin 2544 | mul_start <= 0; 2545 | if (mul_done) begin 2546 | u_bar_dy <= mul_result[38:17]; // ready in 23clk for 20bit mul 2547 | mul_a <= {1'b0,v1_u,13'b0}; 2548 | mul_b <= bar_iy_dy; 2549 | mul_start <= 1; 2550 | fsm_state <= 10; 2551 | end 2552 | end 2553 | 10: begin 2554 | mul_start <= 0; 2555 | if (mul_done) begin 2556 | u_bar_dy <= u_bar_dy + mul_result[38:17]; // ready in 23clk for 20bit mul 2557 | mul_a <= {1'b0,v2_u,13'b0}; 2558 | mul_b <= bar_iz_dy; 2559 | mul_start <= 1; 2560 | fsm_state <= 11; 2561 | end 2562 | end 2563 | 11: begin 2564 | mul_start <= 0; 2565 | if (mul_done) begin 2566 | u_bar_dy <= u_bar_dy + mul_result[38:17]; // ready in 23clk for 20bit mul 2567 | // 2568 | fsm_state <= 12; 2569 | end 2570 | end 2571 | 12: begin 2572 | mul_a <= {1'b0,v0_v,13'b0}; 2573 | mul_b <= bar_ix; 2574 | mul_start <= 1; 2575 | fsm_state <= 13; 2576 | end 2577 | 13: begin 2578 | mul_start <= 0; 2579 | if (mul_done) begin 2580 | v_bar <= mul_result[38:17]; 2581 | mul_a <= {1'b0,v1_v,13'b0}; 2582 | mul_b <= bar_iy; 2583 | mul_start <= 1; 2584 | fsm_state <= 14; 2585 | end 2586 | end 2587 | 14: begin 2588 | mul_start <= 0; 2589 | if (mul_done) begin 2590 | v_bar <= v_bar + mul_result[38:17]; // ready in 23clk for 20bit mul 2591 | mul_a <= {1'b0,v2_v,13'b0}; 2592 | mul_b <= bar_iz; 2593 | mul_start <= 1; 2594 | fsm_state <= 15; 2595 | end 2596 | end 2597 | 15: begin 2598 | mul_start <= 0; 2599 | if (mul_done) begin 2600 | v_bar <= v_bar + mul_result[38:17]; // ready in 23clk for 20bit mul 2601 | // 2602 | mul_a <= {1'b0,v0_v,13'b0}; 2603 | mul_b <= bar_ix_dx; 2604 | mul_start <= 1; 2605 | fsm_state <= 16; 2606 | end 2607 | end 2608 | 16: begin 2609 | mul_start <= 0; 2610 | if (mul_done) begin 2611 | v_bar_dx <= mul_result[38:17]; // ready in 23clk for 20bit mul 2612 | mul_a <= {1'b0,v1_v,13'b0}; 2613 | mul_b <= bar_iy_dx; 2614 | mul_start <= 1; 2615 | fsm_state <= 17; 2616 | end 2617 | end 2618 | 17: begin 2619 | mul_start <= 0; 2620 | if (mul_done) begin 2621 | v_bar_dx <= v_bar_dx + mul_result[38:17]; // ready in 23clk for 20bit mul 2622 | mul_a <= {1'b0,v2_v,13'b0}; 2623 | mul_b <= bar_iz_dx; 2624 | mul_start <= 1; 2625 | fsm_state <= 18; 2626 | end 2627 | end 2628 | 18: begin 2629 | mul_start <= 0; 2630 | if (mul_done) begin 2631 | v_bar_dx <= v_bar_dx + mul_result[38:17]; // ready in 23clk for 20bit mul 2632 | // 2633 | mul_a <= {1'b0,v0_v,13'b0}; 2634 | mul_b <= bar_ix_dy; 2635 | mul_start <= 1; 2636 | fsm_state <= 19; 2637 | end 2638 | end 2639 | 19: begin 2640 | mul_start <= 0; 2641 | if (mul_done) begin 2642 | v_bar_dy <= mul_result[38:17]; // ready in 23clk for 20bit mul 2643 | mul_a <= {1'b0,v1_v,13'b0}; 2644 | mul_b <= bar_iy_dy; 2645 | mul_start <= 1; 2646 | fsm_state <= 20; 2647 | end 2648 | end 2649 | 20: begin 2650 | mul_start <= 0; 2651 | if (mul_done) begin 2652 | v_bar_dy <= v_bar_dy + mul_result[38:17]; // ready in 23clk for 20bit mul 2653 | mul_a <= {1'b0,v2_v,13'b0}; 2654 | mul_b <= bar_iz_dy; 2655 | mul_start <= 1; 2656 | fsm_state <= 21; 2657 | end 2658 | end 2659 | 21: begin 2660 | mul_start <= 0; 2661 | if (mul_done) begin 2662 | v_bar_dy <= v_bar_dy + mul_result[38:17]; // ready in 23clk for 20bit mul 2663 | // 2664 | fsm_state <= 115; 2665 | end 2666 | end 2667 | 2668 | 2669 | //5. for pixel y in bbox 2670 | // - e0 = e0_init, z = z_bar / e0_init += dy, z_bar += z_bar_dy 2671 | 115: begin 2672 | if (pixel_y > bboxMax_Y[9:0]) begin 2673 | tri_idx <= tri_idx + 1; 2674 | fsm_state <= 31; 2675 | end else begin 2676 | e0 <= e0_init; 2677 | e1 <= e1_init; 2678 | e2 <= e2_init; 2679 | pixel_z <= z_bar; 2680 | pixel_u <= u_bar; 2681 | pixel_v <= v_bar; 2682 | // 2683 | e0_init <= e0_init - (x_screen_v1 - x_screen_v0); 2684 | e1_init <= e1_init - (x_screen_v2 - x_screen_v1); 2685 | e2_init <= e2_init - (x_screen_v0 - x_screen_v2); 2686 | z_bar <= z_bar + z_bar_dy; 2687 | u_bar <= u_bar + u_bar_dy; 2688 | v_bar <= v_bar + v_bar_dy; 2689 | 2690 | //shade_color <= tri_idx[3:0]; 2691 | // 2692 | pixel_x <= bboxMin_X[9:0]; 2693 | fsm_state <= 116; 2694 | end 2695 | end 2696 | // - for x in bbox (x < bboxMax_X) 2697 | // - READ x4 Z (23 clk) 2698 | 116: begin 2699 | if (pixel_x > bboxMax_X[9:0]) begin 2700 | fsm_state <= 255; 2701 | end else begin 2702 | if (ram_notbusy) begin 2703 | vsfs_stop_txn <= 0; 2704 | vsfs_start_read <= 1; 2705 | // y*320 + x + z_start 2706 | vsfs_addr <= {6'b0,pixel_y,8'b0} + {8'b0,pixel_y,6'b0} + {14'b0,pixel_x} + 76800; 2707 | numread <= 0; 2708 | read_delay <= 0; 2709 | fsm_state <= 117; 2710 | end 2711 | end 2712 | end 2713 | // -- wait for the first flash data to be ready 2714 | 117: begin 2715 | vsfs_start_read <= 0; 2716 | if(read_delay == 16) begin 2717 | read_delay <= 0; 2718 | Z_buffer[numread[2:1]][{~numread[0],2'b00} +: 4] <= spi_data; 2719 | numread <= 1; 2720 | fsm_state <= 118; 2721 | end 2722 | else begin 2723 | read_delay <= read_delay + 1; 2724 | end 2725 | end 2726 | // -- read 7 more 4bit 2727 | 118: begin 2728 | Z_buffer[numread[2:1]][{~numread[0],2'b00} +: 4] <= spi_data; 2729 | numread <= numread + 1; 2730 | if(numread == 7)begin 2731 | numread <= 0; 2732 | vsfs_stop_txn <= 1; 2733 | fsm_state <= 119; 2734 | end 2735 | end 2736 | // - READ x4 B (19 clk) 2737 | 119: begin 2738 | vsfs_stop_txn <= 0; 2739 | vsfs_start_read <= 1; 2740 | // y * 160 + x/2 + (offset back[0]) 2741 | vsfs_addr <= (evenframe)?{7'b0,pixel_y,7'b0} + {9'b0,pixel_y,5'b0} + {15'b0,pixel_x[9:1]} + 38400: 2742 | {7'b0,pixel_y,7'b0} + {9'b0,pixel_y,5'b0} + {15'b0,pixel_x[9:1]}; 2743 | numread <= 0; 2744 | read_delay <= 0; 2745 | fsm_state <= 120; 2746 | end 2747 | // -- wait for the first flash data to be ready 2748 | 120: begin 2749 | vsfs_start_read <= 0; 2750 | if(read_delay == 16) begin 2751 | read_delay <= 0; 2752 | C_buffer[numread[1:0]] <= spi_data; 2753 | numread <= 1; 2754 | fsm_state <= 121; 2755 | end 2756 | else begin 2757 | read_delay <= read_delay + 1; 2758 | end 2759 | end 2760 | // -- read 3 more 4bit 2761 | 121: begin 2762 | C_buffer[numread[1:0]] <= spi_data; 2763 | numread <= numread + 1; 2764 | if(numread == 3)begin 2765 | numread <= 0; 2766 | vsfs_stop_txn <= 1; 2767 | 2768 | if(has_tex)begin 2769 | // read texel x4 2770 | fsm_state <= 211; 2771 | end else begin 2772 | // skip read texel 2773 | fsm_state <= 122; 2774 | end 2775 | end 2776 | end 2777 | 2778 | // uv Q10.12 -> Q8.0 [19:12] 2779 | // - READ x4 texel (35 clk x2), state 211-225 2780 | 211: begin 2781 | vsfs_stop_txn <= 0; 2782 | if (ram_notbusy) begin 2783 | vsfs_stop_txn <= 0; 2784 | vsfs_start_read <= 1; 2785 | // y * 160 + x/2 + z_start 2786 | // vsfs_addr <= {7'b0,pixel_y,7'b0} + {9'b0,pixel_y,5'b0} + {15'b0,pixel_x[9:1]}; 2787 | // v * 128 + u/2 + 153600 2788 | vsfs_addr <= {9'b0,pixel_v[19:12],7'b0} + {17'b0,pixel_u[19:13]} + 153600; 2789 | pixel_u8 <= pixel_u[12]; 2790 | pixel_u <= pixel_u + u_bar_dx; 2791 | pixel_v <= pixel_v + v_bar_dx; 2792 | numread <= 0; 2793 | read_delay <= 0; 2794 | fsm_state <= 212; 2795 | end 2796 | end 2797 | // -- wait for the first flash data to be ready, read one 4-bit 2798 | 212: begin 2799 | vsfs_start_read <= 0; 2800 | if(read_delay == 16) begin 2801 | read_delay <= 0; 2802 | db_texel[7:4] <= spi_data; 2803 | numread <= 1; 2804 | fsm_state <= 213; 2805 | end 2806 | else begin 2807 | read_delay <= read_delay + 1; 2808 | end 2809 | end 2810 | // -- read 1 more 4bit 2811 | 213: begin 2812 | db_texel[3:0] <= spi_data; 2813 | numread <= 0; 2814 | vsfs_stop_txn <= 1; 2815 | fsm_state <= 214; 2816 | end 2817 | 2818 | 214: begin 2819 | texel[0] <= (pixel_u8 == 1)? db_texel[7:4] : db_texel[3:0]; 2820 | //texel[0] <= db_texel[3:0]; 2821 | 2822 | 2823 | vsfs_stop_txn <= 0; 2824 | vsfs_start_read <= 1; 2825 | // v * 128 + u/2 + 153600 2826 | vsfs_addr <= {9'b0,pixel_v[19:12],7'b0} + {17'b0,pixel_u[19:13]} + 153600; 2827 | pixel_u8 <= pixel_u[12]; 2828 | pixel_u <= pixel_u + u_bar_dx; 2829 | pixel_v <= pixel_v + v_bar_dx; 2830 | numread <= 0; 2831 | read_delay <= 0; 2832 | fsm_state <= 215; 2833 | end 2834 | 215: begin 2835 | vsfs_start_read <= 0; 2836 | if(read_delay == 16) begin 2837 | read_delay <= 0; 2838 | db_texel[7:4] <= spi_data; 2839 | numread <= 1; 2840 | fsm_state <= 216; 2841 | end 2842 | else begin 2843 | read_delay <= read_delay + 1; 2844 | end 2845 | end 2846 | // -- read 1 more 4bit 2847 | 216: begin 2848 | db_texel[3:0] <= spi_data; 2849 | numread <= 0; 2850 | vsfs_stop_txn <= 1; 2851 | fsm_state <= 217; 2852 | end 2853 | 217: begin 2854 | texel[1] <= (pixel_u8 == 1)? db_texel[7:4] : db_texel[3:0]; 2855 | //texel[1] <= db_texel[3:0]; 2856 | 2857 | vsfs_stop_txn <= 0; 2858 | if (ram_notbusy) begin 2859 | vsfs_stop_txn <= 0; 2860 | vsfs_start_read <= 1; 2861 | 2862 | vsfs_addr <= {9'b0,pixel_v[19:12],7'b0} + {17'b0,pixel_u[19:13]} + 153600; 2863 | pixel_u8 <= pixel_u[12]; 2864 | pixel_u <= pixel_u + u_bar_dx; 2865 | pixel_v <= pixel_v + v_bar_dx; 2866 | numread <= 0; 2867 | read_delay <= 0; 2868 | fsm_state <= 218; 2869 | end 2870 | end 2871 | // -- wait for the first flash data to be ready, read one 4-bit 2872 | 218: begin 2873 | vsfs_start_read <= 0; 2874 | if(read_delay == 16) begin 2875 | read_delay <= 0; 2876 | db_texel[7:4] <= spi_data; 2877 | numread <= 1; 2878 | fsm_state <= 219; 2879 | end 2880 | else begin 2881 | read_delay <= read_delay + 1; 2882 | end 2883 | end 2884 | // -- read 1 more 4bit 2885 | 219: begin 2886 | db_texel[3:0] <= spi_data; 2887 | numread <= 0; 2888 | vsfs_stop_txn <= 1; 2889 | fsm_state <= 220; 2890 | end 2891 | 2892 | 220: begin 2893 | texel[2] <= (pixel_u8 == 1)? db_texel[7:4] : db_texel[3:0]; 2894 | //texel[2] <= db_texel[3:0]; 2895 | 2896 | vsfs_stop_txn <= 0; 2897 | vsfs_start_read <= 1; 2898 | // v * 128 + u/2 + 153600 2899 | vsfs_addr <= {9'b0,pixel_v[19:12],7'b0} + {17'b0,pixel_u[19:13]} + 153600; 2900 | pixel_u8 <= pixel_u[12]; 2901 | pixel_u <= pixel_u + u_bar_dx; 2902 | pixel_v <= pixel_v + v_bar_dx; 2903 | numread <= 0; 2904 | read_delay <= 0; 2905 | fsm_state <= 221; 2906 | end 2907 | 221: begin 2908 | vsfs_start_read <= 0; 2909 | if(read_delay == 16) begin 2910 | read_delay <= 0; 2911 | db_texel[7:4] <= spi_data; 2912 | numread <= 1; 2913 | fsm_state <= 222; 2914 | end 2915 | else begin 2916 | read_delay <= read_delay + 1; 2917 | end 2918 | end 2919 | // -- read 1 more 4bit 2920 | 222: begin 2921 | db_texel[3:0] <= spi_data; 2922 | numread <= 0; 2923 | vsfs_stop_txn <= 1; 2924 | fsm_state <= 223; 2925 | end 2926 | 223: begin 2927 | texel[3] <= (pixel_u8 == 1)? db_texel[7:4] : db_texel[3:0]; 2928 | //texel[3] <= db_texel[3:0]; 2929 | 2930 | vsfs_stop_txn <= 0; 2931 | fsm_state <= 122; 2932 | end 2933 | 2934 | 2935 | 2936 | // x4 pixel 2937 | // - if ((e0 > 0) && (e1 > 0) && (e2 > 0)) / e0 += dx, z += z_bar_dx 2938 | // - pixel[0-4].cz = (Z < Zbuffer)? cz: pixel[0-4].cz 2939 | // screen y, 0 at the top (reverse opengl), use e0 > 0 2940 | 122: begin 2941 | fsm_state <= 123; 2942 | 2943 | vsfs_stop_txn <= 0; 2944 | if ((e0 >= 0) && (e1 >= 0) && (e2 >= 0)) begin 2945 | //if (((e0 >= 0) && (e1 >= 0) && (e2 >= 0)) || ((e0 < 0) && (e1 < 0) && (e2 < 0))) begin 2946 | //C_buffer[0] <= (pixel_z[19:12] < Z_buffer[0])? tri_idx[3:0]+1 : C_buffer[0]; 2947 | 2948 | C_buffer[0] <= ((pixel_z[15:8] < Z_buffer[0]) && (pixel_z[21:16] == 0))? 2949 | ((has_tex)? texel[0] : shade_color) : C_buffer[0]; 2950 | Z_buffer[0] <= ((pixel_z[15:8] < Z_buffer[0]) && (pixel_z[21:16] == 0))? 2951 | pixel_z[15:8] : Z_buffer[0]; 2952 | end 2953 | e0 <= e0 + (y_screen_v1 - y_screen_v0); 2954 | e1 <= e1 + (y_screen_v2 - y_screen_v1); 2955 | e2 <= e2 + (y_screen_v0 - y_screen_v2); 2956 | pixel_z <= pixel_z + z_bar_dx; 2957 | end 2958 | 123: begin 2959 | fsm_state <= 124; 2960 | 2961 | if ((e0 >= 0) && (e1 >= 0) && (e2 >= 0)) begin 2962 | C_buffer[1] <= ((pixel_z[15:8] < Z_buffer[1]) && (pixel_z[21:16] == 0))? 2963 | ((has_tex)? texel[1] : shade_color ) : C_buffer[1]; 2964 | Z_buffer[1] <= ((pixel_z[15:8] < Z_buffer[1]) && (pixel_z[21:16] == 0))? 2965 | pixel_z[15:8] : Z_buffer[1]; 2966 | end 2967 | e0 <= e0 + (y_screen_v1 - y_screen_v0); 2968 | e1 <= e1 + (y_screen_v2 - y_screen_v1); 2969 | e2 <= e2 + (y_screen_v0 - y_screen_v2); 2970 | pixel_z <= pixel_z + z_bar_dx; 2971 | end 2972 | 124: begin 2973 | fsm_state <= 125; 2974 | 2975 | if ((e0 >= 0) && (e1 >= 0) && (e2 >= 0)) begin 2976 | C_buffer[2] <= ((pixel_z[15:8] < Z_buffer[2]) && (pixel_z[21:16] == 0))? 2977 | ((has_tex)? texel[2] : shade_color ) : C_buffer[2]; 2978 | Z_buffer[2] <= ((pixel_z[15:8] < Z_buffer[2]) && (pixel_z[21:16] == 0))? 2979 | pixel_z[15:8] : Z_buffer[2]; 2980 | end 2981 | e0 <= e0 + (y_screen_v1 - y_screen_v0); 2982 | e1 <= e1 + (y_screen_v2 - y_screen_v1); 2983 | e2 <= e2 + (y_screen_v0 - y_screen_v2); 2984 | pixel_z <= pixel_z + z_bar_dx; 2985 | end 2986 | 125: begin 2987 | fsm_state <= 126; 2988 | 2989 | if ((e0 >= 0) && (e1 >= 0) && (e2 >= 0)) begin 2990 | C_buffer[3] <= ((pixel_z[15:8] < Z_buffer[3]) && (pixel_z[21:16] == 0))? 2991 | ((has_tex)? texel[3] : shade_color ) : C_buffer[3]; 2992 | Z_buffer[3] <= ((pixel_z[15:8] < Z_buffer[3]) && (pixel_z[21:16] == 0))? 2993 | pixel_z[15:8] : Z_buffer[3]; 2994 | end 2995 | e0 <= e0 + (y_screen_v1 - y_screen_v0); 2996 | e1 <= e1 + (y_screen_v2 - y_screen_v1); 2997 | e2 <= e2 + (y_screen_v0 - y_screen_v2); 2998 | pixel_z <= pixel_z + z_bar_dx; 2999 | end 3000 | // - WRITE x4 Z (16 clk) 3001 | 126: begin 3002 | if (ram_notbusy) begin 3003 | vsfs_stop_txn <= 0; 3004 | vsfs_start_write <= 1; 3005 | // y*320 + x + z_start 3006 | vsfs_addr <= {6'b0,pixel_y,8'b0} + {8'b0,pixel_y,6'b0} + {14'b0,pixel_x} + 76800; 3007 | vsfs_data_in <= Z_buffer[numread[2:1]][{~numread[0],2'b00} +: 4]; 3008 | numread <= numread + 1; 3009 | fsm_state <= 127; 3010 | end 3011 | end 3012 | // -- wait and write 4 Z 3013 | 127: begin 3014 | vsfs_start_write <= 0; 3015 | if(spi_data_req)begin 3016 | vsfs_data_in <= Z_buffer[numread[2:1]][{~numread[0],2'b00} +: 4]; 3017 | numread <= numread + 1; 3018 | if(numread == 7)begin 3019 | numread <= 0; 3020 | vsfs_stop_txn <= 1; 3021 | fsm_state <= 128; 3022 | end 3023 | end 3024 | end 3025 | // - WRITE x4 B (12 clk) 3026 | 128: begin 3027 | vsfs_stop_txn <= 0; 3028 | vsfs_start_write <= 1; 3029 | // y * 160 + x/2 + (offset back[0]) 3030 | vsfs_addr <= (evenframe)?{7'b0,pixel_y,7'b0} + {9'b0,pixel_y,5'b0} + {15'b0,pixel_x[9:1]} + 38400: 3031 | {7'b0,pixel_y,7'b0} + {9'b0,pixel_y,5'b0} + {15'b0,pixel_x[9:1]}; 3032 | vsfs_data_in <= C_buffer[numread[1:0]]; 3033 | numread <= numread + 1; 3034 | fsm_state <= 129; 3035 | end 3036 | // -- wait wait and write 4 color 3037 | 129: begin 3038 | vsfs_start_write <= 0; 3039 | if(spi_data_req)begin 3040 | vsfs_data_in <= C_buffer[numread[1:0]]; 3041 | numread <= numread + 1; 3042 | if(numread == 3)begin 3043 | numread <= 0; 3044 | vsfs_stop_txn <= 1; 3045 | fsm_state <= 254; 3046 | end 3047 | end 3048 | end 3049 | 3050 | 3051 | 3052 | // - last line for pixel x 3053 | 254: begin 3054 | vsfs_stop_txn <= 0; 3055 | pixel_x <= pixel_x + 4; 3056 | fsm_state <= 116; 3057 | end 3058 | // - last line for pixel y, max 255 state 3059 | 255: begin 3060 | pixel_y <= pixel_y + 1; 3061 | fsm_state <= 115; 3062 | end 3063 | 3064 | default: begin 3065 | fsm_state <= 0; 3066 | end 3067 | endcase 3068 | end 3069 | end 3070 | 3071 | 3072 | 3073 | // debug 3074 | // assign debug_vsfs_fsm_state = fsm_state; 3075 | // assign debug_x_model_v0 = {8'b0,v0_u}; 3076 | // assign debug_x_model_v1 = {8'b0,v0_v}; 3077 | // assign debug_x_model_v2 = {8'b0,v1_u}; 3078 | // assign debug_y_model_v0 = {8'b0,v1_v}; 3079 | // assign debug_y_model_v1 = {8'b0,v2_u}; 3080 | // assign debug_y_model_v2 = {8'b0,v2_v}; 3081 | // assign debug_z_model_v0 = z_model_v0; 3082 | // assign debug_z_model_v1 = z_model_v1; 3083 | // assign debug_z_model_v2 = z_model_v2; 3084 | // assign debug_nx = nx; 3085 | // assign debug_ny = ny; 3086 | // assign debug_nz = nz; 3087 | // assign debug_tri_color = tri_color; 3088 | 3089 | 3090 | 3091 | endmodule --------------------------------------------------------------------------------