├── .travis.yml ├── CMakeLists.txt ├── Readme.md ├── docs ├── instruction_set.md └── nihcode_spec.md ├── examples ├── assembler │ ├── cube │ │ ├── Makefile │ │ ├── README.md │ │ ├── data │ │ │ ├── test.vsh │ │ │ └── texture.bin │ │ └── source │ │ │ ├── _gs.s │ │ │ ├── gs.c │ │ │ ├── gs.h │ │ │ ├── main.c │ │ │ ├── math.c │ │ │ └── math.h │ └── cube_lighting │ │ ├── Makefile │ │ ├── README.md │ │ ├── data │ │ ├── test.vsh │ │ └── texture.bin │ │ └── source │ │ ├── _gs.s │ │ ├── gs.c │ │ ├── gs.h │ │ ├── main.c │ │ ├── math.c │ │ └── math.h └── inline_assembler │ └── simple │ ├── CMakeLists.txt │ └── simple.cpp ├── include └── nihstro │ ├── bit_field.h │ ├── float24.h │ ├── inline_assembly.h │ ├── parser_assembly.h │ ├── parser_assembly_private.h │ ├── parser_shbin.h │ ├── preprocessor.h │ ├── shader_binary.h │ ├── shader_bytecode.h │ └── source_tree.h ├── license.txt └── src ├── assembler.cpp ├── disassembler.cpp ├── parser_assembly.cpp ├── parser_assembly ├── common.cpp ├── compare.cpp ├── declaration.cpp ├── floatop.cpp └── flowcontrol.cpp ├── parser_shbin.cpp ├── preprocessor.cpp └── tests ├── parser.cpp └── source_tree_iterator.cpp /.travis.yml: -------------------------------------------------------------------------------- 1 | os: 2 | - linux 3 | 4 | language: cpp 5 | sudo: false 6 | 7 | addons: 8 | apt: 9 | sources: 10 | - ubuntu-toolchain-r-test 11 | - kalakris-cmake 12 | - boost-latest 13 | packages: 14 | - gcc-4.9 15 | - g++-4.9 16 | - cmake 17 | - libboost1.55-all-dev 18 | 19 | script: 20 | - export CC=gcc-4.9 21 | - export CXX=g++-4.9 22 | - mkdir build 23 | - cd build 24 | - cmake .. 25 | - make 26 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | 3 | project(nihstro) 4 | 5 | find_package(Boost COMPONENTS program_options unit_test_framework) 6 | 7 | add_definitions(-std=c++11) 8 | 9 | include_directories(include) 10 | 11 | add_executable(nihstro-disassemble src/disassembler.cpp src/parser_shbin.cpp) 12 | install(TARGETS nihstro-disassemble DESTINATION bin) 13 | 14 | # TODO: Re-enable 15 | # add_subdirectory(examples/inline_assembler/simple) 16 | 17 | # TODO: What if program_options was found but not unit_test_framework? 18 | if(Boost_FOUND) 19 | set(PARSER_SRCS src/parser_assembly.cpp 20 | src/preprocessor.cpp 21 | src/parser_assembly/common.cpp 22 | src/parser_assembly/compare.cpp 23 | src/parser_assembly/declaration.cpp 24 | src/parser_assembly/flowcontrol.cpp 25 | src/parser_assembly/floatop.cpp) 26 | 27 | include_directories(${Boost_INCLUDE_DIRS}) 28 | add_executable(nihstro-assemble src/assembler.cpp ${PARSER_SRCS}) 29 | target_link_libraries(nihstro-assemble ${Boost_PROGRAM_OPTIONS_LIBRARY}) 30 | install(TARGETS nihstro-assemble DESTINATION bin) 31 | 32 | # tests 33 | if(Boost_UNIT_TEST_FRAMEWORK_FOUND) 34 | add_executable(test-parser src/tests/parser.cpp ${PARSER_SRCS}) 35 | target_compile_definitions(test-parser PUBLIC -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN) 36 | target_link_libraries(test-parser ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY}) 37 | enable_testing() 38 | add_test(ParserTests test-parser) 39 | 40 | add_executable(test-source-tree-iterator src/tests/source_tree_iterator.cpp) 41 | target_compile_definitions(test-source-tree-iterator PUBLIC -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN) 42 | target_link_libraries(test-source-tree-iterator ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY}) 43 | add_test(ParserTests test-source-tree-iterator) 44 | else() 45 | message(WARNING "Boost testing framework not found => not building assembler tests") 46 | endif() 47 | else() 48 | message(WARNING "Boost not found => not building assembler") 49 | endif() 50 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # nihstro - 3DS shader tools 2 | 3 | [![Travis CI Build Status](https://travis-ci.org/neobrain/nihstro.svg)](https://travis-ci.org/neobrain/nihstro) 4 | 5 | nihstro is a collection of tools for 3DS shaders targeted at homebrew development and/or reverse engineering. Currently, it supports assembling 3DS shader binaries from assembly source code and disassembling shaders from `shbin` files. It also provides C++ interfaces for analyzing and runtime-compiling shaders. 6 | 7 | This project is released under a three-clause BSD license. For details see license.txt. 8 | 9 | ## Components 10 | 11 | nihstro is well-modularized to minimize dependencies for any particular use case. 12 | 13 | ### Shader assembler 14 | nihstro includes a standalone shader assembler for generating [SHBIN](http://3dbrew.org/wiki/SHBIN) files from human-readable shader source code (the syntax of which is called "nihcode"). It is perfectly suitable for homebrew development. Shader source needs to follow the [nihcode specification](docs/nihcode_spec.md). 15 | 16 | Usage: 17 | `nihstro-assemble -o ` 18 | 19 | Reads vertex shader source code from the input file and generates a shader binary from it. 20 | 21 | Further command line options: 22 | * `-h, --help`: Show command line usage 23 | * `-i, --input`: Explicit switch for specifying the input shader source filename (if omitted, the first switch-less argument is interpreted as the filename) 24 | * `-o, --output`: Output shbin filename (required) 25 | * `-e, --entrypoint`: label name in the input source at which shader execution should start (defaults to "main") 26 | * `-g, --geo_shader`: Compile shader source as a geometry shader 27 | 28 | ### Shader disassembler 29 | 30 | nihstro includes a standalone shader disassembler for disassembling SHBIN files and inspecting meta data (symbol information, constant values, etc). 31 | 32 | Usage: 33 | `nihstro-disassemble ` 34 | 35 | Parses the shader binary header and outputs basic information on the DVLE sections. 36 | 37 | `nihstro-disassemble ` 38 | 39 | Parses the shader binary header and outputs basic information, but also disassembles the shader code using the information in the indexed DVLE (main offset, symbols, etc). 40 | 41 | ### Shader bytecode and SHBIN C++ headers 42 | The header `shader_bytecode.h` defines C++ data structures which describe raw shader binary code, while `shader_binary.h` defines the layout of SHBIN files. This allows for convenient inspection of data in C++ code. Note that these headers are currently not API stable. 43 | 44 | ### Inline assembler (experimental) 45 | The header `inline_assembly.h` provides an experimental mean for runtime generation of PICA200 shaders and SHBIN files within C++ code, so that homebrew authors don't need to ship shaders as precompiled files. While you could use nihstro's actual assembler to allow for runtime shader compilation, the inline assembler may be more convenient and is lighter on dependencies (since it doesn't require Boost to function). However, for obvious reasons it incurs a performance penalty and an increased memory usage compared to offline shader compilation. 46 | 47 | A simple [example program](examples/inline_assembler/simple) is included to illustrate how to use the inline assembler. 48 | 49 | Note that the inline assembler is highly experimental. It may or may not work for you yet, and its API will change a lot in the future. 50 | 51 | ## Building 52 | 53 | All nihstro components require compiler support for C++11 to work. 54 | 55 | The C++ headers `shader_bytecode.h` and `shader_binary` can be easily be included in other project and hence are easy to integrate into any build system (as long as nihstro's directory structure is preserved). 56 | 57 | For the standalone assembler and disassembler, you will also need CMake to generate build files (however it is simple to setup a different build system from scratch if need be), and at least parts of the [Boost libraries](http://www.boost.org/) installed (including Spirit, Fusion, and others). 58 | 59 | ### Installing dependencies on Windows 60 | 61 | You will need to download [CMake](https://cmake.org/download/) and [Boost](http://www.boost.org/users/download/) from their respective download pages. Both projects provide prebuilt binaries. Note that the Boost binaries only work with MSVC, so MinGW users will need to obtain prebuilt binaries from an unofficial source (not recommended) or build Boost from source. 62 | 63 | ### Installing dependencies on Linux 64 | 65 | Chances are your Linux distribution already has CMake and Boost installed. Use your package manager to verify this is the case and to install them if need be. Note that most distributions provide program binaries and development libraries in separate packages; for building nihstro, both are needed. 66 | 67 | ### Installing dependencies on OS X 68 | 69 | On OS X, it is recommended that you use [Homebrew](http://brew.sh/) to install dependencies. You'll need to run the following to build nihstro: 70 | 71 | ``` 72 | brew install cmake boost 73 | ``` 74 | 75 | ### Compiling on Linux, OS X, and other Unix-like systems 76 | 77 | To compile the standalone assembler and disassembler, run the following commands from within the nihstro root directory: 78 | 79 | ``` 80 | mkdir -p build 81 | cd build 82 | cmake .. 83 | make 84 | ``` 85 | 86 | This will build the `nihstro-assemble` and `nihstro-disassemble` standalone executables inside the `build` directory. 87 | 88 | ### Compiling on Windows 89 | 90 | Start the [CMake GUI](https://cmake.org/runningcmake/). You will have to provide two paths: The source code location and the build directory. Point the former to the nihstro root directory, and the latter to a subdirectory called `build`. You may need to create this directory manually if it doesn't exist. 91 | 92 | To make sure CMake finds your Boost installation, press the "Add Entry" button and create a new PATH variable with the name `BOOST_ROOT`. Point it towards the root directory of your boost installation. The correct folder should contain a subdirectory called `boost` with lots of further child directories. 93 | 94 | Once you're done, hit the "Configure" button and adjust the compiler settings appropriately (usually, the default settings should be fine). If an error occurs, CMake might have trouble locating your Boost installation, and you should double-check that you installed the correct set of Boost libraries and that you set up the `BOOST_ROOT` variable correctly. 95 | 96 | If all went fine, click "Generate" and use the generated build files in the `build` subdirectory to build nihstro. In particular if you're using MSVC, open the file `build/nihstro.sln` in Visual Studio. 97 | 98 | ## Contributing 99 | I welcome any contributions! Just create a GitHub fork and submit your changes back via pull requests. 100 | 101 | ## Kudos 102 | A big "thank you!" to everyone who contributed to the information on 3dbrew, which has proven amazingly useful for my 3DS related projects. Another shout-out goes to smealum's aemstro, which served as a great reference when debugging nihstro. 103 | -------------------------------------------------------------------------------- /docs/instruction_set.md: -------------------------------------------------------------------------------- 1 | # Shader Instruction Set 2 | 3 | This page gives an overview over the instruction set supported by nihstro. Note that there is a similar reference list on [3dbrew](http://3dbrew.org/wiki/Shader_Instruction_Set), which documents the actual implementation on hardware though. nihstro seeks to abstract away annoying details like the fact that there are 3 different CALL instructions, and instead provides convenience shortcuts where possible without giving up flexibility. 4 | 5 | # Table of Contents 6 | 7 | - [Shader Instruction Set](#shader-instruction-set) 8 | - [Arithmetic Instructions](#arithmetic-instructions) 9 | - [Flow Control Instructions](#flow-control-instructions) 10 | - [Special Purpose Instructions](#special-purpose-instructions) 11 | 12 | ## Arithmetic Instructions 13 | Most arithmetic instructions take a destination operand and one or more source operands. Source operands may use any kind of swizzle mask, while destination operands may not use reordering or duplicating swizzle masks. Below you will find a short operation description for each instruction, e.g. `dest[i] = src[i]`, which means that the `i`-th source component (as specified by the swizzle mask) will be assigned to the `i`-th destination component (as specified by the swizzle mask), with `i` ranging from 1 to the number of swizzle mask components. Components not listed in the destination swizzle mask hence will not be written. 14 | 15 | Static indexing (i.e. indexing with a constant, not to be confused with the above notation) may be done for both operand types. Source operands additionally support *dynamic indexing*, where the index depends on one of the address registers `a0`/`a1` or on the loop counter `lcnt`. Examples: 16 | * static indexing: `c0[20]` 17 | * dynamic indexing: `c0[2+a0]` 18 | 19 | #### mov: Copy floating point value 20 | Syntax: `mov dest_operand, src_operand` 21 | 22 | Operation: `dest[i] = src[i]` 23 | 24 | Restrictions: 25 | * `src` and `dest` must have the same number of components 26 | 27 | #### add: Per-component floating point sum 28 | Syntax: `add dest_operand, src1_operand, src2_operand` 29 | 30 | Operation: `dest[i] = src1[i] + src2[i]` 31 | 32 | Restrictions: 33 | * `src1`, `src2`, and `dest` must have the same number of components 34 | * not more than one of the source operands may be a float uniform register and/or use dynamic indexing 35 | 36 | Notes: 37 | * subtraction can be performed using negation: `add r0, c0, -c1` 38 | * when chaining an addition and a multiplication, consider using `mad` instead 39 | 40 | #### mul: Per-component floating point multiplication 41 | Syntax: `mul dest_operand, src1_operand, src2_operand` 42 | 43 | Operation: `dest[i] = src1[i] * src2[i]` 44 | 45 | Restrictions: 46 | * `src1`, `src2`, and `dest` must have the same number of components 47 | * not more than one of the source operands may be a float uniform register and/or use dynamic indexing 48 | 49 | Notes: 50 | * division can be performed by computing the reciprocal of src2 and multiplying the result: `rcp r0, c1; mul r0, c0, r0` 51 | * when chaining an addition and a multiplication, consider using `mad` instead 52 | 53 | #### mad: Fused multiply-add of three floating point numbers 54 | Syntax: `mad dest_operand, src1_operand, src2_operand, src3_operand` 55 | 56 | Operation: `dest[i] = src1[i] * src2[i] + src3[i]` 57 | 58 | Restrictions: 59 | * `src1`, `src2`, `src3`, and `dest` must have the same number of components 60 | * not more than two source operands may be float uniform registers 61 | * no dynamic indexing may be performed on any of the source operands. 62 | 63 | Notes: 64 | * when dynamic indexing is not avoidable, use `add` and `mul` instead 65 | * not supported currently 66 | 67 | #### max: Copy the greater of two floating point numbers 68 | Syntax: `max dest_operand, src1_operand, src2_operand` 69 | 70 | Operation: `dest[i] = max(src1[i], src2[i])` 71 | 72 | Restrictions: 73 | * `src1`, `src2`, and `dest` must have the same number of components 74 | * not more than one of the source operands may be a float uniform register and/or use dynamic indexing 75 | 76 | #### min: Copy the smaller of two floating point numbers 77 | Syntax: `min dest_operand, src1_operand, src2_operand` 78 | 79 | Operation: `dest[i] = min(src1[i], src2[i])` 80 | 81 | Restrictions: 82 | * `src1`, `src2`, and `dest` must have the same number of components 83 | * not more than one of the source operands may be a float uniform register and/or use dynamic indexing 84 | 85 | #### flr: Floating point floor 86 | Syntax: `flr dest_operand, src_operand` 87 | 88 | Operation: `dest[i] = floor(src[i])` 89 | 90 | Restrictions: 91 | * `src` and `dest` must have the same number of components 92 | 93 | #### rcp: Floating point reciprocal 94 | Syntax: `rcp dest_operand, src_operand` 95 | 96 | Operation: `dest[i] = 1 / src[i]` 97 | 98 | Restrictions: 99 | * `src` and `dest` must have the same number of components 100 | 101 | #### rsq: Floating point reciprocal square root 102 | Syntax: `rsq dest_operand, src_operand` 103 | 104 | Operation: `dest[i] = 1 / sqrt(src[i])` 105 | 106 | Restrictions: 107 | * `src` and `dest` must have the same number of components 108 | 109 | #### exp: Floating point base-2 exponential 110 | Syntax: `exp dest_operand, src_operand` 111 | 112 | Operation: `dest[i] = exp(src[i])` 113 | 114 | Restrictions: 115 | * `src1` and `dest` must have the same number of components 116 | 117 | #### log: Floating point base-2 logarithm 118 | Syntax: `log dest_operand, src_operand` 119 | 120 | Operation: `dest[i] = log(src[i])` 121 | 122 | Restrictions: 123 | * `src1` and `dest` must have the same number of components 124 | 125 | #### dp3: Floating point 3-component dot-product 126 | Syntax: `dp3 dest_operand, src1_operand, src2_operand` 127 | 128 | Operation: `dest[i] = src1[0]*src2[0]+src1[1]*src2[1]+src1[2]*src2[2])` 129 | 130 | Restrictions: 131 | * `src1`, `src2`, and `dest` must have the same number of components 132 | * not more than one of the source operands may be a float uniform register and/or use dynamic indexing 133 | 134 | #### dp4: Floating point 4-component dot-product 135 | Syntax: `dp4 dest_operand, src1_operand, src2_operand` 136 | 137 | Operation: `dest[i] = src1[0]*src2[0]+src1[1]*src2[1]+src1[2]*src2[2]+src1[3]*src2[3])` 138 | 139 | Restrictions: 140 | * `src1`, `src2`, and `dest` must have the same number of components 141 | * not more than one of the source operands may be a float uniform register and/or use dynamic indexing 142 | 143 | #### dph: Floating point homogeneous dot-product 144 | Syntax: `dph dest_operand, src1_operand, src2_operand` 145 | 146 | Operation: `dest[i] = src1[0]*src2[0]+src1[1]*src2[1]+src1[2]*src2[2]+src2[3]` 147 | 148 | Restrictions: 149 | * `src1`, `src2`, and `dest` must have the same number of components 150 | * not more than one of the source operands may be a float uniform register and/or use dynamic indexing. 151 | 152 | #### sge: Set to one if greater or equal 153 | Syntax: `sge dest_operand, src1_operand, src2_operand` 154 | 155 | Operation: `dest[i] = (src1[i] >= src2[i]) ? 1.0 : 0.0` 156 | 157 | Restrictions: 158 | * `src1`, `src2`, and `dest` must have the same number of components 159 | * not more than one of the source operands may be a float uniform register and/or use dynamic indexing 160 | 161 | #### slt: Set to one if (strictly) less 162 | Syntax: `slt dest_operand, src1_operand, src2_operand` 163 | 164 | Operation: `dest[i] = (src1[i] < src2[i]) ? 1.0 : 0.0` 165 | 166 | Restrictions: 167 | * `src1`, `src2`, and `dest` must have the same number of components 168 | * not more than one of the source operands may be a float uniform register and/or use dynamic indexing 169 | 170 | #### mova: Move to address register 171 | Syntax: `mova src_operand` 172 | 173 | Operation: 174 | 175 | a0 = src.x 176 | a1 = src.y 177 | 178 | Restrictions: 179 | * src_operand must be a two-component vector. 180 | 181 | Notes: 182 | * not supported currently 183 | 184 | ## Flow Control Instructions 185 | These allow for non-linear code execution, e.g. by conditionally or repeatedly running code. 186 | 187 | Some flow control instruction take a "condition" parameter. A condition is either 188 | * a boolean uniform or 189 | * an expression consisting of one or two conditional code components, combined via `&&` ("and") or `||` ("or"), and optionally negated. Examples: `cc.x`, `cc.y && !cc.x` 190 | 191 | #### cmp: Compare two floating point numbers 192 | 193 | Syntax: `cmp src1_operand, src2_operand, op1, op2` 194 | 195 | `op1` and `op2` may be any of the strings `==` (equal), `!=` (not equal), `<` (less than), `<=` (less than or equal to), `>` (greater than), and `>=` (greater than or equal to). 196 | 197 | Operation: 198 | 199 | cc.x = (src1[0] op1 src2[0]) 200 | cc.y = (src1[1] op2 src2[1]) 201 | 202 | Restrictions: 203 | * `src1` and `src2` must be two-component vectors 204 | * it is not possible to set `cc.x` without also setting `cc.y` 205 | * not more than one of the source operands may be a float uniform register and/or use dynamic indexing 206 | 207 | Notes: 208 | * this instruction is used to set conditional codes, which can be used as conditions for `if`/`jmp`/`call`/`break`. 209 | 210 | #### if: Conditional code execution 211 | Syntax: `if condition` 212 | 213 | Operation: 214 | If `condition` is true, conditionally executes the code between itself and the corresponding `else` or `endif` pseudo-instruction. Otherwise, executes the code in the `else` branch, if one is given (otherwise, skips the branch body and continues after the `endif` statement). 215 | 216 | Restrictions: 217 | * not more than one `else` branch may be specified (`else if` syntax is not supported) 218 | 219 | Notes: 220 | * all `if` branches must be closed explicitly using `endif` 221 | * jumping out of a branch body may result in undefined behavior 222 | 223 | Example: 224 | 225 | if cc.x && !cc.y 226 | // do stuff 227 | else 228 | if b0 229 | // do other stuff 230 | endif 231 | endif 232 | 233 | #### loop: Repeat code execution 234 | Syntax: `loop int_uniform` 235 | 236 | Operation: 237 | Initialize `lcnt` to `int_uniform.y`, then process code between `loop` and `endloop` for `int_uniform.x+1` iterations in total. After each iteration, `lcnt` is incremented by `int_uniform.z`. 238 | 239 | Restrictions: 240 | * no swizzle mask may be applied on the given uniform 241 | * there is no direct way of looping zero times (the easiest way is to use `break` with an extra boolean uniform) 242 | 243 | Notes: 244 | * `lcnt` can be used to dynamically index arrays, e.g. to implement vertex lighting with multiple light sources 245 | 246 | #### break: Break out of current loop 247 | Syntax: `break condition` 248 | 249 | Operation: 250 | If `condition` is true, break out of the current loop. 251 | 252 | Restrictions: 253 | * jumping out of a branch body may result in undefined behavior 254 | 255 | #### jmp: Jump to code address 256 | Syntax: `jmp target_label if condition` 257 | 258 | Restrictions: 259 | * jumping out of or into branch bodies or loops may result in undefined behavior 260 | * there is no way to force a jump without specifying a condition 261 | 262 | Notes: 263 | * if you need to automatically return from a function, use `call` instead 264 | 265 | Example: 266 | 267 | main: 268 | jmp my_helper_code if b0 269 | // if not b0, do other stuff here 270 | nop 271 | end 272 | 273 | my_helper_code: 274 | // do stuff 275 | nop 276 | end 277 | 278 | #### call: Jump to code address and return to caller 279 | Possible syntaxes: 280 | `call target_label until return_label if condition` 281 | `call target_label until return_label` 282 | 283 | Operation: 284 | If `condition` is true (or none is given), jumps to `target_label` and processes shader code there until `return_label` is hit, at which point code execution jumps back to the caller. 285 | 286 | Restrictions: 287 | * jumping out of or into branch bodies or loops may result in undefined behavior 288 | 289 | Notes: 290 | * if you don't need to automatically return from a function, use `jmp` instead 291 | 292 | Example: 293 | 294 | main: 295 | call my_helper_code until end_helper_code 296 | nop 297 | end 298 | 299 | my_helper_code: 300 | // do stuff here 301 | nop 302 | end_helper_code: 303 | 304 | ## Special Purpose Instructions 305 | #### nop: No operation 306 | Syntax: `nop` 307 | 308 | Notes: 309 | * This may be necessary before using `end` to make sure all pending write operations have been completed 310 | 311 | #### end: Finish shader execution 312 | Syntax: `end` 313 | 314 | Operation: 315 | Stops shader execution. 316 | -------------------------------------------------------------------------------- /docs/nihcode_spec.md: -------------------------------------------------------------------------------- 1 | #nihcode Specification 2 | 3 | Version 0.1. 4 | 5 | This page seeks to be a formal-ish specification of the input assembly language *nihcode* used by the nihstro shader assembler. 6 | 7 | ## Version information 8 | This document is is intended to give developers an idea of how things are expected to work. Please file issue reports for any deviations in nihstro's behavior from this specifications that you find. Similarly, any inclarities in the specification will be corrected if reported, too. 9 | 10 | ## Structure 11 | nihcode is a sequence of statements, each of which must be put onto a separate line. There are five types of statements: 12 | * version information statements 13 | * include statement 14 | * alias declaration statements, 15 | * label declaration statements, and 16 | * instruction statements, 17 | each of which is described in its own section below. Additionally, C++-like comments may be inserted at any point and are started using the character sequences `//`, `#`, or `;`. Comments span the rest of the line after any of these characters. Any statement must be written on its own line. 18 | 19 | A pseudo-code example of nihcode looks like this: 20 | 21 | // First example shader 22 | .version 0.1 // version information 23 | 24 | .alias inpos v0 // alias declaration 25 | .alias intex v1 // alias declaration 26 | .alias pos o0 as position // alias declaration 27 | .alias pos o1.xy as texcoord0 // alias declaration 28 | 29 | .include "utils.h" // include utility functionality 30 | 31 | main: // label declaration 32 | mov o0, v0 // instruction 33 | mov o1.xy, v1.xy // instruction 34 | nop // instruction 35 | end // instruction 36 | 37 | 38 | ## Shader Registers, builtin Identifiers, Swizzle Masks 39 | A shader can access a number of different registers with different purposes. *Input registers* expose the raw input vertex attribute data, while the output vertex attributes used for rendering is written to *output registers*. External programs can pass parameters to the shader by setting *uniforms*. Additionally, a number of *temporary registers* are free for any use. There are also special-purpose registers, namely the *address registers* and the *conditional code register*. 40 | 41 | Registers are being referred to by using *identifiers*. There is a number of builtin identifiers, each of which refers to one register. Note that most registers are vectors, i.e. they comprise multiple components, which are accessed using swizzle masks. 42 | * `v0`-`v15`: Input registers (read-only), four-component vectors 43 | * `r0`-`r15`: Temporary registers (read-write), four-component vectors 44 | * `c0`-`c95`: Float uniforms (read-only), four-component vectors 45 | * `i0`-`i3`: Integer uniforms (read-only), four-component vectors 46 | * `b0`-`b15`: Boolean uniforms (read-only), scalar 47 | * `o0`-`o15`: Output registers (write-only), four-component vectors 48 | * `a0, a1, aL`: Address registers (used with MOVA and dynamic indexing), scalar 49 | * `cc`: Conditional code register (used with CMP and flow-control instructions), two-component vector 50 | 51 | For better readability, one can also define new identifiers, as explained below. Identifiers may only use a restricted set of names including lower- or uppercase letters a-Z, underscores, and decimal digits (the latter two which may not be used as the first character of the name). Additionally, an identifier may be followed by a swizzle mask, separated by the character `.` (e.g. `texcoord.zyx`). Swizzle masks allow for reordering, duplicating, and removing of one or more vector components of the identified register (without actually modifying that register). 52 | 53 | When used with certain instructions, identifiers may be mentioned along with a sign, an array index, and/or a swizzle mask. Constructs like this are called *expressions*. 54 | 55 | The following names are reserved identifiers, and may not be used during declarations: 56 | * Any names starting with a `gl_` prefix 57 | * Any names starting with a `dmp_` prefix 58 | * Any names starting with an underscore prefix 59 | * Any of the instruction opcodes mentioned below may not be used for the identifier name 60 | 61 | ## Aliases 62 | ### Plain Aliases (any register) 63 | `.alias {.}` 64 | 65 | Declares a new identifier called `new_identifier` which will refer to the same register that `existing_identifier` refers to, applying a swizzle_mask if specified. All subsequent uses of `new_identifier` are equivalent to using `existing_identifier.swizzle_mask`. Aliases of any register type may be created, however it should be noted that using output registers requires explicit assignment of an output semantic (see below). 66 | 67 | E.g. `.alias input_texture v2.xy` 68 | 69 | ### Alias with Assignment of a Semantic (output registers) 70 | `.alias {.swizzle_mask} as ` 71 | 72 | Declares an alias of `existing_identifier` with the name `new_identifier` and assigns the given semantic to the corresponding output register. An output semantic needs to be given to describe how the output vertex attribute is intended to be used after shader execution. `semantic` may be any of the strings `position`, `quaternion`, `color`, `texcoord0`, `texcoord1`, `texcoord2`, and `view`. If not all output register components are being written to, a swizzle mask should be used to denote the "active" components. Note that this swizzle mask may not reorder any components. 73 | 74 | E.g. `.alias output_texcoord o1.xy as texcoord0` 75 | 76 | ### Constant Declarations (uniform registers) 77 | scalar constants: `.alias as ` 78 | 79 | vector constants: `.alias as (, {, {, }})` 80 | 81 | Declares an alias of `existing_identifier` with the name `new_identifier` and assigns the given default value to it. Default values are parsed by the ctrulib API and automatically applied when enabling a shader. The number of components in the given constant must match the number of components in the specified register. 82 | 83 | E.g. `.alias my_const c4 as (0.1, 3.2, -3.14, 0.0)` 84 | 85 | ## Label Declarations 86 | `:` 87 | 88 | Declares a new label with the name `labelname` at the given source line, which can be used in flow control operations. Label names follow the same conventions as identifiers and may not share the same name with an existing identifier. 89 | 90 | ## Instruction Statements 91 | Writes the given opcode according to the given arguments to the shader binary. There are a lot of instructions, and each of them uses one of the following formats: 92 | 93 | Trivial operations: 94 | `` 95 | Used by `else`, `emit`, `end`, `endif`, `endloop`, and `nop`. 96 | 97 | Arithmetic operations: 98 | ` {, {, {, }}}` 99 | Used by `add`, `dp3`, `dp4`, `dph`, `ex2`, `flr`, `lg2`, `mad`, `max`, `min`, `mov`, `mova`, `mul`, `rcp`, `rsq`, `sge` and `slt`. The number of required expressions as well as their meaning depends on the opcode. 100 | 101 | E.g. `mul o3.xyz c4.xyz v0.xyz` 102 | 103 | Compare operation: 104 | `cmp , , , ` 105 | Used exclusively by `cmp`. `expression1` and `expression2` must evaluate to two-component float vectors. `op_x` and `op_y` specify comparison operations for the x and y components of the given expressions, respectively. They may be `==`, `!=`, `<`, `<=`, `>` or `>=`. 106 | 107 | E.g. `cmp c0.xy, i2.xy, <=, ==` 108 | 109 | Flow control operations: 110 | ` ` 111 | Used by `break`, `if` and `loop`. 112 | 113 | ` {} {until } {if }` 114 | Used by `jmp` and `call`. 115 | 116 | `condition` may either be an identifier of a boolean uniform or a conditional expression. Examples for conditional expressions are `cc.x`, `!cc.x`, `!cc.xy`, `cc.x && !cc.y`, and `cc.x || cc.y`, where `{!}cc.xy` is equivalent to `{!}cc.x && {!}cc.y`. `target_label` and `return_label` must be label identifiers. Their meaning depends on the given opcode. 117 | 118 | For a full instruction set reference, go to [instruction set reference](instruction_set.md). You may also want to refer to [3dbrew](http://3dbrew.org/wiki/Shader_Instruction_Set) for low-level documentation on each opcode. Is is suggested that you take a look at the nihstro examples to get a better picture of how to apply that information. 119 | 120 | ## Include Statements 121 | `.include "filename"` 122 | 123 | Replaces the `.include` line with the contents of the given file. The filename is taken to be relative to the file it was included from. 124 | 125 | ## Version Information 126 | `.version number` 127 | 128 | This statement is a hint for the compiler to see which language specification the shader was written against. It may be used to toggle a compatibility assembling mode. 129 | 130 | E.g. `.version 0.1` 131 | -------------------------------------------------------------------------------- /examples/assembler/cube/Makefile: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------------------------------------- 2 | .SUFFIXES: 3 | #--------------------------------------------------------------------------------- 4 | 5 | ifeq ($(strip $(DEVKITARM)),) 6 | $(error "Please set DEVKITARM in your environment. export DEVKITARM=devkitARM") 7 | endif 8 | 9 | ifeq ($(strip $(NIHSTRO)),) 10 | $(error "Please set NIHSTRO in your environment. export NIHSTRO=nihstro-assemble") 11 | endif 12 | 13 | TOPDIR ?= $(CURDIR) 14 | include $(DEVKITARM)/3ds_rules 15 | 16 | #--------------------------------------------------------------------------------- 17 | # TARGET is the name of the output 18 | # BUILD is the directory where object files & intermediate files will be placed 19 | # SOURCES is a list of directories containing source code 20 | # DATA is a list of directories containing data files 21 | # INCLUDES is a list of directories containing header files 22 | # 23 | # NO_SMDH: if set to anything, no SMDH file is generated. 24 | # APP_TITLE is the name of the app stored in the SMDH file (Optional) 25 | # APP_DESCRIPTION is the description of the app stored in the SMDH file (Optional) 26 | # APP_AUTHOR is the author of the app stored in the SMDH file (Optional) 27 | # ICON is the filename of the icon (.png), relative to the project folder. 28 | # If not set, it attempts to use one of the following (in this order): 29 | # - .png 30 | # - icon.png 31 | # - /default_icon.png 32 | #--------------------------------------------------------------------------------- 33 | TARGET := $(notdir $(CURDIR)) 34 | BUILD := build 35 | SOURCES := source 36 | DATA := data 37 | INCLUDES := include 38 | 39 | #--------------------------------------------------------------------------------- 40 | # options for code generation 41 | #--------------------------------------------------------------------------------- 42 | ARCH := -march=armv6k -mtune=mpcore -mfloat-abi=hard 43 | 44 | CFLAGS := -g -Wall -O2 -mword-relocations \ 45 | -fomit-frame-pointer -ffast-math \ 46 | $(ARCH) 47 | 48 | CFLAGS += $(INCLUDE) -DARM11 -D_3DS 49 | 50 | CXXFLAGS := $(CFLAGS) -fno-rtti -std=gnu++11 51 | 52 | ASFLAGS := -g $(ARCH) 53 | LDFLAGS = -specs=3dsx.specs -g $(ARCH) -Wl,-Map,$(notdir $*.map) 54 | 55 | LIBS := -lctru -lm 56 | 57 | #--------------------------------------------------------------------------------- 58 | # list of directories containing libraries, this must be the top level containing 59 | # include and lib 60 | #--------------------------------------------------------------------------------- 61 | LIBDIRS := $(CTRULIB) 62 | 63 | 64 | #--------------------------------------------------------------------------------- 65 | # no real need to edit anything past this point unless you need to add additional 66 | # rules for different file extensions 67 | #--------------------------------------------------------------------------------- 68 | ifneq ($(BUILD),$(notdir $(CURDIR))) 69 | #--------------------------------------------------------------------------------- 70 | 71 | export OUTPUT := $(CURDIR)/$(TARGET) 72 | export TOPDIR := $(CURDIR) 73 | 74 | export VPATH := $(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \ 75 | $(foreach dir,$(DATA),$(CURDIR)/$(dir)) 76 | 77 | export DEPSDIR := $(CURDIR)/$(BUILD) 78 | 79 | CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c))) 80 | CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp))) 81 | SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s))) 82 | BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*))) 83 | 84 | #--------------------------------------------------------------------------------- 85 | # use CXX for linking C++ projects, CC for standard C 86 | #--------------------------------------------------------------------------------- 87 | ifeq ($(strip $(CPPFILES)),) 88 | #--------------------------------------------------------------------------------- 89 | export LD := $(CC) 90 | #--------------------------------------------------------------------------------- 91 | else 92 | #--------------------------------------------------------------------------------- 93 | export LD := $(CXX) 94 | #--------------------------------------------------------------------------------- 95 | endif 96 | #--------------------------------------------------------------------------------- 97 | 98 | export OFILES := $(addsuffix .o,$(BINFILES)) \ 99 | $(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o) 100 | 101 | export INCLUDE := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \ 102 | $(foreach dir,$(LIBDIRS),-I$(dir)/include) \ 103 | -I$(CURDIR)/$(BUILD) 104 | 105 | export LIBPATHS := $(foreach dir,$(LIBDIRS),-L$(dir)/lib) 106 | 107 | ifeq ($(strip $(ICON)),) 108 | icons := $(wildcard *.png) 109 | ifneq (,$(findstring $(TARGET).png,$(icons))) 110 | export APP_ICON := $(TOPDIR)/$(TARGET).png 111 | else 112 | ifneq (,$(findstring icon.png,$(icons))) 113 | export APP_ICON := $(TOPDIR)/icon.png 114 | endif 115 | endif 116 | else 117 | export APP_ICON := $(TOPDIR)/$(ICON) 118 | endif 119 | 120 | .PHONY: $(BUILD) clean all 121 | 122 | #--------------------------------------------------------------------------------- 123 | all: $(BUILD) 124 | 125 | $(BUILD): 126 | @[ -d $@ ] || mkdir -p $@ 127 | @make --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile 128 | 129 | #--------------------------------------------------------------------------------- 130 | clean: 131 | @echo clean ... 132 | @rm -fr $(BUILD) $(TARGET).3dsx $(OUTPUT).smdh $(TARGET).elf test.vsh.shbin 133 | 134 | 135 | #--------------------------------------------------------------------------------- 136 | else 137 | 138 | DEPENDS := $(OFILES:.o=.d) 139 | 140 | #--------------------------------------------------------------------------------- 141 | # main targets 142 | #--------------------------------------------------------------------------------- 143 | ifeq ($(strip $(NO_SMDH)),) 144 | .PHONY: all 145 | all : $(OUTPUT).3dsx $(OUTPUT).smdh 146 | endif 147 | $(OUTPUT).3dsx : $(OUTPUT).elf 148 | $(OUTPUT).elf : $(OFILES) 149 | 150 | #--------------------------------------------------------------------------------- 151 | # you need a rule like this for each extension you use as binary data 152 | #--------------------------------------------------------------------------------- 153 | %.bin.o : %.bin 154 | #--------------------------------------------------------------------------------- 155 | @echo $(notdir $<) 156 | @$(bin2o) 157 | 158 | # WARNING: This is not the right way to do this! TODO: Do it right! 159 | #--------------------------------------------------------------------------------- 160 | %.vsh.o : %.vsh 161 | #--------------------------------------------------------------------------------- 162 | @echo $(notdir $<) 163 | @$(NIHSTRO)/nihstro-assemble --output ../$(notdir $<).shbin $< 164 | @bin2s ../$(notdir $<).shbin | $(PREFIX)as -o $@ 165 | @echo "extern const u8" `(echo $(notdir $<).shbin | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(notdir $<).shbin | tr . _)`.h 166 | @echo "extern const u8" `(echo $(notdir $<).shbin | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(notdir $<).shbin | tr . _)`.h 167 | @echo "extern const u32" `(echo $(notdir $<).shbin | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(notdir $<).shbin | tr . _)`.h 168 | 169 | -include $(DEPENDS) 170 | 171 | #--------------------------------------------------------------------------------------- 172 | endif 173 | #--------------------------------------------------------------------------------------- 174 | -------------------------------------------------------------------------------- /examples/assembler/cube/README.md: -------------------------------------------------------------------------------- 1 | cube example 2 | ============ 3 | 4 | Simple port of ctrulib's gpu example to nihstro shaders. The C program code is mostly unchanged from the original, however the example shader in the data subdirectory should give you a good idea of the basic nihcode shader syntax. 5 | 6 | Before trying to compile, make sure your NIHSTRO environment variable points to the directory nihstro-assemble resides in. Additionally, ctrulib in revision 1f52ac344d or similar is required, plus some patches to implement proper uniform setters. 7 | -------------------------------------------------------------------------------- /examples/assembler/cube/data/test.vsh: -------------------------------------------------------------------------------- 1 | // setup constants 2 | .alias myconst c32 as (1.0, 0.0, 0.5, 1.0) 3 | 4 | // setup output map 5 | .alias outpos o0 as position 6 | .alias outcol o1 as color 7 | .alias outtex0 o2.xyzw as texcoord0 // Would like to use .xy instead, but this is not supported by ctrulib currently 8 | .alias outtex1 o3.xyzw as texcoord1 9 | .alias outtex2 o4.xyzw as texcoord2 10 | 11 | // setup uniform map, for use with SHDR_GetUniformRegister 12 | .alias projection c0 // -c3 13 | .alias modelview c4 // -c7 14 | .alias lightDirection c8 15 | .alias lightAmbient c9 16 | 17 | main: 18 | mov r1.xyz, v0.xyz 19 | mov r1.w, myconst.w 20 | 21 | mdvl: // tempreg = mdlvMtx * in.pos 22 | dp4 r0.x, modelview[0], r1 23 | dp4 r0.y, modelview[1], r1 24 | dp4 r0.z, modelview[2], r1 25 | mov r0.w, myconst.w 26 | 27 | proj: // result.pos = projMtx * tempreg 28 | dp4 outpos.x, projection[0], r0 29 | dp4 outpos.y, projection[1], r0 30 | dp4 outpos.z, projection[2], r0 31 | dp4 outpos.w, projection[3], r0 32 | 33 | tex: // result.texcoord = in.texcoord 34 | mov outtex0, v1 35 | mov outtex1, myconst.yyyw 36 | mov outtex2, myconst.yyyw 37 | 38 | col: // Hacky lighting: color = ambient.xyz + clamp(dot(L,N), 1.0) * ambient.www 39 | dp3 r0.xyz, lightDirection.xyz, v2.xyz 40 | max r0.xyz, myconst.yyy, r0.xyz 41 | mul r0.xyz, lightAmbient.www, r0.xyz 42 | add outcol.xyz, lightAmbient.xyz, r0.xyz 43 | mov outcol.w, myconst.w 44 | 45 | nop 46 | end 47 | 48 | endmain: 49 | -------------------------------------------------------------------------------- /examples/assembler/cube/data/texture.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neobrain/nihstro/f4d8659decbfe5d234f04134b5002b82dc515a44/examples/assembler/cube/data/texture.bin -------------------------------------------------------------------------------- /examples/assembler/cube/source/_gs.s: -------------------------------------------------------------------------------- 1 | .section ".text" 2 | .arm 3 | .align 4 4 | .global _vboMemcpy50 5 | 6 | # r0 : dst 7 | # r1 : src 8 | # fixed size 0x50 9 | _vboMemcpy50: 10 | push {r4-r11} 11 | ldmia r1!, {r2-r12} 12 | stmia r0!, {r2-r12} 13 | ldmia r1!, {r2-r12} 14 | stmia r0!, {r2-r12} 15 | pop {r4-r11} 16 | bx lr 17 | -------------------------------------------------------------------------------- /examples/assembler/cube/source/gs.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include <3ds.h> 5 | 6 | #include "gs.h" 7 | #include "math.h" 8 | 9 | #define BUFFERMATRIXLIST_SIZE (GS_MATRIXSTACK_SIZE*4) 10 | 11 | static void gsInitMatrixStack(); 12 | 13 | Handle linearAllocMutex; 14 | 15 | static u32 gsMatrixStackRegisters[GS_MATRIXTYPES]; 16 | 17 | typedef struct 18 | { 19 | u32 offset; 20 | mtx44 data; 21 | }bufferMatrix_s; 22 | 23 | bufferMatrix_s bufferMatrixList[BUFFERMATRIXLIST_SIZE]; 24 | int bufferMatrixListLength; 25 | 26 | //---------------------- 27 | // GS SYSTEM STUFF 28 | //---------------------- 29 | 30 | void initBufferMatrixList() 31 | { 32 | bufferMatrixListLength=0; 33 | } 34 | 35 | void gsInit(shaderProgram_s* shader) 36 | { 37 | gsInitMatrixStack(); 38 | initBufferMatrixList(); 39 | svcCreateMutex(&linearAllocMutex, false); 40 | if(shader) 41 | { 42 | gsMatrixStackRegisters[0]=shaderInstanceGetUniformLocation(shader->vertexShader, "projection"); 43 | gsMatrixStackRegisters[1]=shaderInstanceGetUniformLocation(shader->vertexShader, "modelview"); 44 | shaderProgramUse(shader); 45 | } 46 | } 47 | 48 | void gsExit(void) 49 | { 50 | svcCloseHandle(linearAllocMutex); 51 | } 52 | 53 | void gsStartFrame(void) 54 | { 55 | GPUCMD_SetBufferOffset(0); 56 | initBufferMatrixList(); 57 | } 58 | 59 | void* gsLinearAlloc(size_t size) 60 | { 61 | void* ret=NULL; 62 | 63 | svcWaitSynchronization(linearAllocMutex, U64_MAX); 64 | ret=linearAlloc(size); 65 | svcReleaseMutex(linearAllocMutex); 66 | 67 | return ret; 68 | } 69 | 70 | void gsLinearFree(void* mem) 71 | { 72 | svcWaitSynchronization(linearAllocMutex, U64_MAX); 73 | linearFree(mem); 74 | svcReleaseMutex(linearAllocMutex); 75 | } 76 | 77 | //---------------------- 78 | // MATRIX STACK STUFF 79 | //---------------------- 80 | 81 | static mtx44 gsMatrixStacks[GS_MATRIXTYPES][GS_MATRIXSTACK_SIZE]; 82 | static u32 gsMatrixStackRegisters[GS_MATRIXTYPES]={0x00, 0x04}; 83 | static u8 gsMatrixStackOffsets[GS_MATRIXTYPES]; 84 | static bool gsMatrixStackUpdated[GS_MATRIXTYPES]; 85 | static GS_MATRIX gsCurrentMatrixType; 86 | 87 | static void gsInitMatrixStack() 88 | { 89 | int i; 90 | for(i=0; i=GS_MATRIXTYPES)return NULL; 102 | 103 | return (float*)gsMatrixStacks[m][gsMatrixStackOffsets[m]]; 104 | } 105 | 106 | int gsLoadMatrix(GS_MATRIX m, float* data) 107 | { 108 | if(m<0 || m>=GS_MATRIXTYPES || !data)return -1; 109 | 110 | memcpy(gsGetMatrix(m), data, sizeof(mtx44)); 111 | 112 | gsMatrixStackUpdated[m]=true; 113 | 114 | return 0; 115 | } 116 | 117 | int gsPushMatrix() 118 | { 119 | const GS_MATRIX m=gsCurrentMatrixType; 120 | if(m<0 || m>=GS_MATRIXTYPES)return -1; 121 | if(gsMatrixStackOffsets[m]<0 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE-1)return -1; 122 | 123 | float* cur=gsGetMatrix(m); 124 | gsMatrixStackOffsets[m]++; 125 | memcpy(gsGetMatrix(m), cur, sizeof(mtx44)); 126 | 127 | return 0; 128 | } 129 | 130 | int gsPopMatrix() 131 | { 132 | const GS_MATRIX m=gsCurrentMatrixType; 133 | if(m<0 || m>=GS_MATRIXTYPES)return -1; 134 | if(gsMatrixStackOffsets[m]<1 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE)return -1; 135 | 136 | gsMatrixStackOffsets[m]--; 137 | 138 | gsMatrixStackUpdated[m]=true; 139 | 140 | return 0; 141 | } 142 | 143 | int gsMatrixMode(GS_MATRIX m) 144 | { 145 | if(m<0 || m>=GS_MATRIXTYPES)return -1; 146 | 147 | gsCurrentMatrixType=m; 148 | 149 | return 0; 150 | } 151 | 152 | //------------------------ 153 | // MATRIX TRANSFORM STUFF 154 | //------------------------ 155 | 156 | int gsMultMatrix(float* data) 157 | { 158 | if(!data)return -1; 159 | 160 | mtx44 tmp; 161 | multMatrix44(gsGetMatrix(gsCurrentMatrixType), data, (float*)tmp); 162 | memcpy(gsGetMatrix(gsCurrentMatrixType), (float*)tmp, sizeof(mtx44)); 163 | 164 | gsMatrixStackUpdated[gsCurrentMatrixType]=true; 165 | 166 | return 0; 167 | } 168 | 169 | void gsLoadIdentity() 170 | { 171 | loadIdentity44(gsGetMatrix(gsCurrentMatrixType)); 172 | gsMatrixStackUpdated[gsCurrentMatrixType]=true; 173 | } 174 | 175 | void gsProjectionMatrix(float fovy, float aspect, float near, float far) 176 | { 177 | initProjectionMatrix(gsGetMatrix(gsCurrentMatrixType), fovy, aspect, near, far); 178 | gsMatrixStackUpdated[gsCurrentMatrixType]=true; 179 | } 180 | 181 | void gsRotateX(float x) 182 | { 183 | rotateMatrixX(gsGetMatrix(gsCurrentMatrixType), x, false); 184 | gsMatrixStackUpdated[gsCurrentMatrixType]=true; 185 | } 186 | 187 | void gsRotateY(float y) 188 | { 189 | rotateMatrixY(gsGetMatrix(gsCurrentMatrixType), y, false); 190 | gsMatrixStackUpdated[gsCurrentMatrixType]=true; 191 | } 192 | 193 | void gsRotateZ(float z) 194 | { 195 | rotateMatrixZ(gsGetMatrix(gsCurrentMatrixType), z, false); 196 | gsMatrixStackUpdated[gsCurrentMatrixType]=true; 197 | } 198 | 199 | void gsScale(float x, float y, float z) 200 | { 201 | scaleMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z); 202 | gsMatrixStackUpdated[gsCurrentMatrixType]=true; 203 | } 204 | 205 | void gsTranslate(float x, float y, float z) 206 | { 207 | translateMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z); 208 | gsMatrixStackUpdated[gsCurrentMatrixType]=true; 209 | } 210 | 211 | //---------------------- 212 | // MATRIX RENDER STUFF 213 | //---------------------- 214 | 215 | static void gsSetUniformMatrix(u32 startreg, float* m) 216 | { 217 | float param[16]; 218 | 219 | param[0x0]=m[3]; //w 220 | param[0x1]=m[2]; //z 221 | param[0x2]=m[1]; //y 222 | param[0x3]=m[0]; //x 223 | 224 | param[0x4]=m[7]; 225 | param[0x5]=m[6]; 226 | param[0x6]=m[5]; 227 | param[0x7]=m[4]; 228 | 229 | param[0x8]=m[11]; 230 | param[0x9]=m[10]; 231 | param[0xa]=m[9]; 232 | param[0xb]=m[8]; 233 | 234 | param[0xc]=m[15]; 235 | param[0xd]=m[14]; 236 | param[0xe]=m[13]; 237 | param[0xf]=m[12]; 238 | 239 | GPU_SetFloatUniform(GPU_VERTEX_SHADER, startreg, (u32*)param, 4); 240 | } 241 | 242 | static int gsUpdateTransformation() 243 | { 244 | GS_MATRIX m; 245 | for(m=0; mdata=NULL; 291 | vbo->currentSize=0; 292 | vbo->maxSize=0; 293 | vbo->commands=NULL; 294 | vbo->commandsSize=0; 295 | 296 | return 0; 297 | } 298 | 299 | int gsVboCreate(gsVbo_s* vbo, u32 size) 300 | { 301 | if(!vbo)return -1; 302 | 303 | vbo->data=gsLinearAlloc(size); 304 | vbo->numVertices=0; 305 | vbo->currentSize=0; 306 | vbo->maxSize=size; 307 | 308 | return 0; 309 | } 310 | 311 | void* gsVboGetOffset(gsVbo_s* vbo) 312 | { 313 | if(!vbo)return NULL; 314 | 315 | return (void*)(&((u8*)vbo->data)[vbo->currentSize]); 316 | } 317 | 318 | int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units) 319 | { 320 | if(!vbo || !data || !size)return -1; 321 | if(((s32)vbo->maxSize)-((s32)vbo->currentSize) < size)return -1; 322 | 323 | memcpy(gsVboGetOffset(vbo), data, size); 324 | vbo->currentSize+=size; 325 | vbo->numVertices+=units; 326 | 327 | return 0; 328 | } 329 | 330 | int gsVboFlushData(gsVbo_s* vbo) 331 | { 332 | if(!vbo)return -1; 333 | 334 | //unnecessary if we use flushAndRun 335 | // GSPGPU_FlushDataCache(NULL, vbo->data, vbo->currentSize); 336 | 337 | return 0; 338 | } 339 | 340 | int gsVboDestroy(gsVbo_s* vbo) 341 | { 342 | if(!vbo)return -1; 343 | 344 | if(vbo->commands)free(vbo->commands); 345 | if(vbo->data)gsLinearFree(vbo->data); 346 | gsVboInit(vbo); 347 | 348 | return 0; 349 | } 350 | 351 | extern u32 debugValue[]; 352 | 353 | void GPU_DrawArrayDirectly(GPU_Primitive_t primitive, u8* data, u32 n) 354 | { 355 | //set attribute buffer address 356 | GPUCMD_AddSingleParam(0x000F0200, (osConvertVirtToPhys((u32)data))>>3); 357 | //set primitive type 358 | GPUCMD_AddSingleParam(0x0002025E, primitive); 359 | GPUCMD_AddSingleParam(0x0002025F, 0x00000001); 360 | //index buffer not used for drawArrays but 0x000F0227 still required 361 | GPUCMD_AddSingleParam(0x000F0227, 0x80000000); 362 | //pass number of vertices 363 | GPUCMD_AddSingleParam(0x000F0228, n); 364 | 365 | GPUCMD_AddSingleParam(0x00010253, 0x00000001); 366 | 367 | GPUCMD_AddSingleParam(0x00010245, 0x00000000); 368 | GPUCMD_AddSingleParam(0x000F022E, 0x00000001); 369 | GPUCMD_AddSingleParam(0x00010245, 0x00000001); 370 | GPUCMD_AddSingleParam(0x000F0231, 0x00000001); 371 | 372 | // GPUCMD_AddSingleParam(0x000F0111, 0x00000001); //breaks stuff 373 | } 374 | 375 | //not thread safe 376 | int gsVboPrecomputeCommands(gsVbo_s* vbo) 377 | { 378 | if(!vbo || vbo->commands)return -1; 379 | 380 | static u32 tmpBuffer[128]; 381 | 382 | u32* savedAdr; u32 savedSize, savedOffset; 383 | GPUCMD_GetBuffer(&savedAdr, &savedSize, &savedOffset); 384 | GPUCMD_SetBuffer(tmpBuffer, 128, 0); 385 | 386 | GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices); 387 | 388 | GPUCMD_GetBuffer(NULL, NULL, &vbo->commandsSize); 389 | vbo->commands=memalign(0x4, vbo->commandsSize*4); 390 | if(!vbo->commands)return -1; 391 | memcpy(vbo->commands, tmpBuffer, vbo->commandsSize*4); 392 | 393 | GPUCMD_SetBuffer(savedAdr, savedSize, savedOffset); 394 | 395 | return 0; 396 | } 397 | 398 | extern u32* gpuCmdBuf; 399 | extern u32 gpuCmdBufSize; 400 | extern u32 gpuCmdBufOffset; 401 | 402 | void _vboMemcpy50(u32* dst, u32* src); 403 | 404 | void _GPUCMD_AddRawCommands(u32* cmd, u32 size) 405 | { 406 | if(!cmd || !size)return; 407 | 408 | if(size*4==0x50)_vboMemcpy50(&gpuCmdBuf[gpuCmdBufOffset], cmd); 409 | else memcpy(&gpuCmdBuf[gpuCmdBufOffset], cmd, size*4); 410 | gpuCmdBufOffset+=size; 411 | } 412 | 413 | int gsVboDraw(gsVbo_s* vbo) 414 | { 415 | if(!vbo || !vbo->data || !vbo->currentSize || !vbo->maxSize)return -1; 416 | 417 | gsUpdateTransformation(); 418 | 419 | gsVboPrecomputeCommands(vbo); 420 | 421 | // u64 val=svcGetSystemTick(); 422 | if(vbo->commands) 423 | { 424 | _GPUCMD_AddRawCommands(vbo->commands, vbo->commandsSize); 425 | }else{ 426 | GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices); 427 | } 428 | // debugValue[5]+=(u32)(svcGetSystemTick()-val); 429 | // debugValue[6]++; 430 | 431 | return 0; 432 | } 433 | -------------------------------------------------------------------------------- /examples/assembler/cube/source/gs.h: -------------------------------------------------------------------------------- 1 | #ifndef GS_H 2 | #define GS_H 3 | 4 | #include <3ds.h> 5 | #include "math.h" 6 | 7 | #define GS_MATRIXSTACK_SIZE (8) 8 | 9 | typedef enum 10 | { 11 | GS_PROJECTION = 0, 12 | GS_MODELVIEW = 1, 13 | GS_MATRIXTYPES 14 | }GS_MATRIX; 15 | 16 | typedef struct 17 | { 18 | u8* data; 19 | u32 currentSize; // in bytes 20 | u32 maxSize; // in bytes 21 | u32 numVertices; 22 | u32* commands; 23 | u32 commandsSize; 24 | }gsVbo_s; 25 | 26 | 27 | void gsInit(shaderProgram_s* shader); 28 | void gsExit(void); 29 | 30 | void gsStartFrame(void); 31 | void gsAdjustBufferMatrices(mtx44 transformation); 32 | 33 | void* gsLinearAlloc(size_t size); 34 | void gsLinearFree(void* mem); 35 | 36 | float* gsGetMatrix(GS_MATRIX m); 37 | int gsLoadMatrix(GS_MATRIX m, float* data); 38 | int gsPushMatrix(); 39 | int gsPopMatrix(); 40 | int gsMatrixMode(GS_MATRIX m); 41 | 42 | void gsLoadIdentity(); 43 | void gsProjectionMatrix(float fovy, float aspect, float near, float far); 44 | void gsRotateX(float x); 45 | void gsRotateY(float y); 46 | void gsRotateZ(float z); 47 | void gsScale(float x, float y, float z); 48 | void gsTranslate(float x, float y, float z); 49 | int gsMultMatrix(float* data); 50 | 51 | int gsVboInit(gsVbo_s* vbo); 52 | int gsVboCreate(gsVbo_s* vbo, u32 size); 53 | int gsVboFlushData(gsVbo_s* vbo); 54 | int gsVboDestroy(gsVbo_s* vbo); 55 | int gsVboDraw(gsVbo_s* vbo); 56 | void* gsVboGetOffset(gsVbo_s* vbo); 57 | int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units); 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /examples/assembler/cube/source/main.c: -------------------------------------------------------------------------------- 1 | /////////////////////////////////////// 2 | // GPU example // 3 | /////////////////////////////////////// 4 | 5 | //this example is meant to show how to use the GPU to render a 3D object 6 | //it also shows how to do stereoscopic 3D 7 | //it uses GS which is a WIP GPU abstraction layer that's currently part of 3DScraft 8 | //keep in mind GPU reverse engineering is an ongoing effort and our understanding of it is still fairly limited. 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include <3ds.h> 15 | 16 | #include "math.h" 17 | #include "gs.h" 18 | 19 | #include "test_vsh_shbin.h" 20 | #include "texture_bin.h" 21 | 22 | //will be moved into ctrulib at some point 23 | #define CONFIG_3D_SLIDERSTATE (*(float*)0x1FF81080) 24 | 25 | #define RGBA8(r,g,b,a) ((((r)&0xFF)<<24) | (((g)&0xFF)<<16) | (((b)&0xFF)<<8) | (((a)&0xFF)<<0)) 26 | 27 | //transfer from GPU output buffer to actual framebuffer flags 28 | #define DISPLAY_TRANSFER_FLAGS \ 29 | (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \ 30 | GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \ 31 | GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_X)) 32 | 33 | //shader structure 34 | DVLB_s* dvlb; 35 | shaderProgram_s shader; 36 | //texture data pointer 37 | u32* texData; 38 | //vbo structure 39 | gsVbo_s vbo; 40 | 41 | //GPU framebuffer address 42 | u32* gpuOut=(u32*)0x1F119400; 43 | //GPU depth buffer address 44 | u32* gpuDOut=(u32*)0x1F370800; 45 | 46 | //angle for the vertex lighting (cf test.vsh) 47 | float lightAngle; 48 | //object position and rotation angle 49 | vect3Df_s position, angle; 50 | 51 | //vertex structure 52 | typedef struct 53 | { 54 | vect3Df_s position; 55 | float texcoord[2]; 56 | vect3Df_s normal; 57 | }vertex_s; 58 | 59 | //object data (cube) 60 | //obviously this doesn't have to be defined manually, but we will here for the purposes of the example 61 | //each line is a vertex : {position.x, position.y, position.z}, {texcoord.t, texcoord.s}, {normal.x, normal.y, normal.z} 62 | //we're drawing triangles so three lines = one triangle 63 | const vertex_s modelVboData[]= 64 | { 65 | //first face (PZ) 66 | //first triangle 67 | {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, 68 | {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, 69 | {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, 70 | //second triangle 71 | {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, 72 | {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, 73 | {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}}, 74 | //second face (MZ) 75 | //first triangle 76 | {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, 77 | {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, 78 | {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, 79 | //second triangle 80 | {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, 81 | {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, 82 | {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}}, 83 | //third face (PX) 84 | //first triangle 85 | {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, 86 | {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, 87 | {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, 88 | //second triangle 89 | {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, 90 | {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, 91 | {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}}, 92 | //fourth face (MX) 93 | //first triangle 94 | {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, 95 | {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, 96 | {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, 97 | //second triangle 98 | {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, 99 | {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, 100 | {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}}, 101 | //fifth face (PY) 102 | //first triangle 103 | {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, 104 | {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, 105 | {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, 106 | //second triangle 107 | {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, 108 | {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, 109 | {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}}, 110 | //sixth face (MY) 111 | //first triangle 112 | {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, 113 | {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, 114 | {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, 115 | //second triangle 116 | {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, 117 | {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, 118 | {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}}, 119 | }; 120 | 121 | //stolen from staplebutt 122 | void GPU_SetDummyTexEnv(u8 num) 123 | { 124 | GPU_SetTexEnv(num, 125 | GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), 126 | GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), 127 | GPU_TEVOPERANDS(0,0,0), 128 | GPU_TEVOPERANDS(0,0,0), 129 | GPU_REPLACE, 130 | GPU_REPLACE, 131 | 0xFFFFFFFF); 132 | } 133 | 134 | // topscreen 135 | void renderFrame() 136 | { 137 | GPU_SetViewport((u32*)osConvertVirtToPhys((u32)gpuDOut),(u32*)osConvertVirtToPhys((u32)gpuOut),0,0,240*2,400); 138 | 139 | GPU_DepthMap(-1.0f, 0.0f); 140 | GPU_SetFaceCulling(GPU_CULL_BACK_CCW); 141 | GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00); 142 | GPU_SetStencilOp(GPU_STENCIL_KEEP, GPU_STENCIL_KEEP, GPU_STENCIL_KEEP); 143 | GPU_SetBlendingColor(0,0,0,0); 144 | GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL); 145 | 146 | GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0); 147 | GPUCMD_AddWrite(GPUREG_0118, 0); 148 | 149 | GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); 150 | GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00); 151 | 152 | GPU_SetTextureEnable(GPU_TEXUNIT0); 153 | 154 | GPU_SetTexEnv(0, 155 | GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), 156 | GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), 157 | GPU_TEVOPERANDS(0,0,0), 158 | GPU_TEVOPERANDS(0,0,0), 159 | GPU_MODULATE, GPU_MODULATE, 160 | 0xFFFFFFFF); 161 | GPU_SetDummyTexEnv(1); 162 | GPU_SetDummyTexEnv(2); 163 | GPU_SetDummyTexEnv(3); 164 | GPU_SetDummyTexEnv(4); 165 | GPU_SetDummyTexEnv(5); 166 | 167 | //texturing stuff 168 | GPU_SetTexture( 169 | GPU_TEXUNIT0, //texture unit 170 | (u32*)osConvertVirtToPhys((u32)texData), //data buffer 171 | 128, //texture width 172 | 128, //texture height 173 | GPU_TEXTURE_MAG_FILTER(GPU_NEAREST) | GPU_TEXTURE_MIN_FILTER(GPU_NEAREST), //texture params 174 | GPU_RGBA8 //texture pixel format 175 | ); 176 | 177 | GPU_SetAttributeBuffers( 178 | 3, //3 attributes: vertices, texcoords, and normals 179 | (u32*)osConvertVirtToPhys((u32)texData), //mesh buffer 180 | GPU_ATTRIBFMT(0, 3, GPU_FLOAT) | // GPU Input attribute register 0 (v0): 3 floats (position) 181 | GPU_ATTRIBFMT(1, 2, GPU_FLOAT) | // GPU Input attribute register 1 (v1): 2 floats (texcoord) 182 | GPU_ATTRIBFMT(2, 3, GPU_FLOAT), // GPU Input attribute register 2 (v2): 3 floats (normal) 183 | 0xFFC, 184 | 0x210, 185 | 1, 186 | (u32[]){0x00000000}, 187 | (u64[]){0x210}, 188 | (u8[]){3} 189 | ); 190 | 191 | //setup lighting (this is specific to our shader) 192 | vect3Df_s lightDir=vnormf(vect3Df(cos(lightAngle), -1.0f, sin(lightAngle))); 193 | GPU_SetFloatUniform(GPU_VERTEX_SHADER, shaderInstanceGetUniformLocation(shader.vertexShader, "lightDirection"), (u32*)(float[]){0.0f, -lightDir.z, -lightDir.y, -lightDir.x}, 1); 194 | GPU_SetFloatUniform(GPU_VERTEX_SHADER, shaderInstanceGetUniformLocation(shader.vertexShader, "lightAmbient"), (u32*)(float[]){0.7f, 0.4f, 0.4f, 0.4f}, 1); 195 | 196 | //initialize projection matrix to standard perspective stuff 197 | gsMatrixMode(GS_PROJECTION); 198 | gsProjectionMatrix(80.0f*M_PI/180.0f, 240.0f/400.0f, 0.01f, 100.0f); 199 | gsRotateZ(M_PI/2); //because framebuffer is sideways... 200 | 201 | //draw object 202 | gsMatrixMode(GS_MODELVIEW); 203 | gsPushMatrix(); 204 | gsTranslate(position.x, position.y, position.z); 205 | gsRotateX(angle.x); 206 | gsRotateY(angle.y); 207 | gsVboDraw(&vbo); 208 | gsPopMatrix(); 209 | GPU_FinishDrawing(); 210 | } 211 | 212 | int main(int argc, char** argv) 213 | { 214 | 215 | gfxInitDefault(); 216 | 217 | //initialize GPU 218 | GPU_Init(NULL); 219 | 220 | //let GFX know we're ok with doing stereoscopic 3D rendering 221 | gfxSet3D(true); 222 | 223 | //allocate our GPU command buffers 224 | //they *have* to be on the linear heap 225 | u32 gpuCmdSize=0x40000; 226 | u32* gpuCmd=(u32*)linearAlloc(gpuCmdSize*4); 227 | u32* gpuCmdRight=(u32*)linearAlloc(gpuCmdSize*4); 228 | 229 | //actually reset the GPU 230 | GPU_Reset(NULL, gpuCmd, gpuCmdSize); 231 | 232 | //load our vertex shader binary 233 | dvlb=DVLB_ParseFile((u32*)test_vsh_shbin, test_vsh_shbin_size); 234 | shaderProgramInit(&shader); 235 | shaderProgramSetVsh(&shader, &dvlb->DVLE[0]); 236 | 237 | //initialize GS 238 | gsInit(&shader); 239 | 240 | // Flush the command buffer so that the shader upload gets executed 241 | GPUCMD_Finalize(); 242 | GPUCMD_FlushAndRun(NULL); 243 | gspWaitForP3D(); 244 | 245 | //create texture 246 | texData=(u32*)linearMemAlign(texture_bin_size, 0x80); //textures need to be 0x80-byte aligned 247 | memcpy(texData, texture_bin, texture_bin_size); 248 | 249 | //create VBO 250 | gsVboInit(&vbo); 251 | gsVboCreate(&vbo, sizeof(modelVboData)); 252 | gsVboAddData(&vbo, (void*)modelVboData, sizeof(modelVboData), sizeof(modelVboData)/sizeof(vertex_s)); 253 | gsVboFlushData(&vbo); 254 | 255 | //initialize object position and angle 256 | position=vect3Df(0.0f, 0.0f, -2.0f); 257 | angle=vect3Df(M_PI/4, M_PI/4, 0.0f); 258 | 259 | //background color (blue) 260 | u32 backgroundColor=RGBA8(0x68, 0xB0, 0xD8, 0xFF); 261 | 262 | while(aptMainLoop()) 263 | { 264 | //get current 3D slider state 265 | float slider=CONFIG_3D_SLIDERSTATE; 266 | 267 | //controls 268 | hidScanInput(); 269 | //START to exit to hbmenu 270 | if(keysDown()&KEY_START)break; 271 | 272 | //A/B to change vertex lighting angle 273 | if(keysHeld()&KEY_A)lightAngle+=0.1f; 274 | if(keysHeld()&KEY_B)lightAngle-=0.1f; 275 | 276 | //D-PAD to rotate object 277 | if(keysHeld()&KEY_DOWN)angle.x+=0.05f; 278 | if(keysHeld()&KEY_UP)angle.x-=0.05f; 279 | if(keysHeld()&KEY_LEFT)angle.y+=0.05f; 280 | if(keysHeld()&KEY_RIGHT)angle.y-=0.05f; 281 | 282 | //R/L to bring object closer to or move it further from the camera 283 | if(keysHeld()&KEY_R)position.z+=0.1f; 284 | if(keysHeld()&KEY_L)position.z-=0.1f; 285 | 286 | //generate our GPU command buffer for this frame 287 | gsStartFrame(); 288 | renderFrame(); 289 | GPUCMD_Finalize(); 290 | 291 | if(slider>0.0f) 292 | { 293 | //new and exciting 3D ! 294 | //make a copy of left gpu buffer 295 | u32 offset; GPUCMD_GetBuffer(NULL, NULL, &offset); 296 | memcpy(gpuCmdRight, gpuCmd, offset*4); 297 | 298 | //setup interaxial 299 | float interaxial=slider*0.12f; 300 | 301 | //adjust left gpu buffer fo 3D ! 302 | {mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, -interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);} 303 | 304 | //draw left framebuffer 305 | GPUCMD_FlushAndRun(NULL); 306 | 307 | //while GPU starts drawing the left buffer, adjust right one for 3D ! 308 | GPUCMD_SetBuffer(gpuCmdRight, gpuCmdSize, offset); 309 | {mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);} 310 | 311 | //we wait for the left buffer to finish drawing 312 | gspWaitForP3D(); 313 | GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS); 314 | gspWaitForPPF(); 315 | 316 | //we draw the right buffer, wait for it to finish and then switch back to left one 317 | //clear the screen 318 | GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH , (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH); 319 | gspWaitForPSC0(); 320 | 321 | //draw the right framebuffer 322 | GPUCMD_FlushAndRun(NULL); 323 | gspWaitForP3D(); 324 | 325 | //transfer from GPU output buffer to actual framebuffer 326 | GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_RIGHT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS); 327 | gspWaitForPPF(); 328 | GPUCMD_SetBuffer(gpuCmd, gpuCmdSize, 0); 329 | }else{ 330 | //boring old 2D ! 331 | 332 | //draw the frame 333 | GPUCMD_FlushAndRun(NULL); 334 | gspWaitForP3D(); 335 | 336 | //clear the screen 337 | GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS); 338 | gspWaitForPPF(); 339 | } 340 | 341 | //clear the screen 342 | GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH); 343 | gspWaitForPSC0(); 344 | gfxSwapBuffersGpu(); 345 | 346 | gspWaitForEvent(GSPEVENT_VBlank0, true); 347 | } 348 | 349 | gsExit(); 350 | shaderProgramFree(&shader); 351 | DVLB_Free(dvlb); 352 | gfxExit(); 353 | return 0; 354 | } 355 | -------------------------------------------------------------------------------- /examples/assembler/cube/source/math.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "math.h" 5 | 6 | void loadIdentity44(float* m) 7 | { 8 | if(!m)return; 9 | 10 | memset(m, 0x00, 16*4); 11 | m[0]=m[5]=m[10]=m[15]=1.0f; 12 | } 13 | 14 | void multMatrix44(float* m1, float* m2, float* m) //4x4 15 | { 16 | int i, j; 17 | for(i=0;i<4;i++)for(j=0;j<4;j++)m[i+j*4]=(m1[0+j*4]*m2[i+0*4])+(m1[1+j*4]*m2[i+1*4])+(m1[2+j*4]*m2[i+2*4])+(m1[3+j*4]*m2[i+3*4]); 18 | 19 | } 20 | 21 | void translateMatrix(float* tm, float x, float y, float z) 22 | { 23 | float rm[16], m[16]; 24 | 25 | loadIdentity44(rm); 26 | rm[3]=x; 27 | rm[7]=y; 28 | rm[11]=z; 29 | 30 | multMatrix44(tm,rm,m); 31 | memcpy(tm,m,16*sizeof(float)); 32 | } 33 | 34 | // 00 01 02 03 35 | // 04 05 06 07 36 | // 08 09 10 11 37 | // 12 13 14 15 38 | 39 | void rotateMatrixX(float* tm, float x, bool r) 40 | { 41 | float rm[16], m[16]; 42 | memset(rm, 0x00, 16*4); 43 | rm[0]=1.0f; 44 | rm[5]=cos(x); 45 | rm[6]=sin(x); 46 | rm[9]=-sin(x); 47 | rm[10]=cos(x); 48 | rm[15]=1.0f; 49 | if(!r)multMatrix44(tm,rm,m); 50 | else multMatrix44(rm,tm,m); 51 | memcpy(tm,m,16*sizeof(float)); 52 | } 53 | 54 | void rotateMatrixY(float* tm, float x, bool r) 55 | { 56 | float rm[16], m[16]; 57 | memset(rm, 0x00, 16*4); 58 | rm[0]=cos(x); 59 | rm[2]=sin(x); 60 | rm[5]=1.0f; 61 | rm[8]=-sin(x); 62 | rm[10]=cos(x); 63 | rm[15]=1.0f; 64 | if(!r)multMatrix44(tm,rm,m); 65 | else multMatrix44(rm,tm,m); 66 | memcpy(tm,m,16*sizeof(float)); 67 | } 68 | 69 | void rotateMatrixZ(float* tm, float x, bool r) 70 | { 71 | float rm[16], m[16]; 72 | memset(rm, 0x00, 16*4); 73 | rm[0]=cos(x); 74 | rm[1]=sin(x); 75 | rm[4]=-sin(x); 76 | rm[5]=cos(x); 77 | rm[10]=1.0f; 78 | rm[15]=1.0f; 79 | if(!r)multMatrix44(tm,rm,m); 80 | else multMatrix44(rm,tm,m); 81 | memcpy(tm,m,16*sizeof(float)); 82 | } 83 | 84 | void scaleMatrix(float* tm, float x, float y, float z) 85 | { 86 | tm[0]*=x; tm[4]*=x; tm[8]*=x; tm[12]*=x; 87 | tm[1]*=y; tm[5]*=y; tm[9]*=y; tm[13]*=y; 88 | tm[2]*=z; tm[6]*=z; tm[10]*=z; tm[14]*=z; 89 | } 90 | 91 | void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far) 92 | { 93 | float top = near*tan(fovy/2); 94 | float right = (top*aspect); 95 | 96 | float mp[4*4]; 97 | 98 | mp[0x0] = near/right; 99 | mp[0x1] = 0.0f; 100 | mp[0x2] = 0.0f; 101 | mp[0x3] = 0.0f; 102 | 103 | mp[0x4] = 0.0f; 104 | mp[0x5] = near/top; 105 | mp[0x6] = 0.0f; 106 | mp[0x7] = 0.0f; 107 | 108 | mp[0x8] = 0.0f; 109 | mp[0x9] = 0.0f; 110 | mp[0xA] = -(far+near)/(far-near); 111 | mp[0xB] = -2.0f*(far*near)/(far-near); 112 | 113 | mp[0xC] = 0.0f; 114 | mp[0xD] = 0.0f; 115 | mp[0xE] = -1.0f; 116 | mp[0xF] = 0.0f; 117 | 118 | float mp2[4*4]; 119 | loadIdentity44(mp2); 120 | mp2[0xA]=0.5; 121 | mp2[0xB]=-0.5; 122 | 123 | multMatrix44(mp2, mp, m); 124 | } 125 | 126 | vect3Df_s getMatrixColumn(float* m, u8 i) 127 | { 128 | if(!m || i>=4)return vect3Df(0,0,0); 129 | return vect3Df(m[0+i*4],m[1+i*4],m[2+i*4]); 130 | } 131 | 132 | vect3Df_s getMatrixRow(float* m, u8 i) 133 | { 134 | if(!m || i>=4)return vect3Df(0,0,0); 135 | return vect3Df(m[i+0*4],m[i+1*4],m[i+2*4]); 136 | } 137 | 138 | vect4Df_s getMatrixColumn4(float* m, u8 i) 139 | { 140 | if(!m || i>=4)return vect4Df(0,0,0,0); 141 | return vect4Df(m[0+i*4],m[1+i*4],m[2+i*4],m[3+i*4]); 142 | } 143 | 144 | vect4Df_s getMatrixRow4(float* m, u8 i) 145 | { 146 | if(!m || i>=4)return vect4Df(0,0,0,0); 147 | return vect4Df(m[i+0*4],m[i+1*4],m[i+2*4],m[i+3*4]); 148 | } 149 | -------------------------------------------------------------------------------- /examples/assembler/cube/source/math.h: -------------------------------------------------------------------------------- 1 | #ifndef MATH_H 2 | #define MATH_H 3 | 4 | #include <3ds/types.h> 5 | #include 6 | 7 | typedef float mtx44[4][4]; 8 | typedef float mtx33[3][3]; 9 | 10 | typedef struct 11 | { 12 | s32 x, y, z; 13 | }vect3Di_s; 14 | 15 | static inline vect3Di_s vect3Di(s32 x, s32 y, s32 z) 16 | { 17 | return (vect3Di_s){x,y,z}; 18 | } 19 | 20 | static inline vect3Di_s vaddi(vect3Di_s u, vect3Di_s v) 21 | { 22 | return (vect3Di_s){u.x+v.x,u.y+v.y,u.z+v.z}; 23 | } 24 | 25 | static inline vect3Di_s vsubi(vect3Di_s u, vect3Di_s v) 26 | { 27 | return (vect3Di_s){u.x-v.x,u.y-v.y,u.z-v.z}; 28 | } 29 | 30 | static inline vect3Di_s vmuli(vect3Di_s v, s32 f) 31 | { 32 | return (vect3Di_s){v.x*f,v.y*f,v.z*f}; 33 | } 34 | 35 | typedef struct 36 | { 37 | float x, y, z; 38 | }vect3Df_s; 39 | 40 | static inline vect3Df_s vect3Df(float x, float y, float z) 41 | { 42 | return (vect3Df_s){x,y,z}; 43 | } 44 | 45 | static inline vect3Df_s vaddf(vect3Df_s u, vect3Df_s v) 46 | { 47 | return (vect3Df_s){u.x+v.x,u.y+v.y,u.z+v.z}; 48 | } 49 | 50 | static inline vect3Df_s vsubf(vect3Df_s u, vect3Df_s v) 51 | { 52 | return (vect3Df_s){u.x-v.x,u.y-v.y,u.z-v.z}; 53 | } 54 | 55 | static inline vect3Df_s vmulf(vect3Df_s v, float f) 56 | { 57 | return (vect3Df_s){v.x*f,v.y*f,v.z*f}; 58 | } 59 | 60 | static inline vect3Df_s vscalef(vect3Df_s v1, vect3Df_s v2) 61 | { 62 | return (vect3Df_s){v1.x*v2.x,v1.y*v2.y,v1.z*v2.z}; 63 | } 64 | 65 | static inline float vmagf(vect3Df_s v) 66 | { 67 | return sqrtf(v.x*v.x+v.y*v.y+v.z*v.z); 68 | } 69 | 70 | static inline float vdistf(vect3Df_s v1, vect3Df_s v2) 71 | { 72 | return sqrtf((v1.x-v2.x)*(v1.x-v2.x)+(v1.y-v2.y)*(v1.y-v2.y)+(v1.z-v2.z)*(v1.z-v2.z)); 73 | } 74 | 75 | static inline vect3Df_s vnormf(vect3Df_s v) 76 | { 77 | const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z); 78 | return (vect3Df_s){v.x/l,v.y/l,v.z/l}; 79 | } 80 | 81 | typedef struct 82 | { 83 | float x, y, z, w; 84 | }vect4Df_s; 85 | 86 | static inline vect4Df_s vect4Df(float x, float y, float z, float w) 87 | { 88 | return (vect4Df_s){x,y,z,w}; 89 | } 90 | 91 | static inline vect4Df_s vaddf4(vect4Df_s u, vect4Df_s v) 92 | { 93 | return (vect4Df_s){u.x+v.x,u.y+v.y,u.z+v.z,u.w+v.w}; 94 | } 95 | 96 | static inline vect4Df_s vsubf4(vect4Df_s u, vect4Df_s v) 97 | { 98 | return (vect4Df_s){u.x-v.x,u.y-v.y,u.z-v.z,u.w-v.w}; 99 | } 100 | 101 | static inline vect4Df_s vmulf4(vect4Df_s v, float f) 102 | { 103 | return (vect4Df_s){v.x*f,v.y*f,v.z*f,v.w*f}; 104 | } 105 | 106 | static inline float vdotf4(vect4Df_s v1, vect4Df_s v2) 107 | { 108 | return v1.x*v2.x+v1.y*v2.y+v1.z*v2.z+v1.w*v2.w; 109 | } 110 | 111 | static inline vect4Df_s vnormf4(vect4Df_s v) 112 | { 113 | const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z+v.w*v.w); 114 | return (vect4Df_s){v.x/l,v.y/l,v.z/l,v.w/l}; 115 | } 116 | 117 | //interstuff 118 | static inline vect3Di_s vf2i(vect3Df_s v) 119 | { 120 | return (vect3Di_s){floorf(v.x),floorf(v.y),floorf(v.z)}; 121 | } 122 | 123 | static inline vect3Df_s vi2f(vect3Di_s v) 124 | { 125 | return (vect3Df_s){(float)v.x,(float)v.y,(float)v.z}; 126 | } 127 | 128 | void loadIdentity44(float* m); 129 | void multMatrix44(float* m1, float* m2, float* m); 130 | 131 | void translateMatrix(float* tm, float x, float y, float z); 132 | void rotateMatrixX(float* tm, float x, bool r); 133 | void rotateMatrixY(float* tm, float x, bool r); 134 | void rotateMatrixZ(float* tm, float x, bool r); 135 | void scaleMatrix(float* tm, float x, float y, float z); 136 | 137 | void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far); 138 | 139 | vect3Df_s getMatrixColumn(float* m, u8 i); 140 | vect3Df_s getMatrixRow(float* m, u8 i); 141 | vect4Df_s getMatrixColumn4(float* m, u8 i); 142 | vect4Df_s getMatrixRow4(float* m, u8 i); 143 | 144 | #endif 145 | -------------------------------------------------------------------------------- /examples/assembler/cube_lighting/Makefile: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------------------------------------- 2 | .SUFFIXES: 3 | #--------------------------------------------------------------------------------- 4 | 5 | ifeq ($(strip $(DEVKITARM)),) 6 | $(error "Please set DEVKITARM in your environment. export DEVKITARM=devkitARM") 7 | endif 8 | 9 | ifeq ($(strip $(NIHSTRO)),) 10 | $(error "Please set NIHSTRO in your environment. export NIHSTRO=nihstro-assemble") 11 | endif 12 | 13 | TOPDIR ?= $(CURDIR) 14 | include $(DEVKITARM)/3ds_rules 15 | 16 | #--------------------------------------------------------------------------------- 17 | # TARGET is the name of the output 18 | # BUILD is the directory where object files & intermediate files will be placed 19 | # SOURCES is a list of directories containing source code 20 | # DATA is a list of directories containing data files 21 | # INCLUDES is a list of directories containing header files 22 | # 23 | # NO_SMDH: if set to anything, no SMDH file is generated. 24 | # APP_TITLE is the name of the app stored in the SMDH file (Optional) 25 | # APP_DESCRIPTION is the description of the app stored in the SMDH file (Optional) 26 | # APP_AUTHOR is the author of the app stored in the SMDH file (Optional) 27 | # ICON is the filename of the icon (.png), relative to the project folder. 28 | # If not set, it attempts to use one of the following (in this order): 29 | # - .png 30 | # - icon.png 31 | # - /default_icon.png 32 | #--------------------------------------------------------------------------------- 33 | TARGET := $(notdir $(CURDIR)) 34 | BUILD := build 35 | SOURCES := source 36 | DATA := data 37 | INCLUDES := include 38 | 39 | #--------------------------------------------------------------------------------- 40 | # options for code generation 41 | #--------------------------------------------------------------------------------- 42 | ARCH := -march=armv6k -mtune=mpcore -mfloat-abi=hard 43 | 44 | CFLAGS := -g -Wall -O2 -mword-relocations \ 45 | -fomit-frame-pointer -ffast-math \ 46 | $(ARCH) 47 | 48 | CFLAGS += $(INCLUDE) -DARM11 -D_3DS 49 | 50 | CXXFLAGS := $(CFLAGS) -fno-rtti -std=gnu++11 51 | 52 | ASFLAGS := -g $(ARCH) 53 | LDFLAGS = -specs=3dsx.specs -g $(ARCH) -Wl,-Map,$(notdir $*.map) 54 | 55 | LIBS := -lctru -lm 56 | 57 | #--------------------------------------------------------------------------------- 58 | # list of directories containing libraries, this must be the top level containing 59 | # include and lib 60 | #--------------------------------------------------------------------------------- 61 | LIBDIRS := $(CTRULIB) 62 | 63 | 64 | #--------------------------------------------------------------------------------- 65 | # no real need to edit anything past this point unless you need to add additional 66 | # rules for different file extensions 67 | #--------------------------------------------------------------------------------- 68 | ifneq ($(BUILD),$(notdir $(CURDIR))) 69 | #--------------------------------------------------------------------------------- 70 | 71 | export OUTPUT := $(CURDIR)/$(TARGET) 72 | export TOPDIR := $(CURDIR) 73 | 74 | export VPATH := $(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \ 75 | $(foreach dir,$(DATA),$(CURDIR)/$(dir)) 76 | 77 | export DEPSDIR := $(CURDIR)/$(BUILD) 78 | 79 | CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c))) 80 | CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp))) 81 | SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s))) 82 | BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*))) 83 | 84 | #--------------------------------------------------------------------------------- 85 | # use CXX for linking C++ projects, CC for standard C 86 | #--------------------------------------------------------------------------------- 87 | ifeq ($(strip $(CPPFILES)),) 88 | #--------------------------------------------------------------------------------- 89 | export LD := $(CC) 90 | #--------------------------------------------------------------------------------- 91 | else 92 | #--------------------------------------------------------------------------------- 93 | export LD := $(CXX) 94 | #--------------------------------------------------------------------------------- 95 | endif 96 | #--------------------------------------------------------------------------------- 97 | 98 | export OFILES := $(addsuffix .o,$(BINFILES)) \ 99 | $(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o) 100 | 101 | export INCLUDE := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \ 102 | $(foreach dir,$(LIBDIRS),-I$(dir)/include) \ 103 | -I$(CURDIR)/$(BUILD) 104 | 105 | export LIBPATHS := $(foreach dir,$(LIBDIRS),-L$(dir)/lib) 106 | 107 | ifeq ($(strip $(ICON)),) 108 | icons := $(wildcard *.png) 109 | ifneq (,$(findstring $(TARGET).png,$(icons))) 110 | export APP_ICON := $(TOPDIR)/$(TARGET).png 111 | else 112 | ifneq (,$(findstring icon.png,$(icons))) 113 | export APP_ICON := $(TOPDIR)/icon.png 114 | endif 115 | endif 116 | else 117 | export APP_ICON := $(TOPDIR)/$(ICON) 118 | endif 119 | 120 | .PHONY: $(BUILD) clean all 121 | 122 | #--------------------------------------------------------------------------------- 123 | all: $(BUILD) 124 | 125 | $(BUILD): 126 | @[ -d $@ ] || mkdir -p $@ 127 | @make --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile 128 | 129 | #--------------------------------------------------------------------------------- 130 | clean: 131 | @echo clean ... 132 | @rm -fr $(BUILD) $(TARGET).3dsx $(OUTPUT).smdh $(TARGET).elf test.vsh.shbin 133 | 134 | 135 | #--------------------------------------------------------------------------------- 136 | else 137 | 138 | DEPENDS := $(OFILES:.o=.d) 139 | 140 | #--------------------------------------------------------------------------------- 141 | # main targets 142 | #--------------------------------------------------------------------------------- 143 | ifeq ($(strip $(NO_SMDH)),) 144 | .PHONY: all 145 | all : $(OUTPUT).3dsx $(OUTPUT).smdh 146 | endif 147 | $(OUTPUT).3dsx : $(OUTPUT).elf 148 | $(OUTPUT).elf : $(OFILES) 149 | 150 | #--------------------------------------------------------------------------------- 151 | # you need a rule like this for each extension you use as binary data 152 | #--------------------------------------------------------------------------------- 153 | %.bin.o : %.bin 154 | #--------------------------------------------------------------------------------- 155 | @echo $(notdir $<) 156 | @$(bin2o) 157 | 158 | # WARNING: This is not the right way to do this! TODO: Do it right! 159 | #--------------------------------------------------------------------------------- 160 | %.vsh.o : %.vsh 161 | #--------------------------------------------------------------------------------- 162 | @echo $(notdir $<) 163 | @$(NIHSTRO)/nihstro-assemble --output ../$(notdir $<).shbin $< 164 | @bin2s ../$(notdir $<).shbin | $(PREFIX)as -o $@ 165 | @echo "extern const u8" `(echo $(notdir $<).shbin | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(notdir $<).shbin | tr . _)`.h 166 | @echo "extern const u8" `(echo $(notdir $<).shbin | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(notdir $<).shbin | tr . _)`.h 167 | @echo "extern const u32" `(echo $(notdir $<).shbin | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(notdir $<).shbin | tr . _)`.h 168 | 169 | -include $(DEPENDS) 170 | 171 | #--------------------------------------------------------------------------------------- 172 | endif 173 | #--------------------------------------------------------------------------------------- 174 | -------------------------------------------------------------------------------- /examples/assembler/cube_lighting/README.md: -------------------------------------------------------------------------------- 1 | cube lighting example 2 | ===================== 3 | 4 | An example similar to cube, but with some rudimentary vertex lighting effects. The shader used is somewhat more complex and involves a LOOP to implement multiple light sources. 5 | 6 | Before trying to compile, make sure your NIHSTRO environment variable points to the directory nihstro-assemble resides in. Additionally, ctrulib in revision 1f52ac344d or similar is required, plus some patches to implement proper uniform setters. 7 | -------------------------------------------------------------------------------- /examples/assembler/cube_lighting/data/test.vsh: -------------------------------------------------------------------------------- 1 | // setup constants 2 | .alias myconst c32 as (1.0, 0.0, 0.5, 1.0) 3 | 4 | // setup output map 5 | .alias outpos o0 as position 6 | .alias outcol o1 as color 7 | .alias outtex0 o2.xyzw as texcoord0 // Would like to use .xy instead, but this is not supported by ctrulib currently 8 | .alias outtex1 o3.xyzw as texcoord1 9 | .alias outtex2 o4.xyzw as texcoord2 10 | 11 | // setup uniform map, for use with SHDR_GetUniformRegister 12 | .alias projection c0-c3 13 | .alias modelview c4-c7 14 | 15 | .alias num_lights i1 16 | 17 | .alias light_dir c8 18 | .alias light_diffuse c9 19 | .alias light_ambient c10 20 | .alias light_dir2 c11 21 | .alias light_diffuse2 c12 22 | .alias light_ambient2 c13 23 | 24 | main: 25 | mov r1.xyz, v0.xyz 26 | mov r1.w, myconst.w 27 | 28 | mdvl: // tempreg = mdlvMtx * in.pos 29 | dp4 r0.x, modelview[0], r1 30 | dp4 r0.y, modelview[1], r1 31 | dp4 r0.z, modelview[2], r1 32 | mov r0.w, myconst.w 33 | 34 | proj: // result.pos = projMtx * tempreg 35 | dp4 outpos.x, projection[0], r0 36 | dp4 outpos.y, projection[1], r0 37 | dp4 outpos.z, projection[2], r0 38 | dp4 outpos.w, projection[3], r0 39 | 40 | tex: // result.texcoord = in.texcoord 41 | mov outtex0, v1.xyzw 42 | mov outtex1, myconst.yyyw 43 | mov outtex2, myconst.yyyw 44 | 45 | lighting: // color = sum over all lights(diffuse * clamp(dot(L,N),0) + ambient) 46 | mov r0, myconst.yyyw 47 | 48 | loop num_lights 49 | mov r1.xyz, myconst.yyy 50 | dp3 r1.xyz, light_dir[lcnt].xyz, v2.xyz 51 | max r1.xyz, r1.xyz, myconst.yyy 52 | mul r1.xyz, r1.xyz, light_diffuse[lcnt].xyz 53 | add r1.xyz, r1.xyz, light_ambient[lcnt].xyz 54 | add r0.xyz, r1.xyz, r0.xyz 55 | nop 56 | endloop 57 | min r0.xyz, r0.xyz, myconst.xxx 58 | 59 | mov outcol, r0 60 | 61 | 62 | 63 | nop 64 | end 65 | 66 | endmain: 67 | -------------------------------------------------------------------------------- /examples/assembler/cube_lighting/data/texture.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/neobrain/nihstro/f4d8659decbfe5d234f04134b5002b82dc515a44/examples/assembler/cube_lighting/data/texture.bin -------------------------------------------------------------------------------- /examples/assembler/cube_lighting/source/_gs.s: -------------------------------------------------------------------------------- 1 | .section ".text" 2 | .arm 3 | .align 4 4 | .global _vboMemcpy50 5 | 6 | # r0 : dst 7 | # r1 : src 8 | # fixed size 0x50 9 | _vboMemcpy50: 10 | push {r4-r11} 11 | ldmia r1!, {r2-r12} 12 | stmia r0!, {r2-r12} 13 | ldmia r1!, {r2-r12} 14 | stmia r0!, {r2-r12} 15 | pop {r4-r11} 16 | bx lr 17 | -------------------------------------------------------------------------------- /examples/assembler/cube_lighting/source/gs.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include <3ds.h> 5 | 6 | #include "gs.h" 7 | #include "math.h" 8 | 9 | #define BUFFERMATRIXLIST_SIZE (GS_MATRIXSTACK_SIZE*4) 10 | 11 | static void gsInitMatrixStack(); 12 | 13 | Handle linearAllocMutex; 14 | 15 | static u32 gsMatrixStackRegisters[GS_MATRIXTYPES]; 16 | 17 | typedef struct 18 | { 19 | u32 offset; 20 | mtx44 data; 21 | }bufferMatrix_s; 22 | 23 | bufferMatrix_s bufferMatrixList[BUFFERMATRIXLIST_SIZE]; 24 | int bufferMatrixListLength; 25 | 26 | //---------------------- 27 | // GS SYSTEM STUFF 28 | //---------------------- 29 | 30 | void initBufferMatrixList() 31 | { 32 | bufferMatrixListLength=0; 33 | } 34 | 35 | void gsInit(shaderProgram_s* shader) 36 | { 37 | gsInitMatrixStack(); 38 | initBufferMatrixList(); 39 | svcCreateMutex(&linearAllocMutex, false); 40 | if(shader) 41 | { 42 | gsMatrixStackRegisters[0]=shaderInstanceGetUniformLocation(shader->vertexShader, "projection"); 43 | gsMatrixStackRegisters[1]=shaderInstanceGetUniformLocation(shader->vertexShader, "modelview"); 44 | shaderProgramUse(shader); 45 | } 46 | } 47 | 48 | void gsExit(void) 49 | { 50 | svcCloseHandle(linearAllocMutex); 51 | } 52 | 53 | void gsStartFrame(void) 54 | { 55 | GPUCMD_SetBufferOffset(0); 56 | initBufferMatrixList(); 57 | } 58 | 59 | void* gsLinearAlloc(size_t size) 60 | { 61 | void* ret=NULL; 62 | 63 | svcWaitSynchronization(linearAllocMutex, U64_MAX); 64 | ret=linearAlloc(size); 65 | svcReleaseMutex(linearAllocMutex); 66 | 67 | return ret; 68 | } 69 | 70 | void gsLinearFree(void* mem) 71 | { 72 | svcWaitSynchronization(linearAllocMutex, U64_MAX); 73 | linearFree(mem); 74 | svcReleaseMutex(linearAllocMutex); 75 | } 76 | 77 | //---------------------- 78 | // MATRIX STACK STUFF 79 | //---------------------- 80 | 81 | static mtx44 gsMatrixStacks[GS_MATRIXTYPES][GS_MATRIXSTACK_SIZE]; 82 | static u32 gsMatrixStackRegisters[GS_MATRIXTYPES]={0x00, 0x04}; 83 | static u8 gsMatrixStackOffsets[GS_MATRIXTYPES]; 84 | static bool gsMatrixStackUpdated[GS_MATRIXTYPES]; 85 | static GS_MATRIX gsCurrentMatrixType; 86 | 87 | static void gsInitMatrixStack() 88 | { 89 | int i; 90 | for(i=0; i=GS_MATRIXTYPES)return NULL; 102 | 103 | return (float*)gsMatrixStacks[m][gsMatrixStackOffsets[m]]; 104 | } 105 | 106 | int gsLoadMatrix(GS_MATRIX m, float* data) 107 | { 108 | if(m<0 || m>=GS_MATRIXTYPES || !data)return -1; 109 | 110 | memcpy(gsGetMatrix(m), data, sizeof(mtx44)); 111 | 112 | gsMatrixStackUpdated[m]=true; 113 | 114 | return 0; 115 | } 116 | 117 | int gsPushMatrix() 118 | { 119 | const GS_MATRIX m=gsCurrentMatrixType; 120 | if(m<0 || m>=GS_MATRIXTYPES)return -1; 121 | if(gsMatrixStackOffsets[m]<0 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE-1)return -1; 122 | 123 | float* cur=gsGetMatrix(m); 124 | gsMatrixStackOffsets[m]++; 125 | memcpy(gsGetMatrix(m), cur, sizeof(mtx44)); 126 | 127 | return 0; 128 | } 129 | 130 | int gsPopMatrix() 131 | { 132 | const GS_MATRIX m=gsCurrentMatrixType; 133 | if(m<0 || m>=GS_MATRIXTYPES)return -1; 134 | if(gsMatrixStackOffsets[m]<1 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE)return -1; 135 | 136 | gsMatrixStackOffsets[m]--; 137 | 138 | gsMatrixStackUpdated[m]=true; 139 | 140 | return 0; 141 | } 142 | 143 | int gsMatrixMode(GS_MATRIX m) 144 | { 145 | if(m<0 || m>=GS_MATRIXTYPES)return -1; 146 | 147 | gsCurrentMatrixType=m; 148 | 149 | return 0; 150 | } 151 | 152 | //------------------------ 153 | // MATRIX TRANSFORM STUFF 154 | //------------------------ 155 | 156 | int gsMultMatrix(float* data) 157 | { 158 | if(!data)return -1; 159 | 160 | mtx44 tmp; 161 | multMatrix44(gsGetMatrix(gsCurrentMatrixType), data, (float*)tmp); 162 | memcpy(gsGetMatrix(gsCurrentMatrixType), (float*)tmp, sizeof(mtx44)); 163 | 164 | gsMatrixStackUpdated[gsCurrentMatrixType]=true; 165 | 166 | return 0; 167 | } 168 | 169 | void gsLoadIdentity() 170 | { 171 | loadIdentity44(gsGetMatrix(gsCurrentMatrixType)); 172 | gsMatrixStackUpdated[gsCurrentMatrixType]=true; 173 | } 174 | 175 | void gsProjectionMatrix(float fovy, float aspect, float near, float far) 176 | { 177 | initProjectionMatrix(gsGetMatrix(gsCurrentMatrixType), fovy, aspect, near, far); 178 | gsMatrixStackUpdated[gsCurrentMatrixType]=true; 179 | } 180 | 181 | void gsRotateX(float x) 182 | { 183 | rotateMatrixX(gsGetMatrix(gsCurrentMatrixType), x, false); 184 | gsMatrixStackUpdated[gsCurrentMatrixType]=true; 185 | } 186 | 187 | void gsRotateY(float y) 188 | { 189 | rotateMatrixY(gsGetMatrix(gsCurrentMatrixType), y, false); 190 | gsMatrixStackUpdated[gsCurrentMatrixType]=true; 191 | } 192 | 193 | void gsRotateZ(float z) 194 | { 195 | rotateMatrixZ(gsGetMatrix(gsCurrentMatrixType), z, false); 196 | gsMatrixStackUpdated[gsCurrentMatrixType]=true; 197 | } 198 | 199 | void gsScale(float x, float y, float z) 200 | { 201 | scaleMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z); 202 | gsMatrixStackUpdated[gsCurrentMatrixType]=true; 203 | } 204 | 205 | void gsTranslate(float x, float y, float z) 206 | { 207 | translateMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z); 208 | gsMatrixStackUpdated[gsCurrentMatrixType]=true; 209 | } 210 | 211 | //---------------------- 212 | // MATRIX RENDER STUFF 213 | //---------------------- 214 | 215 | static void gsSetUniformMatrix(u32 startreg, float* m) 216 | { 217 | float param[16]; 218 | 219 | param[0x0]=m[3]; //w 220 | param[0x1]=m[2]; //z 221 | param[0x2]=m[1]; //y 222 | param[0x3]=m[0]; //x 223 | 224 | param[0x4]=m[7]; 225 | param[0x5]=m[6]; 226 | param[0x6]=m[5]; 227 | param[0x7]=m[4]; 228 | 229 | param[0x8]=m[11]; 230 | param[0x9]=m[10]; 231 | param[0xa]=m[9]; 232 | param[0xb]=m[8]; 233 | 234 | param[0xc]=m[15]; 235 | param[0xd]=m[14]; 236 | param[0xe]=m[13]; 237 | param[0xf]=m[12]; 238 | 239 | GPU_SetFloatUniform(GPU_VERTEX_SHADER, startreg, (u32*)param, 4); 240 | } 241 | 242 | static int gsUpdateTransformation() 243 | { 244 | GS_MATRIX m; 245 | for(m=0; mdata=NULL; 291 | vbo->currentSize=0; 292 | vbo->maxSize=0; 293 | vbo->commands=NULL; 294 | vbo->commandsSize=0; 295 | 296 | return 0; 297 | } 298 | 299 | int gsVboCreate(gsVbo_s* vbo, u32 size) 300 | { 301 | if(!vbo)return -1; 302 | 303 | vbo->data=gsLinearAlloc(size); 304 | vbo->numVertices=0; 305 | vbo->currentSize=0; 306 | vbo->maxSize=size; 307 | 308 | return 0; 309 | } 310 | 311 | void* gsVboGetOffset(gsVbo_s* vbo) 312 | { 313 | if(!vbo)return NULL; 314 | 315 | return (void*)(&((u8*)vbo->data)[vbo->currentSize]); 316 | } 317 | 318 | int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units) 319 | { 320 | if(!vbo || !data || !size)return -1; 321 | if(((s32)vbo->maxSize)-((s32)vbo->currentSize) < size)return -1; 322 | 323 | memcpy(gsVboGetOffset(vbo), data, size); 324 | vbo->currentSize+=size; 325 | vbo->numVertices+=units; 326 | 327 | return 0; 328 | } 329 | 330 | int gsVboFlushData(gsVbo_s* vbo) 331 | { 332 | if(!vbo)return -1; 333 | 334 | //unnecessary if we use flushAndRun 335 | // GSPGPU_FlushDataCache(NULL, vbo->data, vbo->currentSize); 336 | 337 | return 0; 338 | } 339 | 340 | int gsVboDestroy(gsVbo_s* vbo) 341 | { 342 | if(!vbo)return -1; 343 | 344 | if(vbo->commands)free(vbo->commands); 345 | if(vbo->data)gsLinearFree(vbo->data); 346 | gsVboInit(vbo); 347 | 348 | return 0; 349 | } 350 | 351 | extern u32 debugValue[]; 352 | 353 | void GPU_DrawArrayDirectly(GPU_Primitive_t primitive, u8* data, u32 n) 354 | { 355 | //set attribute buffer address 356 | GPUCMD_AddSingleParam(0x000F0200, (osConvertVirtToPhys((u32)data))>>3); 357 | //set primitive type 358 | GPUCMD_AddSingleParam(0x0002025E, primitive); 359 | GPUCMD_AddSingleParam(0x0002025F, 0x00000001); 360 | //index buffer not used for drawArrays but 0x000F0227 still required 361 | GPUCMD_AddSingleParam(0x000F0227, 0x80000000); 362 | //pass number of vertices 363 | GPUCMD_AddSingleParam(0x000F0228, n); 364 | 365 | GPUCMD_AddSingleParam(0x00010253, 0x00000001); 366 | 367 | GPUCMD_AddSingleParam(0x00010245, 0x00000000); 368 | GPUCMD_AddSingleParam(0x000F022E, 0x00000001); 369 | GPUCMD_AddSingleParam(0x00010245, 0x00000001); 370 | GPUCMD_AddSingleParam(0x000F0231, 0x00000001); 371 | 372 | // GPUCMD_AddSingleParam(0x000F0111, 0x00000001); //breaks stuff 373 | } 374 | 375 | //not thread safe 376 | int gsVboPrecomputeCommands(gsVbo_s* vbo) 377 | { 378 | if(!vbo || vbo->commands)return -1; 379 | 380 | static u32 tmpBuffer[128]; 381 | 382 | u32* savedAdr; u32 savedSize, savedOffset; 383 | GPUCMD_GetBuffer(&savedAdr, &savedSize, &savedOffset); 384 | GPUCMD_SetBuffer(tmpBuffer, 128, 0); 385 | 386 | GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices); 387 | 388 | GPUCMD_GetBuffer(NULL, NULL, &vbo->commandsSize); 389 | vbo->commands=memalign(0x4, vbo->commandsSize*4); 390 | if(!vbo->commands)return -1; 391 | memcpy(vbo->commands, tmpBuffer, vbo->commandsSize*4); 392 | 393 | GPUCMD_SetBuffer(savedAdr, savedSize, savedOffset); 394 | 395 | return 0; 396 | } 397 | 398 | extern u32* gpuCmdBuf; 399 | extern u32 gpuCmdBufSize; 400 | extern u32 gpuCmdBufOffset; 401 | 402 | void _vboMemcpy50(u32* dst, u32* src); 403 | 404 | void _GPUCMD_AddRawCommands(u32* cmd, u32 size) 405 | { 406 | if(!cmd || !size)return; 407 | 408 | if(size*4==0x50)_vboMemcpy50(&gpuCmdBuf[gpuCmdBufOffset], cmd); 409 | else memcpy(&gpuCmdBuf[gpuCmdBufOffset], cmd, size*4); 410 | gpuCmdBufOffset+=size; 411 | } 412 | 413 | int gsVboDraw(gsVbo_s* vbo) 414 | { 415 | if(!vbo || !vbo->data || !vbo->currentSize || !vbo->maxSize)return -1; 416 | 417 | gsUpdateTransformation(); 418 | 419 | gsVboPrecomputeCommands(vbo); 420 | 421 | // u64 val=svcGetSystemTick(); 422 | if(vbo->commands) 423 | { 424 | _GPUCMD_AddRawCommands(vbo->commands, vbo->commandsSize); 425 | }else{ 426 | GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices); 427 | } 428 | // debugValue[5]+=(u32)(svcGetSystemTick()-val); 429 | // debugValue[6]++; 430 | 431 | return 0; 432 | } 433 | -------------------------------------------------------------------------------- /examples/assembler/cube_lighting/source/gs.h: -------------------------------------------------------------------------------- 1 | #ifndef GS_H 2 | #define GS_H 3 | 4 | #include <3ds.h> 5 | #include "math.h" 6 | 7 | #define GS_MATRIXSTACK_SIZE (8) 8 | 9 | typedef enum 10 | { 11 | GS_PROJECTION = 0, 12 | GS_MODELVIEW = 1, 13 | GS_MATRIXTYPES 14 | }GS_MATRIX; 15 | 16 | typedef struct 17 | { 18 | u8* data; 19 | u32 currentSize; // in bytes 20 | u32 maxSize; // in bytes 21 | u32 numVertices; 22 | u32* commands; 23 | u32 commandsSize; 24 | }gsVbo_s; 25 | 26 | 27 | void gsInit(shaderProgram_s* shader); 28 | void gsExit(void); 29 | 30 | void gsStartFrame(void); 31 | void gsAdjustBufferMatrices(mtx44 transformation); 32 | 33 | void* gsLinearAlloc(size_t size); 34 | void gsLinearFree(void* mem); 35 | 36 | float* gsGetMatrix(GS_MATRIX m); 37 | int gsLoadMatrix(GS_MATRIX m, float* data); 38 | int gsPushMatrix(); 39 | int gsPopMatrix(); 40 | int gsMatrixMode(GS_MATRIX m); 41 | 42 | void gsLoadIdentity(); 43 | void gsProjectionMatrix(float fovy, float aspect, float near, float far); 44 | void gsRotateX(float x); 45 | void gsRotateY(float y); 46 | void gsRotateZ(float z); 47 | void gsScale(float x, float y, float z); 48 | void gsTranslate(float x, float y, float z); 49 | int gsMultMatrix(float* data); 50 | 51 | int gsVboInit(gsVbo_s* vbo); 52 | int gsVboCreate(gsVbo_s* vbo, u32 size); 53 | int gsVboFlushData(gsVbo_s* vbo); 54 | int gsVboDestroy(gsVbo_s* vbo); 55 | int gsVboDraw(gsVbo_s* vbo); 56 | void* gsVboGetOffset(gsVbo_s* vbo); 57 | int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units); 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /examples/assembler/cube_lighting/source/math.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "math.h" 5 | 6 | void loadIdentity44(float* m) 7 | { 8 | if(!m)return; 9 | 10 | memset(m, 0x00, 16*4); 11 | m[0]=m[5]=m[10]=m[15]=1.0f; 12 | } 13 | 14 | void multMatrix44(float* m1, float* m2, float* m) //4x4 15 | { 16 | int i, j; 17 | for(i=0;i<4;i++)for(j=0;j<4;j++)m[i+j*4]=(m1[0+j*4]*m2[i+0*4])+(m1[1+j*4]*m2[i+1*4])+(m1[2+j*4]*m2[i+2*4])+(m1[3+j*4]*m2[i+3*4]); 18 | 19 | } 20 | 21 | void translateMatrix(float* tm, float x, float y, float z) 22 | { 23 | float rm[16], m[16]; 24 | 25 | loadIdentity44(rm); 26 | rm[3]=x; 27 | rm[7]=y; 28 | rm[11]=z; 29 | 30 | multMatrix44(tm,rm,m); 31 | memcpy(tm,m,16*sizeof(float)); 32 | } 33 | 34 | // 00 01 02 03 35 | // 04 05 06 07 36 | // 08 09 10 11 37 | // 12 13 14 15 38 | 39 | void rotateMatrixX(float* tm, float x, bool r) 40 | { 41 | float rm[16], m[16]; 42 | memset(rm, 0x00, 16*4); 43 | rm[0]=1.0f; 44 | rm[5]=cos(x); 45 | rm[6]=sin(x); 46 | rm[9]=-sin(x); 47 | rm[10]=cos(x); 48 | rm[15]=1.0f; 49 | if(!r)multMatrix44(tm,rm,m); 50 | else multMatrix44(rm,tm,m); 51 | memcpy(tm,m,16*sizeof(float)); 52 | } 53 | 54 | void rotateMatrixY(float* tm, float x, bool r) 55 | { 56 | float rm[16], m[16]; 57 | memset(rm, 0x00, 16*4); 58 | rm[0]=cos(x); 59 | rm[2]=sin(x); 60 | rm[5]=1.0f; 61 | rm[8]=-sin(x); 62 | rm[10]=cos(x); 63 | rm[15]=1.0f; 64 | if(!r)multMatrix44(tm,rm,m); 65 | else multMatrix44(rm,tm,m); 66 | memcpy(tm,m,16*sizeof(float)); 67 | } 68 | 69 | void rotateMatrixZ(float* tm, float x, bool r) 70 | { 71 | float rm[16], m[16]; 72 | memset(rm, 0x00, 16*4); 73 | rm[0]=cos(x); 74 | rm[1]=sin(x); 75 | rm[4]=-sin(x); 76 | rm[5]=cos(x); 77 | rm[10]=1.0f; 78 | rm[15]=1.0f; 79 | if(!r)multMatrix44(tm,rm,m); 80 | else multMatrix44(rm,tm,m); 81 | memcpy(tm,m,16*sizeof(float)); 82 | } 83 | 84 | void scaleMatrix(float* tm, float x, float y, float z) 85 | { 86 | tm[0]*=x; tm[4]*=x; tm[8]*=x; tm[12]*=x; 87 | tm[1]*=y; tm[5]*=y; tm[9]*=y; tm[13]*=y; 88 | tm[2]*=z; tm[6]*=z; tm[10]*=z; tm[14]*=z; 89 | } 90 | 91 | void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far) 92 | { 93 | float top = near*tan(fovy/2); 94 | float right = (top*aspect); 95 | 96 | float mp[4*4]; 97 | 98 | mp[0x0] = near/right; 99 | mp[0x1] = 0.0f; 100 | mp[0x2] = 0.0f; 101 | mp[0x3] = 0.0f; 102 | 103 | mp[0x4] = 0.0f; 104 | mp[0x5] = near/top; 105 | mp[0x6] = 0.0f; 106 | mp[0x7] = 0.0f; 107 | 108 | mp[0x8] = 0.0f; 109 | mp[0x9] = 0.0f; 110 | mp[0xA] = -(far+near)/(far-near); 111 | mp[0xB] = -2.0f*(far*near)/(far-near); 112 | 113 | mp[0xC] = 0.0f; 114 | mp[0xD] = 0.0f; 115 | mp[0xE] = -1.0f; 116 | mp[0xF] = 0.0f; 117 | 118 | float mp2[4*4]; 119 | loadIdentity44(mp2); 120 | mp2[0xA]=0.5; 121 | mp2[0xB]=-0.5; 122 | 123 | multMatrix44(mp2, mp, m); 124 | } 125 | 126 | vect3Df_s getMatrixColumn(float* m, u8 i) 127 | { 128 | if(!m || i>=4)return vect3Df(0,0,0); 129 | return vect3Df(m[0+i*4],m[1+i*4],m[2+i*4]); 130 | } 131 | 132 | vect3Df_s getMatrixRow(float* m, u8 i) 133 | { 134 | if(!m || i>=4)return vect3Df(0,0,0); 135 | return vect3Df(m[i+0*4],m[i+1*4],m[i+2*4]); 136 | } 137 | 138 | vect4Df_s getMatrixColumn4(float* m, u8 i) 139 | { 140 | if(!m || i>=4)return vect4Df(0,0,0,0); 141 | return vect4Df(m[0+i*4],m[1+i*4],m[2+i*4],m[3+i*4]); 142 | } 143 | 144 | vect4Df_s getMatrixRow4(float* m, u8 i) 145 | { 146 | if(!m || i>=4)return vect4Df(0,0,0,0); 147 | return vect4Df(m[i+0*4],m[i+1*4],m[i+2*4],m[i+3*4]); 148 | } 149 | -------------------------------------------------------------------------------- /examples/assembler/cube_lighting/source/math.h: -------------------------------------------------------------------------------- 1 | #ifndef MATH_H 2 | #define MATH_H 3 | 4 | #include <3ds/types.h> 5 | #include 6 | 7 | typedef float mtx44[4][4]; 8 | typedef float mtx33[3][3]; 9 | 10 | typedef struct 11 | { 12 | s32 x, y, z; 13 | }vect3Di_s; 14 | 15 | static inline vect3Di_s vect3Di(s32 x, s32 y, s32 z) 16 | { 17 | return (vect3Di_s){x,y,z}; 18 | } 19 | 20 | static inline vect3Di_s vaddi(vect3Di_s u, vect3Di_s v) 21 | { 22 | return (vect3Di_s){u.x+v.x,u.y+v.y,u.z+v.z}; 23 | } 24 | 25 | static inline vect3Di_s vsubi(vect3Di_s u, vect3Di_s v) 26 | { 27 | return (vect3Di_s){u.x-v.x,u.y-v.y,u.z-v.z}; 28 | } 29 | 30 | static inline vect3Di_s vmuli(vect3Di_s v, s32 f) 31 | { 32 | return (vect3Di_s){v.x*f,v.y*f,v.z*f}; 33 | } 34 | 35 | typedef struct 36 | { 37 | float x, y, z; 38 | }vect3Df_s; 39 | 40 | static inline vect3Df_s vect3Df(float x, float y, float z) 41 | { 42 | return (vect3Df_s){x,y,z}; 43 | } 44 | 45 | static inline vect3Df_s vaddf(vect3Df_s u, vect3Df_s v) 46 | { 47 | return (vect3Df_s){u.x+v.x,u.y+v.y,u.z+v.z}; 48 | } 49 | 50 | static inline vect3Df_s vsubf(vect3Df_s u, vect3Df_s v) 51 | { 52 | return (vect3Df_s){u.x-v.x,u.y-v.y,u.z-v.z}; 53 | } 54 | 55 | static inline vect3Df_s vmulf(vect3Df_s v, float f) 56 | { 57 | return (vect3Df_s){v.x*f,v.y*f,v.z*f}; 58 | } 59 | 60 | static inline vect3Df_s vscalef(vect3Df_s v1, vect3Df_s v2) 61 | { 62 | return (vect3Df_s){v1.x*v2.x,v1.y*v2.y,v1.z*v2.z}; 63 | } 64 | 65 | static inline float vmagf(vect3Df_s v) 66 | { 67 | return sqrtf(v.x*v.x+v.y*v.y+v.z*v.z); 68 | } 69 | 70 | static inline float vdistf(vect3Df_s v1, vect3Df_s v2) 71 | { 72 | return sqrtf((v1.x-v2.x)*(v1.x-v2.x)+(v1.y-v2.y)*(v1.y-v2.y)+(v1.z-v2.z)*(v1.z-v2.z)); 73 | } 74 | 75 | static inline vect3Df_s vnormf(vect3Df_s v) 76 | { 77 | const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z); 78 | return (vect3Df_s){v.x/l,v.y/l,v.z/l}; 79 | } 80 | 81 | typedef struct 82 | { 83 | float x, y, z, w; 84 | }vect4Df_s; 85 | 86 | static inline vect4Df_s vect4Df(float x, float y, float z, float w) 87 | { 88 | return (vect4Df_s){x,y,z,w}; 89 | } 90 | 91 | static inline vect4Df_s vaddf4(vect4Df_s u, vect4Df_s v) 92 | { 93 | return (vect4Df_s){u.x+v.x,u.y+v.y,u.z+v.z,u.w+v.w}; 94 | } 95 | 96 | static inline vect4Df_s vsubf4(vect4Df_s u, vect4Df_s v) 97 | { 98 | return (vect4Df_s){u.x-v.x,u.y-v.y,u.z-v.z,u.w-v.w}; 99 | } 100 | 101 | static inline vect4Df_s vmulf4(vect4Df_s v, float f) 102 | { 103 | return (vect4Df_s){v.x*f,v.y*f,v.z*f,v.w*f}; 104 | } 105 | 106 | static inline float vdotf4(vect4Df_s v1, vect4Df_s v2) 107 | { 108 | return v1.x*v2.x+v1.y*v2.y+v1.z*v2.z+v1.w*v2.w; 109 | } 110 | 111 | static inline vect4Df_s vnormf4(vect4Df_s v) 112 | { 113 | const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z+v.w*v.w); 114 | return (vect4Df_s){v.x/l,v.y/l,v.z/l,v.w/l}; 115 | } 116 | 117 | //interstuff 118 | static inline vect3Di_s vf2i(vect3Df_s v) 119 | { 120 | return (vect3Di_s){floorf(v.x),floorf(v.y),floorf(v.z)}; 121 | } 122 | 123 | static inline vect3Df_s vi2f(vect3Di_s v) 124 | { 125 | return (vect3Df_s){(float)v.x,(float)v.y,(float)v.z}; 126 | } 127 | 128 | void loadIdentity44(float* m); 129 | void multMatrix44(float* m1, float* m2, float* m); 130 | 131 | void translateMatrix(float* tm, float x, float y, float z); 132 | void rotateMatrixX(float* tm, float x, bool r); 133 | void rotateMatrixY(float* tm, float x, bool r); 134 | void rotateMatrixZ(float* tm, float x, bool r); 135 | void scaleMatrix(float* tm, float x, float y, float z); 136 | 137 | void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far); 138 | 139 | vect3Df_s getMatrixColumn(float* m, u8 i); 140 | vect3Df_s getMatrixRow(float* m, u8 i); 141 | vect4Df_s getMatrixColumn4(float* m, u8 i); 142 | vect4Df_s getMatrixRow4(float* m, u8 i); 143 | 144 | #endif 145 | -------------------------------------------------------------------------------- /examples/inline_assembler/simple/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(simple simple.cpp) 2 | -------------------------------------------------------------------------------- /examples/inline_assembler/simple/simple.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Tony Wasserka 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of the owner nor the names of its contributors may 13 | // be used to endorse or promote products derived from this software 14 | // without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | #include 29 | #include 30 | #include 31 | 32 | #include "nihstro/inline_assembly.h" 33 | 34 | using namespace nihstro; 35 | 36 | static const auto in_pos = SourceRegister::MakeInput(0); 37 | static const auto in_tex = SourceRegister::MakeInput(1); 38 | static const auto in_norm = SourceRegister::MakeInput(2); 39 | static const auto backup_pos = SourceRegister::MakeTemporary(1); 40 | static const auto temp_pos = SourceRegister::MakeTemporary(0); 41 | 42 | static const auto constant = SourceRegister::MakeFloat(20); 43 | 44 | static const SourceRegister projection[4] = { SourceRegister::MakeFloat(0), SourceRegister::MakeFloat(1), SourceRegister::MakeFloat(2), SourceRegister::MakeFloat(3) }; 45 | static const SourceRegister modelview[4] = { SourceRegister::MakeFloat(4), SourceRegister::MakeFloat(5), SourceRegister::MakeFloat(6), SourceRegister::MakeFloat(7) }; 46 | static const auto light_direction = SourceRegister::MakeFloat(8); 47 | static const auto light_ambient = SourceRegister::MakeFloat(9); 48 | 49 | static const DestRegister out_pos = DestRegister::MakeOutput(0); 50 | static const DestRegister out_col = DestRegister::MakeOutput(1); 51 | static const DestRegister out_tex0 = DestRegister::MakeOutput(2); 52 | static const DestRegister out_tex1 = DestRegister::MakeOutput(3); 53 | static const DestRegister out_tex2 = DestRegister::MakeOutput(4); 54 | 55 | const auto shbin = InlineAsm::CompileToShbin({ 56 | // TODO: Declare output names 57 | // TODO: Declare constant 58 | // TODO: Declare uniform names 59 | // TODO: Explicitly set entry point 60 | InlineAsm::DeclareOutput(out_pos, OutputRegisterInfo::POSITION), 61 | InlineAsm::DeclareOutput(out_col, OutputRegisterInfo::COLOR), 62 | InlineAsm::DeclareOutput(out_tex0, OutputRegisterInfo::TEXCOORD0), 63 | InlineAsm::DeclareOutput(out_tex1, OutputRegisterInfo::TEXCOORD1), 64 | InlineAsm::DeclareOutput(out_tex2, OutputRegisterInfo::TEXCOORD2), 65 | 66 | InlineAsm::DeclareConstant(constant, 1.0, 0.0, 0.5, 1.0), 67 | 68 | InlineAsm::DeclareUniform(projection[0], projection[3], "projection"), 69 | InlineAsm::DeclareUniform(modelview[0], modelview[3], "modelview"), 70 | InlineAsm::DeclareUniform(light_direction, light_direction, "lightDirection"), 71 | InlineAsm::DeclareUniform(light_ambient, light_ambient, "lightAmbient"), 72 | 73 | { OpCode::Id::MOV, backup_pos, "xyz", in_pos, "xyz" }, 74 | { OpCode::Id::MOV, backup_pos, "w", constant, "xyzw" }, // TODO: Would like to just specify "w" here! // TODO: Somehow, c4 gets written instead... 75 | 76 | { OpCode::Id::DP4, temp_pos, "x", modelview[0], backup_pos }, 77 | { OpCode::Id::DP4, temp_pos, "y", modelview[1], backup_pos }, 78 | { OpCode::Id::DP4, temp_pos, "z", modelview[2], backup_pos }, 79 | { OpCode::Id::MOV, temp_pos, "w", constant, "xyzw" }, // TODO: Would like to just specify "w" here! 80 | 81 | { OpCode::Id::DP4, out_pos, "x", projection[0], temp_pos }, 82 | { OpCode::Id::DP4, out_pos, "y", projection[1], temp_pos }, 83 | { OpCode::Id::DP4, out_pos, "z", projection[2], temp_pos }, 84 | { OpCode::Id::DP4, out_pos, "w", projection[3], temp_pos }, 85 | 86 | { OpCode::Id::MOV, out_tex0, in_tex }, 87 | { OpCode::Id::MOV, out_tex1, constant, "yyyw" }, 88 | { OpCode::Id::MOV, out_tex2, constant, "yyyw" }, 89 | 90 | { OpCode::Id::DP3, temp_pos, "xyz", light_direction, in_norm }, 91 | { OpCode::Id::MAX, temp_pos, "xyz", constant, "yyy", temp_pos }, 92 | { OpCode::Id::MUL, temp_pos, "xyz", light_ambient, "www", temp_pos }, 93 | { OpCode::Id::ADD, out_col, "xyz", light_ambient, temp_pos }, 94 | { OpCode::Id::MOV, out_col, "w", constant, "xyzw" }, // TODO: Would like to just specify "w" here! 95 | 96 | { OpCode::Id::NOP }, 97 | { OpCode::Id::END } 98 | }); 99 | 100 | int main(int argc, char* argv[]) 101 | { 102 | if (argc < 2) { 103 | std::cout << "Error: No filename given" << std::endl; 104 | return 0; 105 | } 106 | 107 | std::ofstream file(argv[1], std::ios::binary); 108 | std::copy(shbin.begin(), shbin.end(), std::ostream_iterator(file)); 109 | 110 | std::cout << "Successfully compiled shader to " << argv[1] << "!" << std::endl; 111 | 112 | return 0; 113 | } 114 | -------------------------------------------------------------------------------- /include/nihstro/bit_field.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Tony Wasserka 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of the owner nor the names of its contributors may 13 | // be used to endorse or promote products derived from this software 14 | // without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | 29 | #pragma once 30 | 31 | #include 32 | #include 33 | 34 | #ifndef __forceinline 35 | #ifndef _WIN32 36 | #define __forceinline inline __attribute__((always_inline)) 37 | #endif 38 | #endif 39 | 40 | namespace nihstro { 41 | 42 | /* 43 | * Abstract bitfield class 44 | * 45 | * Allows endianness-independent access to individual bitfields within some raw 46 | * integer value. The assembly generated by this class is identical to the 47 | * usage of raw bitfields, so it's a perfectly fine replacement. 48 | * 49 | * For BitField, X is the distance of the bitfield to the LSB of the 50 | * raw value, Y is the length in bits of the bitfield. Z is an integer type 51 | * which determines the sign of the bitfield. Z must have the same size as the 52 | * raw integer. 53 | * 54 | * 55 | * General usage: 56 | * 57 | * Create a new union with the raw integer value as a member. 58 | * Then for each bitfield you want to expose, add a BitField member 59 | * in the union. The template parameters are the bit offset and the number 60 | * of desired bits. 61 | * 62 | * Changes in the bitfield members will then get reflected in the raw integer 63 | * value and vice-versa. 64 | * 65 | * 66 | * Sample usage: 67 | * 68 | * union SomeRegister 69 | * { 70 | * u32 hex; 71 | * 72 | * BitField<0,7,u32> first_seven_bits; // unsigned 73 | * BitField<7,8,u32> next_eight_bits; // unsigned 74 | * BitField<3,15,s32> some_signed_fields; // signed 75 | * }; 76 | * 77 | * This is equivalent to the little-endian specific code: 78 | * 79 | * union SomeRegister 80 | * { 81 | * u32 hex; 82 | * 83 | * struct 84 | * { 85 | * u32 first_seven_bits : 7; 86 | * u32 next_eight_bits : 8; 87 | * }; 88 | * struct 89 | * { 90 | * u32 : 3; // padding 91 | * s32 some_signed_fields : 15; 92 | * }; 93 | * }; 94 | * 95 | * 96 | * Caveats: 97 | * 98 | * 1) 99 | * BitField provides automatic casting from and to the storage type where 100 | * appropriate. However, when using non-typesafe functions like printf, an 101 | * explicit cast must be performed on the BitField object to make sure it gets 102 | * passed correctly, e.g.: 103 | * printf("Value: %d", (s32)some_register.some_signed_fields); 104 | * 105 | * 2) 106 | * Not really a caveat, but potentially irritating: This class is used in some 107 | * packed structures that do not guarantee proper alignment. Therefore we have 108 | * to use #pragma pack here not to pack the members of the class, but instead 109 | * to break GCC's assumption that the members of the class are aligned on 110 | * sizeof(StorageType). 111 | * TODO(neobrain): Confirm that this is a proper fix and not just masking 112 | * symptoms. 113 | */ 114 | #pragma pack(1) 115 | template 116 | struct BitField 117 | { 118 | private: 119 | // This constructor might be considered ambiguous: 120 | // Would it initialize the storage or just the bitfield? 121 | // Hence, delete it. Use the assignment operator to set bitfield values! 122 | BitField(T val) = delete; 123 | 124 | public: 125 | // Force default constructor to be created 126 | // so that we can use this within unions 127 | BitField() = default; 128 | 129 | #ifndef _WIN32 130 | // We explicitly delete the copy assigment operator here, because the 131 | // default copy assignment would copy the full storage value, rather than 132 | // just the bits relevant to this particular bit field. 133 | // Ideally, we would just implement the copy assignment to copy only the 134 | // relevant bits, but this requires compiler support for unrestricted 135 | // unions. 136 | // MSVC 2013 has no support for this, hence we disable this code on 137 | // Windows (so that the default copy assignment operator will be used). 138 | // For any C++11 conformant compiler we delete the operator to make sure 139 | // we never use this inappropriate operator to begin with. 140 | // TODO: Implement this operator properly once all target compilers 141 | // support unrestricted unions. 142 | // TODO: Actually, deleting and overriding this operator both cause more 143 | // harm than anything. Instead, it's suggested to never use the copy 144 | // constructor directly but instead invoke Assign() explicitly. 145 | // BitField& operator=(const BitField&) = delete; 146 | #endif 147 | 148 | __forceinline BitField& operator=(T val) 149 | { 150 | Assign(val); 151 | return *this; 152 | } 153 | 154 | __forceinline operator typename std::add_const::type() const 155 | { 156 | return Value(); 157 | } 158 | 159 | __forceinline void Assign(const T& value) { 160 | storage = (storage & ~GetMask()) | ((((StorageType)value) << position) & GetMask()); 161 | } 162 | 163 | __forceinline typename std::add_const::type Value() const 164 | { 165 | if (std::numeric_limits::is_signed) 166 | { 167 | std::size_t shift = 8 * sizeof(T)-bits; 168 | return (T)(((storage & GetMask()) << (shift - position)) >> shift); 169 | } 170 | else 171 | { 172 | return (T)((storage & GetMask()) >> position); 173 | } 174 | } 175 | 176 | static size_t NumBits() { 177 | return bits; 178 | } 179 | 180 | private: 181 | // StorageType is T for non-enum types and the underlying type of T if 182 | // T is an enumeration. Note that T is wrapped within an enable_if in the 183 | // former case to workaround compile errors which arise when using 184 | // std::underlying_type::type directly. 185 | typedef typename std::conditional < std::is_enum::value, 186 | std::underlying_type, 187 | std::enable_if < true, T >> ::type::type StorageType; 188 | 189 | // Unsigned version of StorageType 190 | typedef typename std::make_unsigned::type StorageTypeU; 191 | 192 | __forceinline StorageType GetMask() const 193 | { 194 | return ((~(StorageTypeU)0) >> (8 * sizeof(T)-bits)) << position; 195 | } 196 | 197 | StorageType storage; 198 | 199 | static_assert(bits + position <= 8 * sizeof(T), "Bitfield out of range"); 200 | 201 | // And, you know, just in case people specify something stupid like bits=position=0x80000000 202 | static_assert(position < 8 * sizeof(T), "Invalid position"); 203 | static_assert(bits <= 8 * sizeof(T), "Invalid number of bits"); 204 | static_assert(bits > 0, "Invalid number of bits"); 205 | static_assert(std::is_standard_layout::value, "Invalid base type"); 206 | }; 207 | 208 | /** 209 | * Abstract bit flag class. This is basically a specialization of BitField for single-bit fields. 210 | * Instead of being cast to the underlying type, it acts like a boolean. 211 | */ 212 | template 213 | struct BitFlag : protected BitField 214 | { 215 | private: 216 | BitFlag(T val) = delete; 217 | 218 | typedef BitField ParentType; 219 | 220 | public: 221 | BitFlag() = default; 222 | 223 | #ifndef _WIN32 224 | BitFlag& operator=(const BitFlag&) = delete; 225 | #endif 226 | 227 | __forceinline BitFlag& operator=(bool val) 228 | { 229 | Assign(val); 230 | return *this; 231 | } 232 | 233 | __forceinline operator bool() const 234 | { 235 | return Value(); 236 | } 237 | 238 | __forceinline void Assign(bool value) { 239 | ParentType::Assign(value); 240 | } 241 | 242 | __forceinline bool Value() const 243 | { 244 | return ParentType::Value() != 0; 245 | } 246 | }; 247 | #pragma pack() 248 | 249 | } // namespace 250 | -------------------------------------------------------------------------------- /include/nihstro/float24.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "bit_field.h" 7 | 8 | namespace nihstro { 9 | 10 | inline uint32_t to_float24(float val) { 11 | static_assert(std::numeric_limits::is_iec559, "Compiler does not adhere to IEEE 754"); 12 | 13 | union Float32 { 14 | BitField< 0, 23, uint32_t> mant; 15 | BitField<23, 8, uint32_t> biased_exp; 16 | BitField<31, 1, uint32_t> sign; 17 | 18 | static int ExponentBias() { 19 | return 127; 20 | } 21 | } f32 = reinterpret_cast(val); 22 | 23 | union Float24 { 24 | uint32_t hex; 25 | 26 | BitField< 0, 16, uint32_t> mant; 27 | BitField<16, 7, uint32_t> biased_exp; 28 | BitField<23, 1, uint32_t> sign; 29 | 30 | static int ExponentBias() { 31 | return 63; 32 | } 33 | } f24 = { 0 }; 34 | 35 | int biased_exp = (int)f32.biased_exp - Float32::ExponentBias() + Float24::ExponentBias(); 36 | unsigned mant = (biased_exp >= 0) ? (f32.mant >> (f32.mant.NumBits() - f24.mant.NumBits())) : 0; 37 | if (biased_exp >= (1 << f24.biased_exp.NumBits())) { 38 | // TODO: Return +inf or -inf 39 | } 40 | 41 | f24.biased_exp = std::max(0, biased_exp); 42 | f24.mant = mant; 43 | f24.sign = f32.sign.Value(); 44 | 45 | return f24.hex; 46 | } 47 | 48 | } // namespace 49 | -------------------------------------------------------------------------------- /include/nihstro/parser_assembly.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Tony Wasserka 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of the owner nor the names of its contributors may 13 | // be used to endorse or promote products derived from this software 14 | // without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | #pragma once 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | #include 37 | #include 38 | 39 | #include "source_tree.h" 40 | 41 | #include "shader_binary.h" 42 | #include "shader_bytecode.h" 43 | 44 | namespace nihstro { 45 | 46 | struct InputSwizzlerMask { 47 | int num_components; 48 | 49 | enum Component : uint8_t { 50 | x = 0, 51 | y = 1, 52 | z = 2, 53 | w = 3, 54 | }; 55 | std::array components; 56 | 57 | static InputSwizzlerMask FullMask() { 58 | return { 4, {x,y,z,w} }; 59 | } 60 | 61 | bool operator == (const InputSwizzlerMask& oth) const { 62 | return this->num_components == oth.num_components && this->components == oth.components; 63 | } 64 | 65 | // TODO: Move to implementation? 66 | friend std::ostream& operator<<(std::ostream& os, const Component& v) { 67 | switch(v) { 68 | case x: return os << "x"; 69 | case y: return os << "y"; 70 | case z: return os << "z"; 71 | case w: return os << "w"; 72 | default: return os << "?"; 73 | } 74 | } 75 | friend std::ostream& operator<<(std::ostream& os, const InputSwizzlerMask& v) { 76 | if (!v.num_components) 77 | return os << "(empty_mask)"; 78 | 79 | for (int i = 0; i < v.num_components; ++i) 80 | os << v.components[i]; 81 | 82 | return os; 83 | } 84 | 85 | friend std::string to_string(const Component& v) { 86 | std::stringstream ss; 87 | ss << v; 88 | return ss.str(); 89 | } 90 | 91 | friend std::string to_string(const InputSwizzlerMask& v) { 92 | std::stringstream ss; 93 | ss << v; 94 | return ss.str(); 95 | } 96 | }; 97 | 98 | using Identifier = std::string; 99 | 100 | // A sign, i.e. +1 or -1 101 | using Sign = int; 102 | 103 | struct IntegerWithSign { 104 | int sign; 105 | unsigned value; 106 | 107 | int GetValue() const { 108 | return sign * value; 109 | } 110 | }; 111 | 112 | // Raw index + address register index 113 | struct IndexExpression : std::vector> { 114 | int GetCount() const { 115 | return this->size(); 116 | } 117 | 118 | bool IsRawIndex(int arg) const { 119 | return (*this)[arg].which() == 0; 120 | } 121 | 122 | int GetRawIndex(int arg) const { 123 | return boost::get((*this)[arg]).GetValue(); 124 | } 125 | 126 | bool IsAddressRegisterIdentifier(int arg) const { 127 | return (*this)[arg].which() == 1; 128 | } 129 | 130 | Identifier GetAddressRegisterIdentifier(int arg) const { 131 | return boost::get((*this)[arg]); 132 | } 133 | }; 134 | 135 | 136 | struct Expression { 137 | struct SignedIdentifier { 138 | boost::optional sign; 139 | Identifier identifier; 140 | } signed_identifier; 141 | 142 | boost::optional index; 143 | std::vector swizzle_masks; 144 | 145 | int GetSign() const { 146 | if (!RawSign()) 147 | return +1; 148 | else 149 | return *RawSign(); 150 | } 151 | 152 | const Identifier& GetIdentifier() const { 153 | return RawIdentifier(); 154 | } 155 | 156 | bool HasIndexExpression() const { 157 | return static_cast(RawIndex()); 158 | } 159 | 160 | const IndexExpression& GetIndexExpression() const { 161 | return *RawIndex(); 162 | } 163 | 164 | const std::vector& GetSwizzleMasks() const { 165 | return RawSwizzleMasks(); 166 | } 167 | 168 | private: 169 | const boost::optional& RawSign() const { 170 | return signed_identifier.sign; 171 | } 172 | 173 | const Identifier& RawIdentifier() const { 174 | return signed_identifier.identifier; 175 | } 176 | 177 | const boost::optional& RawIndex() const { 178 | return index; 179 | } 180 | 181 | const std::vector& RawSwizzleMasks() const { 182 | return swizzle_masks; 183 | } 184 | }; 185 | 186 | struct ConditionInput { 187 | bool invert; 188 | Identifier identifier; 189 | boost::optional swizzler_mask; 190 | 191 | bool GetInvertFlag() const { 192 | return invert; 193 | } 194 | 195 | const Identifier& GetIdentifier() const { 196 | return identifier; 197 | } 198 | 199 | bool HasSwizzleMask() const { 200 | return static_cast(swizzler_mask); 201 | } 202 | 203 | const InputSwizzlerMask& GetSwizzleMask() const { 204 | return *swizzler_mask; 205 | } 206 | }; 207 | 208 | struct Condition { 209 | ConditionInput input1; 210 | Instruction::FlowControlType::Op op; 211 | ConditionInput input2; 212 | 213 | const ConditionInput& GetFirstInput() const { 214 | return input1; 215 | } 216 | 217 | Instruction::FlowControlType::Op GetConditionOp() const { 218 | return op; 219 | } 220 | 221 | const ConditionInput& GetSecondInput() const { 222 | return input2; 223 | } 224 | }; 225 | 226 | using StatementLabel = std::string; 227 | 228 | struct StatementInstruction { 229 | OpCode opcode; 230 | std::vector expressions; 231 | 232 | StatementInstruction() = default; 233 | 234 | // TODO: Obsolete constructor? 235 | StatementInstruction(const OpCode& opcode) : opcode(opcode) { 236 | } 237 | 238 | StatementInstruction(const OpCode& opcode, const std::vector expressions) : opcode(opcode), expressions(expressions) { 239 | } 240 | 241 | const OpCode& GetOpCode() const { 242 | return opcode; 243 | } 244 | 245 | const std::vector& GetArguments() const { 246 | return expressions; 247 | } 248 | }; 249 | using FloatOpInstruction = StatementInstruction; 250 | 251 | struct CompareInstruction { 252 | OpCode opcode; 253 | std::vector arguments; 254 | std::vector ops; 255 | 256 | const OpCode& GetOpCode() const { 257 | return opcode; 258 | } 259 | 260 | const Expression& GetSrc1() const { 261 | return arguments[0]; 262 | } 263 | 264 | const Expression& GetSrc2() const { 265 | return arguments[1]; 266 | } 267 | 268 | Instruction::Common::CompareOpType::Op GetOp1() const { 269 | return ops[0]; 270 | } 271 | 272 | Instruction::Common::CompareOpType::Op GetOp2() const { 273 | return ops[1]; 274 | } 275 | }; 276 | 277 | struct FlowControlInstruction { 278 | OpCode opcode; 279 | std::string target_label; 280 | boost::optional return_label; 281 | boost::optional condition; 282 | 283 | const OpCode& GetOpCode() const { 284 | return opcode; 285 | } 286 | 287 | const std::string& GetTargetLabel() const { 288 | return target_label; 289 | } 290 | 291 | bool HasReturnLabel() const { 292 | return static_cast(return_label); 293 | } 294 | 295 | const std::string& GetReturnLabel() const { 296 | return *return_label; 297 | } 298 | 299 | bool HasCondition() const { 300 | return static_cast(condition); 301 | } 302 | 303 | const Condition& GetCondition() const { 304 | return *condition; 305 | } 306 | }; 307 | 308 | struct SetEmitInstruction { 309 | OpCode opcode; 310 | unsigned vertex_id; 311 | 312 | struct Flags { 313 | boost::optional primitive_flag; 314 | boost::optional invert_flag; 315 | } flags; 316 | 317 | bool PrimitiveFlag() const { 318 | return flags.primitive_flag && *flags.primitive_flag; 319 | } 320 | 321 | bool InvertFlag() const { 322 | return flags.invert_flag && *flags.invert_flag; 323 | } 324 | }; 325 | 326 | struct StatementDeclaration { 327 | std::string alias_name; 328 | Identifier identifier_start; /* aliased identifier (start register) */ 329 | boost::optional identifier_end; /* aliased identifier (end register) */ 330 | boost::optional swizzle_mask; // referring to the aliased identifier 331 | 332 | struct Extra { 333 | std::vector constant_value; 334 | boost::optional output_semantic; 335 | } extra; 336 | }; 337 | 338 | struct ParserContext { 339 | // There currently is no context 340 | }; 341 | 342 | 343 | struct Parser { 344 | using Iterator = SourceTreeIterator; 345 | 346 | Parser(const ParserContext& context); 347 | ~Parser(); 348 | 349 | // Skip whitespaces, blank lines, and comments; returns number of line breaks skipped. 350 | unsigned Skip(Iterator& begin, Iterator end); 351 | 352 | // Skip to the next line 353 | void SkipSingleLine(Iterator& begin, Iterator end); 354 | 355 | // Parse alias declaration including line ending 356 | bool ParseDeclaration(Iterator& begin, Iterator end, StatementDeclaration* declaration); 357 | 358 | // Parse label declaration including line ending 359 | bool ParseLabel(Iterator& begin, Iterator end, StatementLabel* label); 360 | 361 | // Parse nothing but a single opcode 362 | bool ParseOpCode(Iterator& begin, Iterator end, OpCode* opcode); 363 | 364 | // Parse trival instruction including line ending 365 | bool ParseSimpleInstruction(Iterator& begin, Iterator end, OpCode* opcode); 366 | 367 | // Parse float instruction including line ending 368 | bool ParseFloatOp(Iterator& begin, Iterator end, FloatOpInstruction* content); 369 | 370 | // Parse compare instruction including line ending 371 | bool ParseCompare(Iterator& begin, Iterator end, CompareInstruction* content); 372 | 373 | // Parse flow control instruction including line ending 374 | bool ParseFlowControl(Iterator& begin, Iterator end, FlowControlInstruction* content); 375 | 376 | // Parse SetEmit instruction including line ending 377 | bool ParseSetEmit(Iterator& begin, Iterator end, SetEmitInstruction* content); 378 | 379 | private: 380 | struct ParserImpl; 381 | std::unique_ptr impl; 382 | }; 383 | 384 | } // namespace 385 | -------------------------------------------------------------------------------- /include/nihstro/parser_shbin.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Tony Wasserka 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of the owner nor the names of its contributors may 13 | // be used to endorse or promote products derived from this software 14 | // without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | #pragma once 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | #include "nihstro/shader_binary.h" 37 | 38 | namespace nihstro { 39 | 40 | struct ShaderInfo { 41 | std::vector code; 42 | std::vector swizzle_info; 43 | 44 | std::vector constant_table; 45 | std::vector label_table; 46 | std::map labels; 47 | std::vector output_register_info; 48 | std::vector uniform_table; 49 | 50 | void Clear() { 51 | code.clear(); 52 | swizzle_info.clear(); 53 | constant_table.clear(); 54 | label_table.clear(); 55 | labels.clear(); 56 | output_register_info.clear(); 57 | uniform_table.clear(); 58 | } 59 | 60 | bool HasLabel(uint32_t offset) const { 61 | return labels.find(offset) != labels.end(); 62 | } 63 | 64 | std::string GetLabel (uint32_t offset) const { 65 | auto it = labels.find(offset); 66 | if (it != labels.end()) 67 | return it->second; 68 | return ""; 69 | } 70 | 71 | template 72 | std::string LookupDestName(const T& dest, const SwizzlePattern& swizzle) const { 73 | if (dest < 0x8) { 74 | // TODO: This one still needs some prettification in case 75 | // multiple output_infos describing this output register 76 | // are found. 77 | std::string ret; 78 | for (const auto& output_info : output_register_info) { 79 | if (dest != output_info.id) 80 | continue; 81 | 82 | // Only display output register name if the output components it's mapped to are 83 | // actually written to. 84 | // swizzle.dest_mask and output_info.component_mask use different bit order, 85 | // so we can't use AND them bitwise to check this. 86 | int matching_mask = 0; 87 | for (int i = 0; i < 4; ++i) 88 | matching_mask |= output_info.component_mask & (swizzle.DestComponentEnabled(i) << i); 89 | 90 | if (!matching_mask) 91 | continue; 92 | 93 | // Add a vertical bar so that we have at least *some* 94 | // indication that we hit multiple matches. 95 | if (!ret.empty()) 96 | ret += "|"; 97 | 98 | ret += output_info.GetSemanticName(); 99 | } 100 | if (!ret.empty()) 101 | return ret; 102 | } else if (dest.GetRegisterType() == RegisterType::Temporary) { 103 | // TODO: Not sure if uniform_info can assign names to temporary registers. 104 | // If that is the case, we should check the table for better names here. 105 | std::stringstream stream; 106 | stream << "temp_" << std::hex << dest.GetIndex(); 107 | return stream.str(); 108 | } 109 | return "(?)"; 110 | } 111 | 112 | template 113 | std::string LookupSourceName(const T& source, unsigned addr_reg_index) const { 114 | if (source.GetRegisterType() != RegisterType::Temporary) { 115 | for (const auto& uniform_info : uniform_table) { 116 | // Magic numbers are needed because uniform info registers use the 117 | // range 0..0x10 for input registers and 0x10...0x70 for uniform registers, 118 | // i.e. there is a "gap" at the temporary registers, for which no 119 | // name can be assigned (?). 120 | int off = (source.GetRegisterType() == RegisterType::Input) ? 0 : 0x10; 121 | if (source - off >= uniform_info.basic.reg_start && 122 | source - off <= uniform_info.basic.reg_end) { 123 | std::string name = uniform_info.name; 124 | 125 | std::string index; 126 | bool is_array = uniform_info.basic.reg_end != uniform_info.basic.reg_start; 127 | if (is_array) { 128 | index += std::to_string(source - off - uniform_info.basic.reg_start); 129 | } 130 | if (addr_reg_index != 0) { 131 | index += (is_array) ? " + " : ""; 132 | index += "a" + std::to_string(addr_reg_index - 1); 133 | } 134 | 135 | if (!index.empty()) 136 | name += "[" + index + "]"; 137 | 138 | return name; 139 | } 140 | } 141 | } 142 | 143 | // Constants and uniforms really are the same internally 144 | for (const auto& constant_info : constant_table) { 145 | if (source - 0x20 == constant_info.regid) { 146 | return "const_" + std::to_string(constant_info.regid.Value()); 147 | } 148 | } 149 | 150 | // For temporary registers, we at least print "temp_X" if no better name could be found. 151 | if (source.GetRegisterType() == RegisterType::Temporary) { 152 | std::stringstream stream; 153 | stream << "temp_" << std::hex << source.GetIndex(); 154 | return stream.str(); 155 | } 156 | 157 | return "(?)"; 158 | } 159 | }; 160 | 161 | class ShbinParser { 162 | public: 163 | void ReadHeaders(const std::string& filename); 164 | 165 | void ReadDVLE(int dvle_index); 166 | 167 | const DVLBHeader& GetDVLBHeader() const { 168 | return dvlb_header; 169 | } 170 | 171 | const DVLPHeader& GetDVLPHeader() const { 172 | return dvlp_header; 173 | } 174 | 175 | const DVLEHeader& GetDVLEHeader(int index) const { 176 | return dvle_headers[index]; 177 | } 178 | 179 | const std::string& GetFilename(int dvle_index) const { 180 | return dvle_filenames[dvle_index]; 181 | } 182 | 183 | private: 184 | 185 | // Reads a null-terminated string from the given offset 186 | std::string ReadSymbol(uint32_t offset); 187 | 188 | std::fstream file; 189 | 190 | 191 | DVLBHeader dvlb_header; 192 | DVLPHeader dvlp_header; 193 | 194 | uint32_t dvlp_offset; 195 | 196 | public: 197 | std::vector dvle_offsets; 198 | std::vector dvle_headers; 199 | std::vector dvle_filenames; 200 | 201 | ShaderInfo shader_info; 202 | 203 | uint32_t main_offset; 204 | }; 205 | 206 | 207 | } // namespace 208 | -------------------------------------------------------------------------------- /include/nihstro/preprocessor.h: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Tony Wasserka 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of the owner nor the names of its contributors may 13 | // be used to endorse or promote products derived from this software 14 | // without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | #pragma once 29 | 30 | namespace nihstro { 31 | 32 | struct SourceTree; 33 | 34 | SourceTree PreprocessAssemblyFile(const std::string& filename); 35 | 36 | } // namespace 37 | -------------------------------------------------------------------------------- /include/nihstro/shader_binary.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Tony Wasserka 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of the owner nor the names of its contributors may 13 | // be used to endorse or promote products derived from this software 14 | // without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | #pragma once 29 | 30 | #include 31 | 32 | #include "shader_bytecode.h" 33 | 34 | namespace nihstro { 35 | 36 | #pragma pack(1) 37 | struct DVLBHeader { 38 | enum : uint32_t { 39 | MAGIC_WORD = 0x424C5644, // "DVLB" 40 | }; 41 | 42 | uint32_t magic_word; 43 | uint32_t num_programs; 44 | 45 | // DVLE offset table with num_programs entries follows 46 | }; 47 | static_assert(sizeof(DVLBHeader) == 0x8, "Incorrect structure size"); 48 | 49 | struct DVLPHeader { 50 | enum : uint32_t { 51 | MAGIC_WORD = 0x504C5644, // "DVLP" 52 | }; 53 | 54 | uint32_t magic_word; 55 | uint32_t version; 56 | uint32_t binary_offset; // relative to DVLP start 57 | uint32_t binary_size_words; 58 | uint32_t swizzle_info_offset; 59 | uint32_t swizzle_info_num_entries; 60 | uint32_t filename_symbol_offset; 61 | }; 62 | static_assert(sizeof(DVLPHeader) == 0x1C, "Incorrect structure size"); 63 | 64 | struct DVLEHeader { 65 | enum : uint32_t { 66 | MAGIC_WORD = 0x454c5644, // "DVLE" 67 | }; 68 | 69 | enum class ShaderType : uint8_t { 70 | VERTEX = 0, 71 | GEOMETRY = 1, 72 | }; 73 | 74 | uint32_t magic_word; 75 | uint16_t pad1; 76 | ShaderType type; 77 | uint8_t pad2; 78 | 79 | // Offset within binary blob to program entry point 80 | uint32_t main_offset_words; 81 | uint32_t endmain_offset_words; 82 | 83 | uint32_t pad3; 84 | uint32_t pad4; 85 | 86 | // Table of constant values for single registers 87 | uint32_t constant_table_offset; 88 | uint32_t constant_table_size; // number of entries 89 | 90 | // Table of program code labels 91 | uint32_t label_table_offset; 92 | uint32_t label_table_size; 93 | 94 | // Table of output registers and their semantics 95 | uint32_t output_register_table_offset; 96 | uint32_t output_register_table_size; 97 | 98 | // Table of uniforms (which may span multiple registers) and their values 99 | uint32_t uniform_table_offset; 100 | uint32_t uniform_table_size; 101 | 102 | // Table of null-terminated strings referenced by the tables above 103 | uint32_t symbol_table_offset; 104 | uint32_t symbol_table_size; 105 | 106 | }; 107 | static_assert(sizeof(DVLEHeader) == 0x40, "Incorrect structure size"); 108 | 109 | 110 | struct SwizzleInfo { 111 | SwizzlePattern pattern; 112 | uint32_t unknown; 113 | }; 114 | 115 | struct ConstantInfo { 116 | enum Type : uint32_t { 117 | Bool = 0, 118 | Int = 1, 119 | Float = 2 120 | }; 121 | 122 | union { 123 | uint32_t full_first_word; 124 | 125 | BitField<0, 2, Type> type; 126 | 127 | BitField<16, 8, uint32_t> regid; 128 | }; 129 | 130 | union { 131 | uint32_t value_hex[4]; 132 | 133 | BitField<0, 1, uint32_t> b; 134 | 135 | struct { 136 | uint8_t x; 137 | uint8_t y; 138 | uint8_t z; 139 | uint8_t w; 140 | } i; 141 | 142 | struct { 143 | // All of these are float24 values! 144 | uint32_t x; 145 | uint32_t y; 146 | uint32_t z; 147 | uint32_t w; 148 | } f; 149 | }; 150 | }; 151 | 152 | struct LabelInfo { 153 | BitField<0, 8, uint32_t> id; 154 | uint32_t program_offset; 155 | uint32_t unk; 156 | uint32_t name_offset; 157 | }; 158 | 159 | union OutputRegisterInfo { 160 | enum Type : uint64_t { 161 | POSITION = 0, 162 | QUATERNION = 1, 163 | COLOR = 2, 164 | TEXCOORD0 = 3, 165 | 166 | TEXCOORD1 = 5, 167 | TEXCOORD2 = 6, 168 | 169 | VIEW = 8, 170 | }; 171 | 172 | OutputRegisterInfo& operator =(const OutputRegisterInfo& oth) { 173 | hex.Assign(oth.hex); 174 | return *this; 175 | } 176 | 177 | BitField< 0, 64, uint64_t> hex; 178 | 179 | BitField< 0, 16, Type> type; 180 | BitField<16, 16, uint64_t> id; 181 | BitField<32, 4, uint64_t> component_mask; 182 | BitField<32, 32, uint64_t> descriptor; 183 | 184 | const std::string GetMask() const { 185 | std::string ret; 186 | if (component_mask & 1) ret += "x"; 187 | if (component_mask & 2) ret += "y"; 188 | if (component_mask & 4) ret += "z"; 189 | if (component_mask & 8) ret += "w"; 190 | return ret; 191 | } 192 | 193 | const std::string GetSemanticName() const { 194 | static const std::map map = { 195 | { POSITION, "out.pos" }, 196 | { QUATERNION, "out.quat" }, 197 | { COLOR, "out.col" }, 198 | { TEXCOORD0, "out.tex0" }, 199 | { TEXCOORD1, "out.tex1" }, 200 | { TEXCOORD2, "out.tex2" }, 201 | { VIEW, "out.view" } 202 | }; 203 | auto it = map.find(type); 204 | if (it != map.end()) 205 | return it->second; 206 | else 207 | return "out.unk"; 208 | } 209 | }; 210 | 211 | struct UniformInfo { 212 | struct { 213 | static RegisterType GetType(uint32_t reg) { 214 | if (reg < 0x10) return RegisterType::Input; 215 | else if (reg < 0x70) return RegisterType::FloatUniform; 216 | else if (reg < 0x74) return RegisterType::IntUniform; 217 | else if (reg >= 0x78 && reg < 0x88) return RegisterType::BoolUniform; 218 | else return RegisterType::Unknown; 219 | } 220 | 221 | static int GetIndex(uint32_t reg) { 222 | switch (GetType(reg)) { 223 | case RegisterType::Input: return reg; 224 | case RegisterType::FloatUniform: return reg - 0x10; 225 | case RegisterType::IntUniform: return reg - 0x70; 226 | case RegisterType::BoolUniform: return reg - 0x78; 227 | default: return -1; 228 | } 229 | } 230 | 231 | RegisterType GetStartType() const { 232 | return GetType(reg_start); 233 | } 234 | 235 | RegisterType GetEndType() const { 236 | return GetType(reg_end); 237 | } 238 | 239 | int GetStartIndex() const { 240 | return GetIndex(reg_start); 241 | } 242 | 243 | int GetEndIndex() const { 244 | return GetIndex(reg_end); 245 | } 246 | 247 | uint32_t symbol_offset; 248 | union { 249 | BitField< 0, 16, uint32_t> reg_start; 250 | BitField<16, 16, uint32_t> reg_end; // inclusive 251 | }; 252 | } basic; 253 | std::string name; 254 | }; 255 | 256 | #pragma pack() 257 | 258 | } // namespace 259 | -------------------------------------------------------------------------------- /include/nihstro/source_tree.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Tony Wasserka 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of the owner nor the names of its contributors may 13 | // be used to endorse or promote products derived from this software 14 | // without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | #pragma once 29 | 30 | #include 31 | #include 32 | #include 33 | 34 | #include 35 | #include 36 | 37 | namespace nihstro { 38 | 39 | struct SourceTreeIterator; 40 | 41 | 42 | struct Node; 43 | // SequenceContainer 44 | struct SourceTree { 45 | SourceTree() = default; 46 | SourceTree(const SourceTree& oth); 47 | 48 | std::string code; 49 | 50 | struct { 51 | std::string filename; 52 | } file_info; 53 | 54 | SourceTree* parent = nullptr; 55 | 56 | // ordered with respect to "position" 57 | std::list children; 58 | 59 | SourceTreeIterator begin(); 60 | SourceTreeIterator end(); 61 | 62 | // Attach the given tree, changing the child's owner to *this. 63 | SourceTree& Attach(SourceTree tree, std::string::difference_type offset); 64 | }; 65 | 66 | struct Node { 67 | SourceTree tree; 68 | 69 | std::string::difference_type offset_within_parent; // within "code" 70 | }; 71 | 72 | inline SourceTree::SourceTree(const SourceTree& oth) : code(oth.code), file_info(oth.file_info), parent(oth.parent), children(oth.children) { 73 | for (auto& child : children) 74 | child.tree.parent = this; 75 | } 76 | 77 | inline SourceTree& SourceTree::Attach(SourceTree tree, std::string::difference_type offset) { 78 | tree.parent = this; 79 | children.push_back(Node{tree, offset}); 80 | return *this; 81 | } 82 | 83 | // RandomAccessIterator 84 | struct SourceTreeIterator { 85 | using difference_type = std::string::iterator::difference_type; 86 | using reference = std::string::iterator::reference; 87 | using value_type = std::string::iterator::value_type; 88 | using pointer = std::string::iterator::pointer; 89 | using iterator_category = std::random_access_iterator_tag; 90 | 91 | SourceTreeIterator() { 92 | } 93 | 94 | SourceTreeIterator(SourceTree& tree) : tree(&tree), position(tree.code.begin()), node_iterator(tree.children.begin()) { 95 | UpdateChildIterator(); 96 | } 97 | 98 | SourceTreeIterator(const SourceTreeIterator&) = default; 99 | 100 | SourceTreeIterator& operator += (difference_type n) { 101 | if (n > 0) { 102 | while (n) { 103 | if (child_iterator) { 104 | auto remaining_to_child = node_iterator->offset_within_parent - (position - tree->code.begin()); 105 | if (remaining_to_child >= n) { 106 | // If the next child is more than n steps away, increase position by n and return 107 | // TODO: Should we make sure that we don't end up out-of-bounds here? 108 | position += n; 109 | UpdateNodeIterator(); 110 | break; 111 | } else { 112 | // Otherwise, move current position to the child if it isn't there already 113 | position += remaining_to_child; 114 | n -= remaining_to_child; 115 | UpdateNodeIterator(); 116 | } 117 | 118 | if (child_iterator->get().StepsRemaining() > n) { 119 | // If child is larger than n, advance child by n and return 120 | child_iterator->get() += n; 121 | break; 122 | } else { 123 | // else step out of the child and increment next child iterator by one 124 | n -= child_iterator->get().StepsRemaining(); 125 | if (node_iterator != tree->children.end()) 126 | node_iterator++; 127 | UpdateChildIterator(); 128 | } 129 | } else { 130 | // TODO: Should we make sure that we don't end up out-of-bounds here? 131 | position += n; 132 | UpdateNodeIterator(); 133 | break; 134 | } 135 | } 136 | } else if (n < 0) { 137 | // Reduce to n>0 case by starting from begin() 138 | n = (*this - tree->begin()) + n; 139 | *this = tree->begin() + n; 140 | } 141 | return *this; 142 | } 143 | 144 | SourceTreeIterator& operator -= (difference_type n) { 145 | *this += -n; 146 | return *this; 147 | } 148 | 149 | difference_type operator -(SourceTreeIterator it) const { 150 | return this->StepsGone() - it.StepsGone(); 151 | } 152 | 153 | bool operator < (const SourceTreeIterator& it) const { 154 | return std::distance(*this, it) > 0; 155 | } 156 | 157 | bool operator <= (const SourceTreeIterator& it) const { 158 | return std::distance(*this, it) >= 0; 159 | } 160 | 161 | bool operator > (const SourceTreeIterator& it) const { 162 | return !(*this <= it); 163 | } 164 | 165 | bool operator >= (const SourceTreeIterator& it) const { 166 | return !(*this < it); 167 | } 168 | 169 | bool operator == (const SourceTreeIterator& it) const { 170 | return (*this <= it) && !(*this < it); 171 | } 172 | 173 | bool operator != (const SourceTreeIterator& it) const { 174 | return !(*this == it); 175 | } 176 | 177 | reference operator* () { 178 | return (*this)[0]; 179 | } 180 | 181 | SourceTreeIterator operator++ () { 182 | *this += 1; 183 | return *this; 184 | } 185 | 186 | SourceTreeIterator operator++ (int) { 187 | auto it = *this; 188 | *this += 1; 189 | return it; 190 | } 191 | 192 | SourceTreeIterator operator +(difference_type n) const { 193 | SourceTreeIterator it2 = *this; 194 | it2 += n; 195 | return it2; 196 | } 197 | 198 | SourceTreeIterator operator -(SourceTreeIterator::difference_type n) const { 199 | return *this + (-n); 200 | } 201 | 202 | reference operator [] (difference_type n) { 203 | auto it = (*this + n); 204 | if (it.WithinChild()) 205 | return it.child_iterator->get()[0]; 206 | else return *it.position; 207 | } 208 | 209 | // Get line number (one-based) within "tree" 210 | unsigned GetLineNumber() const { 211 | // Adding one for natural (i.e. one-based) line numbers 212 | return std::count(tree->code.begin(), position, '\n') + 1; 213 | } 214 | 215 | // Get line number (one-based) within the tree of the current child 216 | unsigned GetCurrentLineNumber() const { 217 | if (WithinChild()) 218 | return child_iterator->get().GetCurrentLineNumber(); 219 | 220 | return GetLineNumber(); 221 | } 222 | 223 | const std::string GetCurrentFilename() const { 224 | if (WithinChild()) 225 | return child_iterator->get().GetCurrentFilename(); 226 | 227 | return tree->file_info.filename; 228 | } 229 | 230 | SourceTreeIterator GetParentIterator(const SourceTree* reference_tree) const { 231 | if (tree == reference_tree) { 232 | return *this; 233 | } else { 234 | return child_iterator->get().GetParentIterator(reference_tree); 235 | } 236 | } 237 | 238 | SourceTree* GetCurrentTree() { 239 | if (WithinChild()) 240 | return child_iterator->get().GetCurrentTree(); 241 | else 242 | return tree; 243 | } 244 | 245 | private: 246 | difference_type StepsRemaining() const { 247 | return std::distance(*this, tree->end()); 248 | } 249 | 250 | difference_type StepsGone() const { 251 | auto it = tree->begin(); 252 | 253 | difference_type diff = 0; 254 | 255 | // Advance reference iterator starting from the beginning until we reach *this, 256 | // making sure that both the main position and the child iterator match. 257 | while (it.position != position || 258 | ((bool)it.child_iterator ^ (bool)child_iterator) || 259 | (it.child_iterator && child_iterator && it.child_iterator->get() != child_iterator->get())) { 260 | // Move to next child (if there is one), or abort if we reach the reference position 261 | if (it.child_iterator) { 262 | auto distance_to_child = std::min(it.node_iterator->offset_within_parent - (it.position -it.tree->code.begin() ), position - it.position); 263 | 264 | // Move to child or this->position 265 | diff += distance_to_child; 266 | it.position += distance_to_child; 267 | 268 | if (it.position - it.tree->code.begin() == it.node_iterator->offset_within_parent) { 269 | if (node_iterator != tree->children.end() && it.node_iterator == node_iterator) { 270 | return diff + (child_iterator->get() - it.child_iterator->get()); 271 | } else { 272 | // Move out of child 273 | diff += it.child_iterator->get().StepsRemaining(); 274 | } 275 | } else { 276 | // We moved to this->position => done 277 | return diff; 278 | } 279 | 280 | // Move to next child 281 | if (it.node_iterator != it.tree->children.end()) { 282 | it.node_iterator++; 283 | it.UpdateChildIterator(); 284 | } 285 | } else { 286 | // no child remaining, hence just move to the given position 287 | return diff + (position - it.position); 288 | } 289 | } 290 | 291 | return diff; 292 | } 293 | 294 | bool WithinChild() const { 295 | return child_iterator && position - tree->code.begin() == node_iterator->offset_within_parent; 296 | } 297 | 298 | void UpdateChildIterator() { 299 | if (node_iterator != tree->children.end()) 300 | child_iterator = boost::recursive_wrapper(node_iterator->tree); 301 | else 302 | child_iterator = boost::none; 303 | } 304 | 305 | void UpdateNodeIterator() { 306 | // Move to the first node which is at the cursor or behind it 307 | while (node_iterator != tree->children.end() && node_iterator->offset_within_parent < std::distance(tree->code.begin(), position)) { 308 | node_iterator++; 309 | UpdateChildIterator(); 310 | } 311 | } 312 | 313 | SourceTree* tree; 314 | std::string::iterator position; 315 | 316 | boost::optional> child_iterator; // points to current or next child 317 | std::list::iterator node_iterator; // points to current or next node 318 | 319 | friend struct SourceTree; 320 | }; 321 | 322 | inline SourceTreeIterator operator +(SourceTreeIterator::difference_type n, const SourceTreeIterator& it) { 323 | return it + n; 324 | } 325 | 326 | inline SourceTreeIterator operator -(SourceTreeIterator::difference_type n, const SourceTreeIterator& it) { 327 | return it - n; 328 | } 329 | 330 | inline SourceTreeIterator SourceTree::begin() { 331 | return SourceTreeIterator(*this); 332 | } 333 | 334 | inline SourceTreeIterator SourceTree::end() { 335 | auto it = SourceTreeIterator(*this); 336 | it.position = code.end(); 337 | it.node_iterator = children.end(); 338 | it.child_iterator = boost::none; 339 | return it; 340 | } 341 | 342 | } // namespace 343 | -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | Copyright 2014 Tony Wasserka 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in the 11 | documentation and/or other materials provided with the distribution. 12 | * Neither the name of the owner nor the names of its contributors may 13 | be used to endorse or promote products derived from this software 14 | without specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | 29 | -------------------------------------------------------------------------------- /src/parser_assembly.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Tony Wasserka 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of the owner nor the names of its contributors may 13 | // be used to endorse or promote products derived from this software 14 | // without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | 29 | // Enable this for detailed XML overview of parser results 30 | // #define BOOST_SPIRIT_DEBUG 31 | 32 | #include 33 | #include 34 | #include 35 | 36 | #include "nihstro/parser_assembly.h" 37 | #include "nihstro/parser_assembly_private.h" 38 | 39 | #include "nihstro/shader_binary.h" 40 | #include "nihstro/shader_bytecode.h" 41 | 42 | namespace spirit = boost::spirit; 43 | namespace qi = boost::spirit::qi; 44 | namespace ascii = boost::spirit::qi::ascii; 45 | namespace phoenix = boost::phoenix; 46 | 47 | using spirit::_1; 48 | using spirit::_2; 49 | using spirit::_3; 50 | using spirit::_4; 51 | 52 | using namespace nihstro; 53 | 54 | // Adapt parser data structures for use with boost::spirit 55 | 56 | BOOST_FUSION_ADAPT_STRUCT( 57 | SetEmitInstruction::Flags, 58 | (boost::optional, primitive_flag) 59 | (boost::optional, invert_flag) 60 | ) 61 | 62 | BOOST_FUSION_ADAPT_STRUCT( 63 | SetEmitInstruction, 64 | (OpCode, opcode) 65 | (unsigned, vertex_id) 66 | (SetEmitInstruction::Flags, flags) 67 | ) 68 | 69 | phoenix::function error_handler; 70 | 71 | template 72 | TrivialOpParser::TrivialOpParser(const ParserContext& context) 73 | : TrivialOpParser::base_type(trivial_instruction), 74 | common(context), 75 | opcodes_trivial(common.opcodes_trivial), 76 | opcodes_compare(common.opcodes_compare), 77 | opcodes_float(common.opcodes_float), 78 | opcodes_flowcontrol(common.opcodes_flowcontrol), 79 | end_of_statement(common.end_of_statement), 80 | diagnostics(common.diagnostics) { 81 | 82 | // Setup rules 83 | if (require_end_of_line) { 84 | opcode = qi::no_case[qi::lexeme[opcodes_trivial >> &ascii::space]]; 85 | trivial_instruction = opcode > end_of_statement; 86 | } else { 87 | opcode = qi::no_case[qi::lexeme[opcodes_trivial | opcodes_compare | opcodes_float[0] 88 | | opcodes_float[1] | opcodes_float[2] | opcodes_float[3] 89 | | opcodes_flowcontrol[0] | opcodes_flowcontrol[1] >> &ascii::space]]; 90 | trivial_instruction = opcode; 91 | } 92 | 93 | // Error handling 94 | BOOST_SPIRIT_DEBUG_NODE(opcode); 95 | BOOST_SPIRIT_DEBUG_NODE(trivial_instruction); 96 | 97 | qi::on_error(trivial_instruction, error_handler(phoenix::ref(diagnostics), _1, _2, _3, _4)); 98 | } 99 | 100 | template 101 | SetEmitParser::SetEmitParser(const ParserContext& context) 102 | : SetEmitParser::base_type(setemit_instruction), 103 | common(context), 104 | opcodes_setemit(common.opcodes_setemit), 105 | end_of_statement(common.end_of_statement), 106 | diagnostics(common.diagnostics) { 107 | 108 | // Setup rules 109 | 110 | auto comma_rule = qi::lit(','); 111 | 112 | opcode = qi::lexeme[qi::no_case[opcodes_setemit] >> &ascii::space]; 113 | 114 | vertex_id = qi::uint_; 115 | prim_flag = qi::lit("prim") >> &(!ascii::alnum) >> qi::attr(true); 116 | inv_flag = qi::lit("inv") >> &(!ascii::alnum) >> qi::attr(true); 117 | flags = ((comma_rule >> prim_flag) ^ (comma_rule >> inv_flag)); 118 | 119 | setemit_instruction = ((opcode >> vertex_id) >> (flags | qi::attr(SetEmitInstruction::Flags{}))) > end_of_statement; 120 | 121 | // Error handling 122 | BOOST_SPIRIT_DEBUG_NODE(opcode); 123 | BOOST_SPIRIT_DEBUG_NODE(vertex_id); 124 | BOOST_SPIRIT_DEBUG_NODE(prim_flag); 125 | BOOST_SPIRIT_DEBUG_NODE(inv_flag); 126 | BOOST_SPIRIT_DEBUG_NODE(flags); 127 | BOOST_SPIRIT_DEBUG_NODE(setemit_instruction); 128 | 129 | qi::on_error(setemit_instruction, error_handler(phoenix::ref(diagnostics), _1, _2, _3, _4)); 130 | } 131 | 132 | template 133 | LabelParser::LabelParser(const ParserContext& context) 134 | : LabelParser::base_type(label), common(context), 135 | end_of_statement(common.end_of_statement), 136 | identifier(common.identifier), 137 | diagnostics(common.diagnostics) { 138 | 139 | label = identifier >> qi::lit(':') > end_of_statement; 140 | 141 | BOOST_SPIRIT_DEBUG_NODE(label); 142 | 143 | qi::on_error(label, error_handler(phoenix::ref(diagnostics), _1, _2, _3, _4)); 144 | } 145 | template struct LabelParser; 146 | 147 | 148 | struct Parser::ParserImpl { 149 | using Iterator = SourceTreeIterator; 150 | 151 | ParserImpl(const ParserContext& context) : label(context), plain_instruction(context), 152 | simple_instruction(context), instruction(context), 153 | compare(context), flow_control(context), 154 | setemit(context), declaration(context) { 155 | } 156 | 157 | unsigned Skip(Iterator& begin, Iterator end) { 158 | unsigned lines_skipped = 0; 159 | do { 160 | parse(begin, end, skipper); 161 | lines_skipped++; 162 | } while (boost::spirit::qi::parse(begin, end, boost::spirit::qi::eol)); 163 | 164 | return --lines_skipped; 165 | } 166 | 167 | void SkipSingleLine(Iterator& begin, Iterator end) { 168 | qi::parse(begin, end, *(qi::char_ - (qi::eol | qi::eoi)) >> (qi::eol | qi::eoi)); 169 | } 170 | 171 | bool ParseLabel(Iterator& begin, Iterator end, StatementLabel* content) { 172 | assert(content != nullptr); 173 | 174 | return phrase_parse(begin, end, label, skipper, *content); 175 | } 176 | 177 | bool ParseOpCode(Iterator& begin, Iterator end, OpCode* content) { 178 | assert(content != nullptr); 179 | 180 | return phrase_parse(begin, end, plain_instruction, skipper, *content); 181 | } 182 | 183 | bool ParseSimpleInstruction(Iterator& begin, Iterator end, OpCode* content) { 184 | assert(content != nullptr); 185 | 186 | return phrase_parse(begin, end, simple_instruction, skipper, *content); 187 | } 188 | 189 | bool ParseFloatOp(Iterator& begin, Iterator end, FloatOpInstruction* content) { 190 | assert(content != nullptr); 191 | 192 | return phrase_parse(begin, end, instruction, skipper, *content); 193 | } 194 | 195 | bool ParseCompare(Iterator& begin, Iterator end, CompareInstruction* content) { 196 | assert(content != nullptr); 197 | 198 | return phrase_parse(begin, end, compare, skipper, *content); 199 | } 200 | 201 | bool ParseFlowControl(Iterator& begin, Iterator end, FlowControlInstruction* content) { 202 | assert(content != nullptr); 203 | 204 | return phrase_parse(begin, end, flow_control, skipper, *content); 205 | } 206 | 207 | bool ParseSetEmit(Iterator& begin, Iterator end, SetEmitInstruction* content) { 208 | assert(content != nullptr); 209 | 210 | return phrase_parse(begin, end, setemit, skipper, *content); 211 | } 212 | 213 | bool ParseDeclaration(Iterator& begin, Iterator end, StatementDeclaration* content) { 214 | assert(content != nullptr); 215 | 216 | return phrase_parse(begin, end, declaration, skipper, *content); 217 | } 218 | 219 | private: 220 | AssemblySkipper skipper; 221 | 222 | LabelParser label; 223 | TrivialOpParser plain_instruction; 224 | TrivialOpParser simple_instruction; 225 | FloatOpParser instruction; 226 | CompareParser compare; 227 | FlowControlParser flow_control; 228 | SetEmitParser setemit; 229 | DeclarationParser declaration; 230 | }; 231 | 232 | 233 | 234 | Parser::Parser(const ParserContext& context) : impl(new ParserImpl(context)) { 235 | }; 236 | 237 | Parser::~Parser() { 238 | } 239 | 240 | unsigned Parser::Skip(Iterator& begin, Iterator end) { 241 | return impl->Skip(begin, end); 242 | } 243 | 244 | void Parser::SkipSingleLine(Iterator& begin, Iterator end) { 245 | impl->SkipSingleLine(begin, end); 246 | } 247 | 248 | bool Parser::ParseLabel(Iterator& begin, Iterator end, StatementLabel* label) { 249 | return impl->ParseLabel(begin, end, label); 250 | } 251 | 252 | bool Parser::ParseOpCode(Iterator& begin, Iterator end, OpCode* opcode) { 253 | return impl->ParseOpCode(begin, end, opcode); 254 | } 255 | 256 | bool Parser::ParseSimpleInstruction(Iterator& begin, Iterator end, OpCode* opcode) { 257 | return impl->ParseSimpleInstruction(begin, end, opcode); 258 | } 259 | 260 | bool Parser::ParseFloatOp(Iterator& begin, Iterator end, FloatOpInstruction* instruction) { 261 | return impl->ParseFloatOp(begin, end, instruction); 262 | } 263 | 264 | bool Parser::ParseCompare(Iterator& begin, Iterator end, CompareInstruction* content) { 265 | return impl->ParseCompare(begin, end, content); 266 | } 267 | 268 | bool Parser::ParseFlowControl(Iterator& begin, Iterator end, FlowControlInstruction* content) { 269 | return impl->ParseFlowControl(begin, end, content); 270 | } 271 | 272 | bool Parser::ParseSetEmit(Iterator& begin, Iterator end, SetEmitInstruction* content) { 273 | return impl->ParseSetEmit(begin, end, content); 274 | } 275 | 276 | bool Parser::ParseDeclaration(Iterator& begin, Iterator end, StatementDeclaration* declaration) { 277 | return impl->ParseDeclaration(begin, end, declaration); 278 | } 279 | -------------------------------------------------------------------------------- /src/parser_assembly/common.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Tony Wasserka 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of the owner nor the names of its contributors may 13 | // be used to endorse or promote products derived from this software 14 | // without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | 29 | // Enable this for detailed XML overview of parser results 30 | // #define BOOST_SPIRIT_DEBUG 31 | 32 | #include 33 | #include 34 | 35 | #include "nihstro/parser_assembly.h" 36 | #include "nihstro/parser_assembly_private.h" 37 | 38 | #include "nihstro/shader_binary.h" 39 | #include "nihstro/shader_bytecode.h" 40 | 41 | namespace spirit = boost::spirit; 42 | namespace qi = boost::spirit::qi; 43 | namespace ascii = boost::spirit::qi::ascii; 44 | namespace phoenix = boost::phoenix; 45 | 46 | using spirit::_1; 47 | using spirit::_2; 48 | using spirit::_3; 49 | using spirit::_4; 50 | 51 | using namespace nihstro; 52 | 53 | // Adapt parser data structures for use with boost::spirit 54 | 55 | BOOST_FUSION_ADAPT_STRUCT( 56 | IntegerWithSign, 57 | (int, sign) 58 | (unsigned, value) 59 | ) 60 | 61 | /** 62 | * Implementation of transform_attribute from std::vector to InputSwizzlerMask. 63 | * This eases swizzle mask parsing a lot. 64 | */ 65 | namespace boost { namespace spirit { namespace traits { 66 | template<> 67 | struct transform_attribute, qi::domain> 68 | { 69 | using Exposed = InputSwizzlerMask; 70 | 71 | using type = std::vector; 72 | 73 | static void post(Exposed& val, const type& attr) { 74 | val.num_components = attr.size(); 75 | for (size_t i = 0; i < attr.size(); ++i) 76 | val.components[i] = attr[i]; 77 | } 78 | 79 | static type pre(Exposed& val) { 80 | type vec; 81 | for (int i = 0; i < val.num_components; ++i) 82 | vec.push_back(val.components[i]); 83 | return vec; 84 | } 85 | 86 | static void fail(Exposed&) { } 87 | }; 88 | }}} // namespaces 89 | 90 | template<> 91 | CommonRules::CommonRules(const ParserContext& context) { 92 | // Setup symbol table 93 | opcodes_trivial.add 94 | ( "nop", OpCode::Id::NOP ) 95 | ( "end", OpCode::Id::END ) 96 | ( "emit", OpCode::Id::EMIT ) 97 | ( "else", OpCode::Id::ELSE ) 98 | ( "endif", OpCode::Id::ENDIF ) 99 | ( "endloop", OpCode::Id::ENDLOOP ); 100 | 101 | opcodes_float[0].add 102 | ( "mova", OpCode::Id::MOVA ); 103 | 104 | opcodes_float[1].add 105 | ( "exp", OpCode::Id::EX2 ) 106 | ( "log", OpCode::Id::LG2 ) 107 | ( "lit", OpCode::Id::LIT ) 108 | ( "flr", OpCode::Id::FLR ) 109 | ( "rcp", OpCode::Id::RCP ) 110 | ( "rsq", OpCode::Id::RSQ ) 111 | ( "mov", OpCode::Id::MOV ); 112 | opcodes_float[2].add 113 | ( "add", OpCode::Id::ADD ) 114 | ( "dp3", OpCode::Id::DP3 ) 115 | ( "dp4", OpCode::Id::DP4 ) 116 | ( "dph", OpCode::Id::DPH ) 117 | ( "dst", OpCode::Id::DST ) 118 | ( "mul", OpCode::Id::MUL ) 119 | ( "sge", OpCode::Id::SGE ) 120 | ( "slt", OpCode::Id::SLT ) 121 | ( "max", OpCode::Id::MAX ) 122 | ( "min", OpCode::Id::MIN ); 123 | opcodes_float[3].add 124 | ( "mad", OpCode::Id::MAD ); 125 | 126 | opcodes_compare.add 127 | ( "cmp", OpCode::Id::CMP ); 128 | 129 | opcodes_flowcontrol[0].add 130 | ( "break", OpCode::Id::BREAK ) 131 | ( "breakc", OpCode::Id::BREAKC ) 132 | ( "if", OpCode::Id::GEN_IF ) 133 | ( "loop", OpCode::Id::LOOP ); 134 | opcodes_flowcontrol[1].add 135 | ( "jmp", OpCode::Id::GEN_JMP ) 136 | ( "call", OpCode::Id::GEN_CALL ); 137 | 138 | opcodes_setemit.add 139 | ( "setemitraw", OpCode::Id::SETEMIT ); 140 | 141 | signs.add( "+", +1) 142 | ( "-", -1); 143 | 144 | // TODO: Add rgba/stq masks 145 | swizzlers.add 146 | ( "x", InputSwizzlerMask::x ) 147 | ( "y", InputSwizzlerMask::y ) 148 | ( "z", InputSwizzlerMask::z ) 149 | ( "w", InputSwizzlerMask::w ); 150 | 151 | // TODO: Make sure this is followed by a space or *some* separator 152 | // TODO: Use qi::repeat(1,4)(swizzlers) instead of Kleene [failed to work when I tried, so make this work!] 153 | // TODO: Use qi::lexeme[swizzlers] [crashed when I tried, so make this work!] 154 | swizzle_mask = qi::attr_cast>(*swizzlers); 155 | 156 | identifier = qi::lexeme[qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z0-9_")]; 157 | peek_identifier = &identifier; 158 | 159 | uint_after_sign = qi::uint_; // TODO: NOT dot (or alphanum) after this to prevent floats..., TODO: overflows? 160 | sign_with_uint = signs > uint_after_sign; 161 | index_expression_first_term = (qi::attr(+1) >> qi::uint_) | (peek_identifier > identifier); 162 | index_expression_following_terms = (qi::lit('+') >> peek_identifier > identifier) | sign_with_uint; 163 | index_expression = (-index_expression_first_term) // the first element has an optional sign 164 | >> (*index_expression_following_terms); // following elements have a mandatory sign 165 | 166 | expression = ((-signs) > peek_identifier > identifier) >> (-(qi::lit('[') > index_expression > qi::lit(']'))) >> *(qi::lit('.') > swizzle_mask); 167 | 168 | end_of_statement = qi::omit[qi::eol | qi::eoi]; 169 | 170 | // Error handling 171 | BOOST_SPIRIT_DEBUG_NODE(identifier); 172 | BOOST_SPIRIT_DEBUG_NODE(uint_after_sign); 173 | BOOST_SPIRIT_DEBUG_NODE(index_expression); 174 | BOOST_SPIRIT_DEBUG_NODE(peek_identifier); 175 | BOOST_SPIRIT_DEBUG_NODE(expression); 176 | BOOST_SPIRIT_DEBUG_NODE(swizzle_mask); 177 | BOOST_SPIRIT_DEBUG_NODE(end_of_statement); 178 | 179 | diagnostics.Add(swizzle_mask.name(), "Expected swizzle mask after period"); 180 | diagnostics.Add(peek_identifier.name(), "Expected identifier"); 181 | diagnostics.Add(uint_after_sign.name(), "Expected integer number after sign"); 182 | diagnostics.Add(index_expression.name(), "Expected index expression between '[' and ']'"); 183 | diagnostics.Add(expression.name(), "Expected expression of a known identifier"); 184 | diagnostics.Add(end_of_statement.name(), "Expected end of statement"); 185 | } 186 | -------------------------------------------------------------------------------- /src/parser_assembly/compare.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Tony Wasserka 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of the owner nor the names of its contributors may 13 | // be used to endorse or promote products derived from this software 14 | // without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | 29 | // Enable this for detailed XML overview of parser results 30 | // #define BOOST_SPIRIT_DEBUG 31 | 32 | #include 33 | #include 34 | #include 35 | 36 | #include "nihstro/parser_assembly.h" 37 | #include "nihstro/parser_assembly_private.h" 38 | 39 | #include "nihstro/shader_binary.h" 40 | #include "nihstro/shader_bytecode.h" 41 | 42 | namespace spirit = boost::spirit; 43 | namespace qi = boost::spirit::qi; 44 | namespace ascii = boost::spirit::qi::ascii; 45 | namespace phoenix = boost::phoenix; 46 | 47 | using spirit::_1; 48 | using spirit::_2; 49 | using spirit::_3; 50 | using spirit::_4; 51 | 52 | using namespace nihstro; 53 | 54 | // Adapt parser data structures for use with boost::spirit 55 | 56 | /*BOOST_FUSION_ADAPT_STRUCT( 57 | IntegerWithSign, 58 | (int, sign) 59 | (unsigned, value) 60 | ) 61 | */ 62 | BOOST_FUSION_ADAPT_STRUCT( 63 | CompareInstruction, 64 | (OpCode, opcode) 65 | (std::vector, arguments) 66 | (std::vector, ops) 67 | ) 68 | 69 | template<> 70 | CompareParser::CompareParser(const ParserContext& context) 71 | : CompareParser::base_type(instruction), 72 | common(context), 73 | opcodes_compare(common.opcodes_compare), 74 | expression(common.expression), 75 | end_of_statement(common.end_of_statement), 76 | diagnostics(common.diagnostics) { 77 | 78 | // TODO: Will this properly match >= ? 79 | compare_ops.add 80 | ( "==", CompareOp::Equal ) 81 | ( "!=", CompareOp::NotEqual ) 82 | ( "<", CompareOp::LessThan ) 83 | ( "<=", CompareOp::LessEqual ) 84 | ( ">", CompareOp::GreaterThan ) 85 | ( ">=", CompareOp::GreaterEqual ); 86 | 87 | // Setup rules 88 | 89 | auto comma_rule = qi::lit(','); 90 | 91 | opcode = qi::no_case[qi::lexeme[opcodes_compare >> &ascii::space]]; 92 | compare_op = qi::lexeme[compare_ops]; 93 | 94 | // cmp src1, src2, op1, op2 95 | // TODO: Also allow "cmp src1 op1 src2, src1 op2 src2" 96 | two_ops = compare_op > comma_rule > compare_op; 97 | two_expressions = expression > comma_rule > expression; 98 | instr[0] = opcode > two_expressions > comma_rule > two_ops; 99 | 100 | instruction = instr[0] > end_of_statement; 101 | 102 | // Error handling 103 | BOOST_SPIRIT_DEBUG_NODE(instr[0]); 104 | BOOST_SPIRIT_DEBUG_NODE(instruction); 105 | 106 | qi::on_error(instruction, error_handler(phoenix::ref(diagnostics), _1, _2, _3, _4)); 107 | } 108 | -------------------------------------------------------------------------------- /src/parser_assembly/declaration.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Tony Wasserka 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of the owner nor the names of its contributors may 13 | // be used to endorse or promote products derived from this software 14 | // without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | 29 | #include 30 | #include 31 | #include 32 | 33 | #include "nihstro/parser_assembly.h" 34 | #include "nihstro/parser_assembly_private.h" 35 | 36 | #include "nihstro/shader_binary.h" 37 | #include "nihstro/shader_bytecode.h" 38 | 39 | using spirit::_1; 40 | using spirit::_2; 41 | using spirit::_3; 42 | using spirit::_4; 43 | 44 | using namespace nihstro; 45 | 46 | // Adapt parser data structures for use with boost::spirit 47 | 48 | BOOST_FUSION_ADAPT_STRUCT( 49 | ConditionInput, 50 | (bool, invert) 51 | (Identifier, identifier) 52 | (boost::optional, swizzler_mask) 53 | ) 54 | 55 | BOOST_FUSION_ADAPT_STRUCT( 56 | StatementDeclaration::Extra, 57 | (std::vector, constant_value) 58 | (boost::optional, output_semantic) 59 | ) 60 | 61 | BOOST_FUSION_ADAPT_STRUCT( 62 | StatementDeclaration, 63 | (std::string, alias_name) 64 | (Identifier, identifier_start) 65 | (boost::optional, identifier_end) 66 | (boost::optional, swizzle_mask) 67 | (StatementDeclaration::Extra, extra) 68 | ) 69 | 70 | // Manually define a swap() overload for qi::hold to work. 71 | /*namespace boost { 72 | namespace spirit { 73 | void swap(nihstro::Condition& a, nihstro::Condition& b) { 74 | boost::fusion::swap(a, b); 75 | } 76 | } 77 | }*/ 78 | 79 | template<> 80 | DeclarationParser::DeclarationParser(const ParserContext& context) 81 | : DeclarationParser::base_type(declaration), 82 | common(context), 83 | identifier(common.identifier), swizzle_mask(common.swizzle_mask), 84 | end_of_statement(common.end_of_statement), 85 | diagnostics(common.diagnostics) { 86 | 87 | // Setup symbol table 88 | output_semantics.add("position", OutputRegisterInfo::POSITION); 89 | output_semantics.add("quaternion", OutputRegisterInfo::QUATERNION); 90 | output_semantics.add("color", OutputRegisterInfo::COLOR); 91 | output_semantics.add("texcoord0", OutputRegisterInfo::TEXCOORD0); 92 | output_semantics.add("texcoord1", OutputRegisterInfo::TEXCOORD1); 93 | output_semantics.add("texcoord2", OutputRegisterInfo::TEXCOORD2); 94 | output_semantics.add("view", OutputRegisterInfo::VIEW); 95 | output_semantics_rule = qi::lexeme[output_semantics]; 96 | 97 | // Setup rules 98 | 99 | alias_identifier = qi::omit[qi::lexeme["alias" >> ascii::blank]] > identifier; 100 | 101 | // e.g. 5.4 or (1.1, 2, 3) 102 | constant = (qi::repeat(1)[qi::float_] 103 | | (qi::lit('(') > (qi::float_ % qi::lit(',')) > qi::lit(')'))); 104 | 105 | dummy_const = qi::attr(std::vector()); 106 | dummy_semantic = qi::attr(boost::optional()); 107 | 108 | // match a constant or a semantic, and fill the respective other one with a dummy 109 | const_or_semantic = (dummy_const >> output_semantics_rule) | (constant >> dummy_semantic); 110 | 111 | // TODO: Would like to use +ascii::blank instead, but somehow that fails to parse lines like ".alias name o2.xy texcoord0" correctly 112 | string_as = qi::omit[qi::no_skip[*/*+*/ascii::blank >> qi::lit("as") >> +ascii::blank]]; 113 | 114 | declaration = ((qi::lit('.') > alias_identifier) >> identifier >> -(qi::lit('-') > identifier) >> -(qi::lit('.') > swizzle_mask)) 115 | >> ( 116 | (string_as > const_or_semantic) 117 | | (dummy_const >> dummy_semantic) 118 | ) 119 | > end_of_statement; 120 | 121 | // Error handling 122 | output_semantics_rule.name("output semantic after \"as\""); 123 | alias_identifier.name("known preprocessor directive (i.e. alias)."); 124 | const_or_semantic.name("constant or semantic after \"as\""); 125 | 126 | BOOST_SPIRIT_DEBUG_NODE(output_semantics_rule); 127 | BOOST_SPIRIT_DEBUG_NODE(constant); 128 | BOOST_SPIRIT_DEBUG_NODE(alias_identifier); 129 | BOOST_SPIRIT_DEBUG_NODE(const_or_semantic); 130 | BOOST_SPIRIT_DEBUG_NODE(declaration); 131 | 132 | qi::on_error(declaration, error_handler(phoenix::ref(diagnostics), _1, _2, _3, _4)); 133 | } 134 | -------------------------------------------------------------------------------- /src/parser_assembly/floatop.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Tony Wasserka 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of the owner nor the names of its contributors may 13 | // be used to endorse or promote products derived from this software 14 | // without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | 29 | // Enable this for detailed XML overview of parser results 30 | // #define BOOST_SPIRIT_DEBUG 31 | 32 | #include 33 | #include 34 | #include 35 | 36 | #include "nihstro/parser_assembly.h" 37 | #include "nihstro/parser_assembly_private.h" 38 | 39 | #include "nihstro/shader_binary.h" 40 | #include "nihstro/shader_bytecode.h" 41 | 42 | namespace spirit = boost::spirit; 43 | namespace qi = boost::spirit::qi; 44 | namespace ascii = boost::spirit::qi::ascii; 45 | namespace phoenix = boost::phoenix; 46 | 47 | using spirit::_1; 48 | using spirit::_2; 49 | using spirit::_3; 50 | using spirit::_4; 51 | 52 | using namespace nihstro; 53 | 54 | // Adapt parser data structures for use with boost::spirit 55 | 56 | BOOST_FUSION_ADAPT_STRUCT( 57 | StatementInstruction, 58 | (OpCode, opcode) 59 | (std::vector, expressions) 60 | ) 61 | 62 | template<> 63 | FloatOpParser::FloatOpParser(const ParserContext& context) 64 | : FloatOpParser::base_type(float_instruction), 65 | common(context), 66 | opcodes_float(common.opcodes_float), 67 | expression(common.expression), 68 | end_of_statement(common.end_of_statement), 69 | diagnostics(common.diagnostics) { 70 | 71 | // Setup rules 72 | 73 | auto comma_rule = qi::lit(','); 74 | 75 | for (int i = 0; i < 4; ++i) { 76 | // Make sure that a mnemonic is always followed by a space (such that e.g. "addbla" fails to match) 77 | opcode[i] = qi::no_case[qi::lexeme[opcodes_float[i] >> &ascii::space]]; 78 | } 79 | 80 | // chain of arguments for each group of opcodes 81 | expression_chain[0] = expression; 82 | for (int i = 1; i < 4; ++i) { 83 | expression_chain[i] = expression_chain[i - 1] >> comma_rule > expression; 84 | } 85 | 86 | // e.g. "add o1, t2, t5" 87 | float_instr[0] = opcode[0] > expression_chain[0]; 88 | float_instr[1] = opcode[1] > expression_chain[1]; 89 | float_instr[2] = opcode[2] > expression_chain[2]; 90 | float_instr[3] = opcode[3] > expression_chain[3]; 91 | 92 | float_instruction %= (float_instr[0] | float_instr[1] | float_instr[2] | float_instr[3]) > end_of_statement; 93 | 94 | // Error handling 95 | BOOST_SPIRIT_DEBUG_NODE(opcode[0]); 96 | BOOST_SPIRIT_DEBUG_NODE(opcode[1]); 97 | BOOST_SPIRIT_DEBUG_NODE(opcode[2]); 98 | BOOST_SPIRIT_DEBUG_NODE(opcode[3]); 99 | 100 | BOOST_SPIRIT_DEBUG_NODE(expression_chain[0]); 101 | BOOST_SPIRIT_DEBUG_NODE(expression_chain[1]); 102 | BOOST_SPIRIT_DEBUG_NODE(expression_chain[2]); 103 | BOOST_SPIRIT_DEBUG_NODE(expression_chain[3]); 104 | 105 | BOOST_SPIRIT_DEBUG_NODE(float_instr[0]); 106 | BOOST_SPIRIT_DEBUG_NODE(float_instr[1]); 107 | BOOST_SPIRIT_DEBUG_NODE(float_instr[2]); 108 | BOOST_SPIRIT_DEBUG_NODE(float_instr[3]); 109 | BOOST_SPIRIT_DEBUG_NODE(float_instruction); 110 | 111 | diagnostics.Add(expression_chain[0].name(), "one argument"); 112 | diagnostics.Add(expression_chain[1].name(), "two arguments"); 113 | diagnostics.Add(expression_chain[2].name(), "three arguments"); 114 | diagnostics.Add(expression_chain[3].name(), "four arguments"); 115 | 116 | qi::on_error(float_instruction, error_handler(phoenix::ref(diagnostics), _1, _2, _3, _4)); 117 | } 118 | -------------------------------------------------------------------------------- /src/parser_assembly/flowcontrol.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Tony Wasserka 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of the owner nor the names of its contributors may 13 | // be used to endorse or promote products derived from this software 14 | // without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | 29 | // Enable this for detailed XML overview of parser results 30 | // #define BOOST_SPIRIT_DEBUG 31 | 32 | #include 33 | #include 34 | #include 35 | #include 36 | 37 | #include "nihstro/parser_assembly.h" 38 | #include "nihstro/parser_assembly_private.h" 39 | 40 | #include "nihstro/shader_binary.h" 41 | #include "nihstro/shader_bytecode.h" 42 | 43 | namespace spirit = boost::spirit; 44 | namespace qi = boost::spirit::qi; 45 | namespace ascii = boost::spirit::qi::ascii; 46 | namespace phoenix = boost::phoenix; 47 | 48 | using spirit::_1; 49 | using spirit::_2; 50 | using spirit::_3; 51 | using spirit::_4; 52 | 53 | using namespace nihstro; 54 | 55 | // Adapt parser data structures for use with boost::spirit 56 | 57 | BOOST_FUSION_ADAPT_STRUCT( 58 | ConditionInput, 59 | (bool, invert) 60 | (Identifier, identifier) 61 | (boost::optional, swizzler_mask) 62 | ) 63 | 64 | BOOST_FUSION_ADAPT_STRUCT( 65 | Condition, 66 | (ConditionInput, input1) 67 | (Instruction::FlowControlType::Op, op) 68 | (ConditionInput, input2) 69 | ) 70 | 71 | BOOST_FUSION_ADAPT_STRUCT( 72 | FlowControlInstruction, 73 | (OpCode, opcode) 74 | (std::string, target_label) 75 | (boost::optional, return_label) 76 | (boost::optional, condition) 77 | ) 78 | 79 | // Manually define a swap() overload for qi::hold to work. 80 | namespace boost { 81 | namespace spirit { 82 | void swap(nihstro::Condition& a, nihstro::Condition& b) { 83 | boost::fusion::swap(a, b); 84 | } 85 | } 86 | } 87 | 88 | template<> 89 | FlowControlParser::FlowControlParser(const ParserContext& context) 90 | : FlowControlParser::base_type(flow_control_instruction), 91 | common(context), 92 | opcodes_flowcontrol(common.opcodes_flowcontrol), 93 | expression(common.expression), 94 | identifier(common.identifier), 95 | swizzle_mask(common.swizzle_mask), 96 | end_of_statement(common.end_of_statement), 97 | diagnostics(common.diagnostics) { 98 | 99 | condition_ops.add 100 | ( "&&", ConditionOp::And ) 101 | ( "||", ConditionOp::Or ); 102 | 103 | // Setup rules 104 | 105 | auto blank_rule = qi::omit[ascii::blank]; 106 | auto label_rule = identifier.alias(); 107 | 108 | opcode[0] = qi::lexeme[qi::no_case[opcodes_flowcontrol[0]] >> &ascii::space]; 109 | opcode[1] = qi::lexeme[qi::no_case[opcodes_flowcontrol[1]] >> &ascii::space]; 110 | 111 | condition_op = qi::lexeme[condition_ops]; 112 | 113 | negation = qi::matches[qi::lit("!")]; 114 | 115 | condition_input = negation >> identifier >> -(qi::lit('.') > swizzle_mask); 116 | 117 | // May be a condition involving the conditional codes, or a reference to a uniform 118 | // TODO: Make sure we use qi::hold wherever necessary 119 | condition = qi::hold[condition_input >> condition_op >> condition_input] 120 | | (condition_input >> qi::attr(ConditionOp::JustX) >> qi::attr(ConditionInput{})); 121 | 122 | // if condition 123 | instr[0] = opcode[0] 124 | >> qi::attr("__dummy") // Dummy label (set indirectly using else,endif, or endloop pseudo-instructions) 125 | >> qi::attr(boost::optional()) // Dummy return label 126 | >> condition; 127 | 128 | // call target_label until return_label if condition 129 | instr[1] = opcode[1] 130 | >> label_rule 131 | >> -(qi::no_skip[(blank_rule >> qi::lit("until")) > blank_rule] >> label_rule) 132 | >> -(qi::no_skip[(blank_rule >> qi::lit("if")) > blank_rule] >> condition); 133 | 134 | flow_control_instruction %= (instr[0] | instr[1]) > end_of_statement; 135 | 136 | // Error handling 137 | BOOST_SPIRIT_DEBUG_NODE(opcode[0]); 138 | BOOST_SPIRIT_DEBUG_NODE(opcode[1]); 139 | BOOST_SPIRIT_DEBUG_NODE(negation); 140 | BOOST_SPIRIT_DEBUG_NODE(condition_op); 141 | BOOST_SPIRIT_DEBUG_NODE(condition_input); 142 | BOOST_SPIRIT_DEBUG_NODE(condition); 143 | 144 | BOOST_SPIRIT_DEBUG_NODE(instr[0]); 145 | BOOST_SPIRIT_DEBUG_NODE(instr[1]); 146 | BOOST_SPIRIT_DEBUG_NODE(flow_control_instruction); 147 | 148 | qi::on_error(flow_control_instruction, error_handler(phoenix::ref(diagnostics), _1, _2, _3, _4)); 149 | } 150 | -------------------------------------------------------------------------------- /src/parser_shbin.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Tony Wasserka 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of the owner nor the names of its contributors may 13 | // be used to endorse or promote products derived from this software 14 | // without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | #include "nihstro/parser_shbin.h" 29 | 30 | using namespace nihstro; 31 | 32 | void ShbinParser::ReadHeaders(const std::string& filename) { 33 | file.exceptions(std::fstream::badbit | std::fstream::failbit | std::fstream::eofbit); 34 | file.open(filename, std::fstream::in | std::fstream::binary); 35 | 36 | file.seekg(0); 37 | file.read((char*)&dvlb_header, sizeof(dvlb_header)); 38 | if (dvlb_header.magic_word != DVLBHeader::MAGIC_WORD) { 39 | std::stringstream stream; 40 | stream << "Wrong DVLB magic word: Got 0x" << std::hex << dvlb_header.magic_word; 41 | throw stream.str(); 42 | } 43 | 44 | dvle_offsets.resize(dvlb_header.num_programs); 45 | dvle_headers.resize(dvlb_header.num_programs); 46 | for (auto& offset : dvle_offsets) { 47 | file.read((char*)&offset, sizeof(offset)); 48 | } 49 | 50 | // DVLP comes directly after the DVLE offset table 51 | dvlp_offset = file.tellg(); 52 | file.seekg(dvlp_offset); 53 | file.read((char*)&dvlp_header, sizeof(dvlp_header)); 54 | if (dvlp_header.magic_word != DVLPHeader::MAGIC_WORD) { 55 | std::stringstream stream; 56 | stream << "Wrong DVLP magic word at offset " << std::hex << dvlp_offset << ": Got " << std::hex << dvlp_header.magic_word; 57 | throw stream.str(); 58 | } 59 | 60 | for (int i = 0; i < dvlb_header.num_programs; ++i) { 61 | auto& dvle_header = dvle_headers[i]; 62 | file.seekg(dvle_offsets[i]); 63 | file.read((char*)&dvle_header, sizeof(dvle_header)); 64 | if (dvle_header.magic_word != DVLEHeader::MAGIC_WORD) { 65 | std::stringstream stream; 66 | stream << "Wrong DVLE header in DVLE #" << i << ": " << std::hex << dvle_header.magic_word; 67 | throw stream.str(); 68 | } 69 | } 70 | 71 | // TODO: Is there indeed exactly one filename per DVLE? 72 | dvle_filenames.resize(dvlb_header.num_programs); 73 | uint32_t offset = dvlp_offset + dvlp_header.filename_symbol_offset; 74 | for (int i = 0; i < dvlb_header.num_programs; ++i) { 75 | auto& filename = dvle_filenames[i]; 76 | filename = ReadSymbol(offset); 77 | offset += filename.length() + 1; 78 | } 79 | 80 | // Read shader binary code 81 | shader_info.code.resize(dvlp_header.binary_size_words); 82 | file.seekg(dvlp_offset + dvlp_header.binary_offset); 83 | file.read((char*)shader_info.code.data(), dvlp_header.binary_size_words * sizeof(Instruction)); 84 | 85 | // Read operand descriptor table 86 | shader_info.swizzle_info.resize(dvlp_header.swizzle_info_num_entries); 87 | file.seekg(dvlp_offset + dvlp_header.swizzle_info_offset); 88 | file.read((char*)shader_info.swizzle_info.data(), dvlp_header.swizzle_info_num_entries * sizeof(SwizzleInfo)); 89 | } 90 | 91 | void ShbinParser::ReadDVLE(int dvle_index) { 92 | // TODO: Check if we have called ReadHeaders() before! 93 | 94 | if (dvle_index >= dvlb_header.num_programs) { 95 | std::stringstream stream; 96 | stream << "Invalid DVLE index " << dvle_index << "given"; 97 | throw stream.str(); 98 | } 99 | 100 | auto& dvle_header = dvle_headers[dvle_index]; 101 | auto& dvle_offset = dvle_offsets[dvle_index]; 102 | 103 | uint32_t symbol_table_offset = dvle_offset + dvle_header.symbol_table_offset; 104 | 105 | shader_info.constant_table.resize(dvle_header.constant_table_size); 106 | uint32_t constant_table_offset = dvle_offset + dvle_header.constant_table_offset; 107 | file.seekg(constant_table_offset); 108 | for (int i = 0; i < dvle_header.constant_table_size; ++i) 109 | file.read((char*)&shader_info.constant_table[i], sizeof(ConstantInfo)); 110 | 111 | shader_info.label_table.resize(dvle_header.label_table_size); 112 | uint32_t label_table_offset = dvle_offset + dvle_header.label_table_offset; 113 | file.seekg(label_table_offset); 114 | for (int i = 0; i < dvle_header.label_table_size; ++i) 115 | file.read((char*)&shader_info.label_table[i], sizeof(LabelInfo)); 116 | for (const auto& label_info : shader_info.label_table) 117 | shader_info.labels.insert({label_info.program_offset, ReadSymbol(symbol_table_offset + label_info.name_offset)}); 118 | 119 | shader_info.output_register_info.resize(dvle_header.output_register_table_size); 120 | file.seekg(dvle_offset + dvle_header.output_register_table_offset); 121 | for (auto& info : shader_info.output_register_info) 122 | file.read((char*)&info, sizeof(OutputRegisterInfo)); 123 | 124 | shader_info.uniform_table.resize(dvle_header.uniform_table_size); 125 | uint32_t uniform_table_offset = dvle_offset + dvle_header.uniform_table_offset; 126 | file.seekg(uniform_table_offset); 127 | for (int i = 0; i < dvle_header.uniform_table_size; ++i) 128 | file.read((char*)&shader_info.uniform_table[i].basic, sizeof(shader_info.uniform_table[i].basic)); 129 | for (auto& uniform_info : shader_info.uniform_table) 130 | uniform_info.name = ReadSymbol(symbol_table_offset + uniform_info.basic.symbol_offset); 131 | 132 | main_offset = dvlp_offset + dvlp_header.binary_offset; 133 | } 134 | 135 | std::string ShbinParser::ReadSymbol(uint32_t offset) { 136 | std::string name; 137 | file.seekg(offset); 138 | std::getline(file, name, '\0'); 139 | return name; 140 | }; 141 | -------------------------------------------------------------------------------- /src/preprocessor.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Tony Wasserka 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of the owner nor the names of its contributors may 13 | // be used to endorse or promote products derived from this software 14 | // without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | #include 29 | #include 30 | #include 31 | 32 | #include 33 | 34 | #include 35 | 36 | namespace nihstro { 37 | 38 | template 39 | struct IncludeParser : qi::grammar> { 40 | using Skipper = AssemblySkipper; 41 | 42 | IncludeParser() : IncludeParser::base_type(include) { 43 | include = qi::lexeme[qi::lit(".include") >> &qi::ascii::space] 44 | > qi::lexeme[qi::lit("\"") > +qi::char_("a-zA-Z0-9./_\\-") > qi::lit("\"")] 45 | > qi::omit[qi::eol | qi::eoi]; 46 | } 47 | 48 | qi::rule include; 49 | }; 50 | 51 | 52 | SourceTree PreprocessAssemblyFile(const std::string& filename) { 53 | SourceTree tree; 54 | tree.file_info.filename = filename; 55 | 56 | std::ifstream input_file(filename); 57 | if (!input_file) { 58 | throw std::runtime_error("Could not open input file " + filename); 59 | } 60 | 61 | std::string prefix; 62 | { 63 | auto last_slash = filename.find_last_of("/"); 64 | if (last_slash != std::string::npos) 65 | prefix = filename.substr(0, last_slash + 1); 66 | } 67 | 68 | input_file.seekg(0, std::ios::end); 69 | tree.code.resize(input_file.tellg()); 70 | 71 | input_file.seekg(0, std::ios::beg); 72 | input_file.read(&tree.code[0], tree.code.size()); 73 | input_file.close(); 74 | 75 | auto cursor = tree.code.begin(); 76 | 77 | IncludeParser include_parser; 78 | AssemblySkipper skipper; 79 | 80 | while (cursor != tree.code.end()) { 81 | std::string parsed_filename; 82 | auto cursor_prev = cursor; 83 | if (qi::phrase_parse(cursor, tree.code.end(), include_parser, skipper, parsed_filename)) { 84 | if (parsed_filename[0] == '/') 85 | throw std::runtime_error("Given filename must be relative to the path of the including file"); 86 | 87 | // TODO: Protect against circular inclusions 88 | auto newtree = PreprocessAssemblyFile(prefix + parsed_filename); 89 | tree.Attach(newtree, cursor_prev - tree.code.begin()); 90 | cursor = tree.code.erase(cursor_prev, cursor); 91 | cursor = tree.code.insert(cursor, '\n'); 92 | } else { 93 | // Skip this line 94 | qi::parse(cursor, tree.code.end(), *(qi::char_ - (qi::eol | qi::eoi)) >> (qi::eol | qi::eoi)); 95 | } 96 | } 97 | return tree; 98 | } 99 | 100 | } // namespace 101 | -------------------------------------------------------------------------------- /src/tests/source_tree_iterator.cpp: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Tony Wasserka 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright 10 | // notice, this list of conditions and the following disclaimer in the 11 | // documentation and/or other materials provided with the distribution. 12 | // * Neither the name of the owner nor the names of its contributors may 13 | // be used to endorse or promote products derived from this software 14 | // without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | #include 29 | #include "nihstro/source_tree.h" 30 | 31 | #define BOOST_TEST_MODULE SourceTreeIterator 32 | #include 33 | 34 | #include 35 | 36 | namespace std { 37 | 38 | std::ostream& operator << (std::ostream& os, const nihstro::SourceTree& tree) { 39 | std::string::const_iterator it = tree.code.cbegin(); 40 | for (auto& child : tree.children) { 41 | os << "\""; 42 | os << std::string(it, tree.code.cbegin() + child.offset_within_parent); 43 | os << "\""; 44 | os << " { "; 45 | os << child.tree; 46 | os << " } "; 47 | it = tree.code.cbegin() + child.offset_within_parent; 48 | } 49 | os << "\"" << std::string(it, tree.code.end()) << "\""; 50 | return os; 51 | } 52 | 53 | } 54 | 55 | // Utility function to manually flatten the given tree into a string 56 | static std::string FlattenTree(const nihstro::SourceTree& tree) { 57 | std::string ret; 58 | std::string::const_iterator it = tree.code.cbegin(); 59 | for (auto& child : tree.children) { 60 | ret += std::string(it, tree.code.cbegin() + child.offset_within_parent); 61 | ret += FlattenTree(child.tree); 62 | it = tree.code.cbegin() + child.offset_within_parent; 63 | } 64 | ret += std::string(it, tree.code.end()); 65 | return ret; 66 | } 67 | 68 | // Utility function to manually determine the size of the given tree 69 | static std::string::size_type TreeSize(const nihstro::SourceTree& tree) { 70 | std::string::size_type ret = 0; 71 | for (auto& child : tree.children) { 72 | ret += TreeSize(child.tree); 73 | } 74 | ret += tree.code.length(); 75 | return ret; 76 | } 77 | 78 | #define CHECK_TREE(tree) do { \ 79 | /* Check length */ \ 80 | BOOST_CHECK_EQUAL(tree.end() - tree.begin(), TreeSize(tree)); \ 81 | BOOST_CHECK_EQUAL(std::distance(tree.begin(), tree.end()), TreeSize(tree)); \ 82 | /* Check forward iteration */ \ 83 | std::string flattened_tree; \ 84 | for (auto& val : tree) \ 85 | flattened_tree += val; \ 86 | auto reference_flattened_tree = FlattenTree(tree); \ 87 | BOOST_CHECK_EQUAL(flattened_tree, reference_flattened_tree); \ 88 | BOOST_CHECK_EQUAL_COLLECTIONS(flattened_tree.begin(), flattened_tree.end(), \ 89 | reference_flattened_tree.begin(), reference_flattened_tree.end()); \ 90 | \ 91 | /* Check reverse iteration */ \ 92 | flattened_tree.clear(); \ 93 | for (auto it = tree.end() - 1;; it -= 1) { \ 94 | flattened_tree += *it; \ 95 | if (it == tree.begin()) \ 96 | break; \ 97 | } \ 98 | std::reverse(reference_flattened_tree.begin(), reference_flattened_tree.end()); \ 99 | BOOST_CHECK_EQUAL(flattened_tree, reference_flattened_tree); \ 100 | BOOST_CHECK_EQUAL_COLLECTIONS(flattened_tree.begin(), flattened_tree.end(), \ 101 | reference_flattened_tree.begin(), reference_flattened_tree.end()); \ 102 | \ 103 | } while (false) 104 | 105 | BOOST_AUTO_TEST_CASE(simple_tree) { 106 | nihstro::SourceTree tree; 107 | 108 | tree.code = "a b c"; 109 | 110 | CHECK_TREE(tree); 111 | } 112 | 113 | BOOST_AUTO_TEST_CASE(nested_tree) { 114 | nihstro::SourceTree tree; 115 | nihstro::SourceTree child1; 116 | nihstro::SourceTree child2; 117 | 118 | tree.code = "aXbXc"; 119 | child1.code = "child1"; 120 | child2.code = "child2"; 121 | tree.Attach(child1, 1).Attach(child2, 3); 122 | 123 | CHECK_TREE(tree); 124 | } 125 | 126 | BOOST_AUTO_TEST_CASE(deep_tree) { 127 | nihstro::SourceTree tree; 128 | nihstro::SourceTree child1; 129 | nihstro::SourceTree child1_child1; 130 | nihstro::SourceTree child1_child2; 131 | nihstro::SourceTree child1_child2_child1; 132 | nihstro::SourceTree child1_child3; 133 | nihstro::SourceTree child2; 134 | nihstro::SourceTree child3; 135 | nihstro::SourceTree child3_child1; 136 | nihstro::SourceTree child4; 137 | 138 | tree.code = "aaaXaaaXaaaXaaaXaaa"; 139 | child1.code = "FirstChild:bbbXbbbXbbbXbbb\n"; 140 | child1_child1.code = "FirstSubchildOfChild1:ccc"; 141 | child1_child2.code = "SecondSubchildOfChild1:dddXddd"; 142 | child1_child2_child1.code = "FirstSubsubchildOfSubchild2OfChild1:eee"; 143 | child1_child3.code = "ThirdSubchildOfChild1:fff"; 144 | child2.code = "SecondChild:ggg\n"; 145 | child3.code = "ThirdChild:hhhXhhh\n"; 146 | child3_child1.code = "FirstSubchildOfChild3:iii"; 147 | child4.code = "FourthChild:jjj\n"; 148 | 149 | child1_child2.Attach(child1_child2_child1, 26); 150 | child1.Attach(child1_child1, 14).Attach(child1_child2, 18).Attach(child1_child3, 22); 151 | child3.Attach(child3_child1, 14); 152 | tree.Attach(child1, 3).Attach(child2, 7).Attach(child3, 11).Attach(child4, 15); 153 | 154 | CHECK_TREE(tree); 155 | } 156 | 157 | BOOST_AUTO_TEST_CASE(subtree_at_begin_and_end) { 158 | nihstro::SourceTree tree; 159 | nihstro::SourceTree child1; 160 | tree.code = "aaa"; 161 | child1.code = "bbb"; 162 | 163 | tree.Attach(child1, 0); 164 | CHECK_TREE(tree); 165 | 166 | tree.children.clear(); 167 | tree.Attach(child1, tree.code.length()); 168 | CHECK_TREE(tree); 169 | } 170 | --------------------------------------------------------------------------------