├── .travis.yml
├── CMakeLists.txt
├── Readme.md
├── docs
    ├── instruction_set.md
    └── nihcode_spec.md
├── examples
    ├── assembler
    │   ├── cube
    │   │   ├── Makefile
    │   │   ├── README.md
    │   │   ├── data
    │   │   │   ├── test.vsh
    │   │   │   └── texture.bin
    │   │   └── source
    │   │   │   ├── _gs.s
    │   │   │   ├── gs.c
    │   │   │   ├── gs.h
    │   │   │   ├── main.c
    │   │   │   ├── math.c
    │   │   │   └── math.h
    │   └── cube_lighting
    │   │   ├── Makefile
    │   │   ├── README.md
    │   │   ├── data
    │   │       ├── test.vsh
    │   │       └── texture.bin
    │   │   └── source
    │   │       ├── _gs.s
    │   │       ├── gs.c
    │   │       ├── gs.h
    │   │       ├── main.c
    │   │       ├── math.c
    │   │       └── math.h
    └── inline_assembler
    │   └── simple
    │       ├── CMakeLists.txt
    │       └── simple.cpp
├── include
    └── nihstro
    │   ├── bit_field.h
    │   ├── float24.h
    │   ├── inline_assembly.h
    │   ├── parser_assembly.h
    │   ├── parser_assembly_private.h
    │   ├── parser_shbin.h
    │   ├── preprocessor.h
    │   ├── shader_binary.h
    │   ├── shader_bytecode.h
    │   └── source_tree.h
├── license.txt
└── src
    ├── assembler.cpp
    ├── disassembler.cpp
    ├── parser_assembly.cpp
    ├── parser_assembly
        ├── common.cpp
        ├── compare.cpp
        ├── declaration.cpp
        ├── floatop.cpp
        └── flowcontrol.cpp
    ├── parser_shbin.cpp
    ├── preprocessor.cpp
    └── tests
        ├── parser.cpp
        └── source_tree_iterator.cpp


/.travis.yml:
--------------------------------------------------------------------------------
 1 | os:
 2 |   - linux
 3 | 
 4 | language: cpp
 5 | sudo: false
 6 | 
 7 | addons:
 8 |   apt:
 9 |     sources:
10 |       - ubuntu-toolchain-r-test
11 |       - kalakris-cmake
12 |       - boost-latest
13 |     packages:
14 |       - gcc-4.9
15 |       - g++-4.9
16 |       - cmake
17 |       - libboost1.55-all-dev
18 | 
19 | script:
20 |  - export CC=gcc-4.9
21 |  - export CXX=g++-4.9
22 |  - mkdir build
23 |  - cd build
24 |  - cmake ..
25 |  - make
26 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.6)
 2 | 
 3 | project(nihstro)
 4 | 
 5 | find_package(Boost COMPONENTS program_options unit_test_framework)
 6 | 
 7 | add_definitions(-std=c++11)
 8 | 
 9 | include_directories(include)
10 | 
11 | add_executable(nihstro-disassemble src/disassembler.cpp src/parser_shbin.cpp)
12 | install(TARGETS nihstro-disassemble DESTINATION bin)
13 | 
14 | # TODO: Re-enable
15 | # add_subdirectory(examples/inline_assembler/simple)
16 | 
17 | # TODO: What if program_options was found but not unit_test_framework?
18 | if(Boost_FOUND)
19 |     set(PARSER_SRCS src/parser_assembly.cpp
20 |                     src/preprocessor.cpp
21 |                     src/parser_assembly/common.cpp
22 |                     src/parser_assembly/compare.cpp
23 |                     src/parser_assembly/declaration.cpp
24 |                     src/parser_assembly/flowcontrol.cpp
25 |                     src/parser_assembly/floatop.cpp)
26 | 
27 |     include_directories(${Boost_INCLUDE_DIRS})
28 |     add_executable(nihstro-assemble src/assembler.cpp ${PARSER_SRCS})
29 |     target_link_libraries(nihstro-assemble ${Boost_PROGRAM_OPTIONS_LIBRARY})
30 |     install(TARGETS nihstro-assemble DESTINATION bin)
31 | 
32 |     # tests
33 |     if(Boost_UNIT_TEST_FRAMEWORK_FOUND)
34 |         add_executable(test-parser src/tests/parser.cpp ${PARSER_SRCS})
35 |         target_compile_definitions(test-parser PUBLIC -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN)
36 |         target_link_libraries(test-parser ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY})
37 |         enable_testing()
38 |         add_test(ParserTests test-parser)
39 | 
40 |         add_executable(test-source-tree-iterator src/tests/source_tree_iterator.cpp)
41 |         target_compile_definitions(test-source-tree-iterator PUBLIC -DBOOST_TEST_DYN_LINK -DBOOST_TEST_MAIN)
42 |         target_link_libraries(test-source-tree-iterator ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY})
43 |         add_test(ParserTests test-source-tree-iterator)
44 |     else()
45 |         message(WARNING "Boost testing framework not found => not building assembler tests")
46 |     endif()
47 | else()
48 |     message(WARNING "Boost not found => not building assembler")
49 | endif()
50 | 


--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
  1 | # nihstro - 3DS shader tools
  2 | 
  3 | [![Travis CI Build Status](https://travis-ci.org/neobrain/nihstro.svg)](https://travis-ci.org/neobrain/nihstro)
  4 | 
  5 | nihstro is a collection of tools for 3DS shaders targeted at homebrew development and/or reverse engineering. Currently, it supports assembling 3DS shader binaries from assembly source code and disassembling shaders from `shbin` files. It also provides C++ interfaces for analyzing and runtime-compiling shaders.
  6 | 
  7 | This project is released under a three-clause BSD license. For details see license.txt.
  8 | 
  9 | ## Components
 10 | 
 11 | nihstro is well-modularized to minimize dependencies for any particular use case.
 12 | 
 13 | ### Shader assembler
 14 | nihstro includes a standalone shader assembler for generating [SHBIN](http://3dbrew.org/wiki/SHBIN) files from human-readable shader source code (the syntax of which is called "nihcode"). It is perfectly suitable for homebrew development. Shader source needs to follow the [nihcode specification](docs/nihcode_spec.md).
 15 | 
 16 | Usage:
 17 | `nihstro-assemble <input_filename.vsh> -o <output_filename.shbin>`
 18 | 
 19 | Reads vertex shader source code from the input file and generates a shader binary from it.
 20 | 
 21 | Further command line options:
 22 | * `-h, --help`: Show command line usage
 23 | * `-i, --input`: Explicit switch for specifying the input shader source filename (if omitted, the first switch-less argument is interpreted as the filename)
 24 | * `-o, --output`: Output shbin filename (required)
 25 | * `-e, --entrypoint`: label name in the input source at which shader execution should start (defaults to "main")
 26 | * `-g, --geo_shader`: Compile shader source as a geometry shader
 27 | 
 28 | ### Shader disassembler
 29 | 
 30 | nihstro includes a standalone shader disassembler for disassembling SHBIN files and inspecting meta data (symbol information, constant values, etc).
 31 | 
 32 | Usage:
 33 | `nihstro-disassemble <filename.shbin>`
 34 | 
 35 | Parses the shader binary header and outputs basic information on the DVLE sections.
 36 | 
 37 | `nihstro-disassemble <filename.shbin> <DVLE index>`
 38 | 
 39 | Parses the shader binary header and outputs basic information, but also disassembles the shader code using the information in the indexed DVLE (main offset, symbols, etc).
 40 | 
 41 | ### Shader bytecode and SHBIN C++ headers
 42 | The header `shader_bytecode.h` defines C++ data structures which describe raw shader binary code, while `shader_binary.h` defines the layout of SHBIN files. This allows for convenient inspection of data in C++ code. Note that these headers are currently not API stable.
 43 | 
 44 | ### Inline assembler (experimental)
 45 | The header `inline_assembly.h` provides an experimental mean for runtime generation of PICA200 shaders and SHBIN files within C++ code, so that homebrew authors don't need to ship shaders as precompiled files. While you could use nihstro's actual assembler to allow for runtime shader compilation, the inline assembler may be more convenient and is lighter on dependencies (since it doesn't require Boost to function). However, for obvious reasons it incurs a performance penalty and an increased memory usage compared to offline shader compilation.
 46 | 
 47 | A simple [example program](examples/inline_assembler/simple) is included to illustrate how to use the inline assembler.
 48 | 
 49 | Note that the inline assembler is highly experimental. It may or may not work for you yet, and its API will change a lot in the future.
 50 | 
 51 | ## Building
 52 | 
 53 | All nihstro components require compiler support for C++11 to work.
 54 | 
 55 | The C++ headers `shader_bytecode.h` and `shader_binary` can be easily be included in other project and hence are easy to integrate into any build system (as long as nihstro's directory structure is preserved).
 56 | 
 57 | For the standalone assembler and disassembler, you will also need CMake to generate build files (however it is simple to setup a different build system from scratch if need be), and at least parts of the [Boost libraries](http://www.boost.org/) installed (including Spirit, Fusion, and others).
 58 | 
 59 | ### Installing dependencies on Windows
 60 | 
 61 | You will need to download [CMake](https://cmake.org/download/) and [Boost](http://www.boost.org/users/download/) from their respective download pages. Both projects provide prebuilt binaries. Note that the Boost binaries only work with MSVC, so MinGW users will need to obtain prebuilt binaries from an unofficial source (not recommended) or build Boost from source.
 62 | 
 63 | ### Installing dependencies on Linux
 64 | 
 65 | Chances are your Linux distribution already has CMake and Boost installed. Use your package manager to verify this is the case and to install them if need be. Note that most distributions provide program binaries and development libraries in separate packages; for building nihstro, both are needed.
 66 | 
 67 | ### Installing dependencies on OS X
 68 | 
 69 | On OS X, it is recommended that you use [Homebrew](http://brew.sh/) to install dependencies. You'll need to run the following to build nihstro:
 70 | 
 71 | ```
 72 | brew install cmake boost
 73 | ```
 74 | 
 75 | ### Compiling on Linux, OS X, and other Unix-like systems
 76 | 
 77 | To compile the standalone assembler and disassembler, run the following commands from within the nihstro root directory:
 78 | 
 79 | ```
 80 | mkdir -p build
 81 | cd build
 82 | cmake ..
 83 | make
 84 | ```
 85 | 
 86 | This will build the `nihstro-assemble` and `nihstro-disassemble` standalone executables inside the `build` directory. 
 87 | 
 88 | ### Compiling on Windows
 89 | 
 90 | Start the [CMake GUI](https://cmake.org/runningcmake/). You will have to provide two paths: The source code location and the build directory. Point the former to the nihstro root directory, and the latter to a subdirectory called `build`. You may need to create this directory manually if it doesn't exist.
 91 | 
 92 | To make sure CMake finds your Boost installation, press the "Add Entry" button and create a new PATH variable with the name `BOOST_ROOT`. Point it towards the root directory of your boost installation. The correct folder should contain a subdirectory called `boost` with lots of further child directories.
 93 | 
 94 | Once you're done, hit the "Configure" button and adjust the compiler settings appropriately (usually, the default settings should be fine). If an error occurs, CMake might have trouble locating your Boost installation, and you should double-check that you installed the correct set of Boost libraries and that you set up the `BOOST_ROOT` variable correctly.
 95 | 
 96 | If all went fine, click "Generate" and use the generated build files in the `build` subdirectory to build nihstro. In particular if you're using MSVC, open the file `build/nihstro.sln` in Visual Studio.
 97 | 
 98 | ## Contributing
 99 | I welcome any contributions! Just create a GitHub fork and submit your changes back via pull requests.
100 | 
101 | ## Kudos
102 | A big "thank you!" to everyone who contributed to the information on 3dbrew, which has proven amazingly useful for my 3DS related projects. Another shout-out goes to smealum's aemstro, which served as a great reference when debugging nihstro.
103 | 


--------------------------------------------------------------------------------
/docs/instruction_set.md:
--------------------------------------------------------------------------------
  1 | # Shader Instruction Set
  2 | 
  3 | This page gives an overview over the instruction set supported by nihstro. Note that there is a similar reference list on [3dbrew](http://3dbrew.org/wiki/Shader_Instruction_Set), which documents the actual implementation on hardware though. nihstro seeks to abstract away annoying details like the fact that there are 3 different CALL instructions, and instead provides convenience shortcuts where possible without giving up flexibility.
  4 | 
  5 | # Table of Contents
  6 | 
  7 | - [Shader Instruction Set](#shader-instruction-set)
  8 |   - [Arithmetic Instructions](#arithmetic-instructions)
  9 |   - [Flow Control Instructions](#flow-control-instructions)
 10 |   - [Special Purpose Instructions](#special-purpose-instructions)
 11 | 
 12 | ## Arithmetic Instructions
 13 | Most arithmetic instructions take a destination operand and one or more source operands. Source operands may use any kind of swizzle mask, while destination operands may not use reordering or duplicating swizzle masks. Below you will find a short operation description for each instruction, e.g. `dest[i] = src[i]`, which means that the `i`-th source component (as specified by the swizzle mask) will be assigned to the `i`-th destination component (as specified by the swizzle mask), with `i` ranging from 1 to the number of swizzle mask components. Components not listed in the destination swizzle mask hence will not be written.
 14 | 
 15 | Static indexing (i.e. indexing with a constant, not to be confused with the above notation) may be done for both operand types. Source operands additionally support *dynamic indexing*, where the index depends on one of the address registers `a0`/`a1` or on the loop counter `lcnt`. Examples:
 16 | * static indexing: `c0[20]`
 17 | * dynamic indexing: `c0[2+a0]`
 18 | 
 19 | #### mov: Copy floating point value
 20 | Syntax: `mov dest_operand, src_operand`
 21 | 
 22 | Operation: `dest[i] = src[i]`
 23 | 
 24 | Restrictions:
 25 | * `src` and `dest` must have the same number of components
 26 | 
 27 | #### add: Per-component floating point sum
 28 | Syntax: `add dest_operand, src1_operand, src2_operand`
 29 | 
 30 | Operation: `dest[i] = src1[i] + src2[i]`
 31 | 
 32 | Restrictions:
 33 | * `src1`, `src2`, and `dest` must have the same number of components
 34 | * not more than one of the source operands may be a float uniform register and/or use dynamic indexing
 35 | 
 36 | Notes:
 37 | * subtraction can be performed using negation: `add r0, c0, -c1`
 38 | * when chaining an addition and a multiplication, consider using `mad` instead
 39 | 
 40 | #### mul: Per-component floating point multiplication
 41 | Syntax: `mul dest_operand, src1_operand, src2_operand`
 42 | 
 43 | Operation: `dest[i] = src1[i] * src2[i]`
 44 | 
 45 | Restrictions:
 46 | * `src1`, `src2`, and `dest` must have the same number of components
 47 | * not more than one of the source operands may be a float uniform register and/or use dynamic indexing
 48 | 
 49 | Notes:
 50 | * division can be performed by computing the reciprocal of src2 and multiplying the result: `rcp r0, c1; mul r0, c0, r0`
 51 | * when chaining an addition and a multiplication, consider using `mad` instead
 52 | 
 53 | #### mad: Fused multiply-add of three floating point numbers
 54 | Syntax: `mad dest_operand, src1_operand, src2_operand, src3_operand`
 55 | 
 56 | Operation: `dest[i] = src1[i] * src2[i] + src3[i]`
 57 | 
 58 | Restrictions:
 59 | * `src1`, `src2`, `src3`, and `dest` must have the same number of components
 60 | * not more than two source operands may be float uniform registers
 61 | * no dynamic indexing may be performed on any of the source operands.
 62 | 
 63 | Notes:
 64 | * when dynamic indexing is not avoidable, use `add` and `mul` instead
 65 | * not supported currently
 66 | 
 67 | #### max: Copy the greater of two floating point numbers
 68 | Syntax: `max dest_operand, src1_operand, src2_operand`
 69 | 
 70 | Operation: `dest[i] = max(src1[i], src2[i])`
 71 | 
 72 | Restrictions:
 73 | * `src1`, `src2`, and `dest` must have the same number of components
 74 | * not more than one of the source operands may be a float uniform register and/or use dynamic indexing
 75 | 
 76 | #### min: Copy the smaller of two floating point numbers
 77 | Syntax: `min dest_operand, src1_operand, src2_operand`
 78 | 
 79 | Operation: `dest[i] = min(src1[i], src2[i])`
 80 | 
 81 | Restrictions:
 82 | * `src1`, `src2`, and `dest` must have the same number of components
 83 | * not more than one of the source operands may be a float uniform register and/or use dynamic indexing
 84 | 
 85 | #### flr: Floating point floor
 86 | Syntax: `flr dest_operand, src_operand`
 87 | 
 88 | Operation: `dest[i] = floor(src[i])`
 89 | 
 90 | Restrictions:
 91 | * `src` and `dest` must have the same number of components
 92 | 
 93 | #### rcp: Floating point reciprocal
 94 | Syntax: `rcp dest_operand, src_operand`
 95 | 
 96 | Operation: `dest[i] = 1 / src[i]`
 97 | 
 98 | Restrictions:
 99 | * `src` and `dest` must have the same number of components
100 | 
101 | #### rsq: Floating point reciprocal square root
102 | Syntax: `rsq dest_operand, src_operand`
103 | 
104 | Operation: `dest[i] = 1 / sqrt(src[i])`
105 | 
106 | Restrictions:
107 | * `src` and `dest` must have the same number of components
108 | 
109 | #### exp: Floating point base-2 exponential
110 | Syntax: `exp dest_operand, src_operand`
111 | 
112 | Operation: `dest[i] = exp(src[i])`
113 | 
114 | Restrictions:
115 | * `src1` and `dest` must have the same number of components
116 | 
117 | #### log: Floating point base-2 logarithm
118 | Syntax: `log dest_operand, src_operand`
119 | 
120 | Operation: `dest[i] = log(src[i])`
121 | 
122 | Restrictions:
123 | * `src1` and `dest` must have the same number of components
124 | 
125 | #### dp3: Floating point 3-component dot-product
126 | Syntax: `dp3 dest_operand, src1_operand, src2_operand`
127 | 
128 | Operation: `dest[i] = src1[0]*src2[0]+src1[1]*src2[1]+src1[2]*src2[2])`
129 | 
130 | Restrictions:
131 | * `src1`, `src2`, and `dest` must have the same number of components
132 | * not more than one of the source operands may be a float uniform register and/or use dynamic indexing
133 | 
134 | #### dp4: Floating point 4-component dot-product
135 | Syntax: `dp4 dest_operand, src1_operand, src2_operand`
136 | 
137 | Operation: `dest[i] = src1[0]*src2[0]+src1[1]*src2[1]+src1[2]*src2[2]+src1[3]*src2[3])`
138 | 
139 | Restrictions:
140 | * `src1`, `src2`, and `dest` must have the same number of components
141 | * not more than one of the source operands may be a float uniform register and/or use dynamic indexing
142 | 
143 | #### dph: Floating point homogeneous dot-product
144 | Syntax: `dph dest_operand, src1_operand, src2_operand`
145 | 
146 | Operation: `dest[i] = src1[0]*src2[0]+src1[1]*src2[1]+src1[2]*src2[2]+src2[3]`
147 | 
148 | Restrictions:
149 | * `src1`, `src2`, and `dest` must have the same number of components
150 | * not more than one of the source operands may be a float uniform register and/or use dynamic indexing.
151 | 
152 | #### sge: Set to one if greater or equal
153 | Syntax: `sge dest_operand, src1_operand, src2_operand`
154 | 
155 | Operation: `dest[i] = (src1[i] >= src2[i]) ? 1.0 : 0.0`
156 | 
157 | Restrictions:
158 | * `src1`, `src2`, and `dest` must have the same number of components
159 | * not more than one of the source operands may be a float uniform register and/or use dynamic indexing
160 | 
161 | #### slt: Set to one if (strictly) less
162 | Syntax: `slt dest_operand, src1_operand, src2_operand`
163 | 
164 | Operation: `dest[i] = (src1[i] < src2[i]) ? 1.0 : 0.0`
165 | 
166 | Restrictions:
167 | * `src1`, `src2`, and `dest` must have the same number of components
168 | * not more than one of the source operands may be a float uniform register and/or use dynamic indexing
169 | 
170 | #### mova: Move to address register
171 | Syntax: `mova src_operand`
172 | 
173 | Operation:
174 | 
175 |     a0 = src.x
176 |     a1 = src.y
177 | 
178 | Restrictions:
179 | * src_operand must be a two-component vector.
180 | 
181 | Notes:
182 | * not supported currently
183 | 
184 | ## Flow Control Instructions
185 | These allow for non-linear code execution, e.g. by conditionally or repeatedly running code.
186 | 
187 | Some flow control instruction take a "condition" parameter. A condition is either
188 | * a boolean uniform or
189 | * an expression consisting of one or two conditional code components, combined via `&&` ("and") or `||` ("or"), and optionally negated. Examples: `cc.x`, `cc.y && !cc.x`
190 | 
191 | #### cmp: Compare two floating point numbers
192 | 
193 | Syntax: `cmp src1_operand, src2_operand, op1, op2`
194 | 
195 | `op1` and `op2` may be any of the strings `==` (equal), `!=` (not equal), `<` (less than), `<=` (less than or equal to), `>` (greater than), and `>=` (greater than or equal to).
196 | 
197 | Operation:
198 | 
199 |     cc.x = (src1[0] op1 src2[0])
200 |     cc.y = (src1[1] op2 src2[1])
201 | 
202 | Restrictions:
203 | * `src1` and `src2` must be two-component vectors
204 | * it is not possible to set `cc.x` without also setting `cc.y`
205 | * not more than one of the source operands may be a float uniform register and/or use dynamic indexing
206 | 
207 | Notes:
208 | * this instruction is used to set conditional codes, which can be used as conditions for `if`/`jmp`/`call`/`break`.
209 | 
210 | #### if: Conditional code execution
211 | Syntax: `if condition`
212 | 
213 | Operation:
214 | If `condition` is true, conditionally executes the code between itself and the corresponding `else` or `endif` pseudo-instruction. Otherwise, executes the code in the `else` branch, if one is given (otherwise, skips the branch body and continues after the `endif` statement).
215 | 
216 | Restrictions:
217 | * not more than one `else` branch may be specified (`else if` syntax is not supported)
218 | 
219 | Notes:
220 | * all `if` branches must be closed explicitly using `endif`
221 | * jumping out of a branch body may result in undefined behavior
222 | 
223 | Example:
224 | 
225 |     if cc.x && !cc.y
226 |         // do stuff
227 |     else
228 |         if b0
229 |             // do other stuff
230 |         endif
231 |     endif
232 | 
233 | #### loop: Repeat code execution
234 | Syntax: `loop int_uniform`
235 | 
236 | Operation:
237 | Initialize `lcnt` to `int_uniform.y`, then process code between `loop` and `endloop` for `int_uniform.x+1` iterations in total. After each iteration, `lcnt` is incremented by `int_uniform.z`.
238 | 
239 | Restrictions:
240 | * no swizzle mask may be applied on the given uniform
241 | * there is no direct way of looping zero times (the easiest way is to use `break` with an extra boolean uniform)
242 | 
243 | Notes:
244 | * `lcnt` can be used to dynamically index arrays, e.g. to implement vertex lighting with multiple light sources
245 | 
246 | #### break: Break out of current loop
247 | Syntax: `break condition`
248 | 
249 | Operation:
250 | If `condition` is true, break out of the current loop.
251 | 
252 | Restrictions:
253 | * jumping out of a branch body may result in undefined behavior
254 | 
255 | #### jmp: Jump to code address
256 | Syntax: `jmp target_label if condition`
257 | 
258 | Restrictions:
259 | * jumping out of or into branch bodies or loops may result in undefined behavior
260 | * there is no way to force a jump without specifying a condition
261 | 
262 | Notes:
263 | * if you need to automatically return from a function, use `call` instead
264 | 
265 | Example:
266 | 
267 |     main:
268 |         jmp my_helper_code if b0
269 |         // if not b0, do other stuff here
270 |         nop
271 |         end
272 | 
273 |     my_helper_code:
274 |         // do stuff
275 |         nop
276 |         end
277 | 
278 | #### call: Jump to code address and return to caller
279 | Possible syntaxes:
280 | `call target_label until return_label if condition`
281 | `call target_label until return_label`
282 | 
283 | Operation:
284 | If `condition` is true (or none is given), jumps to `target_label` and processes shader code there until `return_label` is hit, at which point code execution jumps back to the caller.
285 | 
286 | Restrictions:
287 | * jumping out of or into branch bodies or loops may result in undefined behavior
288 | 
289 | Notes:
290 | * if you don't need to automatically return from a function, use `jmp` instead
291 | 
292 | Example:
293 | 
294 |     main:
295 |         call my_helper_code until end_helper_code
296 |         nop
297 |         end
298 | 
299 |     my_helper_code:
300 |         // do stuff here
301 |         nop
302 |     end_helper_code:
303 | 
304 | ## Special Purpose Instructions
305 | #### nop: No operation
306 | Syntax: `nop`
307 | 
308 | Notes:
309 | * This may be necessary before using `end` to make sure all pending write operations have been completed
310 | 
311 | #### end: Finish shader execution
312 | Syntax: `end`
313 | 
314 | Operation:
315 | Stops shader execution.
316 | 


--------------------------------------------------------------------------------
/docs/nihcode_spec.md:
--------------------------------------------------------------------------------
  1 | #nihcode Specification
  2 | 
  3 | Version 0.1.
  4 | 
  5 | This page seeks to be a formal-ish specification of the input assembly language *nihcode* used by the nihstro shader assembler.
  6 | 
  7 | ## Version information
  8 | This document is is intended to give developers an idea of how things are expected to work. Please file issue reports for any deviations in nihstro's behavior from this specifications that you find. Similarly, any inclarities in the specification will be corrected if reported, too.
  9 | 
 10 | ## Structure
 11 | nihcode is a sequence of statements, each of which must be put onto a separate line. There are five types of statements:
 12 | * version information statements
 13 | * include statement
 14 | * alias declaration statements,
 15 | * label declaration statements, and
 16 | * instruction statements,
 17 | each of which is described in its own section below. Additionally, C++-like comments may be inserted at any point and are started using the character sequences `//`, `#`, or `;`. Comments span the rest of the line after any of these characters. Any statement must be written on its own line.
 18 | 
 19 | A pseudo-code example of nihcode looks like this:
 20 | 
 21 |     // First example shader
 22 |     .version 0.1                  // version information
 23 |     
 24 |     .alias inpos v0               // alias declaration
 25 |     .alias intex v1               // alias declaration
 26 |     .alias pos o0    as position  // alias declaration
 27 |     .alias pos o1.xy as texcoord0 // alias declaration
 28 | 
 29 |     .include "utils.h"            // include utility functionality
 30 | 
 31 |     main:                         // label declaration
 32 |         mov o0, v0                // instruction
 33 |         mov o1.xy, v1.xy          // instruction
 34 |         nop                       // instruction
 35 |         end                       // instruction
 36 | 
 37 | 
 38 | ## Shader Registers, builtin Identifiers, Swizzle Masks
 39 | A shader can access a number of different registers with different purposes. *Input registers* expose the raw input vertex attribute data, while the output vertex attributes used for rendering is written to *output registers*. External programs can pass parameters to the shader by setting *uniforms*. Additionally, a number of *temporary registers* are free for any use. There are also special-purpose registers, namely the *address registers* and the *conditional code register*.
 40 | 
 41 | Registers are being referred to by using *identifiers*. There is a number of builtin identifiers, each of which refers to one register. Note that most registers are vectors, i.e. they comprise multiple components, which are accessed using swizzle masks.
 42 | * `v0`-`v15`: Input registers (read-only), four-component vectors
 43 | * `r0`-`r15`: Temporary registers (read-write), four-component vectors
 44 | * `c0`-`c95`: Float uniforms (read-only), four-component vectors
 45 | * `i0`-`i3`:  Integer uniforms (read-only), four-component vectors
 46 | * `b0`-`b15`: Boolean uniforms (read-only), scalar
 47 | * `o0`-`o15`: Output registers (write-only), four-component vectors
 48 | * `a0, a1, aL`: Address registers (used with MOVA and dynamic indexing), scalar
 49 | * `cc`: Conditional code register (used with CMP and flow-control instructions), two-component vector
 50 | 
 51 | For better readability, one can also define new identifiers, as explained below. Identifiers may only use a restricted set of names including lower- or uppercase letters a-Z, underscores, and decimal digits (the latter two which may not be used as the first character of the name). Additionally, an identifier may be followed by a swizzle mask, separated by the character `.` (e.g. `texcoord.zyx`). Swizzle masks allow for reordering, duplicating, and removing of one or more vector components of the identified register (without actually modifying that register).
 52 | 
 53 | When used with certain instructions, identifiers may be mentioned along with a sign, an array index, and/or a swizzle mask. Constructs like this are called *expressions*.
 54 | 
 55 | The following names are reserved identifiers, and may not be used during declarations:
 56 | * Any names starting with a `gl_` prefix
 57 | * Any names starting with a `dmp_` prefix
 58 | * Any names starting with an underscore prefix
 59 | * Any of the instruction opcodes mentioned below may not be used for the identifier name
 60 | 
 61 | ## Aliases
 62 | ### Plain Aliases (any register)
 63 | `.alias <new_identifier> <existing_identifier>{.<swizzle_mask>}`
 64 | 
 65 | Declares a new identifier called `new_identifier` which will refer to the same register that `existing_identifier` refers to, applying a swizzle_mask if specified. All subsequent uses of `new_identifier` are equivalent to using `existing_identifier.swizzle_mask`. Aliases of any register type may be created, however it should be noted that using output registers requires explicit assignment of an output semantic (see below).
 66 | 
 67 | E.g. `.alias input_texture v2.xy`
 68 | 
 69 | ### Alias with Assignment of a Semantic (output registers)
 70 | `.alias <new_identifier> <existing_identifier>{.swizzle_mask} as <semantic>`
 71 | 
 72 | Declares an alias of `existing_identifier` with the name `new_identifier` and assigns the given semantic to the corresponding output register. An output semantic needs to be given to describe how the output vertex attribute is intended to be used after shader execution. `semantic` may be any of the strings `position`, `quaternion`, `color`, `texcoord0`, `texcoord1`, `texcoord2`, and `view`. If not all output register components are being written to, a swizzle mask should be used to denote the "active" components. Note that this swizzle mask may not reorder any components.
 73 | 
 74 | E.g. `.alias output_texcoord o1.xy as texcoord0`
 75 | 
 76 | ### Constant Declarations (uniform registers)
 77 | scalar constants: `.alias <new_identifier> <existing_identifier> as <value>`
 78 | 
 79 | vector constants: `.alias <new_identifier> <existing_identifier> as (<x>, <y>{, <z>{, <w>}})`
 80 | 
 81 | Declares an alias of `existing_identifier` with the name `new_identifier` and assigns the given default value to it. Default values are parsed by the ctrulib API and automatically applied when enabling a shader. The number of components in the given constant must match the number of components in the specified register.
 82 | 
 83 | E.g. `.alias my_const c4 as (0.1, 3.2, -3.14, 0.0)`
 84 | 
 85 | ## Label Declarations
 86 | `<labelname>:`
 87 | 
 88 | Declares a new label with the name `labelname` at the given source line, which can be used in flow control operations. Label names follow the same conventions as identifiers and may not share the same name with an existing identifier.
 89 | 
 90 | ## Instruction Statements
 91 | Writes the given opcode according to the given arguments to the shader binary. There are a lot of instructions, and each of them uses one of the following formats:
 92 | 
 93 | Trivial operations:
 94 | `<opcode>`
 95 | Used by `else`, `emit`, `end`, `endif`, `endloop`, and `nop`.
 96 | 
 97 | Arithmetic operations:
 98 | `<opcode> <expression1>{, <expression2>{, <expression3>{, <expression4>}}}`
 99 | Used by `add`, `dp3`, `dp4`, `dph`, `ex2`, `flr`, `lg2`, `mad`, `max`, `min`, `mov`, `mova`, `mul`, `rcp`, `rsq`, `sge` and `slt`. The number of required expressions as well as their meaning depends on the opcode.
100 | 
101 | E.g. `mul o3.xyz c4.xyz v0.xyz`
102 | 
103 | Compare operation:
104 | `cmp <expression1>, <expression2>, <op_x>, <op_y>`
105 | Used exclusively by `cmp`. `expression1` and `expression2` must evaluate to two-component float vectors. `op_x` and `op_y` specify comparison operations for the x and y components of the given expressions, respectively. They may be `==`, `!=`, `<`, `<=`, `>` or `>=`.
106 | 
107 | E.g. `cmp c0.xy, i2.xy, <=, ==`
108 | 
109 | Flow control operations:
110 | `<opcode> <condition>`
111 | Used by `break`, `if` and `loop`.
112 | 
113 | `<opcode> {<target_label>} {until <return_label>} {if <condition>}`
114 | Used by `jmp` and `call`.
115 | 
116 |  `condition` may either be an identifier of a boolean uniform or a conditional expression. Examples for conditional expressions are `cc.x`, `!cc.x`, `!cc.xy`, `cc.x && !cc.y`, and `cc.x || cc.y`, where `{!}cc.xy` is equivalent to `{!}cc.x && {!}cc.y`. `target_label` and `return_label` must be label identifiers. Their meaning depends on the given opcode.
117 | 
118 | For a full instruction set reference, go to [instruction set reference](instruction_set.md). You may also want to refer to [3dbrew](http://3dbrew.org/wiki/Shader_Instruction_Set) for low-level documentation on each opcode. Is is suggested that you take a look at the nihstro examples to get a better picture of how to apply that information.
119 | 
120 | ## Include Statements
121 | `.include "filename"`
122 | 
123 | Replaces the `.include` line with the contents of the given file. The filename is taken to be relative to the file it was included from.
124 | 
125 | ## Version Information
126 | `.version number`
127 | 
128 | This statement is a hint for the compiler to see which language specification the shader was written against. It may be used to toggle a compatibility assembling mode.
129 | 
130 | E.g. `.version 0.1`
131 | 


--------------------------------------------------------------------------------
/examples/assembler/cube/Makefile:
--------------------------------------------------------------------------------
  1 | #---------------------------------------------------------------------------------
  2 | .SUFFIXES:
  3 | #---------------------------------------------------------------------------------
  4 | 
  5 | ifeq ($(strip $(DEVKITARM)),)
  6 | $(error "Please set DEVKITARM in your environment. export DEVKITARM=<path to>devkitARM")
  7 | endif
  8 | 
  9 | ifeq ($(strip $(NIHSTRO)),)
 10 | $(error "Please set NIHSTRO in your environment. export NIHSTRO=<path to>nihstro-assemble")
 11 | endif
 12 | 
 13 | TOPDIR ?= $(CURDIR)
 14 | include $(DEVKITARM)/3ds_rules
 15 | 
 16 | #---------------------------------------------------------------------------------
 17 | # TARGET is the name of the output
 18 | # BUILD is the directory where object files & intermediate files will be placed
 19 | # SOURCES is a list of directories containing source code
 20 | # DATA is a list of directories containing data files
 21 | # INCLUDES is a list of directories containing header files
 22 | #
 23 | # NO_SMDH: if set to anything, no SMDH file is generated.
 24 | # APP_TITLE is the name of the app stored in the SMDH file (Optional)
 25 | # APP_DESCRIPTION is the description of the app stored in the SMDH file (Optional)
 26 | # APP_AUTHOR is the author of the app stored in the SMDH file (Optional)
 27 | # ICON is the filename of the icon (.png), relative to the project folder.
 28 | #   If not set, it attempts to use one of the following (in this order):
 29 | #     - <Project name>.png
 30 | #     - icon.png
 31 | #     - <libctru folder>/default_icon.png
 32 | #---------------------------------------------------------------------------------
 33 | TARGET		:=	$(notdir $(CURDIR))
 34 | BUILD		:=	build
 35 | SOURCES		:=	source
 36 | DATA		:=	data
 37 | INCLUDES	:=	include
 38 | 
 39 | #---------------------------------------------------------------------------------
 40 | # options for code generation
 41 | #---------------------------------------------------------------------------------
 42 | ARCH	:=	-march=armv6k -mtune=mpcore -mfloat-abi=hard
 43 | 
 44 | CFLAGS	:=	-g -Wall -O2 -mword-relocations \
 45 | 			-fomit-frame-pointer -ffast-math \
 46 | 			$(ARCH)
 47 | 
 48 | CFLAGS	+=	$(INCLUDE) -DARM11 -D_3DS
 49 | 
 50 | CXXFLAGS	:= $(CFLAGS) -fno-rtti -std=gnu++11
 51 | 
 52 | ASFLAGS	:=	-g $(ARCH)
 53 | LDFLAGS	=	-specs=3dsx.specs -g $(ARCH) -Wl,-Map,$(notdir $*.map)
 54 | 
 55 | LIBS	:= -lctru -lm
 56 | 
 57 | #---------------------------------------------------------------------------------
 58 | # list of directories containing libraries, this must be the top level containing
 59 | # include and lib
 60 | #---------------------------------------------------------------------------------
 61 | LIBDIRS	:= $(CTRULIB)
 62 | 
 63 | 
 64 | #---------------------------------------------------------------------------------
 65 | # no real need to edit anything past this point unless you need to add additional
 66 | # rules for different file extensions
 67 | #---------------------------------------------------------------------------------
 68 | ifneq ($(BUILD),$(notdir $(CURDIR)))
 69 | #---------------------------------------------------------------------------------
 70 | 
 71 | export OUTPUT	:=	$(CURDIR)/$(TARGET)
 72 | export TOPDIR	:=	$(CURDIR)
 73 | 
 74 | export VPATH	:=	$(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \
 75 | 			$(foreach dir,$(DATA),$(CURDIR)/$(dir))
 76 | 
 77 | export DEPSDIR	:=	$(CURDIR)/$(BUILD)
 78 | 
 79 | CFILES		:=	$(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c)))
 80 | CPPFILES	:=	$(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp)))
 81 | SFILES		:=	$(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s)))
 82 | BINFILES	:=	$(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*)))
 83 | 
 84 | #---------------------------------------------------------------------------------
 85 | # use CXX for linking C++ projects, CC for standard C
 86 | #---------------------------------------------------------------------------------
 87 | ifeq ($(strip $(CPPFILES)),)
 88 | #---------------------------------------------------------------------------------
 89 | 	export LD	:=	$(CC)
 90 | #---------------------------------------------------------------------------------
 91 | else
 92 | #---------------------------------------------------------------------------------
 93 | 	export LD	:=	$(CXX)
 94 | #---------------------------------------------------------------------------------
 95 | endif
 96 | #---------------------------------------------------------------------------------
 97 | 
 98 | export OFILES	:=	$(addsuffix .o,$(BINFILES)) \
 99 | 			$(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o)
100 | 
101 | export INCLUDE	:=	$(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \
102 | 			$(foreach dir,$(LIBDIRS),-I$(dir)/include) \
103 | 			-I$(CURDIR)/$(BUILD)
104 | 
105 | export LIBPATHS	:=	$(foreach dir,$(LIBDIRS),-L$(dir)/lib)
106 | 
107 | ifeq ($(strip $(ICON)),)
108 | 	icons := $(wildcard *.png)
109 | 	ifneq (,$(findstring $(TARGET).png,$(icons)))
110 | 		export APP_ICON := $(TOPDIR)/$(TARGET).png
111 | 	else
112 | 		ifneq (,$(findstring icon.png,$(icons)))
113 | 			export APP_ICON := $(TOPDIR)/icon.png
114 | 		endif
115 | 	endif
116 | else
117 | 	export APP_ICON := $(TOPDIR)/$(ICON)
118 | endif
119 | 
120 | .PHONY: $(BUILD) clean all
121 | 
122 | #---------------------------------------------------------------------------------
123 | all: $(BUILD)
124 | 
125 | $(BUILD):
126 | 	@[ -d $@ ] || mkdir -p $@
127 | 	@make --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile
128 | 
129 | #---------------------------------------------------------------------------------
130 | clean:
131 | 	@echo clean ...
132 | 	@rm -fr $(BUILD) $(TARGET).3dsx $(OUTPUT).smdh $(TARGET).elf test.vsh.shbin
133 | 
134 | 
135 | #---------------------------------------------------------------------------------
136 | else
137 | 
138 | DEPENDS	:=	$(OFILES:.o=.d)
139 | 
140 | #---------------------------------------------------------------------------------
141 | # main targets
142 | #---------------------------------------------------------------------------------
143 | ifeq ($(strip $(NO_SMDH)),)
144 | .PHONY: all
145 | all	:	$(OUTPUT).3dsx $(OUTPUT).smdh
146 | endif
147 | $(OUTPUT).3dsx	:	$(OUTPUT).elf
148 | $(OUTPUT).elf	:	$(OFILES)
149 | 
150 | #---------------------------------------------------------------------------------
151 | # you need a rule like this for each extension you use as binary data
152 | #---------------------------------------------------------------------------------
153 | %.bin.o	:	%.bin
154 | #---------------------------------------------------------------------------------
155 | 	@echo $(notdir $<)
156 | 	@$(bin2o)
157 | 
158 | # WARNING: This is not the right way to do this! TODO: Do it right!
159 | #---------------------------------------------------------------------------------
160 | %.vsh.o	:	%.vsh
161 | #---------------------------------------------------------------------------------
162 | 	@echo $(notdir $<)
163 | 	@$(NIHSTRO)/nihstro-assemble --output ../$(notdir $<).shbin $<
164 | 	@bin2s ../$(notdir $<).shbin | $(PREFIX)as -o $@
165 | 	@echo "extern const u8" `(echo $(notdir $<).shbin | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(notdir $<).shbin | tr . _)`.h
166 | 	@echo "extern const u8" `(echo $(notdir $<).shbin | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(notdir $<).shbin | tr . _)`.h
167 | 	@echo "extern const u32" `(echo $(notdir $<).shbin | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(notdir $<).shbin | tr . _)`.h
168 | 
169 | -include $(DEPENDS)
170 | 
171 | #---------------------------------------------------------------------------------------
172 | endif
173 | #---------------------------------------------------------------------------------------
174 | 


--------------------------------------------------------------------------------
/examples/assembler/cube/README.md:
--------------------------------------------------------------------------------
1 | cube example
2 | ============
3 | 
4 | Simple port of ctrulib's gpu example to nihstro shaders. The C program code is mostly unchanged from the original, however the example shader in the data subdirectory should give you a good idea of the basic nihcode shader syntax.
5 | 
6 | Before trying to compile, make sure your NIHSTRO environment variable points to the directory nihstro-assemble resides in. Additionally, ctrulib in revision 1f52ac344d or similar is required, plus some patches to implement proper uniform setters.
7 | 


--------------------------------------------------------------------------------
/examples/assembler/cube/data/test.vsh:
--------------------------------------------------------------------------------
 1 | // setup constants
 2 | .alias myconst c32 as (1.0, 0.0, 0.5, 1.0)
 3 | 
 4 | // setup output map
 5 | .alias outpos  o0      as position
 6 | .alias outcol  o1      as color
 7 | .alias outtex0 o2.xyzw as texcoord0 // Would like to use .xy instead, but this is not supported by ctrulib currently
 8 | .alias outtex1 o3.xyzw as texcoord1
 9 | .alias outtex2 o4.xyzw as texcoord2
10 | 
11 | // setup uniform map, for use with SHDR_GetUniformRegister
12 | .alias projection     c0  // -c3
13 | .alias modelview      c4  // -c7
14 | .alias lightDirection c8
15 | .alias lightAmbient   c9
16 | 
17 | main:
18 | 	mov r1.xyz,  v0.xyz
19 | 	mov r1.w,    myconst.w
20 | 
21 | mdvl:  // tempreg = mdlvMtx * in.pos
22 | 	dp4 r0.x,  modelview[0],  r1
23 | 	dp4 r0.y,  modelview[1],  r1
24 | 	dp4 r0.z,  modelview[2],  r1
25 | 	mov r0.w,  myconst.w
26 | 
27 | proj:  // result.pos = projMtx * tempreg
28 | 	dp4 outpos.x,  projection[0],  r0
29 | 	dp4 outpos.y,  projection[1],  r0
30 | 	dp4 outpos.z,  projection[2],  r0
31 | 	dp4 outpos.w,  projection[3],  r0
32 | 
33 | tex:  // result.texcoord = in.texcoord
34 | 	mov outtex0,  v1
35 | 	mov outtex1,  myconst.yyyw
36 | 	mov outtex2,  myconst.yyyw
37 | 
38 | col:  // Hacky lighting: color = ambient.xyz + clamp(dot(L,N), 1.0) * ambient.www
39 | 	dp3 r0.xyz,     lightDirection.xyz, v2.xyz
40 | 	max r0.xyz,     myconst.yyy,        r0.xyz
41 | 	mul r0.xyz,     lightAmbient.www,   r0.xyz
42 | 	add outcol.xyz, lightAmbient.xyz,   r0.xyz
43 | 	mov outcol.w,   myconst.w
44 | 
45 | 	nop
46 | 	end
47 | 
48 | endmain:
49 | 


--------------------------------------------------------------------------------
/examples/assembler/cube/data/texture.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neobrain/nihstro/f4d8659decbfe5d234f04134b5002b82dc515a44/examples/assembler/cube/data/texture.bin


--------------------------------------------------------------------------------
/examples/assembler/cube/source/_gs.s:
--------------------------------------------------------------------------------
 1 | .section ".text"
 2 | .arm
 3 | .align 4
 4 | .global _vboMemcpy50
 5 | 
 6 | # r0 : dst
 7 | # r1 : src
 8 | # fixed size 0x50
 9 | _vboMemcpy50:
10 | 	push {r4-r11}
11 | 	ldmia r1!, {r2-r12}
12 | 	stmia r0!, {r2-r12}
13 | 	ldmia r1!, {r2-r12}
14 | 	stmia r0!, {r2-r12}
15 | 	pop {r4-r11}
16 | 	bx lr
17 | 


--------------------------------------------------------------------------------
/examples/assembler/cube/source/gs.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <string.h>
  3 | #include <malloc.h>
  4 | #include <3ds.h>
  5 | 
  6 | #include "gs.h"
  7 | #include "math.h"
  8 | 
  9 | #define BUFFERMATRIXLIST_SIZE (GS_MATRIXSTACK_SIZE*4)
 10 | 
 11 | static void gsInitMatrixStack();
 12 | 
 13 | Handle linearAllocMutex;
 14 | 
 15 | static u32 gsMatrixStackRegisters[GS_MATRIXTYPES];
 16 | 
 17 | typedef struct
 18 | {
 19 | 	u32 offset;
 20 | 	mtx44 data;
 21 | }bufferMatrix_s;
 22 | 
 23 | bufferMatrix_s bufferMatrixList[BUFFERMATRIXLIST_SIZE];
 24 | int bufferMatrixListLength;
 25 | 
 26 | //----------------------
 27 | //   GS SYSTEM STUFF
 28 | //----------------------
 29 | 
 30 | void initBufferMatrixList()
 31 | {
 32 | 	bufferMatrixListLength=0;
 33 | }
 34 | 
 35 | void gsInit(shaderProgram_s* shader)
 36 | {
 37 | 	gsInitMatrixStack();
 38 | 	initBufferMatrixList();
 39 | 	svcCreateMutex(&linearAllocMutex, false);
 40 | 	if(shader)
 41 | 	{
 42 | 		gsMatrixStackRegisters[0]=shaderInstanceGetUniformLocation(shader->vertexShader, "projection");
 43 | 		gsMatrixStackRegisters[1]=shaderInstanceGetUniformLocation(shader->vertexShader, "modelview");
 44 | 		shaderProgramUse(shader);
 45 | 	}
 46 | }
 47 | 
 48 | void gsExit(void)
 49 | {
 50 | 	svcCloseHandle(linearAllocMutex);
 51 | }
 52 | 
 53 | void gsStartFrame(void)
 54 | {
 55 | 	GPUCMD_SetBufferOffset(0);
 56 | 	initBufferMatrixList();
 57 | }
 58 | 
 59 | void* gsLinearAlloc(size_t size)
 60 | {
 61 | 	void* ret=NULL;
 62 | 
 63 | 	svcWaitSynchronization(linearAllocMutex, U64_MAX);
 64 | 	ret=linearAlloc(size);
 65 | 	svcReleaseMutex(linearAllocMutex);
 66 | 	
 67 | 	return ret;
 68 | }
 69 | 
 70 | void gsLinearFree(void* mem)
 71 | {
 72 | 	svcWaitSynchronization(linearAllocMutex, U64_MAX);
 73 | 	linearFree(mem);
 74 | 	svcReleaseMutex(linearAllocMutex);
 75 | }
 76 | 
 77 | //----------------------
 78 | //  MATRIX STACK STUFF
 79 | //----------------------
 80 | 
 81 | static mtx44 gsMatrixStacks[GS_MATRIXTYPES][GS_MATRIXSTACK_SIZE];
 82 | static u32 gsMatrixStackRegisters[GS_MATRIXTYPES]={0x00, 0x04};
 83 | static u8 gsMatrixStackOffsets[GS_MATRIXTYPES];
 84 | static bool gsMatrixStackUpdated[GS_MATRIXTYPES];
 85 | static GS_MATRIX gsCurrentMatrixType;
 86 | 
 87 | static void gsInitMatrixStack()
 88 | {
 89 | 	int i;
 90 | 	for(i=0; i<GS_MATRIXTYPES; i++)
 91 | 	{
 92 | 		gsMatrixStackOffsets[i]=0;
 93 | 		gsMatrixStackUpdated[i]=true;
 94 | 		loadIdentity44((float*)gsMatrixStacks[i][0]);
 95 | 	}
 96 | 	gsCurrentMatrixType=GS_PROJECTION;
 97 | }
 98 | 
 99 | float* gsGetMatrix(GS_MATRIX m)
100 | {
101 | 	if(m<0 || m>=GS_MATRIXTYPES)return NULL;
102 | 	
103 | 	return (float*)gsMatrixStacks[m][gsMatrixStackOffsets[m]];
104 | }
105 | 
106 | int gsLoadMatrix(GS_MATRIX m, float* data)
107 | {
108 | 	if(m<0 || m>=GS_MATRIXTYPES || !data)return -1;
109 | 	
110 | 	memcpy(gsGetMatrix(m), data, sizeof(mtx44));
111 | 
112 | 	gsMatrixStackUpdated[m]=true;
113 | 
114 | 	return 0;
115 | }
116 | 
117 | int gsPushMatrix()
118 | {
119 | 	const GS_MATRIX m=gsCurrentMatrixType;
120 | 	if(m<0 || m>=GS_MATRIXTYPES)return -1;
121 | 	if(gsMatrixStackOffsets[m]<0 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE-1)return -1;
122 | 
123 | 	float* cur=gsGetMatrix(m);
124 | 	gsMatrixStackOffsets[m]++;
125 | 	memcpy(gsGetMatrix(m), cur, sizeof(mtx44));
126 | 
127 | 	return 0;
128 | }
129 | 
130 | int gsPopMatrix()
131 | {
132 | 	const GS_MATRIX m=gsCurrentMatrixType;
133 | 	if(m<0 || m>=GS_MATRIXTYPES)return -1;
134 | 	if(gsMatrixStackOffsets[m]<1 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE)return -1;
135 | 
136 | 	gsMatrixStackOffsets[m]--;
137 | 
138 | 	gsMatrixStackUpdated[m]=true;
139 | 
140 | 	return 0;
141 | }
142 | 
143 | int gsMatrixMode(GS_MATRIX m)
144 | {
145 | 	if(m<0 || m>=GS_MATRIXTYPES)return -1;
146 | 
147 | 	gsCurrentMatrixType=m;
148 | 
149 | 	return 0;
150 | }
151 | 
152 | //------------------------
153 | // MATRIX TRANSFORM STUFF
154 | //------------------------
155 | 
156 | int gsMultMatrix(float* data)
157 | {
158 | 	if(!data)return -1;
159 | 	
160 | 	mtx44 tmp;
161 | 	multMatrix44(gsGetMatrix(gsCurrentMatrixType), data, (float*)tmp);
162 | 	memcpy(gsGetMatrix(gsCurrentMatrixType), (float*)tmp, sizeof(mtx44));
163 | 
164 | 	gsMatrixStackUpdated[gsCurrentMatrixType]=true;
165 | 
166 | 	return 0;
167 | }
168 | 
169 | void gsLoadIdentity()
170 | {
171 | 	loadIdentity44(gsGetMatrix(gsCurrentMatrixType));
172 | 	gsMatrixStackUpdated[gsCurrentMatrixType]=true;
173 | }
174 | 
175 | void gsProjectionMatrix(float fovy, float aspect, float near, float far)
176 | {
177 | 	initProjectionMatrix(gsGetMatrix(gsCurrentMatrixType), fovy, aspect, near, far);
178 | 	gsMatrixStackUpdated[gsCurrentMatrixType]=true;
179 | }
180 | 
181 | void gsRotateX(float x)
182 | {
183 | 	rotateMatrixX(gsGetMatrix(gsCurrentMatrixType), x, false);
184 | 	gsMatrixStackUpdated[gsCurrentMatrixType]=true;
185 | }
186 | 
187 | void gsRotateY(float y)
188 | {
189 | 	rotateMatrixY(gsGetMatrix(gsCurrentMatrixType), y, false);
190 | 	gsMatrixStackUpdated[gsCurrentMatrixType]=true;
191 | }
192 | 
193 | void gsRotateZ(float z)
194 | {
195 | 	rotateMatrixZ(gsGetMatrix(gsCurrentMatrixType), z, false);
196 | 	gsMatrixStackUpdated[gsCurrentMatrixType]=true;
197 | }
198 | 
199 | void gsScale(float x, float y, float z)
200 | {
201 | 	scaleMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z);
202 | 	gsMatrixStackUpdated[gsCurrentMatrixType]=true;
203 | }
204 | 
205 | void gsTranslate(float x, float y, float z)
206 | {
207 | 	translateMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z);
208 | 	gsMatrixStackUpdated[gsCurrentMatrixType]=true;
209 | }
210 | 
211 | //----------------------
212 | // MATRIX RENDER STUFF
213 | //----------------------
214 | 
215 | static void gsSetUniformMatrix(u32 startreg, float* m)
216 | {
217 | 	float param[16];
218 | 
219 | 	param[0x0]=m[3]; //w
220 | 	param[0x1]=m[2]; //z
221 | 	param[0x2]=m[1]; //y
222 | 	param[0x3]=m[0]; //x
223 | 
224 | 	param[0x4]=m[7];
225 | 	param[0x5]=m[6];
226 | 	param[0x6]=m[5];
227 | 	param[0x7]=m[4];
228 | 	
229 | 	param[0x8]=m[11];
230 | 	param[0x9]=m[10];
231 | 	param[0xa]=m[9];
232 | 	param[0xb]=m[8];
233 | 
234 | 	param[0xc]=m[15];
235 | 	param[0xd]=m[14];
236 | 	param[0xe]=m[13];
237 | 	param[0xf]=m[12];
238 | 
239 | 	GPU_SetFloatUniform(GPU_VERTEX_SHADER, startreg, (u32*)param, 4);
240 | }
241 | 
242 | static int gsUpdateTransformation()
243 | {
244 | 	GS_MATRIX m;
245 | 	for(m=0; m<GS_MATRIXTYPES; m++)
246 | 	{
247 | 		if(gsMatrixStackUpdated[m])
248 | 		{
249 | 			if(m==GS_PROJECTION && bufferMatrixListLength<BUFFERMATRIXLIST_SIZE)
250 | 			{
251 | 				GPUCMD_GetBuffer(NULL, NULL, &bufferMatrixList[bufferMatrixListLength].offset);
252 | 				memcpy(bufferMatrixList[bufferMatrixListLength].data, gsGetMatrix(m), sizeof(mtx44));
253 | 				bufferMatrixListLength++;
254 | 			}
255 | 			gsSetUniformMatrix(gsMatrixStackRegisters[m], gsGetMatrix(m));
256 | 			gsMatrixStackUpdated[m]=false;
257 | 		}
258 | 	}
259 | 	return 0;
260 | }
261 | 
262 | void gsAdjustBufferMatrices(mtx44 transformation)
263 | {
264 | 	int i;
265 | 	u32* buffer;
266 | 	u32 offset;
267 | 	GPUCMD_GetBuffer(&buffer, NULL, &offset);
268 | 	for(i=0; i<bufferMatrixListLength; i++)
269 | 	{
270 | 		u32 o=bufferMatrixList[i].offset;
271 | 		if(o+2<offset) //TODO : better check, need to account for param size
272 | 		{
273 | 			mtx44 newMatrix;
274 | 			GPUCMD_SetBufferOffset(o);
275 | 			multMatrix44((float*)bufferMatrixList[i].data, (float*)transformation, (float*)newMatrix);
276 | 			gsSetUniformMatrix(gsMatrixStackRegisters[GS_PROJECTION], (float*)newMatrix);
277 | 		}
278 | 	}
279 | 	GPUCMD_SetBufferOffset(offset);
280 | }
281 | 
282 | //----------------------
283 | //      VBO STUFF
284 | //----------------------
285 | 
286 | int gsVboInit(gsVbo_s* vbo)
287 | {
288 | 	if(!vbo)return -1;
289 | 
290 | 	vbo->data=NULL;
291 | 	vbo->currentSize=0;
292 | 	vbo->maxSize=0;
293 | 	vbo->commands=NULL;
294 | 	vbo->commandsSize=0;
295 | 
296 | 	return 0;
297 | }
298 | 
299 | int gsVboCreate(gsVbo_s* vbo, u32 size)
300 | {
301 | 	if(!vbo)return -1;
302 | 
303 | 	vbo->data=gsLinearAlloc(size);
304 | 	vbo->numVertices=0;
305 | 	vbo->currentSize=0;
306 | 	vbo->maxSize=size;
307 | 
308 | 	return 0;
309 | }
310 | 
311 | void* gsVboGetOffset(gsVbo_s* vbo)
312 | {
313 | 	if(!vbo)return NULL;
314 | 
315 | 	return (void*)(&((u8*)vbo->data)[vbo->currentSize]);
316 | }
317 | 
318 | int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units)
319 | {
320 | 	if(!vbo || !data || !size)return -1;
321 | 	if(((s32)vbo->maxSize)-((s32)vbo->currentSize) < size)return -1;
322 | 
323 | 	memcpy(gsVboGetOffset(vbo), data, size);
324 | 	vbo->currentSize+=size;
325 | 	vbo->numVertices+=units;
326 | 
327 | 	return 0;
328 | }
329 | 
330 | int gsVboFlushData(gsVbo_s* vbo)
331 | {
332 | 	if(!vbo)return -1;
333 | 
334 | 	//unnecessary if we use flushAndRun
335 | 	// GSPGPU_FlushDataCache(NULL, vbo->data, vbo->currentSize);
336 | 
337 | 	return 0;
338 | }
339 | 
340 | int gsVboDestroy(gsVbo_s* vbo)
341 | {
342 | 	if(!vbo)return -1;
343 | 
344 | 	if(vbo->commands)free(vbo->commands);
345 | 	if(vbo->data)gsLinearFree(vbo->data);
346 | 	gsVboInit(vbo);
347 | 
348 | 	return 0;
349 | }
350 | 
351 | extern u32 debugValue[];
352 | 
353 | void GPU_DrawArrayDirectly(GPU_Primitive_t primitive, u8* data, u32 n)
354 | {
355 | 	//set attribute buffer address
356 | 	GPUCMD_AddSingleParam(0x000F0200, (osConvertVirtToPhys((u32)data))>>3);
357 | 	//set primitive type
358 | 	GPUCMD_AddSingleParam(0x0002025E, primitive);
359 | 	GPUCMD_AddSingleParam(0x0002025F, 0x00000001);
360 | 	//index buffer not used for drawArrays but 0x000F0227 still required
361 | 	GPUCMD_AddSingleParam(0x000F0227, 0x80000000);
362 | 	//pass number of vertices
363 | 	GPUCMD_AddSingleParam(0x000F0228, n);
364 | 
365 | 	GPUCMD_AddSingleParam(0x00010253, 0x00000001);
366 | 
367 | 	GPUCMD_AddSingleParam(0x00010245, 0x00000000);
368 | 	GPUCMD_AddSingleParam(0x000F022E, 0x00000001);
369 | 	GPUCMD_AddSingleParam(0x00010245, 0x00000001);
370 | 	GPUCMD_AddSingleParam(0x000F0231, 0x00000001);
371 | 
372 | 	// GPUCMD_AddSingleParam(0x000F0111, 0x00000001); //breaks stuff
373 | }
374 | 
375 | //not thread safe
376 | int gsVboPrecomputeCommands(gsVbo_s* vbo)
377 | {
378 | 	if(!vbo || vbo->commands)return -1;
379 | 
380 | 	static u32 tmpBuffer[128];
381 | 
382 | 	u32* savedAdr; u32 savedSize, savedOffset;
383 | 	GPUCMD_GetBuffer(&savedAdr, &savedSize, &savedOffset);
384 | 	GPUCMD_SetBuffer(tmpBuffer, 128, 0);
385 | 
386 | 	GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices);
387 | 	
388 | 	GPUCMD_GetBuffer(NULL, NULL, &vbo->commandsSize);
389 | 	vbo->commands=memalign(0x4, vbo->commandsSize*4);
390 | 	if(!vbo->commands)return -1;
391 | 	memcpy(vbo->commands, tmpBuffer, vbo->commandsSize*4);
392 | 
393 | 	GPUCMD_SetBuffer(savedAdr, savedSize, savedOffset);
394 | 
395 | 	return 0;
396 | }
397 | 
398 | extern u32* gpuCmdBuf;
399 | extern u32 gpuCmdBufSize;
400 | extern u32 gpuCmdBufOffset;
401 | 
402 | void _vboMemcpy50(u32* dst, u32* src);
403 | 
404 | void _GPUCMD_AddRawCommands(u32* cmd, u32 size)
405 | {
406 | 	if(!cmd || !size)return;
407 | 
408 | 	if(size*4==0x50)_vboMemcpy50(&gpuCmdBuf[gpuCmdBufOffset], cmd);
409 | 	else memcpy(&gpuCmdBuf[gpuCmdBufOffset], cmd, size*4);
410 | 	gpuCmdBufOffset+=size;
411 | }
412 | 
413 | int gsVboDraw(gsVbo_s* vbo)
414 | {
415 | 	if(!vbo || !vbo->data || !vbo->currentSize || !vbo->maxSize)return -1;
416 | 
417 | 	gsUpdateTransformation();
418 | 
419 | 	gsVboPrecomputeCommands(vbo);
420 | 
421 | 	// u64 val=svcGetSystemTick();
422 | 	if(vbo->commands)
423 | 	{
424 | 		_GPUCMD_AddRawCommands(vbo->commands, vbo->commandsSize);
425 | 	}else{
426 | 		GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices);
427 | 	}
428 | 	// debugValue[5]+=(u32)(svcGetSystemTick()-val);
429 | 	// debugValue[6]++;
430 | 
431 | 	return 0;
432 | }
433 | 


--------------------------------------------------------------------------------
/examples/assembler/cube/source/gs.h:
--------------------------------------------------------------------------------
 1 | #ifndef GS_H
 2 | #define GS_H
 3 | 
 4 | #include <3ds.h>
 5 | #include "math.h"
 6 | 
 7 | #define GS_MATRIXSTACK_SIZE (8)
 8 | 
 9 | typedef enum
10 | {
11 | 	GS_PROJECTION = 0,
12 | 	GS_MODELVIEW = 1,
13 | 	GS_MATRIXTYPES
14 | }GS_MATRIX;
15 | 
16 | typedef struct
17 | {
18 | 	u8* data;
19 | 	u32 currentSize; // in bytes
20 | 	u32 maxSize; // in bytes
21 | 	u32 numVertices;
22 | 	u32* commands;
23 | 	u32 commandsSize;
24 | }gsVbo_s;
25 | 
26 | 
27 | void gsInit(shaderProgram_s* shader);
28 | void gsExit(void);
29 | 
30 | void gsStartFrame(void);
31 | void gsAdjustBufferMatrices(mtx44 transformation);
32 | 
33 | void* gsLinearAlloc(size_t size);
34 | void gsLinearFree(void* mem);
35 | 
36 | float* gsGetMatrix(GS_MATRIX m);
37 | int gsLoadMatrix(GS_MATRIX m, float* data);
38 | int gsPushMatrix();
39 | int gsPopMatrix();
40 | int gsMatrixMode(GS_MATRIX m);
41 | 
42 | void gsLoadIdentity();
43 | void gsProjectionMatrix(float fovy, float aspect, float near, float far);
44 | void gsRotateX(float x);
45 | void gsRotateY(float y);
46 | void gsRotateZ(float z);
47 | void gsScale(float x, float y, float z);
48 | void gsTranslate(float x, float y, float z);
49 | int gsMultMatrix(float* data);
50 | 
51 | int gsVboInit(gsVbo_s* vbo);
52 | int gsVboCreate(gsVbo_s* vbo, u32 size);
53 | int gsVboFlushData(gsVbo_s* vbo);
54 | int gsVboDestroy(gsVbo_s* vbo);
55 | int gsVboDraw(gsVbo_s* vbo);
56 | void* gsVboGetOffset(gsVbo_s* vbo);
57 | int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units);
58 | 
59 | #endif
60 | 


--------------------------------------------------------------------------------
/examples/assembler/cube/source/main.c:
--------------------------------------------------------------------------------
  1 | ///////////////////////////////////////
  2 | //            GPU example            //
  3 | ///////////////////////////////////////
  4 | 
  5 | //this example is meant to show how to use the GPU to render a 3D object
  6 | //it also shows how to do stereoscopic 3D
  7 | //it uses GS which is a WIP GPU abstraction layer that's currently part of 3DScraft
  8 | //keep in mind GPU reverse engineering is an ongoing effort and our understanding of it is still fairly limited.
  9 | 
 10 | #include <stdlib.h>
 11 | #include <stdio.h>
 12 | #include <string.h>
 13 | #include <math.h>
 14 | #include <3ds.h>
 15 | 
 16 | #include "math.h"
 17 | #include "gs.h"
 18 | 
 19 | #include "test_vsh_shbin.h"
 20 | #include "texture_bin.h"
 21 | 
 22 | //will be moved into ctrulib at some point
 23 | #define CONFIG_3D_SLIDERSTATE (*(float*)0x1FF81080)
 24 | 
 25 | #define RGBA8(r,g,b,a) ((((r)&0xFF)<<24) | (((g)&0xFF)<<16) | (((b)&0xFF)<<8) | (((a)&0xFF)<<0))
 26 | 
 27 | //transfer from GPU output buffer to actual framebuffer flags
 28 | #define DISPLAY_TRANSFER_FLAGS \
 29 | 	(GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \
 30 | 	 GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \
 31 | 	 GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_X))
 32 | 
 33 | //shader structure
 34 | DVLB_s* dvlb;
 35 | shaderProgram_s shader;
 36 | //texture data pointer
 37 | u32* texData;
 38 | //vbo structure
 39 | gsVbo_s vbo;
 40 | 
 41 | //GPU framebuffer address
 42 | u32* gpuOut=(u32*)0x1F119400;
 43 | //GPU depth buffer address
 44 | u32* gpuDOut=(u32*)0x1F370800;
 45 | 
 46 | //angle for the vertex lighting (cf test.vsh)
 47 | float lightAngle;
 48 | //object position and rotation angle
 49 | vect3Df_s position, angle;
 50 | 
 51 | //vertex structure
 52 | typedef struct
 53 | {
 54 | 	vect3Df_s position;
 55 | 	float texcoord[2];
 56 | 	vect3Df_s normal;
 57 | }vertex_s;
 58 | 
 59 | //object data (cube)
 60 | //obviously this doesn't have to be defined manually, but we will here for the purposes of the example
 61 | //each line is a vertex : {position.x, position.y, position.z}, {texcoord.t, texcoord.s}, {normal.x, normal.y, normal.z}
 62 | //we're drawing triangles so three lines = one triangle
 63 | const vertex_s modelVboData[]=
 64 | {
 65 | 	//first face (PZ)
 66 | 		//first triangle
 67 | 		{(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
 68 | 		{(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
 69 | 		{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
 70 | 		//second triangle
 71 | 		{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
 72 | 		{(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
 73 | 		{(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
 74 | 	//second face (MZ)
 75 | 		//first triangle
 76 | 		{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
 77 | 		{(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
 78 | 		{(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
 79 | 		//second triangle
 80 | 		{(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
 81 | 		{(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
 82 | 		{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
 83 | 	//third face (PX)
 84 | 		//first triangle
 85 | 		{(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
 86 | 		{(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
 87 | 		{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
 88 | 		//second triangle
 89 | 		{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
 90 | 		{(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
 91 | 		{(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
 92 | 	//fourth face (MX)
 93 | 		//first triangle
 94 | 		{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
 95 | 		{(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
 96 | 		{(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
 97 | 		//second triangle
 98 | 		{(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
 99 | 		{(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
100 | 		{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
101 | 	//fifth face (PY)
102 | 		//first triangle
103 | 		{(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
104 | 		{(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
105 | 		{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
106 | 		//second triangle
107 | 		{(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
108 | 		{(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
109 | 		{(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
110 | 	//sixth face (MY)
111 | 		//first triangle
112 | 		{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
113 | 		{(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
114 | 		{(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
115 | 		//second triangle
116 | 		{(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
117 | 		{(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
118 | 		{(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
119 | };
120 | 
121 | //stolen from staplebutt
122 | void GPU_SetDummyTexEnv(u8 num)
123 | {
124 | 	GPU_SetTexEnv(num,
125 | 		GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
126 | 		GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
127 | 		GPU_TEVOPERANDS(0,0,0),
128 | 		GPU_TEVOPERANDS(0,0,0),
129 | 		GPU_REPLACE,
130 | 		GPU_REPLACE,
131 | 		0xFFFFFFFF);
132 | }
133 | 
134 | // topscreen
135 | void renderFrame()
136 | {
137 | 	GPU_SetViewport((u32*)osConvertVirtToPhys((u32)gpuDOut),(u32*)osConvertVirtToPhys((u32)gpuOut),0,0,240*2,400);
138 | 
139 | 	GPU_DepthMap(-1.0f, 0.0f);
140 | 	GPU_SetFaceCulling(GPU_CULL_BACK_CCW);
141 | 	GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00);
142 | 	GPU_SetStencilOp(GPU_STENCIL_KEEP, GPU_STENCIL_KEEP, GPU_STENCIL_KEEP);
143 | 	GPU_SetBlendingColor(0,0,0,0);
144 | 	GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL);
145 | 
146 | 	GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0);
147 | 	GPUCMD_AddWrite(GPUREG_0118, 0);
148 | 
149 | 	GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA);
150 | 	GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00);
151 | 
152 | 	GPU_SetTextureEnable(GPU_TEXUNIT0);
153 | 
154 | 	GPU_SetTexEnv(0,
155 | 		GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR),
156 | 		GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR),
157 | 		GPU_TEVOPERANDS(0,0,0),
158 | 		GPU_TEVOPERANDS(0,0,0),
159 | 		GPU_MODULATE, GPU_MODULATE,
160 | 		0xFFFFFFFF);
161 | 	GPU_SetDummyTexEnv(1);
162 | 	GPU_SetDummyTexEnv(2);
163 | 	GPU_SetDummyTexEnv(3);
164 | 	GPU_SetDummyTexEnv(4);
165 | 	GPU_SetDummyTexEnv(5);
166 | 
167 | 	//texturing stuff
168 | 		GPU_SetTexture(
169 | 			GPU_TEXUNIT0, //texture unit
170 | 			(u32*)osConvertVirtToPhys((u32)texData), //data buffer
171 | 			128, //texture width
172 | 			128, //texture height
173 | 			GPU_TEXTURE_MAG_FILTER(GPU_NEAREST) | GPU_TEXTURE_MIN_FILTER(GPU_NEAREST), //texture params
174 | 			GPU_RGBA8 //texture pixel format
175 | 		);
176 | 
177 | 		GPU_SetAttributeBuffers(
178 | 			3, //3 attributes: vertices, texcoords, and normals
179 | 			(u32*)osConvertVirtToPhys((u32)texData), //mesh buffer
180 | 			GPU_ATTRIBFMT(0, 3, GPU_FLOAT) | // GPU Input attribute register 0 (v0): 3 floats (position)
181 | 			GPU_ATTRIBFMT(1, 2, GPU_FLOAT) | // GPU Input attribute register 1 (v1): 2 floats (texcoord)
182 | 			GPU_ATTRIBFMT(2, 3, GPU_FLOAT),  // GPU Input attribute register 2 (v2): 3 floats (normal)
183 | 			0xFFC,
184 | 			0x210,
185 | 			1,
186 | 			(u32[]){0x00000000},
187 | 			(u64[]){0x210},
188 | 			(u8[]){3}
189 | 		);
190 | 
191 | 	//setup lighting (this is specific to our shader)
192 | 		vect3Df_s lightDir=vnormf(vect3Df(cos(lightAngle), -1.0f, sin(lightAngle)));
193 | 		GPU_SetFloatUniform(GPU_VERTEX_SHADER, shaderInstanceGetUniformLocation(shader.vertexShader, "lightDirection"), (u32*)(float[]){0.0f, -lightDir.z, -lightDir.y, -lightDir.x}, 1);
194 | 		GPU_SetFloatUniform(GPU_VERTEX_SHADER, shaderInstanceGetUniformLocation(shader.vertexShader, "lightAmbient"), (u32*)(float[]){0.7f, 0.4f, 0.4f, 0.4f}, 1);
195 | 
196 | 	//initialize projection matrix to standard perspective stuff
197 | 	gsMatrixMode(GS_PROJECTION);
198 | 	gsProjectionMatrix(80.0f*M_PI/180.0f, 240.0f/400.0f, 0.01f, 100.0f);
199 | 	gsRotateZ(M_PI/2); //because framebuffer is sideways...
200 | 
201 | 	//draw object
202 | 		gsMatrixMode(GS_MODELVIEW);
203 | 		gsPushMatrix();
204 | 			gsTranslate(position.x, position.y, position.z);
205 | 			gsRotateX(angle.x);
206 | 			gsRotateY(angle.y);
207 | 			gsVboDraw(&vbo);
208 | 		gsPopMatrix();
209 | 	GPU_FinishDrawing();
210 | }
211 | 
212 | int main(int argc, char** argv)
213 | {
214 | 
215 | 	gfxInitDefault();
216 | 
217 | 	//initialize GPU
218 | 	GPU_Init(NULL);
219 | 
220 | 	//let GFX know we're ok with doing stereoscopic 3D rendering
221 | 	gfxSet3D(true);
222 | 
223 | 	//allocate our GPU command buffers
224 | 	//they *have* to be on the linear heap
225 | 	u32 gpuCmdSize=0x40000;
226 | 	u32* gpuCmd=(u32*)linearAlloc(gpuCmdSize*4);
227 | 	u32* gpuCmdRight=(u32*)linearAlloc(gpuCmdSize*4);
228 | 
229 | 	//actually reset the GPU
230 | 	GPU_Reset(NULL, gpuCmd, gpuCmdSize);
231 | 
232 | 	//load our vertex shader binary
233 | 	dvlb=DVLB_ParseFile((u32*)test_vsh_shbin, test_vsh_shbin_size);
234 | 	shaderProgramInit(&shader);
235 | 	shaderProgramSetVsh(&shader, &dvlb->DVLE[0]);
236 | 
237 | 	//initialize GS
238 | 	gsInit(&shader);
239 | 
240 | 	// Flush the command buffer so that the shader upload gets executed
241 | 	GPUCMD_Finalize();
242 | 	GPUCMD_FlushAndRun(NULL);
243 | 	gspWaitForP3D();
244 | 
245 | 	//create texture
246 | 	texData=(u32*)linearMemAlign(texture_bin_size, 0x80); //textures need to be 0x80-byte aligned
247 | 	memcpy(texData, texture_bin, texture_bin_size);
248 | 
249 | 	//create VBO
250 | 	gsVboInit(&vbo);
251 | 	gsVboCreate(&vbo, sizeof(modelVboData));
252 | 	gsVboAddData(&vbo, (void*)modelVboData, sizeof(modelVboData), sizeof(modelVboData)/sizeof(vertex_s));
253 | 	gsVboFlushData(&vbo);
254 | 
255 | 	//initialize object position and angle
256 | 	position=vect3Df(0.0f, 0.0f, -2.0f);
257 | 	angle=vect3Df(M_PI/4, M_PI/4, 0.0f);
258 | 
259 | 	//background color (blue)
260 | 	u32 backgroundColor=RGBA8(0x68, 0xB0, 0xD8, 0xFF);
261 | 
262 | 	while(aptMainLoop())
263 | 	{
264 | 		//get current 3D slider state
265 | 		float slider=CONFIG_3D_SLIDERSTATE;
266 | 
267 | 		//controls
268 | 		hidScanInput();
269 | 		//START to exit to hbmenu
270 | 		if(keysDown()&KEY_START)break;
271 | 
272 | 		//A/B to change vertex lighting angle
273 | 		if(keysHeld()&KEY_A)lightAngle+=0.1f;
274 | 		if(keysHeld()&KEY_B)lightAngle-=0.1f;
275 | 
276 | 		//D-PAD to rotate object
277 | 		if(keysHeld()&KEY_DOWN)angle.x+=0.05f;
278 | 		if(keysHeld()&KEY_UP)angle.x-=0.05f;
279 | 		if(keysHeld()&KEY_LEFT)angle.y+=0.05f;
280 | 		if(keysHeld()&KEY_RIGHT)angle.y-=0.05f;
281 | 
282 | 		//R/L to bring object closer to or move it further from the camera
283 | 		if(keysHeld()&KEY_R)position.z+=0.1f;
284 | 		if(keysHeld()&KEY_L)position.z-=0.1f;
285 | 
286 | 		//generate our GPU command buffer for this frame
287 | 		gsStartFrame();
288 | 		renderFrame();
289 | 		GPUCMD_Finalize();
290 | 
291 | 		if(slider>0.0f)
292 | 		{
293 | 			//new and exciting 3D !
294 | 			//make a copy of left gpu buffer
295 | 			u32 offset; GPUCMD_GetBuffer(NULL, NULL, &offset);
296 | 			memcpy(gpuCmdRight, gpuCmd, offset*4);
297 | 
298 | 			//setup interaxial
299 | 			float interaxial=slider*0.12f;
300 | 
301 | 			//adjust left gpu buffer fo 3D !
302 | 			{mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, -interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);}
303 | 
304 | 			//draw left framebuffer
305 | 			GPUCMD_FlushAndRun(NULL);
306 | 
307 | 			//while GPU starts drawing the left buffer, adjust right one for 3D !
308 | 			GPUCMD_SetBuffer(gpuCmdRight, gpuCmdSize, offset);
309 | 			{mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);}
310 | 
311 | 			//we wait for the left buffer to finish drawing
312 | 			gspWaitForP3D();
313 | 			GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS);
314 | 			gspWaitForPPF();
315 | 
316 | 			//we draw the right buffer, wait for it to finish and then switch back to left one
317 | 			//clear the screen
318 | 			GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH , (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH);
319 | 			gspWaitForPSC0();
320 | 
321 | 			//draw the right framebuffer
322 | 			GPUCMD_FlushAndRun(NULL);
323 | 			gspWaitForP3D();
324 | 
325 | 			//transfer from GPU output buffer to actual framebuffer
326 | 			GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_RIGHT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS);
327 | 			gspWaitForPPF();
328 | 			GPUCMD_SetBuffer(gpuCmd, gpuCmdSize, 0);
329 | 		}else{
330 | 			//boring old 2D !
331 | 
332 | 			//draw the frame
333 | 			GPUCMD_FlushAndRun(NULL);
334 | 			gspWaitForP3D();
335 | 
336 | 			//clear the screen
337 | 			GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS);
338 | 			gspWaitForPPF();
339 | 		}
340 | 
341 | 		//clear the screen
342 | 		GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH);
343 | 		gspWaitForPSC0();
344 | 		gfxSwapBuffersGpu();
345 | 
346 | 		gspWaitForEvent(GSPEVENT_VBlank0, true);
347 | 	}
348 | 
349 | 	gsExit();
350 | 	shaderProgramFree(&shader);
351 | 	DVLB_Free(dvlb);
352 | 	gfxExit();
353 | 	return 0;
354 | }
355 | 


--------------------------------------------------------------------------------
/examples/assembler/cube/source/math.c:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <string.h>
  3 | 
  4 | #include "math.h"
  5 | 
  6 | void loadIdentity44(float* m)
  7 | {
  8 | 	if(!m)return;
  9 | 
 10 | 	memset(m, 0x00, 16*4);
 11 | 	m[0]=m[5]=m[10]=m[15]=1.0f;
 12 | }
 13 | 
 14 | void multMatrix44(float* m1, float* m2, float* m) //4x4
 15 | {
 16 | 	int i, j;
 17 | 	for(i=0;i<4;i++)for(j=0;j<4;j++)m[i+j*4]=(m1[0+j*4]*m2[i+0*4])+(m1[1+j*4]*m2[i+1*4])+(m1[2+j*4]*m2[i+2*4])+(m1[3+j*4]*m2[i+3*4]);
 18 | 
 19 | }
 20 | 
 21 | void translateMatrix(float* tm, float x, float y, float z)
 22 | {
 23 | 	float rm[16], m[16];
 24 | 
 25 | 	loadIdentity44(rm);
 26 | 	rm[3]=x;
 27 | 	rm[7]=y;
 28 | 	rm[11]=z;
 29 | 	
 30 | 	multMatrix44(tm,rm,m);
 31 | 	memcpy(tm,m,16*sizeof(float));
 32 | }
 33 | 
 34 | // 00 01 02 03
 35 | // 04 05 06 07
 36 | // 08 09 10 11
 37 | // 12 13 14 15
 38 | 
 39 | void rotateMatrixX(float* tm, float x, bool r)
 40 | {
 41 | 	float rm[16], m[16];
 42 | 	memset(rm, 0x00, 16*4);
 43 | 	rm[0]=1.0f;
 44 | 	rm[5]=cos(x);
 45 | 	rm[6]=sin(x);
 46 | 	rm[9]=-sin(x);
 47 | 	rm[10]=cos(x);
 48 | 	rm[15]=1.0f;
 49 | 	if(!r)multMatrix44(tm,rm,m);
 50 | 	else multMatrix44(rm,tm,m);
 51 | 	memcpy(tm,m,16*sizeof(float));
 52 | }
 53 | 
 54 | void rotateMatrixY(float* tm, float x, bool r)
 55 | {
 56 | 	float rm[16], m[16];
 57 | 	memset(rm, 0x00, 16*4);
 58 | 	rm[0]=cos(x);
 59 | 	rm[2]=sin(x);
 60 | 	rm[5]=1.0f;
 61 | 	rm[8]=-sin(x);
 62 | 	rm[10]=cos(x);
 63 | 	rm[15]=1.0f;
 64 | 	if(!r)multMatrix44(tm,rm,m);
 65 | 	else multMatrix44(rm,tm,m);
 66 | 	memcpy(tm,m,16*sizeof(float));
 67 | }
 68 | 
 69 | void rotateMatrixZ(float* tm, float x, bool r)
 70 | {
 71 | 	float rm[16], m[16];
 72 | 	memset(rm, 0x00, 16*4);
 73 | 	rm[0]=cos(x);
 74 | 	rm[1]=sin(x);
 75 | 	rm[4]=-sin(x);
 76 | 	rm[5]=cos(x);
 77 | 	rm[10]=1.0f;
 78 | 	rm[15]=1.0f;
 79 | 	if(!r)multMatrix44(tm,rm,m);
 80 | 	else multMatrix44(rm,tm,m);
 81 | 	memcpy(tm,m,16*sizeof(float));
 82 | }
 83 | 
 84 | void scaleMatrix(float* tm, float x, float y, float z)
 85 | {
 86 | 	tm[0]*=x; tm[4]*=x; tm[8]*=x; tm[12]*=x;
 87 | 	tm[1]*=y; tm[5]*=y; tm[9]*=y; tm[13]*=y;
 88 | 	tm[2]*=z; tm[6]*=z; tm[10]*=z; tm[14]*=z;
 89 | }
 90 | 
 91 | void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far)
 92 | {
 93 | 	float top = near*tan(fovy/2);
 94 | 	float right = (top*aspect);
 95 | 
 96 | 	float mp[4*4];
 97 | 	
 98 | 	mp[0x0] = near/right;
 99 | 	mp[0x1] = 0.0f;
100 | 	mp[0x2] = 0.0f;
101 | 	mp[0x3] = 0.0f;
102 | 
103 | 	mp[0x4] = 0.0f;
104 | 	mp[0x5] = near/top;
105 | 	mp[0x6] = 0.0f;
106 | 	mp[0x7] = 0.0f;
107 | 
108 | 	mp[0x8] = 0.0f;
109 | 	mp[0x9] = 0.0f;
110 | 	mp[0xA] = -(far+near)/(far-near);
111 | 	mp[0xB] = -2.0f*(far*near)/(far-near);
112 | 
113 | 	mp[0xC] = 0.0f;
114 | 	mp[0xD] = 0.0f;
115 | 	mp[0xE] = -1.0f;
116 | 	mp[0xF] = 0.0f;
117 | 
118 | 	float mp2[4*4];
119 | 	loadIdentity44(mp2);
120 | 	mp2[0xA]=0.5;
121 | 	mp2[0xB]=-0.5;
122 | 
123 | 	multMatrix44(mp2, mp, m);
124 | }
125 | 
126 | vect3Df_s getMatrixColumn(float* m, u8 i)
127 | {
128 | 	if(!m || i>=4)return vect3Df(0,0,0);
129 | 	return vect3Df(m[0+i*4],m[1+i*4],m[2+i*4]);
130 | }
131 | 
132 | vect3Df_s getMatrixRow(float* m, u8 i)
133 | {
134 | 	if(!m || i>=4)return vect3Df(0,0,0);
135 | 	return vect3Df(m[i+0*4],m[i+1*4],m[i+2*4]);
136 | }
137 | 
138 | vect4Df_s getMatrixColumn4(float* m, u8 i)
139 | {
140 | 	if(!m || i>=4)return vect4Df(0,0,0,0);
141 | 	return vect4Df(m[0+i*4],m[1+i*4],m[2+i*4],m[3+i*4]);
142 | }
143 | 
144 | vect4Df_s getMatrixRow4(float* m, u8 i)
145 | {
146 | 	if(!m || i>=4)return vect4Df(0,0,0,0);
147 | 	return vect4Df(m[i+0*4],m[i+1*4],m[i+2*4],m[i+3*4]);
148 | }
149 | 


--------------------------------------------------------------------------------
/examples/assembler/cube/source/math.h:
--------------------------------------------------------------------------------
  1 | #ifndef MATH_H
  2 | #define MATH_H
  3 | 
  4 | #include <3ds/types.h>
  5 | #include <math.h>
  6 | 
  7 | typedef float mtx44[4][4];
  8 | typedef float mtx33[3][3];
  9 | 
 10 | typedef struct
 11 | {
 12 | 	s32 x, y, z;
 13 | }vect3Di_s;
 14 | 
 15 | static inline vect3Di_s vect3Di(s32 x, s32 y, s32 z)
 16 | {
 17 | 	return (vect3Di_s){x,y,z};
 18 | }
 19 | 
 20 | static inline vect3Di_s vaddi(vect3Di_s u, vect3Di_s v)
 21 | {
 22 | 	return (vect3Di_s){u.x+v.x,u.y+v.y,u.z+v.z};
 23 | }
 24 | 
 25 | static inline vect3Di_s vsubi(vect3Di_s u, vect3Di_s v)
 26 | {
 27 | 	return (vect3Di_s){u.x-v.x,u.y-v.y,u.z-v.z};
 28 | }
 29 | 
 30 | static inline vect3Di_s vmuli(vect3Di_s v, s32 f)
 31 | {
 32 | 	return (vect3Di_s){v.x*f,v.y*f,v.z*f};
 33 | }
 34 | 
 35 | typedef struct
 36 | {
 37 | 	float x, y, z;
 38 | }vect3Df_s;
 39 | 
 40 | static inline vect3Df_s vect3Df(float x, float y, float z)
 41 | {
 42 | 	return (vect3Df_s){x,y,z};
 43 | }
 44 | 
 45 | static inline vect3Df_s vaddf(vect3Df_s u, vect3Df_s v)
 46 | {
 47 | 	return (vect3Df_s){u.x+v.x,u.y+v.y,u.z+v.z};
 48 | }
 49 | 
 50 | static inline vect3Df_s vsubf(vect3Df_s u, vect3Df_s v)
 51 | {
 52 | 	return (vect3Df_s){u.x-v.x,u.y-v.y,u.z-v.z};
 53 | }
 54 | 
 55 | static inline vect3Df_s vmulf(vect3Df_s v, float f)
 56 | {
 57 | 	return (vect3Df_s){v.x*f,v.y*f,v.z*f};
 58 | }
 59 | 
 60 | static inline vect3Df_s vscalef(vect3Df_s v1, vect3Df_s v2)
 61 | {
 62 | 	return (vect3Df_s){v1.x*v2.x,v1.y*v2.y,v1.z*v2.z};
 63 | }
 64 | 
 65 | static inline float vmagf(vect3Df_s v)
 66 | {
 67 | 	return sqrtf(v.x*v.x+v.y*v.y+v.z*v.z);
 68 | }
 69 | 
 70 | static inline float vdistf(vect3Df_s v1, vect3Df_s v2)
 71 | {
 72 | 	return sqrtf((v1.x-v2.x)*(v1.x-v2.x)+(v1.y-v2.y)*(v1.y-v2.y)+(v1.z-v2.z)*(v1.z-v2.z));
 73 | }
 74 | 
 75 | static inline vect3Df_s vnormf(vect3Df_s v)
 76 | {
 77 | 	const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z);
 78 | 	return (vect3Df_s){v.x/l,v.y/l,v.z/l};
 79 | }
 80 | 
 81 | typedef struct
 82 | {
 83 | 	float x, y, z, w;
 84 | }vect4Df_s;
 85 | 
 86 | static inline vect4Df_s vect4Df(float x, float y, float z, float w)
 87 | {
 88 | 	return (vect4Df_s){x,y,z,w};
 89 | }
 90 | 
 91 | static inline vect4Df_s vaddf4(vect4Df_s u, vect4Df_s v)
 92 | {
 93 | 	return (vect4Df_s){u.x+v.x,u.y+v.y,u.z+v.z,u.w+v.w};
 94 | }
 95 | 
 96 | static inline vect4Df_s vsubf4(vect4Df_s u, vect4Df_s v)
 97 | {
 98 | 	return (vect4Df_s){u.x-v.x,u.y-v.y,u.z-v.z,u.w-v.w};
 99 | }
100 | 
101 | static inline vect4Df_s vmulf4(vect4Df_s v, float f)
102 | {
103 | 	return (vect4Df_s){v.x*f,v.y*f,v.z*f,v.w*f};
104 | }
105 | 
106 | static inline float vdotf4(vect4Df_s v1, vect4Df_s v2)
107 | {
108 | 	return v1.x*v2.x+v1.y*v2.y+v1.z*v2.z+v1.w*v2.w;
109 | }
110 | 
111 | static inline vect4Df_s vnormf4(vect4Df_s v)
112 | {
113 | 	const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z+v.w*v.w);
114 | 	return (vect4Df_s){v.x/l,v.y/l,v.z/l,v.w/l};
115 | }
116 | 
117 | //interstuff
118 | static inline vect3Di_s vf2i(vect3Df_s v)
119 | {
120 | 	return (vect3Di_s){floorf(v.x),floorf(v.y),floorf(v.z)};
121 | }
122 | 
123 | static inline vect3Df_s vi2f(vect3Di_s v)
124 | {
125 | 	return (vect3Df_s){(float)v.x,(float)v.y,(float)v.z};
126 | }
127 | 
128 | void loadIdentity44(float* m);
129 | void multMatrix44(float* m1, float* m2, float* m);
130 | 
131 | void translateMatrix(float* tm, float x, float y, float z);
132 | void rotateMatrixX(float* tm, float x, bool r);
133 | void rotateMatrixY(float* tm, float x, bool r);
134 | void rotateMatrixZ(float* tm, float x, bool r);
135 | void scaleMatrix(float* tm, float x, float y, float z);
136 | 
137 | void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far);
138 | 
139 | vect3Df_s getMatrixColumn(float* m, u8 i);
140 | vect3Df_s getMatrixRow(float* m, u8 i);
141 | vect4Df_s getMatrixColumn4(float* m, u8 i);
142 | vect4Df_s getMatrixRow4(float* m, u8 i);
143 | 
144 | #endif
145 | 


--------------------------------------------------------------------------------
/examples/assembler/cube_lighting/Makefile:
--------------------------------------------------------------------------------
  1 | #---------------------------------------------------------------------------------
  2 | .SUFFIXES:
  3 | #---------------------------------------------------------------------------------
  4 | 
  5 | ifeq ($(strip $(DEVKITARM)),)
  6 | $(error "Please set DEVKITARM in your environment. export DEVKITARM=<path to>devkitARM")
  7 | endif
  8 | 
  9 | ifeq ($(strip $(NIHSTRO)),)
 10 | $(error "Please set NIHSTRO in your environment. export NIHSTRO=<path to>nihstro-assemble")
 11 | endif
 12 | 
 13 | TOPDIR ?= $(CURDIR)
 14 | include $(DEVKITARM)/3ds_rules
 15 | 
 16 | #---------------------------------------------------------------------------------
 17 | # TARGET is the name of the output
 18 | # BUILD is the directory where object files & intermediate files will be placed
 19 | # SOURCES is a list of directories containing source code
 20 | # DATA is a list of directories containing data files
 21 | # INCLUDES is a list of directories containing header files
 22 | #
 23 | # NO_SMDH: if set to anything, no SMDH file is generated.
 24 | # APP_TITLE is the name of the app stored in the SMDH file (Optional)
 25 | # APP_DESCRIPTION is the description of the app stored in the SMDH file (Optional)
 26 | # APP_AUTHOR is the author of the app stored in the SMDH file (Optional)
 27 | # ICON is the filename of the icon (.png), relative to the project folder.
 28 | #   If not set, it attempts to use one of the following (in this order):
 29 | #     - <Project name>.png
 30 | #     - icon.png
 31 | #     - <libctru folder>/default_icon.png
 32 | #---------------------------------------------------------------------------------
 33 | TARGET		:=	$(notdir $(CURDIR))
 34 | BUILD		:=	build
 35 | SOURCES		:=	source
 36 | DATA		:=	data
 37 | INCLUDES	:=	include
 38 | 
 39 | #---------------------------------------------------------------------------------
 40 | # options for code generation
 41 | #---------------------------------------------------------------------------------
 42 | ARCH	:=	-march=armv6k -mtune=mpcore -mfloat-abi=hard
 43 | 
 44 | CFLAGS	:=	-g -Wall -O2 -mword-relocations \
 45 | 			-fomit-frame-pointer -ffast-math \
 46 | 			$(ARCH)
 47 | 
 48 | CFLAGS	+=	$(INCLUDE) -DARM11 -D_3DS
 49 | 
 50 | CXXFLAGS	:= $(CFLAGS) -fno-rtti -std=gnu++11
 51 | 
 52 | ASFLAGS	:=	-g $(ARCH)
 53 | LDFLAGS	=	-specs=3dsx.specs -g $(ARCH) -Wl,-Map,$(notdir $*.map)
 54 | 
 55 | LIBS	:= -lctru -lm
 56 | 
 57 | #---------------------------------------------------------------------------------
 58 | # list of directories containing libraries, this must be the top level containing
 59 | # include and lib
 60 | #---------------------------------------------------------------------------------
 61 | LIBDIRS	:= $(CTRULIB)
 62 | 
 63 | 
 64 | #---------------------------------------------------------------------------------
 65 | # no real need to edit anything past this point unless you need to add additional
 66 | # rules for different file extensions
 67 | #---------------------------------------------------------------------------------
 68 | ifneq ($(BUILD),$(notdir $(CURDIR)))
 69 | #---------------------------------------------------------------------------------
 70 | 
 71 | export OUTPUT	:=	$(CURDIR)/$(TARGET)
 72 | export TOPDIR	:=	$(CURDIR)
 73 | 
 74 | export VPATH	:=	$(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \
 75 | 			$(foreach dir,$(DATA),$(CURDIR)/$(dir))
 76 | 
 77 | export DEPSDIR	:=	$(CURDIR)/$(BUILD)
 78 | 
 79 | CFILES		:=	$(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c)))
 80 | CPPFILES	:=	$(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp)))
 81 | SFILES		:=	$(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s)))
 82 | BINFILES	:=	$(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*)))
 83 | 
 84 | #---------------------------------------------------------------------------------
 85 | # use CXX for linking C++ projects, CC for standard C
 86 | #---------------------------------------------------------------------------------
 87 | ifeq ($(strip $(CPPFILES)),)
 88 | #---------------------------------------------------------------------------------
 89 | 	export LD	:=	$(CC)
 90 | #---------------------------------------------------------------------------------
 91 | else
 92 | #---------------------------------------------------------------------------------
 93 | 	export LD	:=	$(CXX)
 94 | #---------------------------------------------------------------------------------
 95 | endif
 96 | #---------------------------------------------------------------------------------
 97 | 
 98 | export OFILES	:=	$(addsuffix .o,$(BINFILES)) \
 99 | 			$(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o)
100 | 
101 | export INCLUDE	:=	$(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \
102 | 			$(foreach dir,$(LIBDIRS),-I$(dir)/include) \
103 | 			-I$(CURDIR)/$(BUILD)
104 | 
105 | export LIBPATHS	:=	$(foreach dir,$(LIBDIRS),-L$(dir)/lib)
106 | 
107 | ifeq ($(strip $(ICON)),)
108 | 	icons := $(wildcard *.png)
109 | 	ifneq (,$(findstring $(TARGET).png,$(icons)))
110 | 		export APP_ICON := $(TOPDIR)/$(TARGET).png
111 | 	else
112 | 		ifneq (,$(findstring icon.png,$(icons)))
113 | 			export APP_ICON := $(TOPDIR)/icon.png
114 | 		endif
115 | 	endif
116 | else
117 | 	export APP_ICON := $(TOPDIR)/$(ICON)
118 | endif
119 | 
120 | .PHONY: $(BUILD) clean all
121 | 
122 | #---------------------------------------------------------------------------------
123 | all: $(BUILD)
124 | 
125 | $(BUILD):
126 | 	@[ -d $@ ] || mkdir -p $@
127 | 	@make --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile
128 | 
129 | #---------------------------------------------------------------------------------
130 | clean:
131 | 	@echo clean ...
132 | 	@rm -fr $(BUILD) $(TARGET).3dsx $(OUTPUT).smdh $(TARGET).elf test.vsh.shbin
133 | 
134 | 
135 | #---------------------------------------------------------------------------------
136 | else
137 | 
138 | DEPENDS	:=	$(OFILES:.o=.d)
139 | 
140 | #---------------------------------------------------------------------------------
141 | # main targets
142 | #---------------------------------------------------------------------------------
143 | ifeq ($(strip $(NO_SMDH)),)
144 | .PHONY: all
145 | all	:	$(OUTPUT).3dsx $(OUTPUT).smdh
146 | endif
147 | $(OUTPUT).3dsx	:	$(OUTPUT).elf
148 | $(OUTPUT).elf	:	$(OFILES)
149 | 
150 | #---------------------------------------------------------------------------------
151 | # you need a rule like this for each extension you use as binary data
152 | #---------------------------------------------------------------------------------
153 | %.bin.o	:	%.bin
154 | #---------------------------------------------------------------------------------
155 | 	@echo $(notdir $<)
156 | 	@$(bin2o)
157 | 
158 | # WARNING: This is not the right way to do this! TODO: Do it right!
159 | #---------------------------------------------------------------------------------
160 | %.vsh.o	:	%.vsh
161 | #---------------------------------------------------------------------------------
162 | 	@echo $(notdir $<)
163 | 	@$(NIHSTRO)/nihstro-assemble --output ../$(notdir $<).shbin $<
164 | 	@bin2s ../$(notdir $<).shbin | $(PREFIX)as -o $@
165 | 	@echo "extern const u8" `(echo $(notdir $<).shbin | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(notdir $<).shbin | tr . _)`.h
166 | 	@echo "extern const u8" `(echo $(notdir $<).shbin | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(notdir $<).shbin | tr . _)`.h
167 | 	@echo "extern const u32" `(echo $(notdir $<).shbin | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(notdir $<).shbin | tr . _)`.h
168 | 
169 | -include $(DEPENDS)
170 | 
171 | #---------------------------------------------------------------------------------------
172 | endif
173 | #---------------------------------------------------------------------------------------
174 | 


--------------------------------------------------------------------------------
/examples/assembler/cube_lighting/README.md:
--------------------------------------------------------------------------------
1 | cube lighting example
2 | =====================
3 | 
4 | An example similar to cube, but with some rudimentary vertex lighting effects. The shader used is somewhat more complex and involves a LOOP to implement multiple light sources.
5 | 
6 | Before trying to compile, make sure your NIHSTRO environment variable points to the directory nihstro-assemble resides in. Additionally, ctrulib in revision 1f52ac344d or similar is required, plus some patches to implement proper uniform setters.
7 | 


--------------------------------------------------------------------------------
/examples/assembler/cube_lighting/data/test.vsh:
--------------------------------------------------------------------------------
 1 | // setup constants
 2 | .alias myconst c32 as (1.0, 0.0, 0.5, 1.0)
 3 | 
 4 | // setup output map
 5 | .alias outpos  o0      as position
 6 | .alias outcol  o1      as color
 7 | .alias outtex0 o2.xyzw as texcoord0 // Would like to use .xy instead, but this is not supported by ctrulib currently
 8 | .alias outtex1 o3.xyzw as texcoord1
 9 | .alias outtex2 o4.xyzw as texcoord2
10 | 
11 | // setup uniform map, for use with SHDR_GetUniformRegister
12 | .alias projection     c0-c3
13 | .alias modelview      c4-c7
14 | 
15 | .alias num_lights     i1
16 | 
17 | .alias light_dir      c8
18 | .alias light_diffuse  c9
19 | .alias light_ambient  c10
20 | .alias light_dir2     c11
21 | .alias light_diffuse2 c12
22 | .alias light_ambient2 c13
23 | 
24 | main:
25 | 	mov r1.xyz,  v0.xyz
26 | 	mov r1.w,    myconst.w
27 | 
28 | mdvl:  // tempreg = mdlvMtx * in.pos
29 | 	dp4 r0.x,  modelview[0],  r1
30 | 	dp4 r0.y,  modelview[1],  r1
31 | 	dp4 r0.z,  modelview[2],  r1
32 | 	mov r0.w,  myconst.w
33 | 
34 | proj:  // result.pos = projMtx * tempreg
35 | 	dp4 outpos.x,  projection[0],  r0
36 | 	dp4 outpos.y,  projection[1],  r0
37 | 	dp4 outpos.z,  projection[2],  r0
38 | 	dp4 outpos.w,  projection[3],  r0
39 | 
40 | tex:  // result.texcoord = in.texcoord
41 | 	mov outtex0,  v1.xyzw
42 | 	mov outtex1,  myconst.yyyw
43 | 	mov outtex2,  myconst.yyyw
44 | 
45 | lighting: // color = sum over all lights(diffuse * clamp(dot(L,N),0) + ambient)
46 | 	mov r0, myconst.yyyw
47 | 
48 |     loop num_lights
49 | 		mov r1.xyz, myconst.yyy
50 | 		dp3 r1.xyz, light_dir[lcnt].xyz, v2.xyz
51 | 		max r1.xyz, r1.xyz, myconst.yyy
52 | 		mul r1.xyz, r1.xyz, light_diffuse[lcnt].xyz
53 | 		add r1.xyz, r1.xyz, light_ambient[lcnt].xyz
54 | 		add r0.xyz, r1.xyz, r0.xyz
55 | 		nop
56 | 	endloop
57 | 	min r0.xyz, r0.xyz, myconst.xxx
58 | 
59 | 	mov outcol, r0
60 | 
61 | 
62 | 
63 | 	nop
64 | 	end
65 | 
66 | endmain:
67 | 


--------------------------------------------------------------------------------
/examples/assembler/cube_lighting/data/texture.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/neobrain/nihstro/f4d8659decbfe5d234f04134b5002b82dc515a44/examples/assembler/cube_lighting/data/texture.bin


--------------------------------------------------------------------------------
/examples/assembler/cube_lighting/source/_gs.s:
--------------------------------------------------------------------------------
 1 | .section ".text"
 2 | .arm
 3 | .align 4
 4 | .global _vboMemcpy50
 5 | 
 6 | # r0 : dst
 7 | # r1 : src
 8 | # fixed size 0x50
 9 | _vboMemcpy50:
10 | 	push {r4-r11}
11 | 	ldmia r1!, {r2-r12}
12 | 	stmia r0!, {r2-r12}
13 | 	ldmia r1!, {r2-r12}
14 | 	stmia r0!, {r2-r12}
15 | 	pop {r4-r11}
16 | 	bx lr
17 | 


--------------------------------------------------------------------------------
/examples/assembler/cube_lighting/source/gs.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <string.h>
  3 | #include <malloc.h>
  4 | #include <3ds.h>
  5 | 
  6 | #include "gs.h"
  7 | #include "math.h"
  8 | 
  9 | #define BUFFERMATRIXLIST_SIZE (GS_MATRIXSTACK_SIZE*4)
 10 | 
 11 | static void gsInitMatrixStack();
 12 | 
 13 | Handle linearAllocMutex;
 14 | 
 15 | static u32 gsMatrixStackRegisters[GS_MATRIXTYPES];
 16 | 
 17 | typedef struct
 18 | {
 19 | 	u32 offset;
 20 | 	mtx44 data;
 21 | }bufferMatrix_s;
 22 | 
 23 | bufferMatrix_s bufferMatrixList[BUFFERMATRIXLIST_SIZE];
 24 | int bufferMatrixListLength;
 25 | 
 26 | //----------------------
 27 | //   GS SYSTEM STUFF
 28 | //----------------------
 29 | 
 30 | void initBufferMatrixList()
 31 | {
 32 | 	bufferMatrixListLength=0;
 33 | }
 34 | 
 35 | void gsInit(shaderProgram_s* shader)
 36 | {
 37 | 	gsInitMatrixStack();
 38 | 	initBufferMatrixList();
 39 | 	svcCreateMutex(&linearAllocMutex, false);
 40 | 	if(shader)
 41 | 	{
 42 | 		gsMatrixStackRegisters[0]=shaderInstanceGetUniformLocation(shader->vertexShader, "projection");
 43 | 		gsMatrixStackRegisters[1]=shaderInstanceGetUniformLocation(shader->vertexShader, "modelview");
 44 | 		shaderProgramUse(shader);
 45 | 	}
 46 | }
 47 | 
 48 | void gsExit(void)
 49 | {
 50 | 	svcCloseHandle(linearAllocMutex);
 51 | }
 52 | 
 53 | void gsStartFrame(void)
 54 | {
 55 | 	GPUCMD_SetBufferOffset(0);
 56 | 	initBufferMatrixList();
 57 | }
 58 | 
 59 | void* gsLinearAlloc(size_t size)
 60 | {
 61 | 	void* ret=NULL;
 62 | 
 63 | 	svcWaitSynchronization(linearAllocMutex, U64_MAX);
 64 | 	ret=linearAlloc(size);
 65 | 	svcReleaseMutex(linearAllocMutex);
 66 | 	
 67 | 	return ret;
 68 | }
 69 | 
 70 | void gsLinearFree(void* mem)
 71 | {
 72 | 	svcWaitSynchronization(linearAllocMutex, U64_MAX);
 73 | 	linearFree(mem);
 74 | 	svcReleaseMutex(linearAllocMutex);
 75 | }
 76 | 
 77 | //----------------------
 78 | //  MATRIX STACK STUFF
 79 | //----------------------
 80 | 
 81 | static mtx44 gsMatrixStacks[GS_MATRIXTYPES][GS_MATRIXSTACK_SIZE];
 82 | static u32 gsMatrixStackRegisters[GS_MATRIXTYPES]={0x00, 0x04};
 83 | static u8 gsMatrixStackOffsets[GS_MATRIXTYPES];
 84 | static bool gsMatrixStackUpdated[GS_MATRIXTYPES];
 85 | static GS_MATRIX gsCurrentMatrixType;
 86 | 
 87 | static void gsInitMatrixStack()
 88 | {
 89 | 	int i;
 90 | 	for(i=0; i<GS_MATRIXTYPES; i++)
 91 | 	{
 92 | 		gsMatrixStackOffsets[i]=0;
 93 | 		gsMatrixStackUpdated[i]=true;
 94 | 		loadIdentity44((float*)gsMatrixStacks[i][0]);
 95 | 	}
 96 | 	gsCurrentMatrixType=GS_PROJECTION;
 97 | }
 98 | 
 99 | float* gsGetMatrix(GS_MATRIX m)
100 | {
101 | 	if(m<0 || m>=GS_MATRIXTYPES)return NULL;
102 | 	
103 | 	return (float*)gsMatrixStacks[m][gsMatrixStackOffsets[m]];
104 | }
105 | 
106 | int gsLoadMatrix(GS_MATRIX m, float* data)
107 | {
108 | 	if(m<0 || m>=GS_MATRIXTYPES || !data)return -1;
109 | 	
110 | 	memcpy(gsGetMatrix(m), data, sizeof(mtx44));
111 | 
112 | 	gsMatrixStackUpdated[m]=true;
113 | 
114 | 	return 0;
115 | }
116 | 
117 | int gsPushMatrix()
118 | {
119 | 	const GS_MATRIX m=gsCurrentMatrixType;
120 | 	if(m<0 || m>=GS_MATRIXTYPES)return -1;
121 | 	if(gsMatrixStackOffsets[m]<0 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE-1)return -1;
122 | 
123 | 	float* cur=gsGetMatrix(m);
124 | 	gsMatrixStackOffsets[m]++;
125 | 	memcpy(gsGetMatrix(m), cur, sizeof(mtx44));
126 | 
127 | 	return 0;
128 | }
129 | 
130 | int gsPopMatrix()
131 | {
132 | 	const GS_MATRIX m=gsCurrentMatrixType;
133 | 	if(m<0 || m>=GS_MATRIXTYPES)return -1;
134 | 	if(gsMatrixStackOffsets[m]<1 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE)return -1;
135 | 
136 | 	gsMatrixStackOffsets[m]--;
137 | 
138 | 	gsMatrixStackUpdated[m]=true;
139 | 
140 | 	return 0;
141 | }
142 | 
143 | int gsMatrixMode(GS_MATRIX m)
144 | {
145 | 	if(m<0 || m>=GS_MATRIXTYPES)return -1;
146 | 
147 | 	gsCurrentMatrixType=m;
148 | 
149 | 	return 0;
150 | }
151 | 
152 | //------------------------
153 | // MATRIX TRANSFORM STUFF
154 | //------------------------
155 | 
156 | int gsMultMatrix(float* data)
157 | {
158 | 	if(!data)return -1;
159 | 	
160 | 	mtx44 tmp;
161 | 	multMatrix44(gsGetMatrix(gsCurrentMatrixType), data, (float*)tmp);
162 | 	memcpy(gsGetMatrix(gsCurrentMatrixType), (float*)tmp, sizeof(mtx44));
163 | 
164 | 	gsMatrixStackUpdated[gsCurrentMatrixType]=true;
165 | 
166 | 	return 0;
167 | }
168 | 
169 | void gsLoadIdentity()
170 | {
171 | 	loadIdentity44(gsGetMatrix(gsCurrentMatrixType));
172 | 	gsMatrixStackUpdated[gsCurrentMatrixType]=true;
173 | }
174 | 
175 | void gsProjectionMatrix(float fovy, float aspect, float near, float far)
176 | {
177 | 	initProjectionMatrix(gsGetMatrix(gsCurrentMatrixType), fovy, aspect, near, far);
178 | 	gsMatrixStackUpdated[gsCurrentMatrixType]=true;
179 | }
180 | 
181 | void gsRotateX(float x)
182 | {
183 | 	rotateMatrixX(gsGetMatrix(gsCurrentMatrixType), x, false);
184 | 	gsMatrixStackUpdated[gsCurrentMatrixType]=true;
185 | }
186 | 
187 | void gsRotateY(float y)
188 | {
189 | 	rotateMatrixY(gsGetMatrix(gsCurrentMatrixType), y, false);
190 | 	gsMatrixStackUpdated[gsCurrentMatrixType]=true;
191 | }
192 | 
193 | void gsRotateZ(float z)
194 | {
195 | 	rotateMatrixZ(gsGetMatrix(gsCurrentMatrixType), z, false);
196 | 	gsMatrixStackUpdated[gsCurrentMatrixType]=true;
197 | }
198 | 
199 | void gsScale(float x, float y, float z)
200 | {
201 | 	scaleMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z);
202 | 	gsMatrixStackUpdated[gsCurrentMatrixType]=true;
203 | }
204 | 
205 | void gsTranslate(float x, float y, float z)
206 | {
207 | 	translateMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z);
208 | 	gsMatrixStackUpdated[gsCurrentMatrixType]=true;
209 | }
210 | 
211 | //----------------------
212 | // MATRIX RENDER STUFF
213 | //----------------------
214 | 
215 | static void gsSetUniformMatrix(u32 startreg, float* m)
216 | {
217 | 	float param[16];
218 | 
219 | 	param[0x0]=m[3]; //w
220 | 	param[0x1]=m[2]; //z
221 | 	param[0x2]=m[1]; //y
222 | 	param[0x3]=m[0]; //x
223 | 
224 | 	param[0x4]=m[7];
225 | 	param[0x5]=m[6];
226 | 	param[0x6]=m[5];
227 | 	param[0x7]=m[4];
228 | 	
229 | 	param[0x8]=m[11];
230 | 	param[0x9]=m[10];
231 | 	param[0xa]=m[9];
232 | 	param[0xb]=m[8];
233 | 
234 | 	param[0xc]=m[15];
235 | 	param[0xd]=m[14];
236 | 	param[0xe]=m[13];
237 | 	param[0xf]=m[12];
238 | 
239 | 	GPU_SetFloatUniform(GPU_VERTEX_SHADER, startreg, (u32*)param, 4);
240 | }
241 | 
242 | static int gsUpdateTransformation()
243 | {
244 | 	GS_MATRIX m;
245 | 	for(m=0; m<GS_MATRIXTYPES; m++)
246 | 	{
247 | 		if(gsMatrixStackUpdated[m])
248 | 		{
249 | 			if(m==GS_PROJECTION && bufferMatrixListLength<BUFFERMATRIXLIST_SIZE)
250 | 			{
251 | 				GPUCMD_GetBuffer(NULL, NULL, &bufferMatrixList[bufferMatrixListLength].offset);
252 | 				memcpy(bufferMatrixList[bufferMatrixListLength].data, gsGetMatrix(m), sizeof(mtx44));
253 | 				bufferMatrixListLength++;
254 | 			}
255 | 			gsSetUniformMatrix(gsMatrixStackRegisters[m], gsGetMatrix(m));
256 | 			gsMatrixStackUpdated[m]=false;
257 | 		}
258 | 	}
259 | 	return 0;
260 | }
261 | 
262 | void gsAdjustBufferMatrices(mtx44 transformation)
263 | {
264 | 	int i;
265 | 	u32* buffer;
266 | 	u32 offset;
267 | 	GPUCMD_GetBuffer(&buffer, NULL, &offset);
268 | 	for(i=0; i<bufferMatrixListLength; i++)
269 | 	{
270 | 		u32 o=bufferMatrixList[i].offset;
271 | 		if(o+2<offset) //TODO : better check, need to account for param size
272 | 		{
273 | 			mtx44 newMatrix;
274 | 			GPUCMD_SetBufferOffset(o);
275 | 			multMatrix44((float*)bufferMatrixList[i].data, (float*)transformation, (float*)newMatrix);
276 | 			gsSetUniformMatrix(gsMatrixStackRegisters[GS_PROJECTION], (float*)newMatrix);
277 | 		}
278 | 	}
279 | 	GPUCMD_SetBufferOffset(offset);
280 | }
281 | 
282 | //----------------------
283 | //      VBO STUFF
284 | //----------------------
285 | 
286 | int gsVboInit(gsVbo_s* vbo)
287 | {
288 | 	if(!vbo)return -1;
289 | 
290 | 	vbo->data=NULL;
291 | 	vbo->currentSize=0;
292 | 	vbo->maxSize=0;
293 | 	vbo->commands=NULL;
294 | 	vbo->commandsSize=0;
295 | 
296 | 	return 0;
297 | }
298 | 
299 | int gsVboCreate(gsVbo_s* vbo, u32 size)
300 | {
301 | 	if(!vbo)return -1;
302 | 
303 | 	vbo->data=gsLinearAlloc(size);
304 | 	vbo->numVertices=0;
305 | 	vbo->currentSize=0;
306 | 	vbo->maxSize=size;
307 | 
308 | 	return 0;
309 | }
310 | 
311 | void* gsVboGetOffset(gsVbo_s* vbo)
312 | {
313 | 	if(!vbo)return NULL;
314 | 
315 | 	return (void*)(&((u8*)vbo->data)[vbo->currentSize]);
316 | }
317 | 
318 | int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units)
319 | {
320 | 	if(!vbo || !data || !size)return -1;
321 | 	if(((s32)vbo->maxSize)-((s32)vbo->currentSize) < size)return -1;
322 | 
323 | 	memcpy(gsVboGetOffset(vbo), data, size);
324 | 	vbo->currentSize+=size;
325 | 	vbo->numVertices+=units;
326 | 
327 | 	return 0;
328 | }
329 | 
330 | int gsVboFlushData(gsVbo_s* vbo)
331 | {
332 | 	if(!vbo)return -1;
333 | 
334 | 	//unnecessary if we use flushAndRun
335 | 	// GSPGPU_FlushDataCache(NULL, vbo->data, vbo->currentSize);
336 | 
337 | 	return 0;
338 | }
339 | 
340 | int gsVboDestroy(gsVbo_s* vbo)
341 | {
342 | 	if(!vbo)return -1;
343 | 
344 | 	if(vbo->commands)free(vbo->commands);
345 | 	if(vbo->data)gsLinearFree(vbo->data);
346 | 	gsVboInit(vbo);
347 | 
348 | 	return 0;
349 | }
350 | 
351 | extern u32 debugValue[];
352 | 
353 | void GPU_DrawArrayDirectly(GPU_Primitive_t primitive, u8* data, u32 n)
354 | {
355 | 	//set attribute buffer address
356 | 	GPUCMD_AddSingleParam(0x000F0200, (osConvertVirtToPhys((u32)data))>>3);
357 | 	//set primitive type
358 | 	GPUCMD_AddSingleParam(0x0002025E, primitive);
359 | 	GPUCMD_AddSingleParam(0x0002025F, 0x00000001);
360 | 	//index buffer not used for drawArrays but 0x000F0227 still required
361 | 	GPUCMD_AddSingleParam(0x000F0227, 0x80000000);
362 | 	//pass number of vertices
363 | 	GPUCMD_AddSingleParam(0x000F0228, n);
364 | 
365 | 	GPUCMD_AddSingleParam(0x00010253, 0x00000001);
366 | 
367 | 	GPUCMD_AddSingleParam(0x00010245, 0x00000000);
368 | 	GPUCMD_AddSingleParam(0x000F022E, 0x00000001);
369 | 	GPUCMD_AddSingleParam(0x00010245, 0x00000001);
370 | 	GPUCMD_AddSingleParam(0x000F0231, 0x00000001);
371 | 
372 | 	// GPUCMD_AddSingleParam(0x000F0111, 0x00000001); //breaks stuff
373 | }
374 | 
375 | //not thread safe
376 | int gsVboPrecomputeCommands(gsVbo_s* vbo)
377 | {
378 | 	if(!vbo || vbo->commands)return -1;
379 | 
380 | 	static u32 tmpBuffer[128];
381 | 
382 | 	u32* savedAdr; u32 savedSize, savedOffset;
383 | 	GPUCMD_GetBuffer(&savedAdr, &savedSize, &savedOffset);
384 | 	GPUCMD_SetBuffer(tmpBuffer, 128, 0);
385 | 
386 | 	GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices);
387 | 	
388 | 	GPUCMD_GetBuffer(NULL, NULL, &vbo->commandsSize);
389 | 	vbo->commands=memalign(0x4, vbo->commandsSize*4);
390 | 	if(!vbo->commands)return -1;
391 | 	memcpy(vbo->commands, tmpBuffer, vbo->commandsSize*4);
392 | 
393 | 	GPUCMD_SetBuffer(savedAdr, savedSize, savedOffset);
394 | 
395 | 	return 0;
396 | }
397 | 
398 | extern u32* gpuCmdBuf;
399 | extern u32 gpuCmdBufSize;
400 | extern u32 gpuCmdBufOffset;
401 | 
402 | void _vboMemcpy50(u32* dst, u32* src);
403 | 
404 | void _GPUCMD_AddRawCommands(u32* cmd, u32 size)
405 | {
406 | 	if(!cmd || !size)return;
407 | 
408 | 	if(size*4==0x50)_vboMemcpy50(&gpuCmdBuf[gpuCmdBufOffset], cmd);
409 | 	else memcpy(&gpuCmdBuf[gpuCmdBufOffset], cmd, size*4);
410 | 	gpuCmdBufOffset+=size;
411 | }
412 | 
413 | int gsVboDraw(gsVbo_s* vbo)
414 | {
415 | 	if(!vbo || !vbo->data || !vbo->currentSize || !vbo->maxSize)return -1;
416 | 
417 | 	gsUpdateTransformation();
418 | 
419 | 	gsVboPrecomputeCommands(vbo);
420 | 
421 | 	// u64 val=svcGetSystemTick();
422 | 	if(vbo->commands)
423 | 	{
424 | 		_GPUCMD_AddRawCommands(vbo->commands, vbo->commandsSize);
425 | 	}else{
426 | 		GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices);
427 | 	}
428 | 	// debugValue[5]+=(u32)(svcGetSystemTick()-val);
429 | 	// debugValue[6]++;
430 | 
431 | 	return 0;
432 | }
433 | 


--------------------------------------------------------------------------------
/examples/assembler/cube_lighting/source/gs.h:
--------------------------------------------------------------------------------
 1 | #ifndef GS_H
 2 | #define GS_H
 3 | 
 4 | #include <3ds.h>
 5 | #include "math.h"
 6 | 
 7 | #define GS_MATRIXSTACK_SIZE (8)
 8 | 
 9 | typedef enum
10 | {
11 | 	GS_PROJECTION = 0,
12 | 	GS_MODELVIEW = 1,
13 | 	GS_MATRIXTYPES
14 | }GS_MATRIX;
15 | 
16 | typedef struct
17 | {
18 | 	u8* data;
19 | 	u32 currentSize; // in bytes
20 | 	u32 maxSize; // in bytes
21 | 	u32 numVertices;
22 | 	u32* commands;
23 | 	u32 commandsSize;
24 | }gsVbo_s;
25 | 
26 | 
27 | void gsInit(shaderProgram_s* shader);
28 | void gsExit(void);
29 | 
30 | void gsStartFrame(void);
31 | void gsAdjustBufferMatrices(mtx44 transformation);
32 | 
33 | void* gsLinearAlloc(size_t size);
34 | void gsLinearFree(void* mem);
35 | 
36 | float* gsGetMatrix(GS_MATRIX m);
37 | int gsLoadMatrix(GS_MATRIX m, float* data);
38 | int gsPushMatrix();
39 | int gsPopMatrix();
40 | int gsMatrixMode(GS_MATRIX m);
41 | 
42 | void gsLoadIdentity();
43 | void gsProjectionMatrix(float fovy, float aspect, float near, float far);
44 | void gsRotateX(float x);
45 | void gsRotateY(float y);
46 | void gsRotateZ(float z);
47 | void gsScale(float x, float y, float z);
48 | void gsTranslate(float x, float y, float z);
49 | int gsMultMatrix(float* data);
50 | 
51 | int gsVboInit(gsVbo_s* vbo);
52 | int gsVboCreate(gsVbo_s* vbo, u32 size);
53 | int gsVboFlushData(gsVbo_s* vbo);
54 | int gsVboDestroy(gsVbo_s* vbo);
55 | int gsVboDraw(gsVbo_s* vbo);
56 | void* gsVboGetOffset(gsVbo_s* vbo);
57 | int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units);
58 | 
59 | #endif
60 | 


--------------------------------------------------------------------------------
/examples/assembler/cube_lighting/source/math.c:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <string.h>
  3 | 
  4 | #include "math.h"
  5 | 
  6 | void loadIdentity44(float* m)
  7 | {
  8 | 	if(!m)return;
  9 | 
 10 | 	memset(m, 0x00, 16*4);
 11 | 	m[0]=m[5]=m[10]=m[15]=1.0f;
 12 | }
 13 | 
 14 | void multMatrix44(float* m1, float* m2, float* m) //4x4
 15 | {
 16 | 	int i, j;
 17 | 	for(i=0;i<4;i++)for(j=0;j<4;j++)m[i+j*4]=(m1[0+j*4]*m2[i+0*4])+(m1[1+j*4]*m2[i+1*4])+(m1[2+j*4]*m2[i+2*4])+(m1[3+j*4]*m2[i+3*4]);
 18 | 
 19 | }
 20 | 
 21 | void translateMatrix(float* tm, float x, float y, float z)
 22 | {
 23 | 	float rm[16], m[16];
 24 | 
 25 | 	loadIdentity44(rm);
 26 | 	rm[3]=x;
 27 | 	rm[7]=y;
 28 | 	rm[11]=z;
 29 | 	
 30 | 	multMatrix44(tm,rm,m);
 31 | 	memcpy(tm,m,16*sizeof(float));
 32 | }
 33 | 
 34 | // 00 01 02 03
 35 | // 04 05 06 07
 36 | // 08 09 10 11
 37 | // 12 13 14 15
 38 | 
 39 | void rotateMatrixX(float* tm, float x, bool r)
 40 | {
 41 | 	float rm[16], m[16];
 42 | 	memset(rm, 0x00, 16*4);
 43 | 	rm[0]=1.0f;
 44 | 	rm[5]=cos(x);
 45 | 	rm[6]=sin(x);
 46 | 	rm[9]=-sin(x);
 47 | 	rm[10]=cos(x);
 48 | 	rm[15]=1.0f;
 49 | 	if(!r)multMatrix44(tm,rm,m);
 50 | 	else multMatrix44(rm,tm,m);
 51 | 	memcpy(tm,m,16*sizeof(float));
 52 | }
 53 | 
 54 | void rotateMatrixY(float* tm, float x, bool r)
 55 | {
 56 | 	float rm[16], m[16];
 57 | 	memset(rm, 0x00, 16*4);
 58 | 	rm[0]=cos(x);
 59 | 	rm[2]=sin(x);
 60 | 	rm[5]=1.0f;
 61 | 	rm[8]=-sin(x);
 62 | 	rm[10]=cos(x);
 63 | 	rm[15]=1.0f;
 64 | 	if(!r)multMatrix44(tm,rm,m);
 65 | 	else multMatrix44(rm,tm,m);
 66 | 	memcpy(tm,m,16*sizeof(float));
 67 | }
 68 | 
 69 | void rotateMatrixZ(float* tm, float x, bool r)
 70 | {
 71 | 	float rm[16], m[16];
 72 | 	memset(rm, 0x00, 16*4);
 73 | 	rm[0]=cos(x);
 74 | 	rm[1]=sin(x);
 75 | 	rm[4]=-sin(x);
 76 | 	rm[5]=cos(x);
 77 | 	rm[10]=1.0f;
 78 | 	rm[15]=1.0f;
 79 | 	if(!r)multMatrix44(tm,rm,m);
 80 | 	else multMatrix44(rm,tm,m);
 81 | 	memcpy(tm,m,16*sizeof(float));
 82 | }
 83 | 
 84 | void scaleMatrix(float* tm, float x, float y, float z)
 85 | {
 86 | 	tm[0]*=x; tm[4]*=x; tm[8]*=x; tm[12]*=x;
 87 | 	tm[1]*=y; tm[5]*=y; tm[9]*=y; tm[13]*=y;
 88 | 	tm[2]*=z; tm[6]*=z; tm[10]*=z; tm[14]*=z;
 89 | }
 90 | 
 91 | void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far)
 92 | {
 93 | 	float top = near*tan(fovy/2);
 94 | 	float right = (top*aspect);
 95 | 
 96 | 	float mp[4*4];
 97 | 	
 98 | 	mp[0x0] = near/right;
 99 | 	mp[0x1] = 0.0f;
100 | 	mp[0x2] = 0.0f;
101 | 	mp[0x3] = 0.0f;
102 | 
103 | 	mp[0x4] = 0.0f;
104 | 	mp[0x5] = near/top;
105 | 	mp[0x6] = 0.0f;
106 | 	mp[0x7] = 0.0f;
107 | 
108 | 	mp[0x8] = 0.0f;
109 | 	mp[0x9] = 0.0f;
110 | 	mp[0xA] = -(far+near)/(far-near);
111 | 	mp[0xB] = -2.0f*(far*near)/(far-near);
112 | 
113 | 	mp[0xC] = 0.0f;
114 | 	mp[0xD] = 0.0f;
115 | 	mp[0xE] = -1.0f;
116 | 	mp[0xF] = 0.0f;
117 | 
118 | 	float mp2[4*4];
119 | 	loadIdentity44(mp2);
120 | 	mp2[0xA]=0.5;
121 | 	mp2[0xB]=-0.5;
122 | 
123 | 	multMatrix44(mp2, mp, m);
124 | }
125 | 
126 | vect3Df_s getMatrixColumn(float* m, u8 i)
127 | {
128 | 	if(!m || i>=4)return vect3Df(0,0,0);
129 | 	return vect3Df(m[0+i*4],m[1+i*4],m[2+i*4]);
130 | }
131 | 
132 | vect3Df_s getMatrixRow(float* m, u8 i)
133 | {
134 | 	if(!m || i>=4)return vect3Df(0,0,0);
135 | 	return vect3Df(m[i+0*4],m[i+1*4],m[i+2*4]);
136 | }
137 | 
138 | vect4Df_s getMatrixColumn4(float* m, u8 i)
139 | {
140 | 	if(!m || i>=4)return vect4Df(0,0,0,0);
141 | 	return vect4Df(m[0+i*4],m[1+i*4],m[2+i*4],m[3+i*4]);
142 | }
143 | 
144 | vect4Df_s getMatrixRow4(float* m, u8 i)
145 | {
146 | 	if(!m || i>=4)return vect4Df(0,0,0,0);
147 | 	return vect4Df(m[i+0*4],m[i+1*4],m[i+2*4],m[i+3*4]);
148 | }
149 | 


--------------------------------------------------------------------------------
/examples/assembler/cube_lighting/source/math.h:
--------------------------------------------------------------------------------
  1 | #ifndef MATH_H
  2 | #define MATH_H
  3 | 
  4 | #include <3ds/types.h>
  5 | #include <math.h>
  6 | 
  7 | typedef float mtx44[4][4];
  8 | typedef float mtx33[3][3];
  9 | 
 10 | typedef struct
 11 | {
 12 | 	s32 x, y, z;
 13 | }vect3Di_s;
 14 | 
 15 | static inline vect3Di_s vect3Di(s32 x, s32 y, s32 z)
 16 | {
 17 | 	return (vect3Di_s){x,y,z};
 18 | }
 19 | 
 20 | static inline vect3Di_s vaddi(vect3Di_s u, vect3Di_s v)
 21 | {
 22 | 	return (vect3Di_s){u.x+v.x,u.y+v.y,u.z+v.z};
 23 | }
 24 | 
 25 | static inline vect3Di_s vsubi(vect3Di_s u, vect3Di_s v)
 26 | {
 27 | 	return (vect3Di_s){u.x-v.x,u.y-v.y,u.z-v.z};
 28 | }
 29 | 
 30 | static inline vect3Di_s vmuli(vect3Di_s v, s32 f)
 31 | {
 32 | 	return (vect3Di_s){v.x*f,v.y*f,v.z*f};
 33 | }
 34 | 
 35 | typedef struct
 36 | {
 37 | 	float x, y, z;
 38 | }vect3Df_s;
 39 | 
 40 | static inline vect3Df_s vect3Df(float x, float y, float z)
 41 | {
 42 | 	return (vect3Df_s){x,y,z};
 43 | }
 44 | 
 45 | static inline vect3Df_s vaddf(vect3Df_s u, vect3Df_s v)
 46 | {
 47 | 	return (vect3Df_s){u.x+v.x,u.y+v.y,u.z+v.z};
 48 | }
 49 | 
 50 | static inline vect3Df_s vsubf(vect3Df_s u, vect3Df_s v)
 51 | {
 52 | 	return (vect3Df_s){u.x-v.x,u.y-v.y,u.z-v.z};
 53 | }
 54 | 
 55 | static inline vect3Df_s vmulf(vect3Df_s v, float f)
 56 | {
 57 | 	return (vect3Df_s){v.x*f,v.y*f,v.z*f};
 58 | }
 59 | 
 60 | static inline vect3Df_s vscalef(vect3Df_s v1, vect3Df_s v2)
 61 | {
 62 | 	return (vect3Df_s){v1.x*v2.x,v1.y*v2.y,v1.z*v2.z};
 63 | }
 64 | 
 65 | static inline float vmagf(vect3Df_s v)
 66 | {
 67 | 	return sqrtf(v.x*v.x+v.y*v.y+v.z*v.z);
 68 | }
 69 | 
 70 | static inline float vdistf(vect3Df_s v1, vect3Df_s v2)
 71 | {
 72 | 	return sqrtf((v1.x-v2.x)*(v1.x-v2.x)+(v1.y-v2.y)*(v1.y-v2.y)+(v1.z-v2.z)*(v1.z-v2.z));
 73 | }
 74 | 
 75 | static inline vect3Df_s vnormf(vect3Df_s v)
 76 | {
 77 | 	const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z);
 78 | 	return (vect3Df_s){v.x/l,v.y/l,v.z/l};
 79 | }
 80 | 
 81 | typedef struct
 82 | {
 83 | 	float x, y, z, w;
 84 | }vect4Df_s;
 85 | 
 86 | static inline vect4Df_s vect4Df(float x, float y, float z, float w)
 87 | {
 88 | 	return (vect4Df_s){x,y,z,w};
 89 | }
 90 | 
 91 | static inline vect4Df_s vaddf4(vect4Df_s u, vect4Df_s v)
 92 | {
 93 | 	return (vect4Df_s){u.x+v.x,u.y+v.y,u.z+v.z,u.w+v.w};
 94 | }
 95 | 
 96 | static inline vect4Df_s vsubf4(vect4Df_s u, vect4Df_s v)
 97 | {
 98 | 	return (vect4Df_s){u.x-v.x,u.y-v.y,u.z-v.z,u.w-v.w};
 99 | }
100 | 
101 | static inline vect4Df_s vmulf4(vect4Df_s v, float f)
102 | {
103 | 	return (vect4Df_s){v.x*f,v.y*f,v.z*f,v.w*f};
104 | }
105 | 
106 | static inline float vdotf4(vect4Df_s v1, vect4Df_s v2)
107 | {
108 | 	return v1.x*v2.x+v1.y*v2.y+v1.z*v2.z+v1.w*v2.w;
109 | }
110 | 
111 | static inline vect4Df_s vnormf4(vect4Df_s v)
112 | {
113 | 	const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z+v.w*v.w);
114 | 	return (vect4Df_s){v.x/l,v.y/l,v.z/l,v.w/l};
115 | }
116 | 
117 | //interstuff
118 | static inline vect3Di_s vf2i(vect3Df_s v)
119 | {
120 | 	return (vect3Di_s){floorf(v.x),floorf(v.y),floorf(v.z)};
121 | }
122 | 
123 | static inline vect3Df_s vi2f(vect3Di_s v)
124 | {
125 | 	return (vect3Df_s){(float)v.x,(float)v.y,(float)v.z};
126 | }
127 | 
128 | void loadIdentity44(float* m);
129 | void multMatrix44(float* m1, float* m2, float* m);
130 | 
131 | void translateMatrix(float* tm, float x, float y, float z);
132 | void rotateMatrixX(float* tm, float x, bool r);
133 | void rotateMatrixY(float* tm, float x, bool r);
134 | void rotateMatrixZ(float* tm, float x, bool r);
135 | void scaleMatrix(float* tm, float x, float y, float z);
136 | 
137 | void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far);
138 | 
139 | vect3Df_s getMatrixColumn(float* m, u8 i);
140 | vect3Df_s getMatrixRow(float* m, u8 i);
141 | vect4Df_s getMatrixColumn4(float* m, u8 i);
142 | vect4Df_s getMatrixRow4(float* m, u8 i);
143 | 
144 | #endif
145 | 


--------------------------------------------------------------------------------
/examples/inline_assembler/simple/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_executable(simple simple.cpp)
2 | 


--------------------------------------------------------------------------------
/examples/inline_assembler/simple/simple.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Tony Wasserka
  2 | // All rights reserved.
  3 | //
  4 | // Redistribution and use in source and binary forms, with or without
  5 | // modification, are permitted provided that the following conditions are met:
  6 | //
  7 | //     * Redistributions of source code must retain the above copyright
  8 | //       notice, this list of conditions and the following disclaimer.
  9 | //     * Redistributions in binary form must reproduce the above copyright
 10 | //       notice, this list of conditions and the following disclaimer in the
 11 | //       documentation and/or other materials provided with the distribution.
 12 | //     * Neither the name of the owner nor the names of its contributors may
 13 | //       be used to endorse or promote products derived from this software
 14 | //       without specific prior written permission.
 15 | //
 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | 
 28 | #include <fstream>
 29 | #include <iostream>
 30 | #include <iterator>
 31 | 
 32 | #include "nihstro/inline_assembly.h"
 33 | 
 34 | using namespace nihstro;
 35 | 
 36 | static const auto in_pos = SourceRegister::MakeInput(0);
 37 | static const auto in_tex = SourceRegister::MakeInput(1);
 38 | static const auto in_norm = SourceRegister::MakeInput(2);
 39 | static const auto backup_pos = SourceRegister::MakeTemporary(1);
 40 | static const auto temp_pos = SourceRegister::MakeTemporary(0);
 41 | 
 42 | static const auto constant = SourceRegister::MakeFloat(20);
 43 | 
 44 | static const SourceRegister projection[4] = { SourceRegister::MakeFloat(0), SourceRegister::MakeFloat(1), SourceRegister::MakeFloat(2), SourceRegister::MakeFloat(3) };
 45 | static const SourceRegister modelview[4] = { SourceRegister::MakeFloat(4), SourceRegister::MakeFloat(5), SourceRegister::MakeFloat(6), SourceRegister::MakeFloat(7) };
 46 | static const auto light_direction = SourceRegister::MakeFloat(8);
 47 | static const auto light_ambient = SourceRegister::MakeFloat(9);
 48 | 
 49 | static const DestRegister out_pos = DestRegister::MakeOutput(0);
 50 | static const DestRegister out_col = DestRegister::MakeOutput(1);
 51 | static const DestRegister out_tex0 = DestRegister::MakeOutput(2);
 52 | static const DestRegister out_tex1 = DestRegister::MakeOutput(3);
 53 | static const DestRegister out_tex2 = DestRegister::MakeOutput(4);
 54 | 
 55 | const auto shbin = InlineAsm::CompileToShbin({
 56 |     // TODO: Declare output names
 57 |     // TODO: Declare constant
 58 |     // TODO: Declare uniform names
 59 |     // TODO: Explicitly set entry point
 60 |     InlineAsm::DeclareOutput(out_pos, OutputRegisterInfo::POSITION),
 61 |     InlineAsm::DeclareOutput(out_col, OutputRegisterInfo::COLOR),
 62 |     InlineAsm::DeclareOutput(out_tex0, OutputRegisterInfo::TEXCOORD0),
 63 |     InlineAsm::DeclareOutput(out_tex1, OutputRegisterInfo::TEXCOORD1),
 64 |     InlineAsm::DeclareOutput(out_tex2, OutputRegisterInfo::TEXCOORD2),
 65 | 
 66 |     InlineAsm::DeclareConstant(constant, 1.0, 0.0, 0.5, 1.0),
 67 | 
 68 |     InlineAsm::DeclareUniform(projection[0], projection[3], "projection"),
 69 |     InlineAsm::DeclareUniform(modelview[0], modelview[3], "modelview"),
 70 |     InlineAsm::DeclareUniform(light_direction, light_direction, "lightDirection"),
 71 |     InlineAsm::DeclareUniform(light_ambient, light_ambient, "lightAmbient"),
 72 | 
 73 |     { OpCode::Id::MOV, backup_pos, "xyz", in_pos, "xyz" },
 74 |     { OpCode::Id::MOV, backup_pos, "w", constant, "xyzw" }, // TODO: Would like to just specify "w" here! // TODO: Somehow, c4 gets written instead...
 75 | 
 76 |     { OpCode::Id::DP4, temp_pos, "x", modelview[0], backup_pos },
 77 |     { OpCode::Id::DP4, temp_pos, "y", modelview[1], backup_pos },
 78 |     { OpCode::Id::DP4, temp_pos, "z", modelview[2], backup_pos },
 79 |     { OpCode::Id::MOV, temp_pos, "w", constant, "xyzw" }, // TODO: Would like to just specify "w" here!
 80 | 
 81 |     { OpCode::Id::DP4, out_pos, "x", projection[0], temp_pos },
 82 |     { OpCode::Id::DP4, out_pos, "y", projection[1], temp_pos },
 83 |     { OpCode::Id::DP4, out_pos, "z", projection[2], temp_pos },
 84 |     { OpCode::Id::DP4, out_pos, "w", projection[3], temp_pos },
 85 | 
 86 |     { OpCode::Id::MOV, out_tex0, in_tex },
 87 |     { OpCode::Id::MOV, out_tex1, constant, "yyyw" },
 88 |     { OpCode::Id::MOV, out_tex2, constant, "yyyw" },
 89 | 
 90 |     { OpCode::Id::DP3, temp_pos, "xyz", light_direction, in_norm },
 91 |     { OpCode::Id::MAX, temp_pos, "xyz", constant, "yyy", temp_pos },
 92 |     { OpCode::Id::MUL, temp_pos, "xyz", light_ambient, "www", temp_pos },
 93 |     { OpCode::Id::ADD, out_col, "xyz", light_ambient, temp_pos },
 94 |     { OpCode::Id::MOV, out_col, "w", constant, "xyzw" }, // TODO: Would like to just specify "w" here!
 95 | 
 96 |     { OpCode::Id::NOP },
 97 |     { OpCode::Id::END }
 98 | });
 99 | 
100 | int main(int argc, char* argv[])
101 | {
102 |     if (argc < 2) {
103 |         std::cout << "Error: No filename given" << std::endl;
104 |         return 0;
105 |     }
106 | 
107 |     std::ofstream file(argv[1], std::ios::binary);
108 |     std::copy(shbin.begin(), shbin.end(), std::ostream_iterator<uint8_t>(file));
109 | 
110 |     std::cout << "Successfully compiled shader to " << argv[1] << "!" << std::endl;
111 | 
112 | 	return 0;
113 | }
114 | 


--------------------------------------------------------------------------------
/include/nihstro/bit_field.h:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Tony Wasserka
  2 | // All rights reserved.
  3 | //
  4 | // Redistribution and use in source and binary forms, with or without
  5 | // modification, are permitted provided that the following conditions are met:
  6 | //
  7 | //     * Redistributions of source code must retain the above copyright
  8 | //       notice, this list of conditions and the following disclaimer.
  9 | //     * Redistributions in binary form must reproduce the above copyright
 10 | //       notice, this list of conditions and the following disclaimer in the
 11 | //       documentation and/or other materials provided with the distribution.
 12 | //     * Neither the name of the owner nor the names of its contributors may
 13 | //       be used to endorse or promote products derived from this software
 14 | //       without specific prior written permission.
 15 | //
 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | 
 28 | 
 29 | #pragma once
 30 | 
 31 | #include <limits>
 32 | #include <type_traits>
 33 | 
 34 | #ifndef __forceinline
 35 | #ifndef _WIN32
 36 | #define __forceinline inline __attribute__((always_inline))
 37 | #endif
 38 | #endif
 39 | 
 40 | namespace nihstro {
 41 | 
 42 | /*
 43 |  * Abstract bitfield class
 44 |  *
 45 |  * Allows endianness-independent access to individual bitfields within some raw
 46 |  * integer value. The assembly generated by this class is identical to the
 47 |  * usage of raw bitfields, so it's a perfectly fine replacement.
 48 |  *
 49 |  * For BitField<X,Y,Z>, X is the distance of the bitfield to the LSB of the
 50 |  * raw value, Y is the length in bits of the bitfield. Z is an integer type
 51 |  * which determines the sign of the bitfield. Z must have the same size as the
 52 |  * raw integer.
 53 |  *
 54 |  *
 55 |  * General usage:
 56 |  *
 57 |  * Create a new union with the raw integer value as a member.
 58 |  * Then for each bitfield you want to expose, add a BitField member
 59 |  * in the union. The template parameters are the bit offset and the number
 60 |  * of desired bits.
 61 |  *
 62 |  * Changes in the bitfield members will then get reflected in the raw integer
 63 |  * value and vice-versa.
 64 |  *
 65 |  *
 66 |  * Sample usage:
 67 |  *
 68 |  * union SomeRegister
 69 |  * {
 70 |  *     u32 hex;
 71 |  *
 72 |  *     BitField<0,7,u32> first_seven_bits;     // unsigned
 73 |  *     BitField<7,8,u32> next_eight_bits;      // unsigned
 74 |  *     BitField<3,15,s32> some_signed_fields;  // signed
 75 |  * };
 76 |  *
 77 |  * This is equivalent to the little-endian specific code:
 78 |  *
 79 |  * union SomeRegister
 80 |  * {
 81 |  *     u32 hex;
 82 |  *
 83 |  *     struct
 84 |  *     {
 85 |  *         u32 first_seven_bits : 7;
 86 |  *         u32 next_eight_bits : 8;
 87 |  *     };
 88 |  *     struct
 89 |  *     {
 90 |  *         u32 : 3; // padding
 91 |  *         s32 some_signed_fields : 15;
 92 |  *     };
 93 |  * };
 94 |  *
 95 |  *
 96 |  * Caveats:
 97 |  *
 98 |  * 1)
 99 |  * BitField provides automatic casting from and to the storage type where
100 |  * appropriate. However, when using non-typesafe functions like printf, an
101 |  * explicit cast must be performed on the BitField object to make sure it gets
102 |  * passed correctly, e.g.:
103 |  * printf("Value: %d", (s32)some_register.some_signed_fields);
104 |  *
105 |  * 2)
106 |  * Not really a caveat, but potentially irritating: This class is used in some
107 |  * packed structures that do not guarantee proper alignment. Therefore we have
108 |  * to use #pragma pack here not to pack the members of the class, but instead
109 |  * to break GCC's assumption that the members of the class are aligned on
110 |  * sizeof(StorageType).
111 |  * TODO(neobrain): Confirm that this is a proper fix and not just masking
112 |  * symptoms.
113 |  */
114 | #pragma pack(1)
115 | template<std::size_t position, std::size_t bits, typename T>
116 | struct BitField
117 | {
118 | private:
119 |     // This constructor might be considered ambiguous:
120 |     // Would it initialize the storage or just the bitfield?
121 |     // Hence, delete it. Use the assignment operator to set bitfield values!
122 |     BitField(T val) = delete;
123 | 
124 | public:
125 |     // Force default constructor to be created
126 |     // so that we can use this within unions
127 |     BitField() = default;
128 | 
129 | #ifndef _WIN32
130 |     // We explicitly delete the copy assigment operator here, because the
131 |     // default copy assignment would copy the full storage value, rather than
132 |     // just the bits relevant to this particular bit field.
133 |     // Ideally, we would just implement the copy assignment to copy only the
134 |     // relevant bits, but this requires compiler support for unrestricted
135 |     // unions.
136 |     // MSVC 2013 has no support for this, hence we disable this code on
137 |     // Windows (so that the default copy assignment operator will be used).
138 |     // For any C++11 conformant compiler we delete the operator to make sure
139 |     // we never use this inappropriate operator to begin with.
140 |     // TODO: Implement this operator properly once all target compilers
141 |     // support unrestricted unions.
142 |     // TODO: Actually, deleting and overriding this operator both cause more
143 |     // harm than anything. Instead, it's suggested to never use the copy
144 |     // constructor directly but instead invoke Assign() explicitly.
145 |     // BitField& operator=(const BitField&) = delete;
146 | #endif
147 | 
148 |     __forceinline BitField& operator=(T val)
149 |     {
150 |         Assign(val);
151 |         return *this;
152 |     }
153 | 
154 |     __forceinline operator typename std::add_const<T>::type() const
155 |     {
156 |         return Value();
157 |     }
158 | 
159 |     __forceinline void Assign(const T& value) {
160 |         storage = (storage & ~GetMask()) | ((((StorageType)value) << position) & GetMask());
161 |     }
162 | 
163 |     __forceinline typename std::add_const<T>::type Value() const
164 |     {
165 |         if (std::numeric_limits<T>::is_signed)
166 |         {
167 |             std::size_t shift = 8 * sizeof(T)-bits;
168 |             return (T)(((storage & GetMask()) << (shift - position)) >> shift);
169 |         }
170 |         else
171 |         {
172 |             return (T)((storage & GetMask()) >> position);
173 |         }
174 |     }
175 | 
176 | 	static size_t NumBits() {
177 | 		return bits;
178 | 	}
179 | 
180 | private:
181 |     // StorageType is T for non-enum types and the underlying type of T if
182 |     // T is an enumeration. Note that T is wrapped within an enable_if in the
183 |     // former case to workaround compile errors which arise when using
184 |     // std::underlying_type<T>::type directly.
185 |     typedef typename std::conditional < std::is_enum<T>::value,
186 |         std::underlying_type<T>,
187 |         std::enable_if < true, T >> ::type::type StorageType;
188 | 
189 |     // Unsigned version of StorageType
190 |     typedef typename std::make_unsigned<StorageType>::type StorageTypeU;
191 | 
192 |     __forceinline StorageType GetMask() const
193 |     {
194 |         return ((~(StorageTypeU)0) >> (8 * sizeof(T)-bits)) << position;
195 |     }
196 | 
197 |     StorageType storage;
198 | 
199 |     static_assert(bits + position <= 8 * sizeof(T), "Bitfield out of range");
200 | 
201 |     // And, you know, just in case people specify something stupid like bits=position=0x80000000
202 |     static_assert(position < 8 * sizeof(T), "Invalid position");
203 |     static_assert(bits <= 8 * sizeof(T), "Invalid number of bits");
204 |     static_assert(bits > 0, "Invalid number of bits");
205 |     static_assert(std::is_standard_layout<T>::value, "Invalid base type");
206 | };
207 | 
208 | /**
209 |  * Abstract bit flag class. This is basically a specialization  of BitField for single-bit fields.
210 |  * Instead of being cast to the underlying type, it acts like a boolean.
211 |  */
212 | template<std::size_t position, typename T>
213 | struct BitFlag : protected BitField<position, 1, T>
214 | {
215 | private:
216 |     BitFlag(T val) = delete;
217 | 
218 |     typedef BitField<position, 1, T> ParentType;
219 | 
220 | public:
221 |     BitFlag() = default;
222 | 
223 | #ifndef _WIN32
224 |     BitFlag& operator=(const BitFlag&) = delete;
225 | #endif
226 | 
227 |     __forceinline BitFlag& operator=(bool val)
228 |     {
229 |         Assign(val);
230 |         return *this;
231 |     }
232 | 
233 |     __forceinline operator bool() const
234 |     {
235 |         return Value();
236 |     }
237 | 
238 |     __forceinline void Assign(bool value) {
239 |         ParentType::Assign(value);
240 |     }
241 | 
242 |     __forceinline bool Value() const
243 |     {
244 |         return ParentType::Value() != 0;
245 |     }
246 | };
247 | #pragma pack()
248 | 
249 | } // namespace
250 | 


--------------------------------------------------------------------------------
/include/nihstro/float24.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cstdint>
 4 | #include <limits>
 5 | 
 6 | #include "bit_field.h"
 7 | 
 8 | namespace nihstro {
 9 | 
10 | inline uint32_t to_float24(float val) {
11 |     static_assert(std::numeric_limits<float>::is_iec559, "Compiler does not adhere to IEEE 754");
12 | 
13 |     union Float32 {
14 |         BitField< 0, 23, uint32_t> mant;
15 |         BitField<23,  8, uint32_t> biased_exp;
16 |         BitField<31,  1, uint32_t> sign;
17 | 
18 |         static int ExponentBias() {
19 |             return 127;
20 |         }
21 |     } f32 = reinterpret_cast<Float32&>(val);
22 | 
23 |     union Float24 {
24 |         uint32_t hex;
25 | 
26 |         BitField< 0, 16, uint32_t> mant;
27 |         BitField<16,  7, uint32_t> biased_exp;
28 |         BitField<23,  1, uint32_t> sign;
29 | 
30 |         static int ExponentBias() {
31 |             return 63;
32 |         }
33 |     } f24 = { 0 };
34 | 
35 |     int biased_exp = (int)f32.biased_exp - Float32::ExponentBias() + Float24::ExponentBias();
36 |     unsigned mant = (biased_exp >= 0) ? (f32.mant >> (f32.mant.NumBits() - f24.mant.NumBits())) : 0;
37 |     if (biased_exp >= (1 << f24.biased_exp.NumBits())) {
38 |         // TODO: Return +inf or -inf
39 |     }
40 | 
41 |     f24.biased_exp = std::max(0, biased_exp);
42 |     f24.mant = mant;
43 |     f24.sign = f32.sign.Value();
44 | 
45 |     return f24.hex;
46 | }
47 | 
48 | } // namespace
49 | 


--------------------------------------------------------------------------------
/include/nihstro/parser_assembly.h:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Tony Wasserka
  2 | // All rights reserved.
  3 | //
  4 | // Redistribution and use in source and binary forms, with or without
  5 | // modification, are permitted provided that the following conditions are met:
  6 | //
  7 | //     * Redistributions of source code must retain the above copyright
  8 | //       notice, this list of conditions and the following disclaimer.
  9 | //     * Redistributions in binary form must reproduce the above copyright
 10 | //       notice, this list of conditions and the following disclaimer in the
 11 | //       documentation and/or other materials provided with the distribution.
 12 | //     * Neither the name of the owner nor the names of its contributors may
 13 | //       be used to endorse or promote products derived from this software
 14 | //       without specific prior written permission.
 15 | //
 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | 
 28 | #pragma once
 29 | 
 30 | #include <array>
 31 | #include <cstdint>
 32 | #include <memory>
 33 | #include <vector>
 34 | #include <ostream>
 35 | 
 36 | #include <boost/optional.hpp>
 37 | #include <boost/variant.hpp>
 38 | 
 39 | #include "source_tree.h"
 40 | 
 41 | #include "shader_binary.h"
 42 | #include "shader_bytecode.h"
 43 | 
 44 | namespace nihstro {
 45 | 
 46 | struct InputSwizzlerMask {
 47 |     int num_components;
 48 | 
 49 |     enum Component : uint8_t {
 50 |         x = 0,
 51 |         y = 1,
 52 |         z = 2,
 53 |         w = 3,
 54 |     };
 55 |     std::array<Component,4> components;
 56 | 
 57 |     static InputSwizzlerMask FullMask() {
 58 |         return { 4, {x,y,z,w} };
 59 |     }
 60 | 
 61 |     bool operator == (const InputSwizzlerMask& oth) const {
 62 |         return this->num_components == oth.num_components && this->components == oth.components;
 63 |     }
 64 | 
 65 | 	// TODO: Move to implementation?
 66 |     friend std::ostream& operator<<(std::ostream& os, const Component& v) {
 67 |         switch(v) {
 68 |             case x:  return os << "x";
 69 |             case y:  return os << "y";
 70 |             case z:  return os << "z";
 71 |             case w:  return os << "w";
 72 |             default: return os << "?";
 73 |         }
 74 |     }
 75 |     friend std::ostream& operator<<(std::ostream& os, const InputSwizzlerMask& v) {
 76 |         if (!v.num_components)
 77 |             return os << "(empty_mask)";
 78 | 
 79 |         for (int i = 0; i < v.num_components; ++i)
 80 |             os << v.components[i];
 81 | 
 82 |         return os;
 83 |     }
 84 | 
 85 |     friend std::string to_string(const Component& v) {
 86 |         std::stringstream ss;
 87 |         ss << v;
 88 |         return ss.str();
 89 |     }
 90 | 
 91 |     friend std::string to_string(const InputSwizzlerMask& v) {
 92 |         std::stringstream ss;
 93 |         ss << v;
 94 |         return ss.str();
 95 |     }
 96 | };
 97 | 
 98 | using Identifier = std::string;
 99 | 
100 | // A sign, i.e. +1 or -1
101 | using Sign = int;
102 | 
103 | struct IntegerWithSign {
104 |     int sign;
105 |     unsigned value;
106 | 
107 |     int GetValue() const {
108 |         return sign * value;
109 |     }
110 | };
111 | 
112 | // Raw index + address register index
113 | struct IndexExpression : std::vector<boost::variant<IntegerWithSign, Identifier>> {
114 |     int GetCount() const {
115 |         return this->size();
116 |     }
117 | 
118 |     bool IsRawIndex(int arg) const {
119 |         return (*this)[arg].which() == 0;
120 |     }
121 | 
122 |     int GetRawIndex(int arg) const {
123 |         return boost::get<IntegerWithSign>((*this)[arg]).GetValue();
124 |     }
125 | 
126 |     bool IsAddressRegisterIdentifier(int arg) const {
127 |         return (*this)[arg].which() == 1;
128 |     }
129 | 
130 |     Identifier GetAddressRegisterIdentifier(int arg) const {
131 |         return boost::get<Identifier>((*this)[arg]);
132 |     }
133 | };
134 | 
135 | 
136 | struct Expression {
137 |     struct SignedIdentifier {
138 |         boost::optional<Sign> sign;
139 |         Identifier identifier;
140 |     } signed_identifier;
141 | 
142 |     boost::optional<IndexExpression> index;
143 |     std::vector<InputSwizzlerMask> swizzle_masks;
144 | 
145 |     int GetSign() const {
146 |         if (!RawSign())
147 |             return +1;
148 |         else
149 |             return *RawSign();
150 |     }
151 | 
152 |     const Identifier& GetIdentifier() const {
153 |         return RawIdentifier();
154 |     }
155 | 
156 |     bool HasIndexExpression() const {
157 |         return static_cast<bool>(RawIndex());
158 |     }
159 | 
160 |     const IndexExpression& GetIndexExpression() const {
161 |         return *RawIndex();
162 |     }
163 | 
164 |     const std::vector<InputSwizzlerMask>& GetSwizzleMasks() const {
165 |         return RawSwizzleMasks();
166 |     }
167 | 
168 | private:
169 |     const boost::optional<Sign>& RawSign() const {
170 |         return signed_identifier.sign;
171 |     }
172 | 
173 |     const Identifier& RawIdentifier() const {
174 |         return signed_identifier.identifier;
175 |     }
176 | 
177 |     const boost::optional<IndexExpression>& RawIndex() const {
178 |         return index;
179 |     }
180 | 
181 |     const std::vector<InputSwizzlerMask>& RawSwizzleMasks() const {
182 |         return swizzle_masks;
183 |     }
184 | };
185 | 
186 | struct ConditionInput {
187 |     bool invert;
188 |     Identifier identifier;
189 |     boost::optional<InputSwizzlerMask> swizzler_mask;
190 | 
191 |     bool GetInvertFlag() const {
192 |         return invert;
193 |     }
194 | 
195 |     const Identifier& GetIdentifier() const {
196 |         return identifier;
197 |     }
198 | 
199 |     bool HasSwizzleMask() const {
200 |         return static_cast<bool>(swizzler_mask);
201 |     }
202 | 
203 |     const InputSwizzlerMask& GetSwizzleMask() const {
204 |         return *swizzler_mask;
205 |     }
206 | };
207 | 
208 | struct Condition {
209 |     ConditionInput input1;
210 |     Instruction::FlowControlType::Op op;
211 |     ConditionInput input2;
212 | 
213 |     const ConditionInput& GetFirstInput() const {
214 |         return input1;
215 |     }
216 | 
217 |     Instruction::FlowControlType::Op GetConditionOp() const {
218 |         return op;
219 |     }
220 | 
221 |     const ConditionInput& GetSecondInput() const {
222 |         return input2;
223 |     }
224 | };
225 | 
226 | using StatementLabel = std::string;
227 | 
228 | struct StatementInstruction {
229 |     OpCode opcode;
230 |     std::vector<Expression> expressions;
231 | 
232 |     StatementInstruction() = default;
233 | 
234 |     // TODO: Obsolete constructor?
235 |     StatementInstruction(const OpCode& opcode) : opcode(opcode) {
236 |     }
237 | 
238 |     StatementInstruction(const OpCode& opcode, const std::vector<Expression> expressions) : opcode(opcode), expressions(expressions) {
239 |     }
240 | 
241 |     const OpCode& GetOpCode() const {
242 |         return opcode;
243 |     }
244 | 
245 |     const std::vector<Expression>& GetArguments() const {
246 |         return expressions;
247 |     }
248 | };
249 | using FloatOpInstruction = StatementInstruction;
250 | 
251 | struct CompareInstruction {
252 |     OpCode opcode;
253 |     std::vector<Expression> arguments;
254 |     std::vector<Instruction::Common::CompareOpType::Op> ops;
255 | 
256 |     const OpCode& GetOpCode() const {
257 |         return opcode;
258 |     }
259 | 
260 |     const Expression& GetSrc1() const {
261 |         return arguments[0];
262 |     }
263 | 
264 |     const Expression& GetSrc2() const {
265 |         return arguments[1];
266 |     }
267 | 
268 |     Instruction::Common::CompareOpType::Op GetOp1() const {
269 |         return ops[0];
270 |     }
271 | 
272 |     Instruction::Common::CompareOpType::Op GetOp2() const {
273 |         return ops[1];
274 |     }
275 | };
276 | 
277 | struct FlowControlInstruction {
278 |     OpCode opcode;
279 |     std::string target_label;
280 |     boost::optional<std::string> return_label;
281 |     boost::optional<Condition> condition;
282 | 
283 |     const OpCode& GetOpCode() const {
284 |         return opcode;
285 |     }
286 | 
287 |     const std::string& GetTargetLabel() const {
288 |         return target_label;
289 |     }
290 | 
291 |     bool HasReturnLabel() const {
292 |         return static_cast<bool>(return_label);
293 |     }
294 | 
295 |     const std::string& GetReturnLabel() const {
296 |         return *return_label;
297 |     }
298 | 
299 |     bool HasCondition() const {
300 |         return static_cast<bool>(condition);
301 |     }
302 | 
303 |     const Condition& GetCondition() const {
304 |         return *condition;
305 |     }
306 | };
307 | 
308 | struct SetEmitInstruction {
309 |     OpCode opcode;
310 |     unsigned vertex_id;
311 | 
312 |     struct Flags {
313 |         boost::optional<bool> primitive_flag;
314 |         boost::optional<bool> invert_flag;
315 |     } flags;
316 | 
317 |     bool PrimitiveFlag() const {
318 |         return flags.primitive_flag && *flags.primitive_flag;
319 |     }
320 | 
321 |     bool InvertFlag() const {
322 |         return flags.invert_flag && *flags.invert_flag;
323 |     }
324 | };
325 | 
326 | struct StatementDeclaration {
327 |     std::string alias_name;
328 |     Identifier identifier_start; /* aliased identifier (start register) */
329 |     boost::optional<Identifier> identifier_end; /* aliased identifier (end register) */
330 |     boost::optional<InputSwizzlerMask> swizzle_mask; // referring to the aliased identifier
331 | 
332 |     struct Extra {
333 |         std::vector<float> constant_value;
334 |         boost::optional<OutputRegisterInfo::Type> output_semantic;
335 |     } extra;
336 | };
337 | 
338 | struct ParserContext {
339 |     // There currently is no context
340 | };
341 | 
342 | 
343 | struct Parser {
344 |     using Iterator = SourceTreeIterator;
345 | 
346 |     Parser(const ParserContext& context);
347 |     ~Parser();
348 | 
349 |     // Skip whitespaces, blank lines, and comments; returns number of line breaks skipped.
350 |     unsigned Skip(Iterator& begin, Iterator end);
351 | 
352 |     // Skip to the next line
353 |     void SkipSingleLine(Iterator& begin, Iterator end);
354 | 
355 |     // Parse alias declaration including line ending
356 |     bool ParseDeclaration(Iterator& begin, Iterator end, StatementDeclaration* declaration);
357 | 
358 |     // Parse label declaration including line ending
359 |     bool ParseLabel(Iterator& begin, Iterator end, StatementLabel* label);
360 | 
361 |     // Parse nothing but a single opcode
362 |     bool ParseOpCode(Iterator& begin, Iterator end, OpCode* opcode);
363 | 
364 |     // Parse trival instruction including line ending
365 |     bool ParseSimpleInstruction(Iterator& begin, Iterator end, OpCode* opcode);
366 | 
367 |     // Parse float instruction including line ending
368 |     bool ParseFloatOp(Iterator& begin, Iterator end, FloatOpInstruction* content);
369 | 
370 |     // Parse compare instruction including line ending
371 |     bool ParseCompare(Iterator& begin, Iterator end, CompareInstruction* content);
372 | 
373 |     // Parse flow control instruction including line ending
374 |     bool ParseFlowControl(Iterator& begin, Iterator end, FlowControlInstruction* content);
375 | 
376 |     // Parse SetEmit instruction including line ending
377 |     bool ParseSetEmit(Iterator& begin, Iterator end, SetEmitInstruction* content);
378 | 
379 | private:
380 |     struct ParserImpl;
381 |     std::unique_ptr<ParserImpl> impl;
382 | };
383 | 
384 | } // namespace
385 | 


--------------------------------------------------------------------------------
/include/nihstro/parser_shbin.h:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Tony Wasserka
  2 | // All rights reserved.
  3 | //
  4 | // Redistribution and use in source and binary forms, with or without
  5 | // modification, are permitted provided that the following conditions are met:
  6 | //
  7 | //     * Redistributions of source code must retain the above copyright
  8 | //       notice, this list of conditions and the following disclaimer.
  9 | //     * Redistributions in binary form must reproduce the above copyright
 10 | //       notice, this list of conditions and the following disclaimer in the
 11 | //       documentation and/or other materials provided with the distribution.
 12 | //     * Neither the name of the owner nor the names of its contributors may
 13 | //       be used to endorse or promote products derived from this software
 14 | //       without specific prior written permission.
 15 | //
 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | 
 28 | #pragma once
 29 | 
 30 | #include <fstream>
 31 | #include <map>
 32 | #include <string>
 33 | #include <sstream>
 34 | #include <vector>
 35 | 
 36 | #include "nihstro/shader_binary.h"
 37 | 
 38 | namespace nihstro {
 39 | 
 40 | struct ShaderInfo {
 41 |     std::vector<Instruction> code;
 42 |     std::vector<SwizzleInfo> swizzle_info;
 43 | 
 44 |     std::vector<ConstantInfo> constant_table;
 45 |     std::vector<LabelInfo> label_table;
 46 |     std::map<uint32_t, std::string> labels;
 47 |     std::vector<OutputRegisterInfo> output_register_info;
 48 |     std::vector<UniformInfo> uniform_table;
 49 | 
 50 |     void Clear() {
 51 |         code.clear();
 52 |         swizzle_info.clear();
 53 |         constant_table.clear();
 54 |         label_table.clear();
 55 |         labels.clear();
 56 |         output_register_info.clear();
 57 |         uniform_table.clear();
 58 |     }
 59 | 
 60 |     bool HasLabel(uint32_t offset) const {
 61 |         return labels.find(offset) != labels.end();
 62 |     }
 63 | 
 64 |     std::string GetLabel (uint32_t offset) const {
 65 |         auto it = labels.find(offset);
 66 |         if (it != labels.end())
 67 |             return it->second;
 68 |         return "";
 69 |     }
 70 | 
 71 |     template<typename T>
 72 |     std::string LookupDestName(const T& dest, const SwizzlePattern& swizzle) const {
 73 |         if (dest < 0x8) {
 74 |             // TODO: This one still needs some prettification in case
 75 |             //       multiple output_infos describing this output register
 76 |             //       are found.
 77 |             std::string ret;
 78 |             for (const auto& output_info : output_register_info) {
 79 |                 if (dest != output_info.id)
 80 |                     continue;
 81 | 
 82 |                 // Only display output register name if the output components it's mapped to are
 83 |                 // actually written to.
 84 |                 // swizzle.dest_mask and output_info.component_mask use different bit order,
 85 |                 // so we can't use AND them bitwise to check this.
 86 |                 int matching_mask = 0;
 87 |                 for (int i = 0; i < 4; ++i)
 88 |                     matching_mask |= output_info.component_mask & (swizzle.DestComponentEnabled(i) << i);
 89 | 
 90 |                 if (!matching_mask)
 91 |                     continue;
 92 | 
 93 |                 // Add a vertical bar so that we have at least *some*
 94 |                 // indication that we hit multiple matches.
 95 |                 if (!ret.empty())
 96 |                     ret += "|";
 97 | 
 98 |                 ret += output_info.GetSemanticName();
 99 |             }
100 |             if (!ret.empty())
101 |                 return ret;
102 |         } else if (dest.GetRegisterType() == RegisterType::Temporary) {
103 |             // TODO: Not sure if uniform_info can assign names to temporary registers.
104 |             //       If that is the case, we should check the table for better names here.
105 |             std::stringstream stream;
106 |             stream << "temp_" << std::hex << dest.GetIndex();
107 |             return stream.str();
108 |         }
109 |         return "(?)";
110 |     }
111 | 
112 |     template<class T>
113 |     std::string LookupSourceName(const T& source, unsigned addr_reg_index) const {
114 |         if (source.GetRegisterType() != RegisterType::Temporary) {
115 |             for (const auto& uniform_info : uniform_table) {
116 |                 // Magic numbers are needed because uniform info registers use the
117 |                 // range 0..0x10 for input registers and 0x10...0x70 for uniform registers,
118 |                 // i.e. there is a "gap" at the temporary registers, for which no
119 |                 // name can be assigned (?).
120 |                 int off = (source.GetRegisterType() == RegisterType::Input) ? 0 : 0x10;
121 |                 if (source - off >= uniform_info.basic.reg_start &&
122 |                     source - off <= uniform_info.basic.reg_end) {
123 |                     std::string name = uniform_info.name;
124 | 
125 |                     std::string index;
126 |                     bool is_array = uniform_info.basic.reg_end != uniform_info.basic.reg_start;
127 |                     if (is_array) {
128 |                         index += std::to_string(source - off - uniform_info.basic.reg_start);
129 |                     }
130 |                     if (addr_reg_index != 0) {
131 |                         index += (is_array) ? " + " : "";
132 |                         index += "a" + std::to_string(addr_reg_index - 1);
133 |                     }
134 | 
135 |                     if (!index.empty())
136 |                         name += "[" + index +  "]";
137 | 
138 |                     return name;
139 |                 }
140 |             }
141 |         }
142 | 
143 |         // Constants and uniforms really are the same internally
144 |         for (const auto& constant_info : constant_table) {
145 |             if (source - 0x20 == constant_info.regid) {
146 |                 return "const_" + std::to_string(constant_info.regid.Value());
147 |             }
148 |         }
149 | 
150 |         // For temporary registers, we at least print "temp_X" if no better name could be found.
151 |         if (source.GetRegisterType() == RegisterType::Temporary) {
152 |             std::stringstream stream;
153 |             stream << "temp_" << std::hex << source.GetIndex();
154 |             return stream.str();
155 |         }
156 | 
157 |         return "(?)";
158 |     }
159 | };
160 | 
161 | class ShbinParser {
162 | public:
163 |     void ReadHeaders(const std::string& filename);
164 | 
165 |     void ReadDVLE(int dvle_index);
166 | 
167 |     const DVLBHeader& GetDVLBHeader() const {
168 |         return dvlb_header;
169 |     }
170 | 
171 |     const DVLPHeader& GetDVLPHeader() const {
172 |         return dvlp_header;
173 |     }
174 | 
175 |     const DVLEHeader& GetDVLEHeader(int index) const {
176 |         return dvle_headers[index];
177 |     }
178 | 
179 |     const std::string& GetFilename(int dvle_index) const {
180 |         return dvle_filenames[dvle_index];
181 |     }
182 | 
183 | private:
184 | 
185 |     // Reads a null-terminated string from the given offset
186 |     std::string ReadSymbol(uint32_t offset);
187 | 
188 |     std::fstream file;
189 | 
190 | 
191 |     DVLBHeader dvlb_header;
192 |     DVLPHeader dvlp_header;
193 | 
194 |     uint32_t dvlp_offset;
195 | 
196 | public:
197 |     std::vector<uint32_t>    dvle_offsets;
198 |     std::vector<DVLEHeader>  dvle_headers;
199 |     std::vector<std::string> dvle_filenames;
200 | 
201 |     ShaderInfo shader_info;
202 | 
203 |     uint32_t main_offset;
204 | };
205 | 
206 | 
207 | } // namespace
208 | 


--------------------------------------------------------------------------------
/include/nihstro/preprocessor.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 Tony Wasserka
 2 | // All rights reserved.
 3 | //
 4 | // Redistribution and use in source and binary forms, with or without
 5 | // modification, are permitted provided that the following conditions are met:
 6 | //
 7 | //     * Redistributions of source code must retain the above copyright
 8 | //       notice, this list of conditions and the following disclaimer.
 9 | //     * Redistributions in binary form must reproduce the above copyright
10 | //       notice, this list of conditions and the following disclaimer in the
11 | //       documentation and/or other materials provided with the distribution.
12 | //     * Neither the name of the owner nor the names of its contributors may
13 | //       be used to endorse or promote products derived from this software
14 | //       without specific prior written permission.
15 | //
16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | 
28 | #pragma once
29 | 
30 | namespace nihstro {
31 | 
32 | struct SourceTree;
33 | 
34 | SourceTree PreprocessAssemblyFile(const std::string& filename);
35 | 
36 | } // namespace
37 | 


--------------------------------------------------------------------------------
/include/nihstro/shader_binary.h:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Tony Wasserka
  2 | // All rights reserved.
  3 | //
  4 | // Redistribution and use in source and binary forms, with or without
  5 | // modification, are permitted provided that the following conditions are met:
  6 | //
  7 | //     * Redistributions of source code must retain the above copyright
  8 | //       notice, this list of conditions and the following disclaimer.
  9 | //     * Redistributions in binary form must reproduce the above copyright
 10 | //       notice, this list of conditions and the following disclaimer in the
 11 | //       documentation and/or other materials provided with the distribution.
 12 | //     * Neither the name of the owner nor the names of its contributors may
 13 | //       be used to endorse or promote products derived from this software
 14 | //       without specific prior written permission.
 15 | //
 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | 
 28 | #pragma once
 29 | 
 30 | #include <cstdint>
 31 | 
 32 | #include "shader_bytecode.h"
 33 | 
 34 | namespace nihstro {
 35 | 
 36 | #pragma pack(1)
 37 | struct DVLBHeader {
 38 |     enum : uint32_t {
 39 |         MAGIC_WORD = 0x424C5644, // "DVLB"
 40 |     };
 41 | 
 42 |     uint32_t magic_word;
 43 |     uint32_t num_programs;
 44 | 
 45 |     // DVLE offset table with num_programs entries follows
 46 | };
 47 | static_assert(sizeof(DVLBHeader) == 0x8, "Incorrect structure size");
 48 | 
 49 | struct DVLPHeader {
 50 |     enum : uint32_t {
 51 |         MAGIC_WORD = 0x504C5644, // "DVLP"
 52 |     };
 53 | 
 54 |     uint32_t magic_word;
 55 |     uint32_t version;
 56 |     uint32_t binary_offset;  // relative to DVLP start
 57 |     uint32_t binary_size_words;
 58 |     uint32_t swizzle_info_offset;
 59 |     uint32_t swizzle_info_num_entries;
 60 |     uint32_t filename_symbol_offset;
 61 | };
 62 | static_assert(sizeof(DVLPHeader) == 0x1C, "Incorrect structure size");
 63 | 
 64 | struct DVLEHeader {
 65 |     enum : uint32_t {
 66 |         MAGIC_WORD = 0x454c5644, // "DVLE"
 67 |     };
 68 | 
 69 |     enum class ShaderType : uint8_t {
 70 |         VERTEX = 0,
 71 |         GEOMETRY = 1,
 72 |     };
 73 | 
 74 |     uint32_t magic_word;
 75 |     uint16_t pad1;
 76 |     ShaderType type;
 77 |     uint8_t pad2;
 78 | 
 79 |     // Offset within binary blob to program entry point
 80 |     uint32_t main_offset_words;
 81 |     uint32_t endmain_offset_words;
 82 | 
 83 |     uint32_t pad3;
 84 |     uint32_t pad4;
 85 | 
 86 |     // Table of constant values for single registers
 87 |     uint32_t constant_table_offset;
 88 |     uint32_t constant_table_size; // number of entries
 89 | 
 90 |     // Table of program code labels
 91 |     uint32_t label_table_offset;
 92 |     uint32_t label_table_size;
 93 | 
 94 |     // Table of output registers and their semantics
 95 |     uint32_t output_register_table_offset;
 96 |     uint32_t output_register_table_size;
 97 | 
 98 |     // Table of uniforms (which may span multiple registers) and their values
 99 |     uint32_t uniform_table_offset;
100 |     uint32_t uniform_table_size;
101 | 
102 |     // Table of null-terminated strings referenced by the tables above
103 |     uint32_t symbol_table_offset;
104 |     uint32_t symbol_table_size;
105 | 
106 | };
107 | static_assert(sizeof(DVLEHeader) == 0x40, "Incorrect structure size");
108 | 
109 | 
110 | struct SwizzleInfo {
111 |     SwizzlePattern pattern;
112 |     uint32_t unknown;
113 | };
114 | 
115 | struct ConstantInfo {
116 |     enum Type : uint32_t {
117 |         Bool  = 0,
118 |         Int   = 1,
119 |         Float = 2
120 |     };
121 | 
122 |     union {
123 |         uint32_t full_first_word;
124 | 
125 |         BitField<0, 2, Type> type;
126 | 
127 |         BitField<16, 8, uint32_t> regid;
128 |     };
129 | 
130 |     union {
131 |         uint32_t value_hex[4];
132 | 
133 |         BitField<0, 1, uint32_t> b;
134 | 
135 |         struct {
136 |             uint8_t x;
137 |             uint8_t y;
138 |             uint8_t z;
139 |             uint8_t w;
140 |         } i;
141 | 
142 |         struct {
143 |             // All of these are float24 values!
144 |             uint32_t x;
145 |             uint32_t y;
146 |             uint32_t z;
147 |             uint32_t w;
148 |         } f;
149 |     };
150 | };
151 | 
152 | struct LabelInfo {
153 |     BitField<0, 8, uint32_t> id;
154 |     uint32_t program_offset;
155 |     uint32_t unk;
156 |     uint32_t name_offset;
157 | };
158 | 
159 | union OutputRegisterInfo {
160 |     enum Type : uint64_t {
161 |         POSITION   = 0,
162 |         QUATERNION = 1,
163 |         COLOR      = 2,
164 |         TEXCOORD0  = 3,
165 | 
166 |         TEXCOORD1  = 5,
167 |         TEXCOORD2  = 6,
168 | 
169 |         VIEW       = 8,
170 |     };
171 | 
172 |     OutputRegisterInfo& operator =(const OutputRegisterInfo& oth) {
173 |         hex.Assign(oth.hex);
174 |         return *this;
175 |     }
176 | 
177 |     BitField< 0, 64, uint64_t> hex;
178 | 
179 |     BitField< 0, 16, Type> type;
180 |     BitField<16, 16, uint64_t> id;
181 |     BitField<32,  4, uint64_t> component_mask;
182 |     BitField<32, 32, uint64_t> descriptor;
183 | 
184 |     const std::string GetMask() const {
185 |         std::string ret;
186 |         if (component_mask & 1) ret += "x";
187 |         if (component_mask & 2) ret += "y";
188 |         if (component_mask & 4) ret += "z";
189 |         if (component_mask & 8) ret += "w";
190 |         return ret;
191 |     }
192 | 
193 |     const std::string GetSemanticName() const {
194 |         static const std::map<Type, std::string> map = {
195 |             { POSITION,   "out.pos"  },
196 |             { QUATERNION, "out.quat" },
197 |             { COLOR,      "out.col"  },
198 |             { TEXCOORD0,  "out.tex0" },
199 |             { TEXCOORD1,  "out.tex1" },
200 |             { TEXCOORD2,  "out.tex2" },
201 |             { VIEW,       "out.view" }
202 |         };
203 |         auto it = map.find(type);
204 |         if (it != map.end())
205 |             return it->second;
206 |         else
207 |             return "out.unk";
208 |     }
209 | };
210 | 
211 | struct UniformInfo {
212 |     struct {
213 |         static RegisterType GetType(uint32_t reg) {
214 |             if (reg < 0x10) return RegisterType::Input;
215 |             else if (reg < 0x70) return RegisterType::FloatUniform;
216 |             else if (reg < 0x74) return RegisterType::IntUniform;
217 |             else if (reg >= 0x78 && reg < 0x88) return RegisterType::BoolUniform;
218 |             else return RegisterType::Unknown;
219 |         }
220 | 
221 |         static int GetIndex(uint32_t reg) {
222 |             switch (GetType(reg)) {
223 |             case RegisterType::Input: return reg;
224 |             case RegisterType::FloatUniform: return reg - 0x10;
225 |             case RegisterType::IntUniform: return reg - 0x70;
226 |             case RegisterType::BoolUniform: return reg - 0x78;
227 |             default: return -1;
228 |             }
229 |         }
230 | 
231 |         RegisterType GetStartType() const {
232 |             return GetType(reg_start);
233 |         }
234 | 
235 |         RegisterType GetEndType() const {
236 |             return GetType(reg_end);
237 |         }
238 | 
239 |         int GetStartIndex() const {
240 |             return GetIndex(reg_start);
241 |         }
242 | 
243 |         int GetEndIndex() const {
244 |             return GetIndex(reg_end);
245 |         }
246 | 
247 |         uint32_t symbol_offset;
248 |         union {
249 |             BitField< 0, 16, uint32_t> reg_start;
250 |             BitField<16, 16, uint32_t> reg_end; // inclusive
251 |         };
252 |     } basic;
253 |     std::string name;
254 | };
255 | 
256 | #pragma pack()
257 | 
258 | } // namespace
259 | 


--------------------------------------------------------------------------------
/include/nihstro/source_tree.h:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Tony Wasserka
  2 | // All rights reserved.
  3 | //
  4 | // Redistribution and use in source and binary forms, with or without
  5 | // modification, are permitted provided that the following conditions are met:
  6 | //
  7 | //     * Redistributions of source code must retain the above copyright
  8 | //       notice, this list of conditions and the following disclaimer.
  9 | //     * Redistributions in binary form must reproduce the above copyright
 10 | //       notice, this list of conditions and the following disclaimer in the
 11 | //       documentation and/or other materials provided with the distribution.
 12 | //     * Neither the name of the owner nor the names of its contributors may
 13 | //       be used to endorse or promote products derived from this software
 14 | //       without specific prior written permission.
 15 | //
 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | 
 28 | #pragma once
 29 | 
 30 | #include <algorithm>
 31 | #include <list>
 32 | #include <string>
 33 | 
 34 | #include <boost/optional.hpp>
 35 | #include <boost/variant/recursive_wrapper.hpp>
 36 | 
 37 | namespace nihstro {
 38 | 
 39 | struct SourceTreeIterator;
 40 | 
 41 | 
 42 | struct Node;
 43 | // SequenceContainer
 44 | struct SourceTree {
 45 |     SourceTree() = default;
 46 |     SourceTree(const SourceTree& oth);
 47 | 
 48 |     std::string code;
 49 | 
 50 |     struct {
 51 |         std::string filename;
 52 |     } file_info;
 53 | 
 54 |     SourceTree* parent = nullptr;
 55 | 
 56 |     // ordered with respect to "position"
 57 |     std::list<Node> children;
 58 | 
 59 |     SourceTreeIterator begin();
 60 |     SourceTreeIterator end();
 61 | 
 62 |     // Attach the given tree, changing the child's owner to *this.
 63 |     SourceTree& Attach(SourceTree tree, std::string::difference_type offset);
 64 | };
 65 | 
 66 | struct Node {
 67 |     SourceTree tree;
 68 | 
 69 |     std::string::difference_type offset_within_parent; // within "code"
 70 | };
 71 | 
 72 | inline SourceTree::SourceTree(const SourceTree& oth) : code(oth.code), file_info(oth.file_info), parent(oth.parent), children(oth.children) {
 73 |     for (auto& child : children)
 74 |         child.tree.parent = this;
 75 | }
 76 | 
 77 | inline SourceTree& SourceTree::Attach(SourceTree tree, std::string::difference_type offset) {
 78 |     tree.parent = this;
 79 |     children.push_back(Node{tree, offset});
 80 |     return *this;
 81 | }
 82 | 
 83 | // RandomAccessIterator
 84 | struct SourceTreeIterator {
 85 |     using difference_type = std::string::iterator::difference_type;
 86 |     using reference = std::string::iterator::reference;
 87 |     using value_type = std::string::iterator::value_type;
 88 |     using pointer = std::string::iterator::pointer;
 89 |     using iterator_category = std::random_access_iterator_tag;
 90 |     
 91 |     SourceTreeIterator() {
 92 |     }
 93 | 
 94 |     SourceTreeIterator(SourceTree& tree) : tree(&tree), position(tree.code.begin()), node_iterator(tree.children.begin()) {
 95 |         UpdateChildIterator();
 96 |     }
 97 | 
 98 |     SourceTreeIterator(const SourceTreeIterator&) = default;
 99 | 
100 |     SourceTreeIterator& operator += (difference_type n) {
101 |         if (n > 0) {
102 |             while (n) {
103 |                 if (child_iterator) {
104 |                     auto remaining_to_child = node_iterator->offset_within_parent - (position - tree->code.begin());
105 |                     if (remaining_to_child >= n) {
106 |                         // If the next child is more than n steps away, increase position by n and return
107 |                         // TODO: Should we make sure that we don't end up out-of-bounds here?
108 |                         position += n;
109 |                         UpdateNodeIterator();
110 |                         break;
111 |                     } else {
112 |                         // Otherwise, move current position to the child if it isn't there already
113 |                         position += remaining_to_child;
114 |                         n -= remaining_to_child;
115 |                         UpdateNodeIterator();
116 |                     }
117 | 
118 |                     if (child_iterator->get().StepsRemaining() > n) {
119 |                         // If child is larger than n, advance child by n and return
120 |                         child_iterator->get() += n;
121 |                         break;
122 |                     } else {
123 |                         // else step out of the child and increment next child iterator by one
124 |                         n -= child_iterator->get().StepsRemaining();
125 |                         if (node_iterator != tree->children.end())
126 |                             node_iterator++;
127 |                         UpdateChildIterator();
128 |                     }
129 |                 } else {
130 |                     // TODO: Should we make sure that we don't end up out-of-bounds here?
131 |                     position += n;
132 |                     UpdateNodeIterator();
133 |                     break;
134 |                 }
135 |             }
136 |         } else if (n < 0) {
137 |             // Reduce to n>0 case by starting from begin()
138 |             n = (*this - tree->begin()) + n;
139 |             *this = tree->begin() + n;
140 |         }
141 |         return *this;
142 |     }
143 | 
144 |     SourceTreeIterator& operator -= (difference_type n) {
145 |         *this += -n;
146 |         return *this;
147 |     }
148 | 
149 |     difference_type operator -(SourceTreeIterator it) const {
150 |         return this->StepsGone() - it.StepsGone();
151 |     }
152 | 
153 |     bool operator < (const SourceTreeIterator& it) const {
154 |         return std::distance(*this, it) > 0;
155 |     }
156 | 
157 |     bool operator <= (const SourceTreeIterator& it) const {
158 |         return std::distance(*this, it) >= 0;
159 |     }
160 | 
161 |     bool operator > (const SourceTreeIterator& it) const {
162 |         return !(*this <= it);
163 |     }
164 | 
165 |     bool operator >= (const SourceTreeIterator& it) const {
166 |         return !(*this < it);
167 |     }
168 | 
169 |     bool operator == (const SourceTreeIterator& it) const {
170 |         return (*this <= it) && !(*this < it);
171 |     }
172 | 
173 |     bool operator != (const SourceTreeIterator& it) const {
174 |         return !(*this == it);
175 |     }
176 | 
177 |     reference operator* () {
178 |         return (*this)[0];
179 |     }
180 | 
181 |     SourceTreeIterator operator++ () {
182 |         *this += 1;
183 |         return *this;
184 |     }
185 | 
186 |     SourceTreeIterator operator++ (int) {
187 |         auto it = *this;
188 |         *this += 1;
189 |         return it;
190 |     }
191 | 
192 |     SourceTreeIterator operator +(difference_type n) const {
193 |         SourceTreeIterator it2 = *this;
194 |         it2 += n;
195 |         return it2;
196 |     }
197 | 
198 |     SourceTreeIterator operator -(SourceTreeIterator::difference_type n) const {
199 |         return *this + (-n);
200 |     }
201 | 
202 |     reference operator [] (difference_type n) {
203 |         auto it = (*this + n);
204 |         if (it.WithinChild())
205 |             return it.child_iterator->get()[0];
206 |         else return *it.position;
207 |     }
208 | 
209 |     // Get line number (one-based) within "tree"
210 |     unsigned GetLineNumber() const {
211 |         // Adding one for natural (i.e. one-based) line numbers
212 |         return std::count(tree->code.begin(), position, '\n') + 1;
213 |     }
214 | 
215 |     // Get line number (one-based) within the tree of the current child
216 |     unsigned GetCurrentLineNumber() const {
217 |         if (WithinChild())
218 |             return child_iterator->get().GetCurrentLineNumber();
219 | 
220 |         return GetLineNumber();
221 |     }
222 | 
223 |     const std::string GetCurrentFilename() const {
224 |         if (WithinChild())
225 |             return child_iterator->get().GetCurrentFilename();
226 | 
227 |         return tree->file_info.filename;
228 |     }
229 | 
230 |     SourceTreeIterator GetParentIterator(const SourceTree* reference_tree) const {
231 |         if (tree == reference_tree) {
232 |             return *this;
233 |         } else {
234 |             return child_iterator->get().GetParentIterator(reference_tree);
235 |         }
236 |     }
237 | 
238 |     SourceTree* GetCurrentTree() {
239 |         if (WithinChild())
240 |             return child_iterator->get().GetCurrentTree();
241 |         else
242 |             return tree;
243 |     }
244 | 
245 | private:
246 |     difference_type StepsRemaining() const {
247 |         return std::distance(*this, tree->end());
248 |     }
249 | 
250 |     difference_type StepsGone() const {
251 |         auto it = tree->begin();
252 | 
253 |         difference_type diff = 0;
254 | 
255 |         // Advance reference iterator starting from the beginning until we reach *this,
256 |         // making sure that both the main position and the child iterator match.
257 |         while (it.position != position ||
258 |                ((bool)it.child_iterator ^ (bool)child_iterator) ||
259 |                (it.child_iterator && child_iterator && it.child_iterator->get() != child_iterator->get())) {
260 |             // Move to next child (if there is one), or abort if we reach the reference position
261 |             if (it.child_iterator) {
262 |                 auto distance_to_child = std::min(it.node_iterator->offset_within_parent - (it.position -it.tree->code.begin() ), position - it.position);
263 | 
264 |                 // Move to child or this->position
265 |                 diff += distance_to_child;
266 |                 it.position += distance_to_child;
267 | 
268 |                 if (it.position - it.tree->code.begin() == it.node_iterator->offset_within_parent) {
269 |                     if (node_iterator != tree->children.end() && it.node_iterator == node_iterator) {
270 |                         return diff + (child_iterator->get() - it.child_iterator->get());
271 |                     } else {
272 |                         // Move out of child
273 |                         diff += it.child_iterator->get().StepsRemaining();
274 |                     }
275 |                 } else {
276 |                     // We moved to this->position => done
277 |                     return diff;
278 |                 }
279 | 
280 |                 // Move to next child
281 |                 if (it.node_iterator != it.tree->children.end()) {
282 |                     it.node_iterator++;
283 |                     it.UpdateChildIterator();
284 |                 }
285 |             } else {
286 |                 // no child remaining, hence just move to the given position
287 |                 return diff + (position - it.position);
288 |             }
289 |         }
290 | 
291 |         return diff;
292 |     }
293 | 
294 |     bool WithinChild() const {
295 |         return child_iterator && position - tree->code.begin() == node_iterator->offset_within_parent;
296 |     }
297 | 
298 |     void UpdateChildIterator() {
299 |         if (node_iterator != tree->children.end())
300 |             child_iterator = boost::recursive_wrapper<SourceTreeIterator>(node_iterator->tree);
301 |         else
302 |             child_iterator = boost::none;
303 |     }
304 | 
305 |     void UpdateNodeIterator() {
306 |         // Move to the first node which is at the cursor or behind it
307 |         while (node_iterator != tree->children.end() && node_iterator->offset_within_parent < std::distance(tree->code.begin(), position)) {
308 |             node_iterator++;
309 |             UpdateChildIterator();
310 |         }
311 |     }
312 | 
313 |     SourceTree* tree;
314 |     std::string::iterator position;
315 | 
316 |     boost::optional<boost::recursive_wrapper<SourceTreeIterator>> child_iterator; // points to current or next child
317 |     std::list<Node>::iterator node_iterator; // points to current or next node
318 | 
319 |     friend struct SourceTree;
320 | };
321 | 
322 | inline SourceTreeIterator operator +(SourceTreeIterator::difference_type n, const SourceTreeIterator& it) {
323 |     return it + n;
324 | }
325 | 
326 | inline SourceTreeIterator operator -(SourceTreeIterator::difference_type n, const SourceTreeIterator& it) {
327 |     return it - n;
328 | }
329 | 
330 | inline SourceTreeIterator SourceTree::begin() {
331 |     return SourceTreeIterator(*this);
332 | }
333 | 
334 | inline SourceTreeIterator SourceTree::end() {
335 |     auto it = SourceTreeIterator(*this);
336 |     it.position = code.end();
337 |     it.node_iterator = children.end();
338 |     it.child_iterator = boost::none;
339 |     return it;
340 | }
341 | 
342 | } // namespace
343 | 


--------------------------------------------------------------------------------
/license.txt:
--------------------------------------------------------------------------------
 1 | Copyright 2014 Tony Wasserka
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 |     * Redistributions of source code must retain the above copyright
 8 |       notice, this list of conditions and the following disclaimer.
 9 |     * Redistributions in binary form must reproduce the above copyright
10 |       notice, this list of conditions and the following disclaimer in the
11 |       documentation and/or other materials provided with the distribution.
12 |     * Neither the name of the owner nor the names of its contributors may
13 |       be used to endorse or promote products derived from this software
14 |       without specific prior written permission.
15 | 
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/src/parser_assembly.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Tony Wasserka
  2 | // All rights reserved.
  3 | //
  4 | // Redistribution and use in source and binary forms, with or without
  5 | // modification, are permitted provided that the following conditions are met:
  6 | //
  7 | //     * Redistributions of source code must retain the above copyright
  8 | //       notice, this list of conditions and the following disclaimer.
  9 | //     * Redistributions in binary form must reproduce the above copyright
 10 | //       notice, this list of conditions and the following disclaimer in the
 11 | //       documentation and/or other materials provided with the distribution.
 12 | //     * Neither the name of the owner nor the names of its contributors may
 13 | //       be used to endorse or promote products derived from this software
 14 | //       without specific prior written permission.
 15 | //
 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | 
 28 | 
 29 | // Enable this for detailed XML overview of parser results
 30 | // #define BOOST_SPIRIT_DEBUG
 31 | 
 32 | #include <boost/fusion/include/adapt_struct.hpp>
 33 | #include <boost/phoenix/core/reference.hpp>
 34 | #include <boost/spirit/include/qi.hpp>
 35 | 
 36 | #include "nihstro/parser_assembly.h"
 37 | #include "nihstro/parser_assembly_private.h"
 38 | 
 39 | #include "nihstro/shader_binary.h"
 40 | #include "nihstro/shader_bytecode.h"
 41 | 
 42 | namespace spirit = boost::spirit;
 43 | namespace qi = boost::spirit::qi;
 44 | namespace ascii = boost::spirit::qi::ascii;
 45 | namespace phoenix = boost::phoenix;
 46 | 
 47 | using spirit::_1;
 48 | using spirit::_2;
 49 | using spirit::_3;
 50 | using spirit::_4;
 51 | 
 52 | using namespace nihstro;
 53 | 
 54 | // Adapt parser data structures for use with boost::spirit
 55 | 
 56 | BOOST_FUSION_ADAPT_STRUCT(
 57 |     SetEmitInstruction::Flags,
 58 |     (boost::optional<bool>, primitive_flag)
 59 |     (boost::optional<bool>, invert_flag)
 60 | )
 61 | 
 62 | BOOST_FUSION_ADAPT_STRUCT(
 63 |     SetEmitInstruction,
 64 |     (OpCode, opcode)
 65 |     (unsigned, vertex_id)
 66 |     (SetEmitInstruction::Flags, flags)
 67 | )
 68 | 
 69 | phoenix::function<ErrorHandler> error_handler;
 70 | 
 71 | template<typename Iterator, bool require_end_of_line>
 72 | TrivialOpParser<Iterator, require_end_of_line>::TrivialOpParser(const ParserContext& context)
 73 |                 : TrivialOpParser::base_type(trivial_instruction),
 74 |                   common(context),
 75 |                   opcodes_trivial(common.opcodes_trivial),
 76 |                   opcodes_compare(common.opcodes_compare),
 77 |                   opcodes_float(common.opcodes_float),
 78 |                   opcodes_flowcontrol(common.opcodes_flowcontrol),
 79 |                   end_of_statement(common.end_of_statement),
 80 |                   diagnostics(common.diagnostics) {
 81 | 
 82 |         // Setup rules
 83 |         if (require_end_of_line) {
 84 |             opcode = qi::no_case[qi::lexeme[opcodes_trivial >> &ascii::space]];
 85 |             trivial_instruction = opcode > end_of_statement;
 86 |         } else {
 87 |             opcode = qi::no_case[qi::lexeme[opcodes_trivial | opcodes_compare | opcodes_float[0]
 88 |                                             | opcodes_float[1] | opcodes_float[2] | opcodes_float[3]
 89 |                                             | opcodes_flowcontrol[0] | opcodes_flowcontrol[1] >> &ascii::space]];
 90 |             trivial_instruction = opcode;
 91 |         }
 92 | 
 93 |         // Error handling
 94 |         BOOST_SPIRIT_DEBUG_NODE(opcode);
 95 |         BOOST_SPIRIT_DEBUG_NODE(trivial_instruction);
 96 | 
 97 |         qi::on_error<qi::fail>(trivial_instruction, error_handler(phoenix::ref(diagnostics), _1, _2, _3, _4));
 98 | }
 99 | 
100 | template<typename Iterator>
101 | SetEmitParser<Iterator>::SetEmitParser(const ParserContext& context)
102 |                 : SetEmitParser::base_type(setemit_instruction),
103 |                   common(context),
104 |                   opcodes_setemit(common.opcodes_setemit),
105 |                   end_of_statement(common.end_of_statement),
106 |                   diagnostics(common.diagnostics) {
107 | 
108 |         // Setup rules
109 | 
110 |         auto comma_rule = qi::lit(',');
111 | 
112 |         opcode = qi::lexeme[qi::no_case[opcodes_setemit] >> &ascii::space];
113 | 
114 |         vertex_id = qi::uint_;
115 |         prim_flag = qi::lit("prim") >> &(!ascii::alnum) >> qi::attr(true);
116 |         inv_flag = qi::lit("inv") >> &(!ascii::alnum) >> qi::attr(true);
117 |         flags = ((comma_rule >> prim_flag) ^ (comma_rule >> inv_flag));
118 | 
119 |         setemit_instruction = ((opcode >> vertex_id) >> (flags | qi::attr(SetEmitInstruction::Flags{}))) > end_of_statement;
120 | 
121 |         // Error handling
122 |         BOOST_SPIRIT_DEBUG_NODE(opcode);
123 |         BOOST_SPIRIT_DEBUG_NODE(vertex_id);
124 |         BOOST_SPIRIT_DEBUG_NODE(prim_flag);
125 |         BOOST_SPIRIT_DEBUG_NODE(inv_flag);
126 |         BOOST_SPIRIT_DEBUG_NODE(flags);
127 |         BOOST_SPIRIT_DEBUG_NODE(setemit_instruction);
128 | 
129 |         qi::on_error<qi::fail>(setemit_instruction, error_handler(phoenix::ref(diagnostics), _1, _2, _3, _4));
130 | }
131 | 
132 | template<typename Iterator>
133 | LabelParser<Iterator>::LabelParser(const ParserContext& context)
134 |                 : LabelParser::base_type(label), common(context),
135 |                   end_of_statement(common.end_of_statement),
136 |                   identifier(common.identifier),
137 |                   diagnostics(common.diagnostics) {
138 | 
139 |         label = identifier >> qi::lit(':') > end_of_statement;
140 | 
141 |         BOOST_SPIRIT_DEBUG_NODE(label);
142 | 
143 |         qi::on_error<qi::fail>(label, error_handler(phoenix::ref(diagnostics), _1, _2, _3, _4));
144 | }
145 | template struct LabelParser<ParserIterator>;
146 | 
147 | 
148 | struct Parser::ParserImpl {
149 |     using Iterator = SourceTreeIterator;
150 | 
151 |     ParserImpl(const ParserContext& context) : label(context), plain_instruction(context),
152 |                                                simple_instruction(context), instruction(context),
153 |                                                compare(context), flow_control(context),
154 |                                                setemit(context), declaration(context) {
155 |     }
156 | 
157 |     unsigned Skip(Iterator& begin, Iterator end) {
158 |         unsigned lines_skipped = 0;
159 |         do {
160 |             parse(begin, end, skipper);
161 |             lines_skipped++;
162 |         } while (boost::spirit::qi::parse(begin, end, boost::spirit::qi::eol));
163 | 
164 |         return --lines_skipped;
165 |     }
166 | 
167 |     void SkipSingleLine(Iterator& begin, Iterator end) {
168 |         qi::parse(begin, end, *(qi::char_ - (qi::eol | qi::eoi)) >> (qi::eol | qi::eoi));
169 |     }
170 | 
171 |     bool ParseLabel(Iterator& begin, Iterator end, StatementLabel* content) {
172 |         assert(content != nullptr);
173 | 
174 |         return phrase_parse(begin, end, label, skipper, *content);
175 |     }
176 | 
177 |     bool ParseOpCode(Iterator& begin, Iterator end, OpCode* content) {
178 |         assert(content != nullptr);
179 | 
180 |         return phrase_parse(begin, end, plain_instruction, skipper, *content);
181 |     }
182 | 
183 |     bool ParseSimpleInstruction(Iterator& begin, Iterator end, OpCode* content) {
184 |         assert(content != nullptr);
185 | 
186 |         return phrase_parse(begin, end, simple_instruction, skipper, *content);
187 |     }
188 | 
189 |     bool ParseFloatOp(Iterator& begin, Iterator end, FloatOpInstruction* content) {
190 |         assert(content != nullptr);
191 | 
192 |         return phrase_parse(begin, end, instruction, skipper, *content);
193 |     }
194 | 
195 |     bool ParseCompare(Iterator& begin, Iterator end, CompareInstruction* content) {
196 |         assert(content != nullptr);
197 | 
198 |         return phrase_parse(begin, end, compare, skipper, *content);
199 |     }
200 | 
201 |     bool ParseFlowControl(Iterator& begin, Iterator end, FlowControlInstruction* content) {
202 |         assert(content != nullptr);
203 | 
204 |         return phrase_parse(begin, end, flow_control, skipper, *content);
205 |     }
206 | 
207 |     bool ParseSetEmit(Iterator& begin, Iterator end, SetEmitInstruction* content) {
208 |         assert(content != nullptr);
209 | 
210 |         return phrase_parse(begin, end, setemit, skipper, *content);
211 |     }
212 | 
213 |     bool ParseDeclaration(Iterator& begin, Iterator end, StatementDeclaration* content) {
214 |         assert(content != nullptr);
215 | 
216 |         return phrase_parse(begin, end, declaration, skipper, *content);
217 |     }
218 | 
219 | private:
220 |     AssemblySkipper<Iterator>   skipper;
221 | 
222 |     LabelParser<Iterator>       label;
223 |     TrivialOpParser<Iterator, false> plain_instruction;
224 |     TrivialOpParser<Iterator, true>  simple_instruction;
225 |     FloatOpParser<Iterator>     instruction;
226 |     CompareParser<Iterator>     compare;
227 |     FlowControlParser<Iterator> flow_control;
228 |     SetEmitParser<Iterator> setemit;
229 |     DeclarationParser<Iterator> declaration;
230 | };
231 | 
232 | 
233 | 
234 | Parser::Parser(const ParserContext& context) : impl(new ParserImpl(context)) {
235 | };
236 | 
237 | Parser::~Parser() {
238 | }
239 | 
240 | unsigned Parser::Skip(Iterator& begin, Iterator end) {
241 |     return impl->Skip(begin, end);
242 | }
243 | 
244 | void Parser::SkipSingleLine(Iterator& begin, Iterator end) {
245 |     impl->SkipSingleLine(begin, end);
246 | }
247 | 
248 | bool Parser::ParseLabel(Iterator& begin, Iterator end, StatementLabel* label) {
249 |     return impl->ParseLabel(begin, end, label);
250 | }
251 | 
252 | bool Parser::ParseOpCode(Iterator& begin, Iterator end, OpCode* opcode) {
253 |     return impl->ParseOpCode(begin, end, opcode);
254 | }
255 | 
256 | bool Parser::ParseSimpleInstruction(Iterator& begin, Iterator end, OpCode* opcode) {
257 |     return impl->ParseSimpleInstruction(begin, end, opcode);
258 | }
259 | 
260 | bool Parser::ParseFloatOp(Iterator& begin, Iterator end, FloatOpInstruction* instruction) {
261 |     return impl->ParseFloatOp(begin, end, instruction);
262 | }
263 | 
264 | bool Parser::ParseCompare(Iterator& begin, Iterator end, CompareInstruction* content) {
265 |     return impl->ParseCompare(begin, end, content);
266 | }
267 | 
268 | bool Parser::ParseFlowControl(Iterator& begin, Iterator end, FlowControlInstruction* content) {
269 |     return impl->ParseFlowControl(begin, end, content);
270 | }
271 | 
272 | bool Parser::ParseSetEmit(Iterator& begin, Iterator end, SetEmitInstruction* content) {
273 |     return impl->ParseSetEmit(begin, end, content);
274 | }
275 | 
276 | bool Parser::ParseDeclaration(Iterator& begin, Iterator end, StatementDeclaration* declaration) {
277 |     return impl->ParseDeclaration(begin, end, declaration);
278 | }
279 | 


--------------------------------------------------------------------------------
/src/parser_assembly/common.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Tony Wasserka
  2 | // All rights reserved.
  3 | //
  4 | // Redistribution and use in source and binary forms, with or without
  5 | // modification, are permitted provided that the following conditions are met:
  6 | //
  7 | //     * Redistributions of source code must retain the above copyright
  8 | //       notice, this list of conditions and the following disclaimer.
  9 | //     * Redistributions in binary form must reproduce the above copyright
 10 | //       notice, this list of conditions and the following disclaimer in the
 11 | //       documentation and/or other materials provided with the distribution.
 12 | //     * Neither the name of the owner nor the names of its contributors may
 13 | //       be used to endorse or promote products derived from this software
 14 | //       without specific prior written permission.
 15 | //
 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | 
 28 | 
 29 | // Enable this for detailed XML overview of parser results
 30 | // #define BOOST_SPIRIT_DEBUG
 31 | 
 32 | #include <boost/fusion/include/adapt_struct.hpp>
 33 | #include <boost/spirit/include/qi.hpp>
 34 | 
 35 | #include "nihstro/parser_assembly.h"
 36 | #include "nihstro/parser_assembly_private.h"
 37 | 
 38 | #include "nihstro/shader_binary.h"
 39 | #include "nihstro/shader_bytecode.h"
 40 | 
 41 | namespace spirit = boost::spirit;
 42 | namespace qi = boost::spirit::qi;
 43 | namespace ascii = boost::spirit::qi::ascii;
 44 | namespace phoenix = boost::phoenix;
 45 | 
 46 | using spirit::_1;
 47 | using spirit::_2;
 48 | using spirit::_3;
 49 | using spirit::_4;
 50 | 
 51 | using namespace nihstro;
 52 | 
 53 | // Adapt parser data structures for use with boost::spirit
 54 | 
 55 | BOOST_FUSION_ADAPT_STRUCT(
 56 |     IntegerWithSign,
 57 |     (int, sign)
 58 |     (unsigned, value)
 59 | )
 60 | 
 61 | /**
 62 |  * Implementation of transform_attribute from std::vector<InputSwizzlerMask::Component> to InputSwizzlerMask.
 63 |  * This eases swizzle mask parsing a lot.
 64 |  */
 65 | namespace boost { namespace spirit { namespace traits {
 66 | template<>
 67 | struct transform_attribute<InputSwizzlerMask, std::vector<InputSwizzlerMask::Component>, qi::domain>
 68 | {
 69 |     using Exposed = InputSwizzlerMask;
 70 | 
 71 |     using type = std::vector<InputSwizzlerMask::Component>;
 72 | 
 73 |     static void post(Exposed& val, const type& attr) {
 74 |         val.num_components = attr.size();
 75 |         for (size_t i = 0; i < attr.size(); ++i)
 76 |             val.components[i] = attr[i];
 77 |     }
 78 | 
 79 |     static type pre(Exposed& val) {
 80 |         type vec;
 81 |         for (int i = 0; i < val.num_components; ++i)
 82 |             vec.push_back(val.components[i]);
 83 |         return vec;
 84 |     }
 85 | 
 86 |     static void fail(Exposed&) { }
 87 | };
 88 | }}} // namespaces
 89 | 
 90 | template<>
 91 | CommonRules<ParserIterator>::CommonRules(const ParserContext& context) {
 92 |     // Setup symbol table
 93 |     opcodes_trivial.add
 94 |                ( "nop",      OpCode::Id::NOP      )
 95 |                ( "end",      OpCode::Id::END      )
 96 |                ( "emit",     OpCode::Id::EMIT     )
 97 |                ( "else",     OpCode::Id::ELSE     )
 98 |                ( "endif",    OpCode::Id::ENDIF    )
 99 |                ( "endloop",  OpCode::Id::ENDLOOP  );
100 | 
101 |     opcodes_float[0].add
102 |                ( "mova",     OpCode::Id::MOVA     );
103 | 
104 |     opcodes_float[1].add
105 |                ( "exp",      OpCode::Id::EX2      )
106 |                ( "log",      OpCode::Id::LG2      )
107 |                ( "lit",      OpCode::Id::LIT      )
108 |                ( "flr",      OpCode::Id::FLR      )
109 |                ( "rcp",      OpCode::Id::RCP      )
110 |                ( "rsq",      OpCode::Id::RSQ      )
111 |                ( "mov",      OpCode::Id::MOV      );
112 |     opcodes_float[2].add
113 |                ( "add",      OpCode::Id::ADD      )
114 |                ( "dp3",      OpCode::Id::DP3      )
115 |                ( "dp4",      OpCode::Id::DP4      )
116 |                ( "dph",      OpCode::Id::DPH      )
117 |                ( "dst",      OpCode::Id::DST      )
118 |                ( "mul",      OpCode::Id::MUL      )
119 |                ( "sge",      OpCode::Id::SGE      )
120 |                ( "slt",      OpCode::Id::SLT      )
121 |                ( "max",      OpCode::Id::MAX      )
122 |                ( "min",      OpCode::Id::MIN      );
123 |     opcodes_float[3].add
124 |                ( "mad",      OpCode::Id::MAD      );
125 | 
126 |     opcodes_compare.add
127 |                ( "cmp",      OpCode::Id::CMP      );
128 | 
129 |     opcodes_flowcontrol[0].add
130 |                ( "break",    OpCode::Id::BREAK    )
131 |                ( "breakc",   OpCode::Id::BREAKC   )
132 |                ( "if",       OpCode::Id::GEN_IF   )
133 |                ( "loop",     OpCode::Id::LOOP     );
134 |     opcodes_flowcontrol[1].add
135 |                ( "jmp",      OpCode::Id::GEN_JMP  )
136 |                ( "call",     OpCode::Id::GEN_CALL );
137 | 
138 |     opcodes_setemit.add
139 |                ( "setemitraw", OpCode::Id::SETEMIT );
140 | 
141 |         signs.add( "+", +1)
142 |                  ( "-", -1);
143 | 
144 |         // TODO: Add rgba/stq masks
145 |         swizzlers.add
146 |                      ( "x",    InputSwizzlerMask::x )
147 |                      ( "y",    InputSwizzlerMask::y )
148 |                      ( "z",    InputSwizzlerMask::z )
149 |                      ( "w",    InputSwizzlerMask::w );
150 | 
151 |         // TODO: Make sure this is followed by a space or *some* separator
152 |         // TODO: Use qi::repeat(1,4)(swizzlers) instead of Kleene [failed to work when I tried, so make this work!]
153 |         // TODO: Use qi::lexeme[swizzlers] [crashed when I tried, so make this work!]
154 |         swizzle_mask = qi::attr_cast<InputSwizzlerMask, std::vector<InputSwizzlerMask::Component>>(*swizzlers);
155 | 
156 |         identifier = qi::lexeme[qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z0-9_")];
157 |         peek_identifier = &identifier;
158 | 
159 |         uint_after_sign = qi::uint_; // TODO: NOT dot (or alphanum) after this to prevent floats..., TODO: overflows?
160 |         sign_with_uint = signs > uint_after_sign;
161 |         index_expression_first_term = (qi::attr(+1) >> qi::uint_) | (peek_identifier > identifier);
162 |         index_expression_following_terms = (qi::lit('+') >> peek_identifier > identifier) | sign_with_uint;
163 |         index_expression = (-index_expression_first_term)           // the first element has an optional sign
164 |                             >> (*index_expression_following_terms); // following elements have a mandatory sign
165 | 
166 |         expression = ((-signs) > peek_identifier > identifier) >> (-(qi::lit('[') > index_expression > qi::lit(']'))) >> *(qi::lit('.') > swizzle_mask);
167 | 
168 |         end_of_statement = qi::omit[qi::eol | qi::eoi];
169 | 
170 |         // Error handling
171 |         BOOST_SPIRIT_DEBUG_NODE(identifier);
172 |         BOOST_SPIRIT_DEBUG_NODE(uint_after_sign);
173 |         BOOST_SPIRIT_DEBUG_NODE(index_expression);
174 |         BOOST_SPIRIT_DEBUG_NODE(peek_identifier);
175 |         BOOST_SPIRIT_DEBUG_NODE(expression);
176 |         BOOST_SPIRIT_DEBUG_NODE(swizzle_mask);
177 |         BOOST_SPIRIT_DEBUG_NODE(end_of_statement);
178 | 
179 |         diagnostics.Add(swizzle_mask.name(), "Expected swizzle mask after period");
180 |         diagnostics.Add(peek_identifier.name(), "Expected identifier");
181 |         diagnostics.Add(uint_after_sign.name(), "Expected integer number after sign");
182 |         diagnostics.Add(index_expression.name(), "Expected index expression between '[' and ']'");
183 |         diagnostics.Add(expression.name(), "Expected expression of a known identifier");
184 |         diagnostics.Add(end_of_statement.name(), "Expected end of statement");
185 | }
186 | 


--------------------------------------------------------------------------------
/src/parser_assembly/compare.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Tony Wasserka
  2 | // All rights reserved.
  3 | //
  4 | // Redistribution and use in source and binary forms, with or without
  5 | // modification, are permitted provided that the following conditions are met:
  6 | //
  7 | //     * Redistributions of source code must retain the above copyright
  8 | //       notice, this list of conditions and the following disclaimer.
  9 | //     * Redistributions in binary form must reproduce the above copyright
 10 | //       notice, this list of conditions and the following disclaimer in the
 11 | //       documentation and/or other materials provided with the distribution.
 12 | //     * Neither the name of the owner nor the names of its contributors may
 13 | //       be used to endorse or promote products derived from this software
 14 | //       without specific prior written permission.
 15 | //
 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | 
 28 | 
 29 | // Enable this for detailed XML overview of parser results
 30 | // #define BOOST_SPIRIT_DEBUG
 31 | 
 32 | #include <boost/fusion/include/adapt_struct.hpp>
 33 | #include <boost/phoenix/core/reference.hpp>
 34 | #include <boost/spirit/include/qi.hpp>
 35 | 
 36 | #include "nihstro/parser_assembly.h"
 37 | #include "nihstro/parser_assembly_private.h"
 38 | 
 39 | #include "nihstro/shader_binary.h"
 40 | #include "nihstro/shader_bytecode.h"
 41 | 
 42 | namespace spirit = boost::spirit;
 43 | namespace qi = boost::spirit::qi;
 44 | namespace ascii = boost::spirit::qi::ascii;
 45 | namespace phoenix = boost::phoenix;
 46 | 
 47 | using spirit::_1;
 48 | using spirit::_2;
 49 | using spirit::_3;
 50 | using spirit::_4;
 51 | 
 52 | using namespace nihstro;
 53 | 
 54 | // Adapt parser data structures for use with boost::spirit
 55 | 
 56 | /*BOOST_FUSION_ADAPT_STRUCT(
 57 |     IntegerWithSign,
 58 |     (int, sign)
 59 |     (unsigned, value)
 60 | )
 61 | */
 62 | BOOST_FUSION_ADAPT_STRUCT(
 63 |     CompareInstruction,
 64 |     (OpCode, opcode)
 65 |     (std::vector<Expression>, arguments)
 66 |     (std::vector<Instruction::Common::CompareOpType::Op>, ops)
 67 | )
 68 | 
 69 | template<>
 70 | CompareParser<ParserIterator>::CompareParser(const ParserContext& context)
 71 |                 : CompareParser::base_type(instruction),
 72 |                   common(context),
 73 |                   opcodes_compare(common.opcodes_compare),
 74 |                   expression(common.expression),
 75 |                   end_of_statement(common.end_of_statement),
 76 |                   diagnostics(common.diagnostics) {
 77 | 
 78 |         // TODO: Will this properly match >= ?
 79 |         compare_ops.add
 80 |                        ( "==", CompareOp::Equal )
 81 |                        ( "!=", CompareOp::NotEqual )
 82 |                        ( "<", CompareOp::LessThan )
 83 |                        ( "<=", CompareOp::LessEqual )
 84 |                        ( ">", CompareOp::GreaterThan )
 85 |                        ( ">=", CompareOp::GreaterEqual );
 86 | 
 87 |         // Setup rules
 88 | 
 89 |         auto comma_rule = qi::lit(',');
 90 | 
 91 |         opcode = qi::no_case[qi::lexeme[opcodes_compare >> &ascii::space]];
 92 |         compare_op = qi::lexeme[compare_ops];
 93 | 
 94 |         // cmp src1, src2, op1, op2
 95 |         // TODO: Also allow "cmp src1 op1 src2, src1 op2 src2"
 96 |         two_ops = compare_op > comma_rule > compare_op;
 97 |         two_expressions = expression > comma_rule > expression;
 98 |         instr[0] = opcode > two_expressions > comma_rule > two_ops;
 99 | 
100 |         instruction = instr[0] > end_of_statement;
101 | 
102 |         // Error handling
103 |         BOOST_SPIRIT_DEBUG_NODE(instr[0]);
104 |         BOOST_SPIRIT_DEBUG_NODE(instruction);
105 | 
106 |         qi::on_error<qi::fail>(instruction, error_handler(phoenix::ref(diagnostics), _1, _2, _3, _4));
107 | }
108 | 


--------------------------------------------------------------------------------
/src/parser_assembly/declaration.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Tony Wasserka
  2 | // All rights reserved.
  3 | //
  4 | // Redistribution and use in source and binary forms, with or without
  5 | // modification, are permitted provided that the following conditions are met:
  6 | //
  7 | //     * Redistributions of source code must retain the above copyright
  8 | //       notice, this list of conditions and the following disclaimer.
  9 | //     * Redistributions in binary form must reproduce the above copyright
 10 | //       notice, this list of conditions and the following disclaimer in the
 11 | //       documentation and/or other materials provided with the distribution.
 12 | //     * Neither the name of the owner nor the names of its contributors may
 13 | //       be used to endorse or promote products derived from this software
 14 | //       without specific prior written permission.
 15 | //
 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | 
 28 | 
 29 | #include <boost/fusion/include/adapt_struct.hpp>
 30 | #include <boost/phoenix/core/reference.hpp>
 31 | #include <boost/spirit/include/qi.hpp>
 32 | 
 33 | #include "nihstro/parser_assembly.h"
 34 | #include "nihstro/parser_assembly_private.h"
 35 | 
 36 | #include "nihstro/shader_binary.h"
 37 | #include "nihstro/shader_bytecode.h"
 38 | 
 39 | using spirit::_1;
 40 | using spirit::_2;
 41 | using spirit::_3;
 42 | using spirit::_4;
 43 | 
 44 | using namespace nihstro;
 45 | 
 46 | // Adapt parser data structures for use with boost::spirit
 47 | 
 48 | BOOST_FUSION_ADAPT_STRUCT(
 49 |     ConditionInput,
 50 |     (bool, invert)
 51 |     (Identifier, identifier)
 52 |     (boost::optional<InputSwizzlerMask>, swizzler_mask)
 53 | )
 54 | 
 55 | BOOST_FUSION_ADAPT_STRUCT(
 56 |     StatementDeclaration::Extra,
 57 |     (std::vector<float>, constant_value)
 58 |     (boost::optional<OutputRegisterInfo::Type>, output_semantic)
 59 | )
 60 | 
 61 | BOOST_FUSION_ADAPT_STRUCT(
 62 |     StatementDeclaration,
 63 |     (std::string, alias_name)
 64 |     (Identifier, identifier_start)
 65 |     (boost::optional<Identifier>, identifier_end)
 66 |     (boost::optional<InputSwizzlerMask>, swizzle_mask)
 67 |     (StatementDeclaration::Extra, extra)
 68 | )
 69 | 
 70 | // Manually define a swap() overload for qi::hold to work.
 71 | /*namespace boost {
 72 | namespace spirit {
 73 | void swap(nihstro::Condition& a, nihstro::Condition& b) {
 74 |     boost::fusion::swap(a, b);
 75 | }
 76 | }
 77 | }*/
 78 | 
 79 | template<>
 80 | DeclarationParser<ParserIterator>::DeclarationParser(const ParserContext& context)
 81 |                 : DeclarationParser::base_type(declaration),
 82 |                   common(context),
 83 |                   identifier(common.identifier), swizzle_mask(common.swizzle_mask),
 84 |                   end_of_statement(common.end_of_statement),
 85 |                   diagnostics(common.diagnostics) {
 86 | 
 87 |         // Setup symbol table
 88 |         output_semantics.add("position", OutputRegisterInfo::POSITION);
 89 |         output_semantics.add("quaternion", OutputRegisterInfo::QUATERNION);
 90 |         output_semantics.add("color", OutputRegisterInfo::COLOR);
 91 |         output_semantics.add("texcoord0", OutputRegisterInfo::TEXCOORD0);
 92 |         output_semantics.add("texcoord1", OutputRegisterInfo::TEXCOORD1);
 93 |         output_semantics.add("texcoord2", OutputRegisterInfo::TEXCOORD2);
 94 |         output_semantics.add("view", OutputRegisterInfo::VIEW);
 95 |         output_semantics_rule = qi::lexeme[output_semantics];
 96 | 
 97 |         // Setup rules
 98 | 
 99 |         alias_identifier = qi::omit[qi::lexeme["alias" >> ascii::blank]] > identifier;
100 | 
101 |         // e.g. 5.4 or (1.1, 2, 3)
102 |         constant = (qi::repeat(1)[qi::float_]
103 |                                   | (qi::lit('(') > (qi::float_ % qi::lit(',')) > qi::lit(')')));
104 | 
105 |         dummy_const = qi::attr(std::vector<float>());
106 |         dummy_semantic = qi::attr(boost::optional<OutputRegisterInfo::Type>());
107 | 
108 |         // match a constant or a semantic, and fill the respective other one with a dummy
109 |         const_or_semantic = (dummy_const >> output_semantics_rule) | (constant >> dummy_semantic);
110 | 
111 |         // TODO: Would like to use +ascii::blank instead, but somehow that fails to parse lines like ".alias name o2.xy texcoord0" correctly
112 |         string_as = qi::omit[qi::no_skip[*/*+*/ascii::blank >> qi::lit("as") >> +ascii::blank]];
113 | 
114 |         declaration = ((qi::lit('.') > alias_identifier) >> identifier >> -(qi::lit('-') > identifier) >> -(qi::lit('.') > swizzle_mask))
115 |                        >> (
116 |                             (string_as > const_or_semantic)
117 |                             | (dummy_const >> dummy_semantic)
118 |                           )
119 |                        > end_of_statement;
120 | 
121 |         // Error handling
122 |         output_semantics_rule.name("output semantic after \"as\"");
123 |         alias_identifier.name("known preprocessor directive (i.e. alias).");
124 |         const_or_semantic.name("constant or semantic after \"as\"");
125 | 
126 |         BOOST_SPIRIT_DEBUG_NODE(output_semantics_rule);
127 |         BOOST_SPIRIT_DEBUG_NODE(constant);
128 |         BOOST_SPIRIT_DEBUG_NODE(alias_identifier);
129 |         BOOST_SPIRIT_DEBUG_NODE(const_or_semantic);
130 |         BOOST_SPIRIT_DEBUG_NODE(declaration);
131 | 
132 |         qi::on_error<qi::fail>(declaration, error_handler(phoenix::ref(diagnostics), _1, _2, _3, _4));
133 | }
134 | 


--------------------------------------------------------------------------------
/src/parser_assembly/floatop.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Tony Wasserka
  2 | // All rights reserved.
  3 | //
  4 | // Redistribution and use in source and binary forms, with or without
  5 | // modification, are permitted provided that the following conditions are met:
  6 | //
  7 | //     * Redistributions of source code must retain the above copyright
  8 | //       notice, this list of conditions and the following disclaimer.
  9 | //     * Redistributions in binary form must reproduce the above copyright
 10 | //       notice, this list of conditions and the following disclaimer in the
 11 | //       documentation and/or other materials provided with the distribution.
 12 | //     * Neither the name of the owner nor the names of its contributors may
 13 | //       be used to endorse or promote products derived from this software
 14 | //       without specific prior written permission.
 15 | //
 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | 
 28 | 
 29 | // Enable this for detailed XML overview of parser results
 30 | // #define BOOST_SPIRIT_DEBUG
 31 | 
 32 | #include <boost/fusion/include/adapt_struct.hpp>
 33 | #include <boost/phoenix/core/reference.hpp>
 34 | #include <boost/spirit/include/qi.hpp>
 35 | 
 36 | #include "nihstro/parser_assembly.h"
 37 | #include "nihstro/parser_assembly_private.h"
 38 | 
 39 | #include "nihstro/shader_binary.h"
 40 | #include "nihstro/shader_bytecode.h"
 41 | 
 42 | namespace spirit = boost::spirit;
 43 | namespace qi = boost::spirit::qi;
 44 | namespace ascii = boost::spirit::qi::ascii;
 45 | namespace phoenix = boost::phoenix;
 46 | 
 47 | using spirit::_1;
 48 | using spirit::_2;
 49 | using spirit::_3;
 50 | using spirit::_4;
 51 | 
 52 | using namespace nihstro;
 53 | 
 54 | // Adapt parser data structures for use with boost::spirit
 55 | 
 56 | BOOST_FUSION_ADAPT_STRUCT(
 57 |     StatementInstruction,
 58 |     (OpCode, opcode)
 59 |     (std::vector<Expression>, expressions)
 60 | )
 61 | 
 62 | template<>
 63 | FloatOpParser<ParserIterator>::FloatOpParser(const ParserContext& context)
 64 |                 : FloatOpParser::base_type(float_instruction),
 65 |                   common(context),
 66 |                   opcodes_float(common.opcodes_float),
 67 |                   expression(common.expression),
 68 |                   end_of_statement(common.end_of_statement),
 69 |                   diagnostics(common.diagnostics) {
 70 | 
 71 |         // Setup rules
 72 | 
 73 |         auto comma_rule = qi::lit(',');
 74 | 
 75 |         for (int i = 0; i < 4; ++i) {
 76 |             // Make sure that a mnemonic is always followed by a space (such that e.g. "addbla" fails to match)
 77 |             opcode[i] = qi::no_case[qi::lexeme[opcodes_float[i] >> &ascii::space]];
 78 |         }
 79 | 
 80 |         // chain of arguments for each group of opcodes
 81 |         expression_chain[0] = expression;
 82 |         for (int i = 1; i < 4; ++i) {
 83 |             expression_chain[i] = expression_chain[i - 1] >> comma_rule > expression;
 84 |         }
 85 | 
 86 |         // e.g. "add o1, t2, t5"
 87 |         float_instr[0] = opcode[0] > expression_chain[0];
 88 |         float_instr[1] = opcode[1] > expression_chain[1];
 89 |         float_instr[2] = opcode[2] > expression_chain[2];
 90 |         float_instr[3] = opcode[3] > expression_chain[3];
 91 | 
 92 |         float_instruction %= (float_instr[0] | float_instr[1] | float_instr[2] | float_instr[3]) > end_of_statement;
 93 | 
 94 |         // Error handling
 95 |         BOOST_SPIRIT_DEBUG_NODE(opcode[0]);
 96 |         BOOST_SPIRIT_DEBUG_NODE(opcode[1]);
 97 |         BOOST_SPIRIT_DEBUG_NODE(opcode[2]);
 98 |         BOOST_SPIRIT_DEBUG_NODE(opcode[3]);
 99 | 
100 |         BOOST_SPIRIT_DEBUG_NODE(expression_chain[0]);
101 |         BOOST_SPIRIT_DEBUG_NODE(expression_chain[1]);
102 |         BOOST_SPIRIT_DEBUG_NODE(expression_chain[2]);
103 |         BOOST_SPIRIT_DEBUG_NODE(expression_chain[3]);
104 | 
105 |         BOOST_SPIRIT_DEBUG_NODE(float_instr[0]);
106 |         BOOST_SPIRIT_DEBUG_NODE(float_instr[1]);
107 |         BOOST_SPIRIT_DEBUG_NODE(float_instr[2]);
108 |         BOOST_SPIRIT_DEBUG_NODE(float_instr[3]);
109 |         BOOST_SPIRIT_DEBUG_NODE(float_instruction);
110 | 
111 |         diagnostics.Add(expression_chain[0].name(), "one argument");
112 |         diagnostics.Add(expression_chain[1].name(), "two arguments");
113 |         diagnostics.Add(expression_chain[2].name(), "three arguments");
114 |         diagnostics.Add(expression_chain[3].name(), "four arguments");
115 | 
116 |         qi::on_error<qi::fail>(float_instruction, error_handler(phoenix::ref(diagnostics), _1, _2, _3, _4));
117 | }
118 | 


--------------------------------------------------------------------------------
/src/parser_assembly/flowcontrol.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Tony Wasserka
  2 | // All rights reserved.
  3 | //
  4 | // Redistribution and use in source and binary forms, with or without
  5 | // modification, are permitted provided that the following conditions are met:
  6 | //
  7 | //     * Redistributions of source code must retain the above copyright
  8 | //       notice, this list of conditions and the following disclaimer.
  9 | //     * Redistributions in binary form must reproduce the above copyright
 10 | //       notice, this list of conditions and the following disclaimer in the
 11 | //       documentation and/or other materials provided with the distribution.
 12 | //     * Neither the name of the owner nor the names of its contributors may
 13 | //       be used to endorse or promote products derived from this software
 14 | //       without specific prior written permission.
 15 | //
 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | 
 28 | 
 29 | // Enable this for detailed XML overview of parser results
 30 | // #define BOOST_SPIRIT_DEBUG
 31 | 
 32 | #include <boost/fusion/include/adapt_struct.hpp>
 33 | #include <boost/fusion/include/swap.hpp>
 34 | #include <boost/phoenix/core/reference.hpp>
 35 | #include <boost/spirit/include/qi.hpp>
 36 | 
 37 | #include "nihstro/parser_assembly.h"
 38 | #include "nihstro/parser_assembly_private.h"
 39 | 
 40 | #include "nihstro/shader_binary.h"
 41 | #include "nihstro/shader_bytecode.h"
 42 | 
 43 | namespace spirit = boost::spirit;
 44 | namespace qi = boost::spirit::qi;
 45 | namespace ascii = boost::spirit::qi::ascii;
 46 | namespace phoenix = boost::phoenix;
 47 | 
 48 | using spirit::_1;
 49 | using spirit::_2;
 50 | using spirit::_3;
 51 | using spirit::_4;
 52 | 
 53 | using namespace nihstro;
 54 | 
 55 | // Adapt parser data structures for use with boost::spirit
 56 | 
 57 | BOOST_FUSION_ADAPT_STRUCT(
 58 |     ConditionInput,
 59 |     (bool, invert)
 60 |     (Identifier, identifier)
 61 |     (boost::optional<InputSwizzlerMask>, swizzler_mask)
 62 | )
 63 | 
 64 | BOOST_FUSION_ADAPT_STRUCT(
 65 |     Condition,
 66 |     (ConditionInput, input1)
 67 |     (Instruction::FlowControlType::Op, op)
 68 |     (ConditionInput, input2)
 69 | )
 70 | 
 71 | BOOST_FUSION_ADAPT_STRUCT(
 72 |     FlowControlInstruction,
 73 |     (OpCode, opcode)
 74 |     (std::string, target_label)
 75 |     (boost::optional<std::string>, return_label)
 76 |     (boost::optional<Condition>, condition)
 77 | )
 78 | 
 79 | // Manually define a swap() overload for qi::hold to work.
 80 | namespace boost {
 81 | namespace spirit {
 82 | void swap(nihstro::Condition& a, nihstro::Condition& b) {
 83 |     boost::fusion::swap(a, b);
 84 | }
 85 | }
 86 | }
 87 | 
 88 | template<>
 89 | FlowControlParser<ParserIterator>::FlowControlParser(const ParserContext& context)
 90 |                 : FlowControlParser::base_type(flow_control_instruction),
 91 |                   common(context),
 92 |                   opcodes_flowcontrol(common.opcodes_flowcontrol),
 93 |                   expression(common.expression),
 94 |                   identifier(common.identifier),
 95 |                   swizzle_mask(common.swizzle_mask),
 96 |                   end_of_statement(common.end_of_statement),
 97 |                   diagnostics(common.diagnostics) {
 98 | 
 99 |         condition_ops.add
100 |                    ( "&&",    ConditionOp::And     )
101 |                    ( "||",    ConditionOp::Or      );
102 | 
103 |         // Setup rules
104 | 
105 |         auto blank_rule = qi::omit[ascii::blank];
106 |         auto label_rule = identifier.alias();
107 | 
108 |         opcode[0] = qi::lexeme[qi::no_case[opcodes_flowcontrol[0]] >> &ascii::space];
109 |         opcode[1] = qi::lexeme[qi::no_case[opcodes_flowcontrol[1]] >> &ascii::space];
110 | 
111 |         condition_op = qi::lexeme[condition_ops];
112 | 
113 |         negation = qi::matches[qi::lit("!")];
114 | 
115 |         condition_input = negation >> identifier >> -(qi::lit('.') > swizzle_mask);
116 | 
117 |         // May be a condition involving the conditional codes, or a reference to a uniform
118 |         // TODO: Make sure we use qi::hold wherever necessary
119 |         condition = qi::hold[condition_input >> condition_op >> condition_input]
120 |                     | (condition_input >> qi::attr(ConditionOp::JustX) >> qi::attr(ConditionInput{}));
121 | 
122 |         // if condition
123 |         instr[0] = opcode[0]
124 |                    >> qi::attr("__dummy")  // Dummy label (set indirectly using else,endif, or endloop pseudo-instructions)
125 |                    >> qi::attr(boost::optional<std::string>()) // Dummy return label
126 |                    >> condition;
127 | 
128 |         // call target_label until return_label if condition
129 |         instr[1] = opcode[1]
130 |                    >> label_rule
131 |                    >> -(qi::no_skip[(blank_rule >> qi::lit("until")) > blank_rule] >> label_rule)
132 |                    >> -(qi::no_skip[(blank_rule >> qi::lit("if")) > blank_rule] >> condition);
133 | 
134 |         flow_control_instruction %= (instr[0] | instr[1]) > end_of_statement;
135 | 
136 |         // Error handling
137 |         BOOST_SPIRIT_DEBUG_NODE(opcode[0]);
138 |         BOOST_SPIRIT_DEBUG_NODE(opcode[1]);
139 |         BOOST_SPIRIT_DEBUG_NODE(negation);
140 |         BOOST_SPIRIT_DEBUG_NODE(condition_op);
141 |         BOOST_SPIRIT_DEBUG_NODE(condition_input);
142 |         BOOST_SPIRIT_DEBUG_NODE(condition);
143 | 
144 |         BOOST_SPIRIT_DEBUG_NODE(instr[0]);
145 |         BOOST_SPIRIT_DEBUG_NODE(instr[1]);
146 |         BOOST_SPIRIT_DEBUG_NODE(flow_control_instruction);
147 | 
148 |         qi::on_error<qi::fail>(flow_control_instruction, error_handler(phoenix::ref(diagnostics), _1, _2, _3, _4));
149 | }
150 | 


--------------------------------------------------------------------------------
/src/parser_shbin.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 Tony Wasserka
  2 | // All rights reserved.
  3 | //
  4 | // Redistribution and use in source and binary forms, with or without
  5 | // modification, are permitted provided that the following conditions are met:
  6 | //
  7 | //     * Redistributions of source code must retain the above copyright
  8 | //       notice, this list of conditions and the following disclaimer.
  9 | //     * Redistributions in binary form must reproduce the above copyright
 10 | //       notice, this list of conditions and the following disclaimer in the
 11 | //       documentation and/or other materials provided with the distribution.
 12 | //     * Neither the name of the owner nor the names of its contributors may
 13 | //       be used to endorse or promote products derived from this software
 14 | //       without specific prior written permission.
 15 | //
 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | 
 28 | #include "nihstro/parser_shbin.h"
 29 | 
 30 | using namespace nihstro;
 31 | 
 32 | void ShbinParser::ReadHeaders(const std::string& filename) {
 33 |     file.exceptions(std::fstream::badbit | std::fstream::failbit | std::fstream::eofbit);
 34 |     file.open(filename, std::fstream::in | std::fstream::binary);
 35 | 
 36 |     file.seekg(0);
 37 |     file.read((char*)&dvlb_header, sizeof(dvlb_header));
 38 |     if (dvlb_header.magic_word != DVLBHeader::MAGIC_WORD) {
 39 |         std::stringstream stream;
 40 |         stream << "Wrong DVLB magic word: Got 0x" << std::hex << dvlb_header.magic_word;
 41 |         throw stream.str();
 42 |     }
 43 | 
 44 |     dvle_offsets.resize(dvlb_header.num_programs);
 45 |     dvle_headers.resize(dvlb_header.num_programs);
 46 |     for (auto& offset : dvle_offsets) {
 47 |         file.read((char*)&offset, sizeof(offset));
 48 |     }
 49 | 
 50 |     // DVLP comes directly after the DVLE offset table
 51 |     dvlp_offset = file.tellg();
 52 |     file.seekg(dvlp_offset);
 53 |     file.read((char*)&dvlp_header, sizeof(dvlp_header));
 54 |     if (dvlp_header.magic_word != DVLPHeader::MAGIC_WORD) {
 55 |         std::stringstream stream;
 56 |         stream << "Wrong DVLP magic word at offset " << std::hex << dvlp_offset << ": Got " << std::hex << dvlp_header.magic_word;
 57 |         throw stream.str();
 58 |     }
 59 | 
 60 |     for (int i = 0; i < dvlb_header.num_programs; ++i) {
 61 |         auto& dvle_header = dvle_headers[i];
 62 |         file.seekg(dvle_offsets[i]);
 63 |         file.read((char*)&dvle_header, sizeof(dvle_header));
 64 |         if (dvle_header.magic_word != DVLEHeader::MAGIC_WORD) {
 65 |             std::stringstream stream;
 66 |             stream << "Wrong DVLE header in DVLE #" << i << ": " << std::hex << dvle_header.magic_word;
 67 |             throw stream.str();
 68 |         }
 69 |     }
 70 | 
 71 |     // TODO: Is there indeed exactly one filename per DVLE?
 72 |     dvle_filenames.resize(dvlb_header.num_programs);
 73 |     uint32_t offset = dvlp_offset + dvlp_header.filename_symbol_offset;
 74 |     for (int i = 0; i < dvlb_header.num_programs; ++i) {
 75 |         auto& filename = dvle_filenames[i];
 76 |         filename = ReadSymbol(offset);
 77 |         offset += filename.length() + 1;
 78 |     }
 79 | 
 80 |     // Read shader binary code
 81 |     shader_info.code.resize(dvlp_header.binary_size_words);
 82 |     file.seekg(dvlp_offset + dvlp_header.binary_offset);
 83 |     file.read((char*)shader_info.code.data(), dvlp_header.binary_size_words * sizeof(Instruction));
 84 | 
 85 |     // Read operand descriptor table
 86 |     shader_info.swizzle_info.resize(dvlp_header.swizzle_info_num_entries);
 87 |     file.seekg(dvlp_offset + dvlp_header.swizzle_info_offset);
 88 |     file.read((char*)shader_info.swizzle_info.data(), dvlp_header.swizzle_info_num_entries * sizeof(SwizzleInfo));
 89 | }
 90 | 
 91 | void ShbinParser::ReadDVLE(int dvle_index) {
 92 |     // TODO: Check if we have called ReadHeaders() before!
 93 | 
 94 |     if (dvle_index >= dvlb_header.num_programs) {
 95 |         std::stringstream stream;
 96 |         stream << "Invalid DVLE index " << dvle_index << "given";
 97 |         throw stream.str();
 98 |     }
 99 | 
100 |     auto& dvle_header = dvle_headers[dvle_index];
101 |     auto& dvle_offset = dvle_offsets[dvle_index];
102 | 
103 |     uint32_t symbol_table_offset = dvle_offset + dvle_header.symbol_table_offset;
104 | 
105 |     shader_info.constant_table.resize(dvle_header.constant_table_size);
106 |     uint32_t constant_table_offset = dvle_offset + dvle_header.constant_table_offset;
107 |     file.seekg(constant_table_offset);
108 |     for (int i = 0; i < dvle_header.constant_table_size; ++i)
109 |         file.read((char*)&shader_info.constant_table[i], sizeof(ConstantInfo));
110 | 
111 |     shader_info.label_table.resize(dvle_header.label_table_size);
112 |     uint32_t label_table_offset = dvle_offset + dvle_header.label_table_offset;
113 |     file.seekg(label_table_offset);
114 |     for (int i = 0; i < dvle_header.label_table_size; ++i)
115 |         file.read((char*)&shader_info.label_table[i], sizeof(LabelInfo));
116 |     for (const auto& label_info : shader_info.label_table)
117 |         shader_info.labels.insert({label_info.program_offset, ReadSymbol(symbol_table_offset + label_info.name_offset)});
118 | 
119 |     shader_info.output_register_info.resize(dvle_header.output_register_table_size);
120 |     file.seekg(dvle_offset + dvle_header.output_register_table_offset);
121 |     for (auto& info : shader_info.output_register_info)
122 |         file.read((char*)&info, sizeof(OutputRegisterInfo));
123 | 
124 |     shader_info.uniform_table.resize(dvle_header.uniform_table_size);
125 |     uint32_t uniform_table_offset = dvle_offset + dvle_header.uniform_table_offset;
126 |     file.seekg(uniform_table_offset);
127 |     for (int i = 0; i < dvle_header.uniform_table_size; ++i)
128 |         file.read((char*)&shader_info.uniform_table[i].basic, sizeof(shader_info.uniform_table[i].basic));
129 |     for (auto& uniform_info : shader_info.uniform_table)
130 |         uniform_info.name = ReadSymbol(symbol_table_offset + uniform_info.basic.symbol_offset);
131 | 
132 |     main_offset = dvlp_offset + dvlp_header.binary_offset;
133 | }
134 | 
135 | std::string ShbinParser::ReadSymbol(uint32_t offset) {
136 |     std::string name;
137 |     file.seekg(offset);
138 |     std::getline(file, name, '\0');
139 |     return name;
140 | };
141 | 


--------------------------------------------------------------------------------
/src/preprocessor.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 Tony Wasserka
  2 | // All rights reserved.
  3 | //
  4 | // Redistribution and use in source and binary forms, with or without
  5 | // modification, are permitted provided that the following conditions are met:
  6 | //
  7 | //     * Redistributions of source code must retain the above copyright
  8 | //       notice, this list of conditions and the following disclaimer.
  9 | //     * Redistributions in binary form must reproduce the above copyright
 10 | //       notice, this list of conditions and the following disclaimer in the
 11 | //       documentation and/or other materials provided with the distribution.
 12 | //     * Neither the name of the owner nor the names of its contributors may
 13 | //       be used to endorse or promote products derived from this software
 14 | //       without specific prior written permission.
 15 | //
 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | 
 28 | #include <nihstro/parser_assembly_private.h>
 29 | #include <nihstro/preprocessor.h>
 30 | #include <nihstro/source_tree.h>
 31 | 
 32 | #include <boost/spirit/include/qi.hpp>
 33 | 
 34 | #include <fstream>
 35 | 
 36 | namespace nihstro {
 37 | 
 38 | template<typename Iterator>
 39 | struct IncludeParser : qi::grammar<Iterator, std::string(), AssemblySkipper<Iterator>> {
 40 |     using Skipper = AssemblySkipper<Iterator>;
 41 | 
 42 |     IncludeParser() : IncludeParser::base_type(include) {
 43 |         include = qi::lexeme[qi::lit(".include") >> &qi::ascii::space]
 44 |                   > qi::lexeme[qi::lit("\"") > +qi::char_("a-zA-Z0-9./_\\-") > qi::lit("\"")]
 45 |                   > qi::omit[qi::eol | qi::eoi];
 46 |     }
 47 | 
 48 |     qi::rule<Iterator, std::string(), Skipper> include;
 49 | };
 50 | 
 51 | 
 52 | SourceTree PreprocessAssemblyFile(const std::string& filename) {
 53 |     SourceTree tree;
 54 |     tree.file_info.filename = filename;
 55 | 
 56 |     std::ifstream input_file(filename);
 57 |     if (!input_file) {
 58 |         throw std::runtime_error("Could not open input file " + filename);
 59 |     }
 60 | 
 61 |     std::string prefix;
 62 |     {
 63 |         auto last_slash = filename.find_last_of("/");
 64 |         if (last_slash != std::string::npos)
 65 |             prefix = filename.substr(0, last_slash + 1);
 66 |     }
 67 | 
 68 |     input_file.seekg(0, std::ios::end);
 69 |     tree.code.resize(input_file.tellg());
 70 | 
 71 |     input_file.seekg(0, std::ios::beg);
 72 |     input_file.read(&tree.code[0], tree.code.size());
 73 |     input_file.close();
 74 | 
 75 |     auto cursor = tree.code.begin();
 76 | 
 77 |     IncludeParser<decltype(cursor)> include_parser;
 78 |     AssemblySkipper<decltype(cursor)> skipper;
 79 | 
 80 |     while (cursor != tree.code.end()) {
 81 |         std::string parsed_filename;
 82 |         auto cursor_prev = cursor;
 83 |         if (qi::phrase_parse(cursor, tree.code.end(), include_parser, skipper, parsed_filename)) {
 84 |             if (parsed_filename[0] == '/')
 85 |                 throw std::runtime_error("Given filename must be relative to the path of the including file");
 86 | 
 87 |             // TODO: Protect against circular inclusions
 88 |             auto newtree = PreprocessAssemblyFile(prefix + parsed_filename);
 89 |             tree.Attach(newtree, cursor_prev - tree.code.begin());
 90 |             cursor = tree.code.erase(cursor_prev, cursor);
 91 |             cursor = tree.code.insert(cursor, '\n');
 92 |         } else {
 93 |             // Skip this line
 94 |             qi::parse(cursor, tree.code.end(), *(qi::char_ - (qi::eol | qi::eoi)) >> (qi::eol | qi::eoi));
 95 |         }
 96 |     }
 97 |     return tree;
 98 | }
 99 | 
100 | } // namespace
101 | 


--------------------------------------------------------------------------------
/src/tests/source_tree_iterator.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 Tony Wasserka
  2 | // All rights reserved.
  3 | //
  4 | // Redistribution and use in source and binary forms, with or without
  5 | // modification, are permitted provided that the following conditions are met:
  6 | //
  7 | //     * Redistributions of source code must retain the above copyright
  8 | //       notice, this list of conditions and the following disclaimer.
  9 | //     * Redistributions in binary form must reproduce the above copyright
 10 | //       notice, this list of conditions and the following disclaimer in the
 11 | //       documentation and/or other materials provided with the distribution.
 12 | //     * Neither the name of the owner nor the names of its contributors may
 13 | //       be used to endorse or promote products derived from this software
 14 | //       without specific prior written permission.
 15 | //
 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | 
 28 | #include <iostream>
 29 | #include "nihstro/source_tree.h"
 30 | 
 31 | #define BOOST_TEST_MODULE SourceTreeIterator
 32 | #include <boost/test/unit_test.hpp>
 33 | 
 34 | #include <iterator>
 35 | 
 36 | namespace std {
 37 | 
 38 | std::ostream& operator << (std::ostream& os, const nihstro::SourceTree& tree) {
 39 |     std::string::const_iterator it = tree.code.cbegin();
 40 |     for (auto& child : tree.children) {
 41 |         os << "\"";
 42 |         os << std::string(it, tree.code.cbegin() + child.offset_within_parent);
 43 |         os << "\"";
 44 |         os << " { ";
 45 |         os << child.tree;
 46 |         os << " } ";
 47 |         it = tree.code.cbegin() + child.offset_within_parent;
 48 |     }
 49 |     os << "\"" << std::string(it, tree.code.end()) << "\"";
 50 |     return os;
 51 | }
 52 | 
 53 | }
 54 | 
 55 | // Utility function to manually flatten the given tree into a string
 56 | static std::string FlattenTree(const nihstro::SourceTree& tree) {
 57 |     std::string ret;
 58 |     std::string::const_iterator it = tree.code.cbegin();
 59 |     for (auto& child : tree.children) {
 60 |         ret += std::string(it, tree.code.cbegin() + child.offset_within_parent);
 61 |         ret += FlattenTree(child.tree);
 62 |         it = tree.code.cbegin() + child.offset_within_parent;
 63 |     }
 64 |     ret += std::string(it, tree.code.end());
 65 |     return ret;
 66 | }
 67 | 
 68 | // Utility function to manually determine the size of the given tree
 69 | static std::string::size_type TreeSize(const nihstro::SourceTree& tree) {
 70 |     std::string::size_type ret = 0;
 71 |     for (auto& child : tree.children) {
 72 |         ret += TreeSize(child.tree);
 73 |     }
 74 |     ret += tree.code.length();
 75 |     return ret;
 76 | }
 77 | 
 78 | #define CHECK_TREE(tree) do { \
 79 |     /* Check length */ \
 80 |     BOOST_CHECK_EQUAL(tree.end() - tree.begin(), TreeSize(tree)); \
 81 |     BOOST_CHECK_EQUAL(std::distance(tree.begin(), tree.end()), TreeSize(tree)); \
 82 |     /* Check forward iteration */ \
 83 |     std::string flattened_tree; \
 84 |     for (auto& val : tree) \
 85 |         flattened_tree += val; \
 86 |     auto reference_flattened_tree = FlattenTree(tree); \
 87 |     BOOST_CHECK_EQUAL(flattened_tree, reference_flattened_tree); \
 88 |     BOOST_CHECK_EQUAL_COLLECTIONS(flattened_tree.begin(), flattened_tree.end(), \
 89 |                                   reference_flattened_tree.begin(), reference_flattened_tree.end()); \
 90 |     \
 91 |     /* Check reverse iteration */ \
 92 |     flattened_tree.clear(); \
 93 |     for (auto it = tree.end() - 1;; it -= 1) { \
 94 |         flattened_tree += *it; \
 95 |         if (it == tree.begin()) \
 96 |             break; \
 97 |     } \
 98 |     std::reverse(reference_flattened_tree.begin(), reference_flattened_tree.end()); \
 99 |     BOOST_CHECK_EQUAL(flattened_tree, reference_flattened_tree); \
100 |     BOOST_CHECK_EQUAL_COLLECTIONS(flattened_tree.begin(), flattened_tree.end(), \
101 |                                   reference_flattened_tree.begin(), reference_flattened_tree.end()); \
102 |  \
103 | } while (false)
104 | 
105 | BOOST_AUTO_TEST_CASE(simple_tree) {
106 |     nihstro::SourceTree tree;
107 | 
108 |     tree.code = "a b c";
109 | 
110 |     CHECK_TREE(tree);
111 | }
112 | 
113 | BOOST_AUTO_TEST_CASE(nested_tree) {
114 |     nihstro::SourceTree tree;
115 |     nihstro::SourceTree child1;
116 |     nihstro::SourceTree child2;
117 | 
118 |     tree.code = "aXbXc";
119 |     child1.code = "child1";
120 |     child2.code = "child2";
121 | 	tree.Attach(child1, 1).Attach(child2, 3);
122 | 
123 |     CHECK_TREE(tree);
124 | }
125 | 
126 | BOOST_AUTO_TEST_CASE(deep_tree) {
127 |     nihstro::SourceTree tree;
128 |     nihstro::SourceTree child1;
129 |     nihstro::SourceTree child1_child1;
130 |     nihstro::SourceTree child1_child2;
131 |     nihstro::SourceTree child1_child2_child1;
132 |     nihstro::SourceTree child1_child3;
133 |     nihstro::SourceTree child2;
134 |     nihstro::SourceTree child3;
135 |     nihstro::SourceTree child3_child1;
136 |     nihstro::SourceTree child4;
137 | 
138 |     tree.code = "aaaXaaaXaaaXaaaXaaa";
139 |     child1.code = "FirstChild:bbbXbbbXbbbXbbb\n";
140 |     child1_child1.code = "FirstSubchildOfChild1:ccc";
141 |     child1_child2.code = "SecondSubchildOfChild1:dddXddd";
142 |     child1_child2_child1.code = "FirstSubsubchildOfSubchild2OfChild1:eee";
143 |     child1_child3.code = "ThirdSubchildOfChild1:fff";
144 |     child2.code = "SecondChild:ggg\n";
145 |     child3.code = "ThirdChild:hhhXhhh\n";
146 |     child3_child1.code = "FirstSubchildOfChild3:iii";
147 |     child4.code = "FourthChild:jjj\n";
148 | 
149 |     child1_child2.Attach(child1_child2_child1, 26);
150 |     child1.Attach(child1_child1, 14).Attach(child1_child2, 18).Attach(child1_child3, 22);
151 |     child3.Attach(child3_child1, 14);
152 |     tree.Attach(child1, 3).Attach(child2, 7).Attach(child3, 11).Attach(child4, 15);
153 | 
154 |     CHECK_TREE(tree);
155 | }
156 | 
157 | BOOST_AUTO_TEST_CASE(subtree_at_begin_and_end) {
158 |     nihstro::SourceTree tree;
159 |     nihstro::SourceTree child1;
160 |     tree.code = "aaa";
161 |     child1.code = "bbb";
162 | 
163 |     tree.Attach(child1, 0);
164 |     CHECK_TREE(tree);
165 | 
166 |     tree.children.clear();
167 |     tree.Attach(child1, tree.code.length());
168 |     CHECK_TREE(tree);
169 | }
170 | 


--------------------------------------------------------------------------------