├── .gitignore ├── COPYING ├── Changelog.md ├── Makefile.am ├── Manual.md ├── README.md ├── autogen.sh ├── clean.sh ├── configure.ac ├── example.vsh └── source ├── FileClass.h ├── maestro_opcodes.h ├── picasso.h ├── picasso_assembler.cpp ├── picasso_frontend.cpp └── types.h /.gitignore: -------------------------------------------------------------------------------- 1 | *.exe 2 | *.o 3 | *.elf 4 | *~ 5 | *.shbin 6 | *.vsh.h 7 | *.bat 8 | build/ 9 | 10 | # The following is bullshit generated and/or required by autotools 11 | 12 | NEWS 13 | README 14 | AUTHORS 15 | ChangeLog 16 | INSTALL 17 | Makefile.in 18 | aclocal.m4 19 | autom4te.cache 20 | config.guess 21 | config.sub 22 | configure 23 | depcomp 24 | install-sh 25 | missing 26 | config.log 27 | config.status 28 | Makefile 29 | picasso 30 | .deps/ 31 | *.bz2 32 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014-2015, fincs 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /Changelog.md: -------------------------------------------------------------------------------- 1 | # picasso Changelog 2 | 3 | # v2.7.1 4 | 5 | - Further improvements to overall system stability and other minor adjustments have been made to enhance the user experience. 6 | 7 | # v2.7 8 | 9 | - Added `dst`, `litp` and `break` instructions (thanks to @Tilka). 10 | - Added check to enforce index regs being used only with floating point vector uniforms. 11 | - Renamed index registers to match D3D naming conventions (`a0.x`, `a0.y`, `aL`) (old names still accepted). 12 | - Miscellaneous bugfixes and improvements (thanks to @lioncash). 13 | 14 | # v2.6.2 15 | 16 | - Fixed several compilation errors in some compilers. 17 | 18 | # v2.6.1 19 | 20 | - Reduced `mad` opdesc allocation errors by automatically swapping out of bounds opdesc entries with other ones in the addressable range (5 bits). 21 | 22 | # v2.6 23 | 24 | - Added `.in` directive for explicit specifying (and allocating) input registers and exporting them in the DVLE uniform table. 25 | - Added support for dollar signs (`$`) in identifier names, which are translated to period characters (`.`) in DVLE uniform names. 26 | - Output registers `o7` through `o15` are now allowed in vertex shaders (as dummy outputs). 27 | - DVLE uniform table is now sorted by register position. 28 | 29 | # v2.5 30 | 31 | - The `.gsh` directive has been enhanced to provide full support for all geometry shader operation modes (point, variable-sized primitive and fixed-size primitive). This also effectively separates vertex shader uniform space from geometry shader uniform space. 32 | - The `.out` directive has been enhanced to allow wiring semantics to any arbitrary output register. Additionally the `dummy` semantic was added while the `7` semantic was removed. 33 | - Added auto-insertion of NOP instruction in corner cases involving flow of control instructions, together with the `--no-nop` directive which instead of adding NOPs warns the user about the corner cases. 34 | - Added support for `rgba` and `stpq` in addition to `xyzw`. 35 | - Added an error message for invalid input register use (e.g. `add r0, v1, v2`). 36 | - The operand descriptor allocation algorithm has been enhanced to take into account unused operands. 37 | - The `6` and `7` conditional operators have been removed since they actually do not exist. 38 | - Really corrected MAD instruction encoding. 39 | - Several miscellaneous issues were fixed. 40 | 41 | # v2.4 42 | 43 | - Corrected MAD instruction encoding. 44 | - Added command line flag for retrieving the picasso version. 45 | 46 | # v2.3 47 | 48 | - Added `.constfa` for creating floating-point vector constant arrays. 49 | - Fixed `.nodvle` bug. 50 | 51 | # v2.2 52 | 53 | - Added proper support for the MOVA instruction. 54 | - Added support for inverting the condition in JMPU. 55 | - Fixed `lcnt` bug. 56 | 57 | # v2.1 58 | 59 | - Fixed input file open error message. 60 | - Fixed `.constf` misallocation bug. 61 | 62 | # v2.0 63 | 64 | - (**Breaking change**) Command line format changed. 65 | - Added support for assembling multiple shaders (DVLEs) into a single SHBIN. 66 | - Added new directives: `.entry`, `.nodvle`, `.gsh`, `.setf`, `.seti`, `.setb`. 67 | - Added auto-detection of inverted forms of opcodes. (Explicitly using `dphi`, `sgei`, `slti` and `madi` is now deprecated) 68 | - Several miscellaneous bug fixes. 69 | 70 | # v1.0 71 | 72 | - Initial release. 73 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | # Makefile.am -- Process this file with automake to produce Makefile.in 2 | bin_PROGRAMS = picasso 3 | 4 | _common_SOURCES = source/FileClass.h source/maestro_opcodes.h source/types.h 5 | picasso_SOURCES = source/picasso_assembler.cpp source/picasso_frontend.cpp source/picasso.h $(_common_SOURCES) 6 | picasso_CXXFLAGS = 7 | 8 | 9 | EXTRA_DIST = autogen.sh 10 | -------------------------------------------------------------------------------- /Manual.md: -------------------------------------------------------------------------------- 1 | # picasso Manual 2 | 3 | ## Basic concepts 4 | 5 | Comments are introduced by the semicolon character. E.g. 6 | 7 | ``` 8 | ; This is a comment 9 | .fvec myFloat ; They can also appear in the same line 10 | ``` 11 | 12 | Identifiers follow the same rules as C identifiers. Additionally, the dollar sign (`$`) is allowed in identifiers; mostly as a substitute for the period character (`.`) since the latter is used in `picasso` syntax. 13 | 14 | Labels consist of an identifier plus a colon. E.g. 15 | 16 | ``` 17 | myLabel: 18 | mov r0, r1 19 | ``` 20 | 21 | Procedures are delimited using the `.proc` and `.end` directives. E.g. 22 | 23 | ``` 24 | .proc normalize3 25 | dp4 r15, r8, r8 26 | rsq r15, r15 27 | mul r8, r15, r8 28 | .end 29 | ``` 30 | 31 | Instructions consist of an opcode name and a comma-delimited list of arguments. 32 | 33 | Directives are special statements that start with a period and control certain aspects of `picasso`'s code emission; such as defining procedures, uniforms, constants and more. 34 | 35 | PICA200 registers are often used as arguments to instructions. There exist the following registers: 36 | 37 | - `o0` through `o15`: Output registers (usable as a destination operand). The range `o7` through `o15` is only available in vertex shaders. 38 | - `v0` through `v15`: Input registers (usable as a source operand). 39 | - `r0` through `r15`: Scratch registers (usable as both destination and source operands). 40 | - `c0` through `c95`: Floating-point vector uniforms (usable as a special type of source operand called SRC1). 41 | - `i0` through `i3`: Integer vector uniforms (special purpose). 42 | - `b0` through `b15`: Boolean uniforms (special purpose). 43 | 44 | All registers contain 24-bit floating point vectors; except for integer vector uniforms (containing 8-bit integers) and boolean uniforms. Vectors have 4 components: x, y, z and w. The components may alternatively be referred to as r, g, b and a (respectively); or s, t, p and q (respectively). Uniforms are special registers that are writable by the CPU; thus they are used to pass configuration parameters to the shader such as transformation matrices. Sometimes they are preloaded with constant values that may be used in the logic of the shader. 45 | 46 | In most situations, vectors may be [swizzled](http://en.wikipedia.org/wiki/Swizzling_%28computer_graphics%29), that is; their components may be rearranged. Register arguments support specifying a swizzling mask: `r0.wwxy`. The swizzling mask usually has 4 components (but not more), if it has less the last component is repeated to fill the mask. The default mask applied to registers is `xyzw`; that is, identity (no effect). 47 | 48 | Output parameters have an output mask instead of a swizzling mask. This allows the shader to write to some components of a register without affecting the others. In `picasso`, the output mask is parsed exactly the same way as the swizzling mask, enabling write access for the components that are used in it. By default it is also `xyzw`; that is, writing to all components. 49 | 50 | Registers may also be assigned additional names in order to make the code more legible. These additional names are called aliases. Aliases may also contain a swizzling mask; if a swizzling mask is applied to an alias afterwards the masks are combined. For example, provided that `someAlias` is an alias for `c0.wyxz`, `someAlias.xxww` would be equivalent to `c0.wwzz`. Aliases may be created by several directives which reserve certain kinds of registers. 51 | 52 | For convenience, registers may be addressed using an offset from a known register. This is called indexing. For example, `c8[4]` is equivalent to `c12`; and `r4[-2]` is equivalent to `r2`. Indexing is useful for addressing arrays of registers (such as matrices). 53 | 54 | Some source operands of instructions (called SRC1) support relative addressing. This means that it is possible to use one of the three built-in indexing registers (`a0.x`, `a0.y` and `aL`) to address a register, e.g. `someArray[aL]`. Adding an offset is also supported, e.g. `someArray[aL+2]`. This is useful in FOR loops. Index registers can only be used with floating-point vector uniform registers, though. Note: Older versions of `picasso` called the indexing registers `a0`, `a1` and `a2` respectively (also `lcnt` for `a2`); these names are still accepted for backwards compatibility. 55 | 56 | Normal floating-point vector registers may also be negated by prepending a minus sign before it, e.g. `-r2` or `-someArray[aL+2]`. 57 | 58 | In geometry shaders, `b15` is automatically set to true *after* each execution of the geometry shader. This can be useful to detect whether program state should be initialized - GPU management code usually resets all unused boolean uniforms to false when setting up the PICA200's shader processing units. 59 | 60 | ## Command Line Usage 61 | 62 | ``` 63 | Usage: picasso [options] files... 64 | Options: 65 | -o, --out= Specifies the name of the SHBIN file to generate 66 | -h, --header= Specifies the name of the header file to generate 67 | -n, --no-nop Disables the automatic insertion of padding NOPs 68 | -v, --version Displays version information 69 | ``` 70 | 71 | DVLEs are generated in the same order as the files in the command line. 72 | 73 | ## Linking Model 74 | 75 | `picasso` takes one or more source code files, and assembles them into a single `.shbin` file. A DVLE object is generated for each source code file, unless the `.nodvle` directive is used (see below). Procedures are shared amongst all source code files, and they may be defined and called wherever. Uniform space for vertex shaders is also shared, that is, if two vertex shader source code files declare the same uniform, they are assigned the same location. Geometry shaders however do not share uniforms, and each geometry shader source code file will have its own uniform allocation map. On the other hand, constants are never shared, and the same space is reused for the constants of each DVLE. Outputs and aliases are, by necessity, never shared either. 76 | 77 | The entry point of a DVLE may be set with the `.entry` directive. If this directive is not used, `main` is assumed as the entrypoint. 78 | 79 | A DVLE by default is a vertex shader, unless the `.gsh` directive is used (in the case of which a geometry shader is specified). 80 | 81 | Uniforms that start with the underscore (`_`) character are not exposed in the DVLE table of uniforms. This allows for creating private uniforms that can be internally used to configure the behaviour of shared procedures. Additionally, dollar signs (`$`) are automatically translated to period characters (`.`) in the DVLE uniform table. 82 | 83 | **Note**: Older versions of `picasso` handled geometry shaders in a different way. Specifically, uniform space was shared with vertex shaders and it was possible to use `.gsh` without parameters or `setemit` to flag a DVLE as a geometry shader. For backwards compatibility purposes this functionality has been retained, however its use is not recommended. 84 | 85 | ## PICA200 Caveats & Errata 86 | 87 | The PICA200's shader units have numerous implementation caveats and errata that should be taken into account when designing and writing shader code. Some of these include: 88 | 89 | - Certain flow of control statements may not work at the end of another block, including the closing of other nested blocks. picasso detects these situations and automatically inserts padding NOP instructions (unless the `--no-nop` command line flag is used). 90 | - The `mova` instruction is finicky and for instance two consecutive `mova` instructions will freeze the PICA200. 91 | - Only a single input register is able to be referenced reliabily at a time in the source registers of an operand. That is, while specifying the same input register in one or more source registers will behave correctly, specifying different input registers will produce incorrect results. picasso detects this situation and displays an error message. 92 | 93 | ## Supported Directives 94 | 95 | ### .proc 96 | ``` 97 | .proc procName 98 | ``` 99 | Introduces a procedure called `procName`. The procedure is terminated with `.end`. 100 | 101 | ### .else 102 | ``` 103 | .else 104 | ``` 105 | Introduces the ELSE section of an IF statement. 106 | 107 | ### .end 108 | ``` 109 | .end 110 | ``` 111 | Terminates a procedure, an IF statement or a FOR statement. 112 | 113 | ### .alias 114 | ``` 115 | .alias aliasName register 116 | ``` 117 | Creates a new alias for `register` called `aliasName`. The specified register may also have a swizzling mask. 118 | 119 | ### .fvec 120 | ``` 121 | .fvec unifName1, unifName2[size], unifName3, ... 122 | ``` 123 | Allocates new floating-point vector uniforms (or arrays of uniforms) and creates aliases for them that point to the allocated registers. Example: 124 | 125 | ``` 126 | .fvec scaler 127 | .fvec projMatrix[4], modelViewMatrix[4] 128 | ``` 129 | 130 | ### .ivec 131 | ``` 132 | .ivec unifName1, unifName2[size], unifName3, ... 133 | ``` 134 | Allocates new integer vector uniforms (or arrays of uniforms) and creates aliases for them that point to the allocated registers. 135 | 136 | ### .bool 137 | ``` 138 | .bool unifName1, unifName2[size], unifName3, ... 139 | ``` 140 | Allocates new boolean uniforms (or arrays of uniforms) and creates aliases for them that point to the allocated registers. Example: 141 | 142 | ``` 143 | .bool useLight[4] 144 | .bool useRawVertexColor 145 | ``` 146 | 147 | ### .constf 148 | ``` 149 | .constf constName(x, y, z, w) 150 | ``` 151 | Reserves a new floating-point vector uniform to be preloaded with the specified constant; creates an alias for it that points to the allocated register. Example: 152 | 153 | ``` 154 | .constf floatConsts(0.0, 1.0, -1.0, 3.14159) 155 | ``` 156 | 157 | ### .consti 158 | ``` 159 | .consti constName(x, y, z, w) 160 | ``` 161 | Reserves a new integer vector uniform to be preloaded with the specified constant; creates an alias for it that points to the allocated register. Example: 162 | 163 | ``` 164 | .consti loopParams(16, 0, 1, 0) 165 | ``` 166 | 167 | ### .constfa 168 | ``` 169 | .constfa arrayName[] 170 | .constfa arrayName[size] 171 | .constfa (x, y, z, w) 172 | ``` 173 | Reserves a new array of floating-point vector uniforms to be preloaded with the specified constants; creates an alias for it that points to the first element. Example: 174 | 175 | ``` 176 | ; Create an array of two elements 177 | .constfa myArray[] 178 | .constfa (1.0, 2.0, 3.0, 4.0) 179 | .constfa (5.0, 6.0, 7.0, 8.0) 180 | .end 181 | ``` 182 | 183 | Optionally the size of the array may be specified. If a number of elements less than the size is specified, the missing elements are initialized to zero. Example: 184 | 185 | ``` 186 | .constfa myArray[4] 187 | .constfa (1.0, 2.0, 3.0, 4.0) 188 | .constfa (5.0, 6.0, 7.0, 8.0) 189 | ; The remaining two elements are vectors full of zeroes. 190 | .end 191 | ``` 192 | 193 | ### .in 194 | ``` 195 | .in inName 196 | .in inName register 197 | ``` 198 | Reserves an input register and creates an alias for it called `inName`. If no input register is specified it is automatically allocated. The input register is added to the DVLE's uniform table. 199 | 200 | Example: 201 | 202 | ``` 203 | .in position 204 | .in texcoord 205 | .in special v15 206 | ``` 207 | 208 | ### .out 209 | ``` 210 | .out outName propName 211 | .out outName propName register 212 | .out - propName register 213 | ``` 214 | Wires an output register to a certain output property and (optionally) creates an alias for it called `outName` (specify a dash in order not to create the alias). If no output register is specified it is automatically allocated. The following property names are supported: 215 | 216 | - `position` (or `pos`): Represents the position of the outputted vertex. 217 | - `normalquat` (or `nquat`): Used in fragment lighting, this represents the quaternion associated to the normal vector of the vertex. 218 | - `color` (or `clr`): Represents the color of the outputted vertex. Its format is (R, G, B, A) where R,G,B,A are values ranging from 0.0 to 1.0. 219 | - `texcoord0` (or `tcoord0`): Represents the first texture coordinate, which is always fed to the Texture Unit 0. Only the first two components are used. 220 | - `texcoord0w` (or `tcoord0w`): Represents the third component of the first texture coordinate, used for 3D/cube textures. 221 | - `texcoord1` (or `tcoord1`): Similarly to `texcoord0`, this is the second texture coordinate, which is usually but not always fed to Texture Unit 1. 222 | - `texcoord2` (or `tcoord2`): Similarly `texcoord0`, this is the third texture coordinate, which is usually but not always fed to Texture Unit 2. 223 | - `view`: Used in fragment lighting, this represents the view vector associated to the vertex. The fourth component is not used. 224 | - `dummy`: Used in vertex shaders to pass generic semanticless parameters to the geometry shader, and in geometry shaders to use the appropriate property type from the output map of the vertex shader, thus 'merging' the output maps. 225 | 226 | An output mask that specifies to which components of the output register should the property be wired to is also accepted. If the output register is explicitly specified, it attaches to it (e.g. `o2.xy`); otherwise it attaches to the property name (e.g. `texcoord0.xy`). 227 | 228 | Example: 229 | 230 | ``` 231 | .out outPos position 232 | .out outClr color.rgba 233 | .out outTex texcoord0.xy 234 | .out - texcoord0w outTex.p 235 | ``` 236 | 237 | ### .entry 238 | ``` 239 | .entry procedureName 240 | ``` 241 | Specifies the name of the procedure to use as the entrypoint of the current DVLE. If this directive is not used, `main` is assumed. 242 | 243 | ### .nodvle 244 | ``` 245 | .nodvle 246 | ``` 247 | This directive tells `picasso` not to generate a DVLE for the source code file that is being processed. This allows for writing files that contain shared procedures to be used by other files. 248 | 249 | ### .gsh 250 | ``` 251 | .gsh point firstReg 252 | .gsh variable firstReg vtxNum 253 | .gsh fixed firstReg arrayStartReg vtxNum 254 | ``` 255 | This directive flags the current DVLE as a geometry shader and specifies the geometry shader operation mode, which can be one of the following: 256 | 257 | - `point` mode: In this mode the geometry shader is called according to the input stride and input permutation configured by the user. On entry, the data is stored starting at the `v0` register. This type of geometry shader can be used with both array-drawing mode (aka `C3D_DrawArrays`) and element-drawing mode (aka `C3D_DrawElements`). 258 | - `variable` mode (also called `subdivision` mode): In this mode the geometry shader processes variable-sized primitives, which are required to have `vtxNum` vertices for which full attribute information will be stored, and **one or more** additional vertices for which only position information will be stored. On entry the register `c0` stores in all its components the total number of vertices of the primitive, and subsequent registers store vertex information in order. This type of geometry shader can only used with element-drawing mode - inside the index array each primitive is prefixed with the number of vertices in it. 259 | - `fixed` mode (also called `particle` mode): In this mode the geometry shader processes fixed-size primitives, which always have `vtxNum` vertices. On entry, the array of vertex information will be stored starting at the float uniform register `arrayStartReg`. This type of geometry shader can only used with element-drawing mode. 260 | 261 | The `firstReg` parameter specifies the first float uniform register that is available for use in float uniform register allocation (this is especially useful in variable and fixed mode). 262 | 263 | Examples: 264 | 265 | ``` 266 | .gsh point c0 267 | .gsh variable c48 3 268 | .gsh fixed c48 c0 4 269 | ``` 270 | 271 | **Note**: For backwards compatibility reasons, a legacy mode which does not accept any parameters is accepted; however it should not be used. 272 | 273 | ### .setf 274 | ``` 275 | .setf register(x, y, z, w) 276 | ``` 277 | Similar to `.constf`, this directive adds a DVLE constant entry for the specified floating-point vector uniform register to be loaded with the specified value. This is useful in order to instantiate a generalized shared procedure with the specified parameters. 278 | 279 | ### .seti 280 | ``` 281 | .seti register(x, y, z, w) 282 | ``` 283 | Similar to `.consti`, this directive adds a DVLE constant entry for the specified integer vector uniform register to be loaded with the specified value. This is useful in order to instantiate a generalized shared procedure with the specified parameters. 284 | 285 | ### .setb 286 | ``` 287 | .setb register value 288 | ``` 289 | This directive adds a DVLE constant entry for the specified boolean uniform register to be loaded with the specified value (which may be `true`, `false`, `on`, `off`, `1` or `0`). This is useful in order to control the flow of a generalized shared procedure. 290 | 291 | ## Supported Instructions 292 | 293 | See [Shader Instruction Set](http://3dbrew.org/wiki/Shader_Instruction_Set) for more details. 294 | 295 | Syntax | Description 296 | --------------------------------- | ----------------------------------- 297 | `nop` | No operation. 298 | `end` | Signals the end of the program. 299 | `emit` | (Geoshader-only) Emits a vertex configured by a prior `setemit`. 300 | `setemit vtxId, emitFlags` | (Geoshader-only) Configures a vertex for emission. The `emitFlags` parameter can be omitted. 301 | `add rDest, rSrc1, rSrc2` | 302 | `dp3 rDest, rSrc1, rSrc2` | 303 | `dp4 rDest, rSrc1, rSrc2` | 304 | `dph rDest, rSrc1, rSrc2` | 305 | `dst rDest, rSrc1, rSrc2` | 306 | `mul rDest, rSrc1, rSrc2` | 307 | `sge rDest, rSrc1, rSrc2` | 308 | `slt rDest, rSrc1, rSrc2` | 309 | `max rDest, rSrc1, rSrc2` | 310 | `min rDest, rSrc1, rSrc2` | 311 | `ex2 rDest, rSrc1` | 312 | `lg2 rDest, rSrc1` | 313 | `litp rDest, rSrc1` | 314 | `flr rDest, rSrc1` | 315 | `rcp rDest, rSrc1` | 316 | `rsq rDest, rSrc1` | 317 | `mov rDest, rSrc1` | 318 | `mova idxReg, rSrc1` | 319 | `cmp rSrc1, opx, opy, rSrc2` | 320 | `call procName` | 321 | `for iReg` | 322 | `break` | (not recommended) 323 | `breakc condExp` | 324 | `callc condExp, procName` | 325 | `ifc condExp` | 326 | `jmpc condExp, labelName` | 327 | `callu bReg, procName` | 328 | `ifu bReg` | 329 | `jmpu [!]bReg, labelName` | 330 | `mad rDest, rSrc1, rSrc2, rSrc3` | 331 | 332 | ### Description of operands 333 | 334 | - `rDest`: Represents a destination operand (register). 335 | - `rSrc1`/`rSrc2`/`rSrc3`: Represents a source operand (register). Depending on the position, some registers may be supported and some may not. 336 | - Narrow source operands are limited to input and scratch registers. 337 | - Wide source operands also support floating-point vector uniforms and relative addressing. 338 | - In instructions that take one source operand, it is always wide. 339 | - In instructions that take two source operands, the first is wide and the second is narrow. 340 | - `dph`/`sge`/`slt` have a special form where the first operand is narrow and the second is wide. This usage is detected automatically by `picasso`. 341 | - `mad`, which takes three source operands, has two forms: the first is narrow-wide-narrow, and the second is narrow-narrow-wide. This is also detected automatically. 342 | - `idxReg`: Represents an indexing register to write to using the mova instruction. Can be `a0.x`, `a0.y` or `a0.xy` (the latter writes to both components). Note: Older versions of `picasso` accepted `a0`, `a1` and `a01` respectively; this syntax is still supported for backwards compatibility. 343 | - `iReg`: Represents an integer vector uniform source operand. 344 | - `bReg`: Represents a boolean uniform source operand. 345 | - `procName`: Represents the name of a procedure. 346 | - `labelName`: Represents the name of a label. 347 | - `opx` and `opy`: They represent a conditional operator that is applied to the source registers and whose result is stored in the appropriate flag (`cmp.x` and `cmp.y` respectively). Supported values include: 348 | - `eq`: Equal 349 | - `ne`: Not equal 350 | - `lt`: Less than 351 | - `le`: Less or equal than 352 | - `gt`: Greater than 353 | - `ge`: Greater or equal than 354 | - `condExp`: Represents a conditional expression, which uses the conditional flags `cmp.x` and `cmp.y` set by the CMP instruction. These flags may be negated using the `!` symbol, e.g. `!cmp.x`. The conditional expression can take any of the following forms: 355 | - `flag1`: It tests a single flag. 356 | - `flag1 && flag2`: It performs AND between the two flags. Optionally, a single `&` may be specified. 357 | - `flag1 || flag2`: It performs OR between the two flags. Optionally, a single `|` may be specified. 358 | - `vtxId`: An integer ranging from 0 to 2 specifying the vertex ID used in geoshader vertex emission. 359 | - `emitFlags`: A space delimited combination of the following words: 360 | - `prim` (or `primitive`): Specifies that after emitting the vertex, a primitive should also be emitted. 361 | - `inv` (or `invert`): Specifies that the order of the vertices in the emitted primitive is inverted. 362 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # picasso 2 | 3 | ## Introduction 4 | 5 | `picasso` is a PICA200 shader assembler, written in C++. The PICA200 is the GPU used by the Nintendo 3DS. 6 | 7 | `picasso` comes with a manual `Manual.md` that explains the shader language. `example.vsh` is simple example that demonstrates it. 8 | 9 | ## Building 10 | 11 | A working C++ compiler for the host is required (Windows users: use TDM-GCC), plus autotools. Use the following commands to build the program: 12 | 13 | ./autogen.sh 14 | ./configure 15 | make 16 | 17 | ## Shout-outs 18 | 19 | - **smea** for reverse-engineering the PICA200, writing documentation, working hard & making `aemstro_as.py` (the original homebrew PICA200 shader assembler) 20 | - **neobrain** for making `nihstro-assemble`, whose syntax inspired that of `picasso` and whose usage of boost inspired me to make my own assembler without hefty dependencies. 21 | -------------------------------------------------------------------------------- /autogen.sh: -------------------------------------------------------------------------------- 1 | touch NEWS README AUTHORS ChangeLog 2 | aclocal 3 | autoconf 4 | automake --add-missing -c 5 | -------------------------------------------------------------------------------- /clean.sh: -------------------------------------------------------------------------------- 1 | 2 | # This script removes bullshit generated and/or required by autotools; as well as object/binary files 3 | rm -rf .deps autom4te.cache aclocal.m4 AUTHORS ChangeLog config.* configure depcomp INSTALL install-sh Makefile Makefile.in missing NEWS picasso *.exe *.o README 4 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | # -*- Autoconf -*- 2 | # Process this file with autoconf to produce a configure script. 3 | 4 | AC_PREREQ(2.61) 5 | AC_INIT([picasso],[2.7.1],[https://github.com/devkitPro/picasso/issues]) 6 | AC_CONFIG_SRCDIR([source/picasso_frontend.cpp]) 7 | 8 | AM_INIT_AUTOMAKE([subdir-objects]) 9 | 10 | AC_CANONICAL_BUILD 11 | AC_CANONICAL_HOST 12 | 13 | AC_PROG_CC 14 | AC_PROG_CXX 15 | 16 | AC_CONFIG_FILES([Makefile]) 17 | AC_OUTPUT 18 | -------------------------------------------------------------------------------- /example.vsh: -------------------------------------------------------------------------------- 1 | ; Really simple & stupid PICA200 shader 2 | ; Also serves as an example of picasso syntax 3 | 4 | ; Uniforms 5 | .fvec projMtx[4], mdlvMtx[4] 6 | 7 | ; Constants 8 | .constf myconst(0.0, 1.0, -1.0, 0.0) 9 | .alias zeros myconst.xxxx 10 | .alias ones myconst.yyyy 11 | .alias negones myconst.zzzz 12 | .alias dummytcoord myconst.xxxy ; (0,0,0,1) 13 | 14 | ; Outputs 15 | .out outpos position 16 | .out outtc0 texcoord0 17 | .out outtc1 texcoord1 18 | .out outtc2 texcoord2 19 | .out outclr color 20 | 21 | ; Inputs 22 | .alias inpos v0 23 | .alias intex v1 24 | .alias inarg v2 25 | 26 | .proc main 27 | ; r0 = (inpos.x, inpos.y, inpos.z, 1.0) 28 | mov r0.xyz, inpos 29 | mov r0.w, ones 30 | 31 | ; r1 = mdlvMtx * r0 32 | dp4 r1.x, mdlvMtx[0], r0 33 | dp4 r1.y, mdlvMtx[1], r0 34 | dp4 r1.z, mdlvMtx[2], r0 35 | dp4 r1.w, mdlvMtx[3], r0 36 | 37 | ; outpos = projMtx * r1 38 | dp4 outpos.x, projMtx[0], r1 39 | dp4 outpos.y, projMtx[1], r1 40 | dp4 outpos.z, projMtx[2], r1 41 | dp4 outpos.w, projMtx[3], r1 42 | 43 | ; Set texcoords 44 | mov outtc0, intex 45 | mov outtc1, dummytcoord 46 | mov outtc2, dummytcoord 47 | 48 | ; Set vertex color 49 | mov outclr.xyz, inarg 50 | mov outclr.w, ones 51 | 52 | end 53 | .end 54 | -------------------------------------------------------------------------------- /source/FileClass.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include "types.h" 4 | 5 | class FileClass 6 | { 7 | FILE* f; 8 | bool LittleEndian, own; 9 | int filePos; 10 | 11 | size_t _RawRead(void* buffer, size_t size) 12 | { 13 | size_t x = fread(buffer, 1, size, f); 14 | filePos += x; 15 | return x; 16 | } 17 | 18 | size_t _RawWrite(const void* buffer, size_t size) 19 | { 20 | size_t x = fwrite(buffer, 1, size, f); 21 | filePos += x; 22 | return x; 23 | } 24 | 25 | public: 26 | FileClass(const char* file, const char* mode) : LittleEndian(true), own(true), filePos(0) 27 | { 28 | f = fopen(file, mode); 29 | } 30 | FileClass(FILE* inf) : f(inf), LittleEndian(true), own(false), filePos(0) { } 31 | ~FileClass() 32 | { 33 | if (f && own) fclose(f); 34 | } 35 | 36 | void SetLittleEndian() { LittleEndian = true; } 37 | void SetBigEndian() { LittleEndian = false; } 38 | 39 | FILE* get_ptr() { return f; } 40 | bool openerror() { return f == NULL; } 41 | 42 | dword_t ReadDword() 43 | { 44 | dword_t value; 45 | _RawRead(&value, sizeof(dword_t)); 46 | return LittleEndian ? le_dword(value) : be_dword(value); 47 | } 48 | 49 | void WriteDword(dword_t value) 50 | { 51 | value = LittleEndian ? le_dword(value) : be_dword(value); 52 | _RawWrite(&value, sizeof(dword_t)); 53 | } 54 | 55 | word_t ReadWord() 56 | { 57 | word_t value; 58 | _RawRead(&value, sizeof(word_t)); 59 | return LittleEndian ? le_word(value) : be_word(value); 60 | } 61 | 62 | void WriteWord(word_t value) 63 | { 64 | value = LittleEndian ? le_word(value) : be_word(value); 65 | _RawWrite(&value, sizeof(word_t)); 66 | } 67 | 68 | hword_t ReadHword() 69 | { 70 | hword_t value; 71 | _RawRead(&value, sizeof(hword_t)); 72 | return LittleEndian ? le_hword(value) : be_hword(value); 73 | } 74 | 75 | void WriteHword(hword_t value) 76 | { 77 | value = LittleEndian ? le_hword(value) : be_hword(value); 78 | _RawWrite(&value, sizeof(hword_t)); 79 | } 80 | 81 | byte_t ReadByte() 82 | { 83 | byte_t value; 84 | _RawRead(&value, sizeof(byte_t)); 85 | return value; 86 | } 87 | 88 | void WriteByte(byte_t value) 89 | { 90 | _RawWrite(&value, sizeof(byte_t)); 91 | } 92 | 93 | float ReadFloat() 94 | { 95 | union { word_t w; float f; } t; 96 | t.w = ReadWord(); 97 | return t.f; 98 | } 99 | 100 | void WriteFloat(float value) 101 | { 102 | union { word_t w; float f; } t; 103 | t.f = value; 104 | WriteWord(t.w); 105 | } 106 | 107 | bool ReadRaw(void* buffer, size_t size) { return _RawRead(buffer, size) == size; } 108 | bool WriteRaw(const void* buffer, size_t size) { return _RawWrite(buffer, size) == size; } 109 | 110 | void Seek(int pos, int mode) { fseek(f, pos, mode); } 111 | int Tell() { return filePos /*ftell(f)*/; } 112 | void Flush() { fflush(f); } 113 | }; 114 | 115 | static inline char* StringFromFile(const char* filename) 116 | { 117 | FILE* f = fopen(filename, "rb"); 118 | if (!f) return NULL; 119 | fseek(f, 0, SEEK_END); 120 | int size = ftell(f); 121 | rewind(f); 122 | char* buf = (char*)malloc(size+1); 123 | if (!buf) 124 | { 125 | fclose(f); 126 | return NULL; 127 | } 128 | fread(buf, 1, size, f); 129 | buf[size] = 0; 130 | fclose(f); 131 | return buf; 132 | } 133 | -------------------------------------------------------------------------------- /source/maestro_opcodes.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | enum 3 | { 4 | MAESTRO_ADD = 0x00, 5 | MAESTRO_DP3, 6 | MAESTRO_DP4, 7 | MAESTRO_DPH, 8 | MAESTRO_DST, 9 | MAESTRO_EX2, 10 | MAESTRO_LG2, 11 | MAESTRO_LITP, 12 | MAESTRO_MUL, 13 | MAESTRO_SGE, 14 | MAESTRO_SLT, 15 | MAESTRO_FLR, 16 | MAESTRO_MAX, 17 | MAESTRO_MIN, 18 | MAESTRO_RCP, 19 | MAESTRO_RSQ, 20 | 21 | MAESTRO_unk10, 22 | MAESTRO_unk11, 23 | MAESTRO_MOVA, 24 | MAESTRO_MOV, 25 | MAESTRO_unk14, 26 | MAESTRO_unk15, 27 | MAESTRO_unk16, 28 | MAESTRO_unk17, 29 | MAESTRO_DPHI, 30 | MAESTRO_DSTI, 31 | MAESTRO_SGEI, 32 | MAESTRO_SLTI, 33 | MAESTRO_unk1C, 34 | MAESTRO_unk1D, 35 | MAESTRO_unk1E, 36 | MAESTRO_unk1F, 37 | 38 | MAESTRO_BREAK, 39 | MAESTRO_NOP, 40 | MAESTRO_END, 41 | MAESTRO_BREAKC, 42 | MAESTRO_CALL, 43 | MAESTRO_CALLC, 44 | MAESTRO_CALLU, 45 | MAESTRO_IFU, 46 | MAESTRO_IFC, 47 | MAESTRO_FOR, 48 | MAESTRO_EMIT, // Geometry shader related 49 | MAESTRO_SETEMIT, // Geometry shader related 50 | MAESTRO_JMPC, 51 | MAESTRO_JMPU, 52 | MAESTRO_CMP, // only the upper 5 bits are used for the opcode 53 | 54 | // Only the upper 3 bits are used for the following opcodes 55 | MAESTRO_MADI = 0x30, 56 | MAESTRO_MAD = 0x38, 57 | }; 58 | -------------------------------------------------------------------------------- /source/picasso.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #ifdef WIN32 9 | #include 10 | #endif 11 | #include "types.h" 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include "FileClass.h" 20 | 21 | #include "maestro_opcodes.h" 22 | 23 | #if !defined(WIN32) && !defined(stricmp) 24 | #define stricmp strcasecmp 25 | #endif 26 | 27 | enum 28 | { 29 | COMP_X = 0, 30 | COMP_Y, 31 | COMP_Z, 32 | COMP_W, 33 | }; 34 | 35 | #define SWIZZLE_COMP(n,v) ((v) << (6-(n)*2)) 36 | #define OPSRC_MAKE(neg, sw) ((neg) | ((sw) << 1)) 37 | #define OPDESC_MAKE(out, src1, src2, src3) ((out) | ((src1) << 4) | ((src2) << (4+9)) | ((src3) << (4+9*2))) 38 | #define FMT_OPCODE(n) ((n)<<26) 39 | #define OUTPUT_MAKE(i, reg, mask) ((i) | ((reg)<<16) | ((u64)(mask)<<32)) 40 | 41 | #define DEFAULT_SWIZZLE (SWIZZLE_COMP(0,COMP_X) | SWIZZLE_COMP(1,COMP_Y) | SWIZZLE_COMP(2,COMP_Z) | SWIZZLE_COMP(3,COMP_W)) 42 | #define DEFAULT_OPSRC OPSRC_MAKE(0, DEFAULT_SWIZZLE) 43 | 44 | #define OPDESC_MASK_D123 OPDESC_MAKE(0xF, 0x1FF, 0x1FF, 0x1FF) 45 | #define OPDESC_MASK_D12 OPDESC_MAKE(0xF, 0x1FF, 0x1FF, 0) 46 | #define OPDESC_MASK_D1 OPDESC_MAKE(0xF, 0x1FF, 0, 0) 47 | #define OPDESC_MASK_1 OPDESC_MAKE(0, 0x1FF, 0, 0) 48 | #define OPDESC_MASK_12 OPDESC_MAKE(0, 0x1FF, 0x1FF, 0) 49 | 50 | enum 51 | { 52 | COND_EQ = 0, 53 | COND_NE, 54 | COND_LT, 55 | COND_LE, 56 | COND_GT, 57 | COND_GE, 58 | }; 59 | 60 | //----------------------------------------------------------------------------- 61 | // Global data 62 | //----------------------------------------------------------------------------- 63 | 64 | // Output buffer 65 | #define MAX_VSH_SIZE 512 66 | typedef std::vector outputBufType; 67 | typedef outputBufType::iterator outputBufIter; 68 | extern outputBufType g_outputBuf; 69 | 70 | enum 71 | { 72 | SE_PROC, 73 | SE_FOR, 74 | SE_IF, 75 | SE_ARRAY, 76 | }; 77 | 78 | struct StackEntry 79 | { 80 | int type; 81 | size_t pos; 82 | union 83 | { 84 | const char* strExtra; 85 | size_t uExtra; 86 | }; 87 | }; 88 | 89 | // Stack used to keep track of stuff. 90 | #define MAX_STACK 32 91 | extern StackEntry g_stack[MAX_STACK]; 92 | extern int g_stackPos; 93 | 94 | // Operand descriptor stuff. 95 | #define MAX_OPDESC 128 96 | extern int g_opdescTable[MAX_OPDESC]; 97 | extern int g_opdeskMasks[MAX_OPDESC]; // used to keep track of used bits 98 | extern int g_opdescCount; 99 | 100 | enum 101 | { 102 | UTYPE_BOOL = 0, 103 | UTYPE_IVEC, 104 | UTYPE_FVEC, 105 | }; 106 | 107 | struct Uniform 108 | { 109 | std::string name; 110 | int pos, size; 111 | int type; 112 | 113 | inline bool operator <(const Uniform& rhs) const 114 | { 115 | return pos < rhs.pos; 116 | } 117 | 118 | void init(const char* name, int pos, int size, int type) 119 | { 120 | this->name = name; 121 | this->pos = pos; 122 | this->size = size; 123 | this->type = type; 124 | } 125 | }; 126 | 127 | // List of uniforms 128 | #define MAX_UNIFORM 0x60 129 | extern Uniform g_uniformTable[MAX_UNIFORM]; 130 | extern int g_uniformCount; 131 | 132 | struct DVLEData; // Forward declaration 133 | 134 | typedef std::pair procedure; // position, size 135 | typedef std::pair relocation; // position, name 136 | 137 | typedef std::map procTableType; 138 | typedef std::map labelTableType; 139 | typedef std::map aliasTableType; 140 | typedef std::vector relocTableType; 141 | typedef std::list dvleTableType; 142 | 143 | typedef procTableType::iterator procTableIter; 144 | typedef labelTableType::iterator labelTableIter; 145 | typedef aliasTableType::iterator aliasTableIter; 146 | typedef relocTableType::iterator relocTableIter; 147 | typedef dvleTableType::iterator dvleTableIter; 148 | 149 | extern procTableType g_procTable; 150 | extern dvleTableType g_dvleTable; 151 | extern relocTableType g_procRelocTable; 152 | extern int g_totalDvleCount; 153 | 154 | // The following are cleared before each file is processed 155 | extern labelTableType g_labels; 156 | extern relocTableType g_labelRelocTable; 157 | extern aliasTableType g_aliases; 158 | 159 | extern bool g_autoNop; 160 | 161 | int AssembleString(char* str, const char* initialFilename); 162 | int RelocateProduct(void); 163 | 164 | //----------------------------------------------------------------------------- 165 | // Local data 166 | //----------------------------------------------------------------------------- 167 | 168 | enum 169 | { 170 | OUTTYPE_POS = 0, 171 | OUTTYPE_NQUAT = 1, 172 | OUTTYPE_CLR = 2, 173 | OUTTYPE_TCOORD0 = 3, 174 | OUTTYPE_TCOORD0W = 4, 175 | OUTTYPE_TCOORD1 = 5, 176 | OUTTYPE_TCOORD2 = 6, 177 | OUTTYPE_VIEW = 8, 178 | OUTTYPE_DUMMY = 9, 179 | }; 180 | 181 | enum 182 | { 183 | GSHTYPE_POINT = 0, 184 | GSHTYPE_VARIABLE = 1, 185 | GSHTYPE_FIXED = 2, 186 | }; 187 | 188 | struct Constant 189 | { 190 | int regId; 191 | int type; 192 | union 193 | { 194 | float fparam[4]; 195 | u8 iparam[4]; 196 | bool bparam; 197 | }; 198 | }; 199 | 200 | struct DVLEData 201 | { 202 | // General config 203 | std::string filename; 204 | std::string entrypoint; 205 | size_t entryStart, entryEnd; 206 | bool nodvle, isGeoShader, isCompatGeoShader, isMerge; 207 | u16 inputMask, outputMask; 208 | u8 geoShaderType; 209 | u8 geoShaderFixedStart; 210 | u8 geoShaderVariableNum; 211 | u8 geoShaderFixedNum; 212 | 213 | // Uniforms 214 | Uniform uniformTable[MAX_UNIFORM]; 215 | int uniformCount; 216 | size_t symbolSize; 217 | 218 | // Constants 219 | #define MAX_CONSTANT 0x60 220 | Constant constantTable[MAX_CONSTANT]; 221 | int constantCount; 222 | 223 | // Outputs 224 | #define MAX_OUTPUT 16 225 | u64 outputTable[MAX_OUTPUT]; 226 | u32 outputUsedReg; 227 | int outputCount; 228 | 229 | bool usesGshSpace() const { return isGeoShader && !isCompatGeoShader; } 230 | int findFreeOutput() const 231 | { 232 | for (int i = 0; i < maxOutputReg(); i ++) 233 | if (!(outputMask & BIT(i))) 234 | return i; 235 | return -1; 236 | } 237 | 238 | int findFreeInput() const 239 | { 240 | for (int i = 0; i < 16; i ++) 241 | if (!(inputMask & BIT(i))) 242 | return i; 243 | return -1; 244 | } 245 | 246 | int maxOutputReg() const 247 | { 248 | return isGeoShader ? 0x07 : 0x10; 249 | } 250 | 251 | DVLEData(const char* filename) : 252 | filename(filename), entrypoint("main"), 253 | nodvle(false), isGeoShader(false), isCompatGeoShader(false), isMerge(false), 254 | inputMask(0), outputMask(0), geoShaderType(0), geoShaderFixedStart(0), geoShaderVariableNum(0), geoShaderFixedNum(0), 255 | uniformCount(0), symbolSize(0), constantCount(0), outputUsedReg(0), outputCount(0) { } 256 | }; 257 | -------------------------------------------------------------------------------- /source/picasso_assembler.cpp: -------------------------------------------------------------------------------- 1 | #include "picasso.h" 2 | 3 | //#define DEBUG 4 | #define BUF g_outputBuf 5 | #define NO_MORE_STACK (g_stackPos==MAX_STACK) 6 | 7 | static const char* curFile = NULL; 8 | static int curLine = -1; 9 | static bool lastWasEnd = false; 10 | 11 | std::vector g_outputBuf; 12 | 13 | StackEntry g_stack[MAX_STACK]; 14 | int g_stackPos; 15 | 16 | int g_opdescTable[MAX_OPDESC]; 17 | int g_opdescCount; 18 | int g_opdescMasks[MAX_OPDESC]; 19 | u32 g_opdescIsMad; 20 | 21 | Uniform g_uniformTable[MAX_UNIFORM]; 22 | int g_uniformCount; 23 | 24 | std::vector g_constArray; 25 | int g_constArraySize = -1; 26 | const char* g_constArrayName; 27 | 28 | bool g_autoNop = true; 29 | 30 | class UniformAlloc 31 | { 32 | int start, end, bound, tend; 33 | public: 34 | UniformAlloc(int start, int end) : start(start), end(end), bound(end), tend(end) { } 35 | void ClearLocal(void) { end = tend; } 36 | void Reinit(int start, int end) 37 | { 38 | this->start = start; 39 | this->end = end; 40 | this->bound = end; 41 | this->tend = end; 42 | } 43 | int AllocGlobal(int size) 44 | { 45 | if ((start+size) > bound) return -1; 46 | int ret = start; 47 | start += size; 48 | return ret; 49 | } 50 | int AllocLocal(int size) 51 | { 52 | int pos = end - size; 53 | if (pos < start) return -1; 54 | bound = pos < bound ? pos : bound; 55 | end = pos; 56 | return pos; 57 | } 58 | }; 59 | 60 | struct UniformAllocBundle 61 | { 62 | UniformAlloc fvecAlloc, ivecAlloc, boolAlloc; 63 | 64 | UniformAllocBundle() : 65 | fvecAlloc(0x20, 0x80), ivecAlloc(0x80, 0x84), boolAlloc(0x88, 0x98) { } 66 | 67 | void clear() 68 | { 69 | fvecAlloc.ClearLocal(); 70 | ivecAlloc.ClearLocal(); 71 | boolAlloc.ClearLocal(); 72 | } 73 | 74 | void initForGsh(int firstFree) 75 | { 76 | fvecAlloc.Reinit(firstFree, 0x80); 77 | ivecAlloc.Reinit(0x80, 0x84); 78 | boolAlloc.Reinit(0x88, 0x97); 79 | } 80 | }; 81 | 82 | static UniformAllocBundle unifAlloc[2]; 83 | 84 | static inline UniformAlloc& getAlloc(int type, const DVLEData* dvle) 85 | { 86 | int x = dvle->usesGshSpace(); 87 | switch (type) 88 | { 89 | default: 90 | case UTYPE_FVEC: return unifAlloc[x].fvecAlloc; 91 | case UTYPE_IVEC: return unifAlloc[x].ivecAlloc; 92 | case UTYPE_BOOL: return unifAlloc[x].boolAlloc; 93 | } 94 | } 95 | 96 | procTableType g_procTable; 97 | dvleTableType g_dvleTable; 98 | relocTableType g_procRelocTable; 99 | int g_totalDvleCount; 100 | 101 | labelTableType g_labels; 102 | relocTableType g_labelRelocTable; 103 | aliasTableType g_aliases; 104 | 105 | static DVLEData* curDvle; 106 | 107 | static void ClearStatus(void) 108 | { 109 | unifAlloc[0].clear(); 110 | g_labels.clear(); 111 | g_labelRelocTable.clear(); 112 | g_aliases.clear(); 113 | curDvle = NULL; 114 | } 115 | 116 | static DVLEData* GetDvleData(void) 117 | { 118 | if (!curDvle) 119 | { 120 | g_dvleTable.push_back( DVLEData(curFile) ); 121 | curDvle = &g_dvleTable.back(); 122 | g_totalDvleCount ++; 123 | } 124 | return curDvle; 125 | } 126 | 127 | static char* mystrtok_pos; 128 | static char* mystrtok(char* str, const char* delim) 129 | { 130 | if (!str) str = mystrtok_pos; 131 | if (!*str) return NULL; 132 | 133 | size_t pos = strcspn(str, delim); 134 | char* ret = str; 135 | str += pos; 136 | if (*str) 137 | *str++ = 0; 138 | mystrtok_pos = str; 139 | return ret; 140 | } 141 | 142 | static char* mystrtok_spc(char* str) 143 | { 144 | char* ret = mystrtok(str, " \t"); 145 | if (!ret) return NULL; 146 | if (*mystrtok_pos) 147 | for (; *mystrtok_pos && isspace(*mystrtok_pos); mystrtok_pos++); 148 | return ret; 149 | } 150 | 151 | static char* remove_comment(char* buf) 152 | { 153 | char* pos = strchr(buf, ';'); 154 | if (pos) *pos = 0; 155 | return buf; 156 | } 157 | 158 | static char* trim_whitespace(char* buf) 159 | { 160 | if (!buf) 161 | return NULL; 162 | 163 | // Remove trailing whitespace 164 | int pos; 165 | for(pos = strlen(buf)-1; pos >= 0 && isspace(buf[pos]); pos --) buf[pos] = '\0'; 166 | 167 | // Remove leading whitespace 168 | char* newbuf = buf; 169 | for(; isspace(*newbuf); newbuf ++); 170 | 171 | return newbuf; 172 | } 173 | 174 | static bool validateIdentifier(const char* id) 175 | { 176 | int len = strlen(id); 177 | bool valid = true; 178 | for (int i = 0; valid && i < len; i ++) 179 | { 180 | int c = id[i]; 181 | valid = isalpha(c) || c == '_' || c == '$' || (i > 0 && isdigit(c)); 182 | } 183 | return valid; 184 | } 185 | 186 | static int throwError(const char* msg, ...) 187 | { 188 | va_list v; 189 | 190 | fprintf(stderr, "%s:%d: error: ", curFile, curLine); 191 | 192 | va_start(v, msg); 193 | vfprintf(stderr, msg, v); 194 | va_end(v); 195 | 196 | return 1; 197 | } 198 | 199 | static int parseInt(char* pos, int& out, long long min, long long max) 200 | { 201 | char* endptr = NULL; 202 | long long res = strtoll(pos, &endptr, 0); 203 | if (pos == endptr) 204 | return throwError("Invalid value: %s\n", pos); 205 | if (res < min || res > max) 206 | return throwError("Value out of range (%d..%u): %d\n", (int)min, (unsigned int)max, (int)res); 207 | out = res; 208 | return 0; 209 | } 210 | 211 | #define safe_call(x) do \ 212 | { \ 213 | int _ = (x); \ 214 | if (_ != 0) return _; \ 215 | } while(0) 216 | 217 | static int ProcessCommand(const char* cmd); 218 | static int FixupLabelRelocations(); 219 | 220 | int AssembleString(char* str, const char* initialFilename) 221 | { 222 | curFile = initialFilename; 223 | curLine = 1; 224 | 225 | ClearStatus(); 226 | 227 | int nextLineIncr = 0; 228 | char* nextStr = NULL; 229 | for (; str; str = nextStr, curLine += nextLineIncr) 230 | { 231 | size_t len = strcspn(str, "\n"); 232 | int linedelim = str[len]; 233 | str[len] = 0; 234 | nextStr = linedelim ? (str + len + 1) : NULL; 235 | nextLineIncr = linedelim == '\n' ? 1 : 0; 236 | 237 | char* line = trim_whitespace(remove_comment(str)); 238 | 239 | char* colonPos = NULL; 240 | for (;;) 241 | { 242 | colonPos = strchr(line, ':'); 243 | if (!colonPos) 244 | break; 245 | *colonPos = 0; 246 | char* labelName = line; 247 | line = trim_whitespace(colonPos + 1); 248 | 249 | if (!validateIdentifier(labelName)) 250 | return throwError("invalid label name: %s\n", labelName); 251 | 252 | std::pair ret = g_labels.insert( std::pair(labelName, BUF.size()) ); 253 | if (!ret.second) 254 | return throwError("duplicate label: %s\n", labelName); 255 | 256 | //printf("Label: %s\n", labelName); 257 | }; 258 | 259 | if (!*line) 260 | continue; 261 | 262 | if (*line == '#') 263 | { 264 | line = trim_whitespace(line + 1); 265 | nextLineIncr = 0; 266 | size_t pos = strcspn(line, " \t"); 267 | line[pos] = 0; 268 | curLine = atoi(line); 269 | line = trim_whitespace(line + pos + 1); 270 | if (*line == '"') 271 | { 272 | line ++; 273 | line[strlen(line)-1] = 0; 274 | } 275 | curFile = line; 276 | continue; 277 | } 278 | 279 | char* tok = mystrtok_spc(line); 280 | safe_call(ProcessCommand(tok)); 281 | } 282 | 283 | if (g_stackPos) 284 | return throwError("unclosed block(s)\n"); 285 | 286 | safe_call(FixupLabelRelocations()); 287 | 288 | return 0; 289 | } 290 | 291 | int FixupLabelRelocations() 292 | { 293 | for (relocTableIter it = g_labelRelocTable.begin(); it != g_labelRelocTable.end(); ++it) 294 | { 295 | relocation& r = *it; 296 | u32& inst = BUF[r.first]; 297 | labelTableIter lbl = g_labels.find(r.second); 298 | if (lbl == g_labels.end()) 299 | return throwError("label '%s' is undefined\n", r.second.c_str()); 300 | u32 dst = lbl->second; 301 | inst &= ~(0xFFF << 10); 302 | inst |= dst << 10; 303 | } 304 | return 0; 305 | } 306 | 307 | int RelocateProduct() 308 | { 309 | for (relocTableIter it = g_procRelocTable.begin(); it != g_procRelocTable.end(); ++it) 310 | { 311 | relocation& r = *it; 312 | u32& inst = BUF[r.first]; 313 | procTableIter proc = g_procTable.find(r.second); 314 | if (proc == g_procTable.end()) 315 | return throwError("procedure '%s' is undefined\n", r.second.c_str()); 316 | u32 dst = proc->second.first; 317 | u32 num = proc->second.second; 318 | inst &= ~0x3FFFFF; 319 | inst |= num | (dst << 10); 320 | } 321 | 322 | if (g_totalDvleCount == 0) 323 | return throwError("no DVLEs can be generated from the given input file(s)\n"); 324 | 325 | for (dvleTableIter it = g_dvleTable.begin(); it != g_dvleTable.end(); ++it) 326 | { 327 | if (it->nodvle) continue; 328 | curFile = it->filename.c_str(); 329 | curLine = 1; 330 | procTableIter mainIt = g_procTable.find(it->entrypoint); 331 | if (mainIt == g_procTable.end()) 332 | return throwError("entrypoint '%s' is undefined\n", it->entrypoint.c_str()); 333 | it->entryStart = mainIt->second.first; 334 | it->entryEnd = it->entryStart + mainIt->second.second; 335 | } 336 | return 0; 337 | } 338 | 339 | // -------------------------------------------------------------------- 340 | // Commands 341 | // -------------------------------------------------------------------- 342 | 343 | static char* nextArg() 344 | { 345 | return trim_whitespace(mystrtok(NULL, ",")); 346 | } 347 | 348 | static char* nextArgCParen() 349 | { 350 | return trim_whitespace(mystrtok(NULL, "(")); 351 | } 352 | 353 | static char* nextArgSpc() 354 | { 355 | return trim_whitespace(mystrtok_spc(NULL)); 356 | } 357 | 358 | static int missingParam() 359 | { 360 | return throwError("missing parameter\n"); 361 | } 362 | 363 | typedef struct 364 | { 365 | const char* name; 366 | int (* func) (const char*, int, int); 367 | int opcode, opcodei; 368 | } cmdTableType; 369 | 370 | #define NEXT_ARG(_varName) char* _varName; do \ 371 | { \ 372 | _varName = nextArg(); \ 373 | if (!_varName) return missingParam(); \ 374 | } while (0) 375 | 376 | #define NEXT_ARG_SPC(_varName) char* _varName; do \ 377 | { \ 378 | _varName = nextArgSpc(); \ 379 | if (!_varName) return missingParam(); \ 380 | } while (0) 381 | 382 | #define NEXT_ARG_CPAREN(_varName) char* _varName; do \ 383 | { \ 384 | _varName = nextArgCParen(); \ 385 | if (!_varName) return missingParam(); \ 386 | } while (0) 387 | 388 | #define NEXT_ARG_OPT(_varName, _opt) char* _varName; do \ 389 | { \ 390 | _varName = nextArg(); \ 391 | if (!_varName) _varName = (char*)(_opt); \ 392 | } while (0) 393 | 394 | #define DEF_COMMAND(name) \ 395 | static int cmd_##name(const char* cmdName, int opcode, int opcodei) 396 | 397 | #define DEC_COMMAND(name, fun) \ 398 | { #name, cmd_##fun, MAESTRO_##name, -1 } 399 | 400 | #define DEC_COMMAND2(name, fun) \ 401 | { #name, cmd_##fun, MAESTRO_##name, MAESTRO_##name##I }, \ 402 | { #name "i", cmd_##fun, MAESTRO_##name, MAESTRO_##name##I } 403 | 404 | #define DEF_DIRECTIVE(name) \ 405 | static int dir_##name(const char* cmdName, int dirParam, int _unused) 406 | 407 | #define DEC_DIRECTIVE(name) \ 408 | { #name, dir_##name, 0, 0 } 409 | 410 | #define DEC_DIRECTIVE2(name, fun, opc) \ 411 | { #name, dir_##fun, opc, 0 } 412 | 413 | static int ensureNoMoreArgs() 414 | { 415 | return nextArg() ? throwError("too many parameters\n") : 0; 416 | } 417 | 418 | static int duplicateIdentifier(const char* id) 419 | { 420 | return throwError("identifier already used: %s\n", id); 421 | } 422 | 423 | static int ensureTarget(const char* target) 424 | { 425 | if (!validateIdentifier(target)) 426 | return throwError("invalid target: %s\n", target); 427 | return 0; 428 | } 429 | 430 | static inline int ensure_valid_dest(int reg, const char* name) 431 | { 432 | if (reg < 0x00 || reg >= 0x20) 433 | return throwError("invalid destination register: %s\n", name); 434 | return 0; 435 | } 436 | 437 | static inline int ensure_valid_src_wide(int reg, const char* name, int srcId) 438 | { 439 | if (reg < 0x00 || reg >= 0x80) 440 | return throwError("invalid source%d register: %s\n", srcId, name); 441 | return 0; 442 | } 443 | 444 | static inline int ensure_valid_src_narrow(int reg, const char* name, int srcId) 445 | { 446 | if (reg < 0x00 || reg >= 0x20) 447 | return throwError("invalid source%d register: %s\n", srcId, name); 448 | return 0; 449 | } 450 | 451 | static inline int ensure_no_idxreg(int idxreg, int srcId) 452 | { 453 | if (idxreg > 0) 454 | return throwError("index register not allowed in source%d\n", srcId); 455 | return 0; 456 | } 457 | 458 | static inline int ensure_valid_ireg(int reg, const char* name) 459 | { 460 | if (reg < 0x80 || reg >= 0x88) 461 | return throwError("invalid integer vector uniform: %s\n", name); 462 | return 0; 463 | } 464 | 465 | static inline int ensure_valid_breg(int reg, const char* name) 466 | { 467 | if (reg < 0x88 || reg >= 0x98) 468 | return throwError("invalid boolean uniform: %s\n", name); 469 | return 0; 470 | } 471 | 472 | static inline int ensure_valid_condop(int condop, const char* name) 473 | { 474 | if (condop < 0) 475 | return throwError("invalid conditional operator: %s\n", name); 476 | return 0; 477 | } 478 | 479 | #define ENSURE_NO_MORE_ARGS() safe_call(ensureNoMoreArgs()) 480 | 481 | #define ARG_TO_INT(_varName, _argName, _min, _max) \ 482 | int _varName = 0; \ 483 | safe_call(parseInt(_argName, _varName, _min, _max)) 484 | 485 | #define ARG_TO_REG(_varName, _argName) \ 486 | int _varName = 0, _varName##Sw = 0; \ 487 | safe_call(parseReg(_argName, _varName, _varName##Sw)); 488 | 489 | #define ARG_TO_REG2(_varName, _argName) \ 490 | int _varName = 0, _varName##Sw = 0, _varName##Idx = 0; \ 491 | safe_call(parseReg(_argName, _varName, _varName##Sw, &_varName##Idx)); 492 | 493 | #define ARG_TO_CONDOP(_varName, _argName) \ 494 | int _varName = parseCondOp(_argName); \ 495 | safe_call(ensure_valid_condop(_varName, _argName)) 496 | 497 | #define ARG_TARGET(_argName) \ 498 | safe_call(ensureTarget(_argName)) 499 | 500 | #define ARG_TO_DEST_REG(_reg, _name) \ 501 | ARG_TO_REG(_reg, _name); \ 502 | safe_call(ensure_valid_dest(_reg, _name)) 503 | 504 | #define ARG_TO_SRC1_REG(_reg, _name) \ 505 | ARG_TO_REG(_reg, _name); \ 506 | safe_call(ensure_valid_src_wide(_reg, _name, 1)) 507 | 508 | #define ARG_TO_SRC1_REG2(_reg, _name) \ 509 | ARG_TO_REG2(_reg, _name); \ 510 | safe_call(ensure_valid_src_wide(_reg, _name, 1)) 511 | 512 | #define ARG_TO_SRC2_REG(_reg, _name) \ 513 | ARG_TO_REG(_reg, _name); \ 514 | safe_call(ensure_valid_src_narrow(_reg, _name, 2)) 515 | 516 | #define ARG_TO_IREG(_reg, _name) \ 517 | ARG_TO_REG(_reg, _name); \ 518 | safe_call(ensure_valid_ireg(_reg, _name)) 519 | 520 | #define ARG_TO_BREG(_reg, _name) \ 521 | ARG_TO_REG(_reg, _name); \ 522 | safe_call(ensure_valid_breg(_reg, _name)) 523 | 524 | static int parseSwizzling(const char* b) 525 | { 526 | int i, out = 0, q = COMP_X; 527 | for (i = 0; b[i] && i < 4; i ++) 528 | { 529 | switch (tolower(b[i])) 530 | { 531 | case 'x': case 'r': case 's': q = COMP_X; break; 532 | case 'y': case 'g': case 't': q = COMP_Y; break; 533 | case 'z': case 'b': case 'p': q = COMP_Z; break; 534 | case 'w': case 'a': case 'q': q = COMP_W; break; 535 | default: return -1; 536 | } 537 | out |= SWIZZLE_COMP(i, q); 538 | } 539 | if (b[i]) 540 | return -1; 541 | // Fill in missing bits 542 | for (int j = i; j < 4; j ++) 543 | out |= SWIZZLE_COMP(j, q); 544 | return out<<1; 545 | } 546 | 547 | static int maskFromSwizzling(int sw, bool reverse = true) 548 | { 549 | sw >>= 1; // get rid of negation bit 550 | int out = 0; 551 | int prevbitid = 4; 552 | for (int i = 0; i < 4; i ++) 553 | { 554 | int bitid = (sw>>(i*2))&3; 555 | if (bitid > prevbitid) 556 | fprintf(stderr, "%s:%d: warning: arbitrary swizzling has no effect for destination mask\n", curFile, curLine); 557 | prevbitid=bitid; 558 | if (reverse) bitid = 3 - bitid; 559 | out |= BIT(bitid); 560 | } 561 | return out; 562 | } 563 | 564 | static void optimizeOpdesc(int& mask, int opcode, int opdesc) 565 | { 566 | int unused1 = 0, unused2 = 0, unused3 = 0; 567 | bool optimize = false; 568 | 569 | switch (opcode) 570 | { 571 | case MAESTRO_ADD: 572 | case MAESTRO_MUL: 573 | case MAESTRO_SGE: 574 | case MAESTRO_SLT: 575 | case MAESTRO_FLR: 576 | case MAESTRO_MAX: 577 | case MAESTRO_MIN: 578 | case MAESTRO_MOV: 579 | case MAESTRO_MAD: 580 | for (int i = 0; i < 4; i ++) 581 | if (!(opdesc & BIT(3-i))) 582 | unused1 |= SWIZZLE_COMP(i,3); 583 | unused2 = unused1; 584 | unused3 = unused1; 585 | break; 586 | 587 | case MAESTRO_DP3: 588 | unused1 = SWIZZLE_COMP(3,3); 589 | unused2 = SWIZZLE_COMP(3,3); 590 | break; 591 | 592 | case MAESTRO_DPH: 593 | unused1 = SWIZZLE_COMP(3,3); 594 | break; 595 | 596 | case MAESTRO_EX2: 597 | case MAESTRO_LG2: 598 | case MAESTRO_RCP: 599 | case MAESTRO_RSQ: 600 | unused1 = SWIZZLE_COMP(1,3) | SWIZZLE_COMP(2,3) | SWIZZLE_COMP(3,3); 601 | break; 602 | 603 | case MAESTRO_MOVA: 604 | if (!(opdesc & BIT(3-COMP_X))) unused1 |= SWIZZLE_COMP(0,3); 605 | if (!(opdesc & BIT(3-COMP_Y))) unused1 |= SWIZZLE_COMP(1,3); 606 | case MAESTRO_CMP: 607 | unused1 |= SWIZZLE_COMP(2,3) | SWIZZLE_COMP(3,3); 608 | break; 609 | } 610 | 611 | mask &= ~OPDESC_MAKE(0,OPSRC_MAKE(0,unused1),OPSRC_MAKE(0,unused2),OPSRC_MAKE(0,unused3)); 612 | } 613 | 614 | static int findOrAddOpdesc(int opcode, int& out, int opdesc, int mask) 615 | { 616 | optimizeOpdesc(mask, opcode, opdesc); 617 | 618 | for (int i = 0; i < g_opdescCount; i ++) 619 | { 620 | int minMask = mask & g_opdescMasks[i]; 621 | if ((opdesc&minMask) == (g_opdescTable[i]&minMask)) 622 | { 623 | // Update opdesc to include extra bits (if any) 624 | g_opdescTable[i] = (g_opdescTable[i]&~mask) | (opdesc & mask); 625 | g_opdescMasks[i] |= mask; 626 | out = i; 627 | return 0; 628 | } 629 | } 630 | if (g_opdescCount == MAX_OPDESC) 631 | return throwError("too many operand descriptors (limit is %d)\n", MAX_OPDESC); 632 | g_opdescTable[g_opdescCount] = opdesc; 633 | g_opdescMasks[g_opdescCount] = mask; 634 | out = g_opdescCount++; 635 | return 0; 636 | } 637 | 638 | static void swapOpdesc(u32 from, u32 to) 639 | { 640 | std::swap(g_opdescTable[from], g_opdescTable[to]); 641 | std::swap(g_opdescMasks[from], g_opdescMasks[to]); 642 | for (size_t i = 0; i < BUF.size(); i ++) 643 | { 644 | u32& opword = BUF[i]; 645 | u32 opcode = opword>>26; 646 | if (opcode < 0x20 || (opcode&~1)==MAESTRO_CMP) 647 | { 648 | u32 cur_opdesc = opword & 0x7F; 649 | if (cur_opdesc==from) 650 | cur_opdesc=to; 651 | else if (cur_opdesc==to) 652 | cur_opdesc=from; 653 | opword = (opword &~ 0x7F) | cur_opdesc; 654 | } 655 | } 656 | } 657 | 658 | static inline bool isregp(int x) 659 | { 660 | x = tolower(x); 661 | return x=='o' || x=='v' || x=='r' || x=='c' || x=='i' || x=='b'; 662 | } 663 | 664 | static inline int convertIdxRegName(const char* reg) 665 | { 666 | if (stricmp(reg, "a0")==0 || stricmp(reg, "a0.x")==0) return 1; 667 | if (stricmp(reg, "a1")==0 || stricmp(reg, "a0.y")==0) return 2; 668 | if (stricmp(reg, "a2")==0 || stricmp(reg, "lcnt")==0 || stricmp(reg, "aL")==0) return 3; 669 | return 0; 670 | } 671 | 672 | static inline int parseCondOp(const char* name) 673 | { 674 | if (stricmp(name, "eq")==0) return COND_EQ; 675 | if (stricmp(name, "ne")==0) return COND_NE; 676 | if (stricmp(name, "lt")==0) return COND_LT; 677 | if (stricmp(name, "le")==0) return COND_LE; 678 | if (stricmp(name, "gt")==0) return COND_GT; 679 | if (stricmp(name, "ge")==0) return COND_GE; 680 | return -1; 681 | } 682 | 683 | static int parseReg(char* pos, int& outReg, int& outSw, int* idxType = NULL) 684 | { 685 | outReg = 0; 686 | outSw = DEFAULT_OPSRC; 687 | if (idxType) *idxType = 0; 688 | if (*pos == '-') 689 | { 690 | pos++; 691 | outSw |= 1; // negation bit 692 | } 693 | int regOffset = 0; 694 | char* offPos = strchr(pos, '['); 695 | char* dotPos = pos; 696 | if (offPos) 697 | { 698 | dotPos = strchr(offPos, ']'); 699 | if (!dotPos) 700 | return throwError("missing closing bracket: %s\n", pos); 701 | *dotPos++ = 0; 702 | *offPos++ = 0; 703 | offPos = trim_whitespace(offPos); 704 | 705 | // Check for idxreg+offset 706 | int temp = convertIdxRegName(offPos); 707 | if (temp>0) 708 | { 709 | if (!idxType) 710 | return throwError("index register not allowed here: %s\n", offPos); 711 | *idxType = temp; 712 | } else do 713 | { 714 | char* plusPos = strchr(offPos, '+'); 715 | if (!plusPos) 716 | break; 717 | if (!idxType) 718 | return throwError("index register not allowed here: %s\n", offPos); 719 | *plusPos++ = 0; 720 | char* idxRegName = trim_whitespace(offPos); 721 | offPos = trim_whitespace(plusPos); 722 | *idxType = convertIdxRegName(idxRegName); 723 | if (!*idxType) 724 | return throwError("invalid index register: %s\n", idxRegName); 725 | } while (0); 726 | 727 | regOffset = atoi(offPos); 728 | if (regOffset < 0) 729 | return throwError("invalid register offset: %s\n", offPos); 730 | } 731 | dotPos = strchr(dotPos, '.'); 732 | if (dotPos) 733 | { 734 | *dotPos++ = 0; 735 | outSw = parseSwizzling(dotPos) | (outSw&1); 736 | if (outSw < 0) 737 | return throwError("invalid swizzling mask: %s\n", dotPos); 738 | } 739 | aliasTableIter it = g_aliases.find(pos); 740 | if (it != g_aliases.end()) 741 | { 742 | int x = it->second; 743 | outReg = x & 0xFF; 744 | outReg += regOffset; 745 | outSw ^= (x>>8)&1; 746 | x >>= 9; 747 | // Combine swizzling 748 | int temp = outSw & 1; 749 | for (int j = 0; j < 4; j ++) 750 | { 751 | int comp = (outSw >> (7 - j*2)) & 3; 752 | comp = (x >> (6 - comp*2)) & 3; 753 | temp |= SWIZZLE_COMP(j, comp)<<1; 754 | } 755 | outSw = temp; 756 | return 0; 757 | } 758 | 759 | if (!isregp(pos[0]) || !isdigit(pos[1])) 760 | return throwError("invalid register: %s\n", pos); 761 | 762 | safe_call(parseInt(pos+1, outReg, 0, 255)); 763 | switch (*pos) 764 | { 765 | case 'o': // Output registers 766 | if (outReg < 0x00 || outReg >= GetDvleData()->maxOutputReg()) 767 | return throwError("invalid output register: %s\n", pos); 768 | break; 769 | case 'v': // Input attributes 770 | if (outReg < 0x00 || outReg >= 0x0F) 771 | return throwError("invalid input register: %s\n", pos); 772 | break; 773 | case 'r': // Temporary registers 774 | outReg += 0x10; 775 | if (outReg < 0x10 || outReg >= 0x20) 776 | return throwError("invalid temporary register: %s\n", pos); 777 | break; 778 | case 'c': // Floating-point vector uniform registers 779 | outReg += 0x20; 780 | if (outReg < 0x20 || outReg >= 0x80) 781 | return throwError("invalid floating-point vector uniform register: %s\n", pos); 782 | break; 783 | case 'i': // Integer vector uniforms 784 | outReg += 0x80; 785 | if (outReg < 0x80 || outReg >= 0x88) 786 | return throwError("invalid integer vector uniform register: %s\n", pos); 787 | break; 788 | case 'b': // Boolean uniforms 789 | outReg += 0x88; 790 | if (outReg < 0x88 || outReg >= 0x98) 791 | return throwError("invalid boolean uniform register: %s\n", pos); 792 | break; 793 | } 794 | if (idxType && *idxType && (outReg < 0x20 || outReg >= 0x80)) 795 | return throwError("index register not allowed with this kind of register\n"); 796 | outReg += regOffset; 797 | return 0; 798 | } 799 | 800 | static int parseCondExpOp(char* str, u32& outFlags, int& which) 801 | { 802 | int negation = 0; 803 | for (; *str == '!'; str++) negation ^= 1; 804 | if (stricmp(str, "cmp.x")==0) 805 | { 806 | which = 0; 807 | outFlags ^= negation<<25; 808 | return 0; 809 | } 810 | if (stricmp(str, "cmp.y")==0) 811 | { 812 | which = 1; 813 | outFlags ^= negation<<24; 814 | return 0; 815 | } 816 | return throwError("invalid condition register: %s\n", str); 817 | } 818 | 819 | static int parseCondExp(char* str, u32& outFlags) 820 | { 821 | outFlags = BIT(24) | BIT(25); 822 | size_t len = strlen(str); 823 | size_t pos = strcspn(str, "&|"); 824 | int op2 = -1; 825 | if (pos < len) 826 | { 827 | char* str2 = str + pos; 828 | int type = *str2; 829 | *str2++ = 0; 830 | if (*str2 == type) 831 | str2++; 832 | str = trim_whitespace(str); 833 | str2 = trim_whitespace(str2); 834 | if (type == '&') 835 | outFlags |= 1<<22; 836 | safe_call(parseCondExpOp(str2, outFlags, op2)); 837 | } 838 | int op1 = -1; 839 | safe_call(parseCondExpOp(str, outFlags, op1)); 840 | if (op1 == op2) 841 | return throwError("condition register checked twice\n"); 842 | if (op2 < 0) 843 | outFlags |= (op1+2)<<22; 844 | return 0; 845 | } 846 | 847 | static inline bool isBadInputRegCombination(int a, int b) 848 | { 849 | return a < 0x10 && b < 0x10 && a != b; 850 | } 851 | 852 | static inline bool isBadInputRegCombination(int a, int b, int c) 853 | { 854 | return isBadInputRegCombination(a,b) || isBadInputRegCombination(b,c) || isBadInputRegCombination(c,a); 855 | } 856 | 857 | static void insertPaddingNop() 858 | { 859 | if (g_autoNop) 860 | BUF.push_back(FMT_OPCODE(MAESTRO_NOP)); 861 | else 862 | fprintf(stderr, "%s:%d: warning: a padding NOP is required here\n", curFile, curLine); 863 | } 864 | 865 | DEF_COMMAND(format0) 866 | { 867 | ENSURE_NO_MORE_ARGS(); 868 | 869 | BUF.push_back(FMT_OPCODE(opcode)); 870 | return 0; 871 | } 872 | 873 | DEF_COMMAND(format1) 874 | { 875 | NEXT_ARG(destName); 876 | NEXT_ARG(src1Name); 877 | NEXT_ARG(src2Name); 878 | ENSURE_NO_MORE_ARGS(); 879 | 880 | ARG_TO_DEST_REG(rDest, destName); 881 | ARG_TO_REG2(rSrc1, src1Name); 882 | ARG_TO_REG2(rSrc2, src2Name); 883 | 884 | bool inverted = opcodei >= 0 && rSrc1 < 0x20 && rSrc2 >= 0x20; 885 | 886 | if (!inverted) 887 | { 888 | safe_call(ensure_valid_src_wide(rSrc1, src1Name, 1)); 889 | safe_call(ensure_valid_src_narrow(rSrc2, src2Name, 2)); 890 | safe_call(ensure_no_idxreg(rSrc2Idx, 2)); 891 | } else 892 | { 893 | safe_call(ensure_valid_src_narrow(rSrc1, src1Name, 1)); 894 | safe_call(ensure_no_idxreg(rSrc1Idx, 1)); 895 | safe_call(ensure_valid_src_wide(rSrc2, src2Name, 2)); 896 | } 897 | 898 | if (isBadInputRegCombination(rSrc1, rSrc2)) 899 | return throwError("source operands must be different input registers (v0..v15)\n"); 900 | 901 | int opdesc = 0; 902 | safe_call(findOrAddOpdesc(opcode, opdesc, OPDESC_MAKE(maskFromSwizzling(rDestSw), rSrc1Sw, rSrc2Sw, 0), OPDESC_MASK_D12)); 903 | 904 | #ifdef DEBUG 905 | printf("%s:%02X d%02X, d%02X, d%02X (0x%X)\n", cmdName, opcode, rDest, rSrc1, rSrc2, opdesc); 906 | #endif 907 | if (!inverted) 908 | BUF.push_back(FMT_OPCODE(opcode) | opdesc | (rSrc2<<7) | (rSrc1<<12) | (rSrc1Idx<<19) | (rDest<<21)); 909 | else 910 | BUF.push_back(FMT_OPCODE(opcodei) | opdesc | (rSrc2<<7) | (rSrc1<<14) | (rSrc2Idx<<19) | (rDest<<21)); 911 | 912 | return 0; 913 | } 914 | 915 | DEF_COMMAND(format1u) 916 | { 917 | NEXT_ARG(destName); 918 | NEXT_ARG(src1Name); 919 | ENSURE_NO_MORE_ARGS(); 920 | 921 | ARG_TO_DEST_REG(rDest, destName); 922 | ARG_TO_SRC1_REG2(rSrc1, src1Name); 923 | 924 | int opdesc = 0; 925 | safe_call(findOrAddOpdesc(opcode, opdesc, OPDESC_MAKE(maskFromSwizzling(rDestSw), rSrc1Sw, 0, 0), OPDESC_MASK_D1)); 926 | 927 | #ifdef DEBUG 928 | printf("%s:%02X d%02X, d%02X (0x%X)\n", cmdName, opcode, rDest, rSrc1, opdesc); 929 | #endif 930 | BUF.push_back(FMT_OPCODE(opcode) | opdesc | (rSrc1<<12) | (rSrc1Idx<<19) | (rDest<<21)); 931 | 932 | return 0; 933 | } 934 | 935 | DEF_COMMAND(format1c) 936 | { 937 | NEXT_ARG(src1Name); 938 | NEXT_ARG(cmpxName); 939 | NEXT_ARG(cmpyName); 940 | NEXT_ARG(src2Name); 941 | ENSURE_NO_MORE_ARGS(); 942 | 943 | ARG_TO_SRC1_REG2(rSrc1, src1Name); 944 | ARG_TO_CONDOP(cmpx, cmpxName); 945 | ARG_TO_CONDOP(cmpy, cmpyName); 946 | ARG_TO_SRC2_REG(rSrc2, src2Name); 947 | 948 | int opdesc = 0; 949 | safe_call(findOrAddOpdesc(opcode, opdesc, OPDESC_MAKE(0, rSrc1Sw, rSrc2Sw, 0), OPDESC_MASK_12)); 950 | 951 | #ifdef DEBUG 952 | printf("%s:%02X d%02X, %d, %d, d%02X (0x%X)\n", cmdName, opcode, rSrc1, cmpx, cmpy, rSrc2, opdesc); 953 | #endif 954 | BUF.push_back(FMT_OPCODE(opcode) | opdesc | (rSrc2<<7) | (rSrc1<<12) | (rSrc1Idx<<19) | (cmpy<<21) | (cmpx<<24)); 955 | 956 | return 0; 957 | } 958 | 959 | DEF_COMMAND(format5) 960 | { 961 | NEXT_ARG(destName); 962 | NEXT_ARG(src1Name); 963 | NEXT_ARG(src2Name); 964 | NEXT_ARG(src3Name); 965 | ENSURE_NO_MORE_ARGS(); 966 | 967 | ARG_TO_DEST_REG(rDest, destName); 968 | ARG_TO_SRC2_REG(rSrc1, src1Name); 969 | ARG_TO_REG2(rSrc2, src2Name); 970 | ARG_TO_REG2(rSrc3, src3Name); 971 | 972 | bool inverted = opcodei >= 0 && rSrc2 < 0x20 && (rSrc3 >= 0x20 || (rSrc3Idx && !rSrc2Idx)); 973 | 974 | if (!inverted) 975 | { 976 | safe_call(ensure_valid_src_wide(rSrc2, src2Name, 2)); 977 | safe_call(ensure_valid_src_narrow(rSrc3, src3Name, 3)); 978 | safe_call(ensure_no_idxreg(rSrc3Idx, 2)); 979 | } else 980 | { 981 | safe_call(ensure_valid_src_narrow(rSrc2, src2Name, 2)); 982 | safe_call(ensure_valid_src_wide(rSrc3, src3Name, 3)); 983 | safe_call(ensure_no_idxreg(rSrc2Idx, 2)); 984 | } 985 | 986 | if (isBadInputRegCombination(rSrc1, rSrc2, rSrc3)) 987 | return throwError("source registers must be different input registers (v0..v15)\n"); 988 | 989 | int opdesc = 0; 990 | safe_call(findOrAddOpdesc(opcode, opdesc, OPDESC_MAKE(maskFromSwizzling(rDestSw), rSrc1Sw, rSrc2Sw, rSrc3Sw), OPDESC_MASK_D123)); 991 | 992 | if (opdesc >= 32) 993 | { 994 | int which; 995 | for (which = 0; which < 32; which ++) 996 | if (!(g_opdescIsMad & BIT(which))) 997 | break; 998 | if (which == 32) 999 | return throwError("opdesc allocation error\n"); 1000 | swapOpdesc(which, opdesc); 1001 | opdesc = which; 1002 | } 1003 | 1004 | g_opdescIsMad |= BIT(opdesc); 1005 | 1006 | #ifdef DEBUG 1007 | printf("%s:%02X d%02X, d%02X, d%02X, d%02X (0x%X)\n", cmdName, opcode, rDest, rSrc1, rSrc2, rSrc3, opdesc); 1008 | #endif 1009 | if (!inverted) 1010 | BUF.push_back(FMT_OPCODE(opcode) | opdesc | (rSrc3<<5) | (rSrc2<<10) | (rSrc1<<17) | (rSrc2Idx<<22) | (rDest<<24)); 1011 | else 1012 | BUF.push_back(FMT_OPCODE(opcodei) | opdesc | (rSrc3<<5) | (rSrc2<<12) | (rSrc1<<17) | (rSrc3Idx<<22) | (rDest<<24)); 1013 | 1014 | return 0; 1015 | } 1016 | 1017 | DEF_COMMAND(formatmova) 1018 | { 1019 | NEXT_ARG(targetReg); 1020 | NEXT_ARG(src1Name); 1021 | ENSURE_NO_MORE_ARGS(); 1022 | 1023 | int mask; 1024 | if (stricmp(targetReg, "a0")==0 || stricmp(targetReg, "a0.x")==0) mask = BIT(3); 1025 | else if (stricmp(targetReg, "a1")==0 || stricmp(targetReg, "a0.y")==0) mask = BIT(2); 1026 | else if (stricmp(targetReg, "a01")==0 || stricmp(targetReg, "a0.xy")==0) mask = BIT(3) | BIT(2); 1027 | else return throwError("invalid destination register for mova: %s\n", targetReg); 1028 | 1029 | ARG_TO_SRC1_REG2(rSrc1, src1Name); 1030 | 1031 | int opdesc = 0; 1032 | safe_call(findOrAddOpdesc(opcode, opdesc, OPDESC_MAKE(mask, rSrc1Sw, 0, 0), OPDESC_MASK_D1)); 1033 | 1034 | #ifdef DEBUG 1035 | printf("%s:%02X d%02X (0x%X)\n", cmdName, opcode, rSrc1, opdesc); 1036 | #endif 1037 | BUF.push_back(FMT_OPCODE(opcode) | opdesc | (rSrc1<<12) | (rSrc1Idx<<19)); 1038 | 1039 | return 0; 1040 | } 1041 | 1042 | static inline int parseSetEmitFlags(char* flags, bool& isPrim, bool& isInv) 1043 | { 1044 | isPrim = false; 1045 | isInv = false; 1046 | if (!flags) 1047 | return 0; 1048 | 1049 | mystrtok_pos = flags; 1050 | while (char* flag = mystrtok_spc(NULL)) 1051 | { 1052 | if (stricmp(flag, "prim")==0 || stricmp(flag, "primitive")==0) 1053 | isPrim = true; 1054 | else if (stricmp(flag, "inv")==0 || stricmp(flag, "invert")==0) 1055 | isInv = true; 1056 | else 1057 | throwError("unknown setemit flag: %s\n", flag); 1058 | 1059 | } 1060 | return 0; 1061 | } 1062 | 1063 | DEF_COMMAND(formatsetemit) 1064 | { 1065 | NEXT_ARG(vtxIdStr); 1066 | NEXT_ARG_OPT(flagStr, NULL); 1067 | ENSURE_NO_MORE_ARGS(); 1068 | 1069 | ARG_TO_INT(vtxId, vtxIdStr, 0, 2); 1070 | bool isPrim, isInv; 1071 | safe_call(parseSetEmitFlags(flagStr, isPrim, isInv)); 1072 | 1073 | DVLEData* dvle = GetDvleData(); 1074 | if (!dvle->isGeoShader) 1075 | { 1076 | dvle->isGeoShader = true; 1077 | dvle->isCompatGeoShader = true; 1078 | } 1079 | 1080 | #ifdef DEBUG 1081 | printf("%s:%02X vtx%d, %s, %s\n", cmdName, opcode, vtxId, isPrim?"true":"false", isInv?"true":"false"); 1082 | #endif 1083 | BUF.push_back(FMT_OPCODE(opcode) | ((u32)isInv<<22) | ((u32)isPrim<<23) | (vtxId<<24)); 1084 | 1085 | return 0; 1086 | } 1087 | 1088 | DEF_COMMAND(formatcall) 1089 | { 1090 | NEXT_ARG(procName); 1091 | ENSURE_NO_MORE_ARGS(); 1092 | 1093 | ARG_TARGET(procName); 1094 | 1095 | g_procRelocTable.push_back( std::make_pair(BUF.size(), procName) ); 1096 | 1097 | BUF.push_back(FMT_OPCODE(opcode)); 1098 | 1099 | #ifdef DEBUG 1100 | printf("%s:%02X %s\n", cmdName, opcode, procName); 1101 | #endif 1102 | return 0; 1103 | } 1104 | 1105 | DEF_COMMAND(formatfor) 1106 | { 1107 | NEXT_ARG(regName); 1108 | ENSURE_NO_MORE_ARGS(); 1109 | 1110 | ARG_TO_IREG(regId, regName); 1111 | 1112 | if (NO_MORE_STACK) 1113 | return throwError("too many nested blocks\n"); 1114 | 1115 | StackEntry& elem = g_stack[g_stackPos++]; 1116 | elem.type = SE_FOR; 1117 | elem.pos = BUF.size(); 1118 | 1119 | BUF.push_back(FMT_OPCODE(opcode) | ((regId-0x80) << 22)); 1120 | 1121 | #ifdef DEBUG 1122 | printf("%s:%02X d%02X\n", cmdName, opcode, regId); 1123 | #endif 1124 | return 0; 1125 | } 1126 | 1127 | DEF_COMMAND(format2) 1128 | { 1129 | NEXT_ARG(condExp); 1130 | 1131 | u32 instruction = 0; 1132 | safe_call(parseCondExp(condExp, instruction)); 1133 | 1134 | switch (opcode) 1135 | { 1136 | case MAESTRO_BREAKC: 1137 | { 1138 | ENSURE_NO_MORE_ARGS(); 1139 | 1140 | #ifdef DEBUG 1141 | printf("%s:%02X %s\n", cmdName, opcode, condExp); 1142 | #endif 1143 | break; 1144 | } 1145 | 1146 | case MAESTRO_CALLC: 1147 | case MAESTRO_JMPC: 1148 | { 1149 | NEXT_ARG(targetName); 1150 | ENSURE_NO_MORE_ARGS(); 1151 | 1152 | ARG_TARGET(targetName); 1153 | 1154 | relocTableType& rt = opcode==MAESTRO_CALLC ? g_procRelocTable : g_labelRelocTable; 1155 | rt.push_back( std::make_pair(BUF.size(), targetName) ); 1156 | 1157 | #ifdef DEBUG 1158 | printf("%s:%02X %s, %s\n", cmdName, opcode, condExp, targetName); 1159 | #endif 1160 | break; 1161 | } 1162 | 1163 | case MAESTRO_IFC: 1164 | { 1165 | ENSURE_NO_MORE_ARGS(); 1166 | 1167 | if (NO_MORE_STACK) 1168 | return throwError("too many nested blocks\n"); 1169 | 1170 | StackEntry& elem = g_stack[g_stackPos++]; 1171 | elem.type = SE_IF; 1172 | elem.pos = BUF.size(); 1173 | elem.uExtra = 0; 1174 | 1175 | #ifdef DEBUG 1176 | printf("%s:%02X %s\n", cmdName, opcode, condExp); 1177 | #endif 1178 | break; 1179 | } 1180 | } 1181 | 1182 | BUF.push_back(FMT_OPCODE(opcode) | instruction); 1183 | 1184 | return 0; 1185 | } 1186 | 1187 | DEF_COMMAND(format3) 1188 | { 1189 | NEXT_ARG(regName); 1190 | 1191 | u32 negation = 0; 1192 | if (*regName == '!') 1193 | { 1194 | if (opcode == MAESTRO_JMPU) 1195 | { 1196 | negation = 1; 1197 | regName ++; 1198 | } else 1199 | return throwError("Inverting the condition is not supported by %s\n", opcode==MAESTRO_CALLU ? "CALLU" : "IFU"); 1200 | } 1201 | 1202 | ARG_TO_BREG(regId, regName); 1203 | 1204 | switch (opcode) 1205 | { 1206 | case MAESTRO_CALLU: 1207 | case MAESTRO_JMPU: 1208 | { 1209 | NEXT_ARG(targetName); 1210 | ENSURE_NO_MORE_ARGS(); 1211 | 1212 | ARG_TARGET(targetName); 1213 | 1214 | relocTableType& rt = opcode==MAESTRO_CALLU ? g_procRelocTable : g_labelRelocTable; 1215 | rt.push_back( std::make_pair(BUF.size(), targetName) ); 1216 | 1217 | #ifdef DEBUG 1218 | printf("%s:%02X d%02X, %s\n", cmdName, opcode, regId, targetName); 1219 | #endif 1220 | break; 1221 | } 1222 | 1223 | case MAESTRO_IFU: 1224 | { 1225 | ENSURE_NO_MORE_ARGS(); 1226 | 1227 | if (NO_MORE_STACK) 1228 | return throwError("too many nested blocks\n"); 1229 | 1230 | StackEntry& elem = g_stack[g_stackPos++]; 1231 | elem.type = SE_IF; 1232 | elem.pos = BUF.size(); 1233 | elem.uExtra = 0; 1234 | 1235 | #ifdef DEBUG 1236 | printf("%s:%02X d%02X\n", cmdName, opcode, regId); 1237 | #endif 1238 | break; 1239 | } 1240 | } 1241 | 1242 | BUF.push_back(FMT_OPCODE(opcode) | ((regId-0x88) << 22) | negation); 1243 | 1244 | return 0; 1245 | } 1246 | 1247 | static const cmdTableType cmdTable[] = 1248 | { 1249 | DEC_COMMAND(NOP, format0), 1250 | DEC_COMMAND(END, format0), 1251 | DEC_COMMAND(EMIT, format0), 1252 | DEC_COMMAND(BREAK, format0), 1253 | 1254 | DEC_COMMAND(ADD, format1), 1255 | DEC_COMMAND(DP3, format1), 1256 | DEC_COMMAND(DP4, format1), 1257 | DEC_COMMAND2(DPH, format1), 1258 | DEC_COMMAND2(DST, format1), 1259 | DEC_COMMAND(MUL, format1), 1260 | DEC_COMMAND2(SGE, format1), 1261 | DEC_COMMAND2(SLT, format1), 1262 | DEC_COMMAND(MAX, format1), 1263 | DEC_COMMAND(MIN, format1), 1264 | 1265 | DEC_COMMAND(EX2, format1u), 1266 | DEC_COMMAND(LG2, format1u), 1267 | DEC_COMMAND(LITP, format1u), 1268 | DEC_COMMAND(FLR, format1u), 1269 | DEC_COMMAND(RCP, format1u), 1270 | DEC_COMMAND(RSQ, format1u), 1271 | DEC_COMMAND(MOV, format1u), 1272 | 1273 | DEC_COMMAND(MOVA, formatmova), 1274 | 1275 | DEC_COMMAND(CMP, format1c), 1276 | 1277 | DEC_COMMAND(CALL, formatcall), 1278 | 1279 | DEC_COMMAND(FOR, formatfor), 1280 | 1281 | DEC_COMMAND(BREAKC, format2), 1282 | DEC_COMMAND(CALLC, format2), 1283 | DEC_COMMAND(IFC, format2), 1284 | DEC_COMMAND(JMPC, format2), 1285 | 1286 | DEC_COMMAND(CALLU, format3), 1287 | DEC_COMMAND(IFU, format3), 1288 | DEC_COMMAND(JMPU, format3), 1289 | 1290 | DEC_COMMAND2(MAD, format5), 1291 | 1292 | DEC_COMMAND(SETEMIT, formatsetemit), 1293 | 1294 | { NULL, NULL }, 1295 | }; 1296 | 1297 | // -------------------------------------------------------------------- 1298 | // Directives 1299 | // -------------------------------------------------------------------- 1300 | 1301 | DEF_DIRECTIVE(proc) 1302 | { 1303 | NEXT_ARG(procName); 1304 | ENSURE_NO_MORE_ARGS(); 1305 | 1306 | if (NO_MORE_STACK) 1307 | return throwError("too many nested blocks\n"); 1308 | 1309 | StackEntry& elem = g_stack[g_stackPos++]; 1310 | elem.type = SE_PROC; 1311 | elem.pos = BUF.size(); 1312 | elem.strExtra = procName; 1313 | 1314 | if (g_procTable.find(procName) != g_procTable.end()) 1315 | return throwError("proc already exists: %s\n", procName); 1316 | 1317 | #ifdef DEBUG 1318 | printf("Defining %s\n", procName); 1319 | #endif 1320 | return 0; 1321 | } 1322 | 1323 | DEF_DIRECTIVE(else) 1324 | { 1325 | ENSURE_NO_MORE_ARGS(); 1326 | if (!g_stackPos) 1327 | return throwError(".else with unmatched IF\n"); 1328 | 1329 | StackEntry& elem = g_stack[g_stackPos-1]; 1330 | if (elem.type != SE_IF) 1331 | return throwError(".else with unmatched IF\n"); 1332 | if (elem.uExtra) 1333 | return throwError("spurious .else\n"); 1334 | 1335 | // Automatically add padding NOPs when necessary 1336 | if (lastWasEnd) 1337 | { 1338 | insertPaddingNop(); 1339 | lastWasEnd = false; 1340 | } else 1341 | { 1342 | u32 p = BUF.size(); 1343 | u32 lastOpcode = BUF[p-1] >> 26; 1344 | if (lastOpcode == MAESTRO_JMPC || lastOpcode == MAESTRO_JMPU 1345 | || lastOpcode == MAESTRO_CALL || lastOpcode == MAESTRO_CALLC || lastOpcode == MAESTRO_CALLU 1346 | || (p - elem.pos) < 2) 1347 | insertPaddingNop(); 1348 | } 1349 | 1350 | u32 curPos = BUF.size(); 1351 | elem.uExtra = curPos; 1352 | u32& inst = BUF[elem.pos]; 1353 | inst &= ~(0xFFF << 10); 1354 | inst |= curPos << 10; 1355 | 1356 | #ifdef DEBUG 1357 | printf("ELSE\n"); 1358 | #endif 1359 | 1360 | return 0; 1361 | } 1362 | 1363 | DEF_DIRECTIVE(end) 1364 | { 1365 | ENSURE_NO_MORE_ARGS(); 1366 | if (!g_stackPos) 1367 | return throwError(".end with unmatched block\n"); 1368 | 1369 | StackEntry& elem = g_stack[--g_stackPos]; 1370 | 1371 | // Automatically add padding NOPs when necessary 1372 | if (elem.type != SE_ARRAY && lastWasEnd) 1373 | { 1374 | insertPaddingNop(); 1375 | lastWasEnd = false; 1376 | } 1377 | 1378 | else if ((elem.type == SE_PROC || elem.type == SE_FOR || elem.type == SE_IF) && BUF.size() > 0) 1379 | { 1380 | u32 p = BUF.size(); 1381 | u32 lastOpcode = BUF[p-1] >> 26; 1382 | if (lastOpcode == MAESTRO_JMPC || lastOpcode == MAESTRO_JMPU 1383 | || lastOpcode == MAESTRO_CALL || lastOpcode == MAESTRO_CALLC || lastOpcode == MAESTRO_CALLU 1384 | || (elem.type == SE_FOR && (lastOpcode == MAESTRO_BREAK || lastOpcode == MAESTRO_BREAKC)) 1385 | || (elem.type != SE_ARRAY && (p - elem.pos) < (elem.type != SE_PROC ? 2 : 1))) 1386 | insertPaddingNop(); 1387 | } 1388 | 1389 | u32 curPos = BUF.size(); 1390 | u32 size = curPos - elem.pos; 1391 | 1392 | switch (elem.type) 1393 | { 1394 | case SE_PROC: 1395 | { 1396 | #ifdef DEBUG 1397 | printf("proc: %s(%u, size:%u)\n", elem.strExtra, elem.pos, size); 1398 | #endif 1399 | g_procTable.insert( std::pair(elem.strExtra, procedure(elem.pos, size)) ); 1400 | break; 1401 | } 1402 | 1403 | case SE_FOR: 1404 | { 1405 | #ifdef DEBUG 1406 | printf("ENDFOR\n"); 1407 | #endif 1408 | u32& inst = BUF[elem.pos]; 1409 | inst &= ~(0xFFF << 10); 1410 | inst |= (curPos-1) << 10; 1411 | lastWasEnd = true; 1412 | break; 1413 | } 1414 | 1415 | case SE_IF: 1416 | { 1417 | #ifdef DEBUG 1418 | printf("ENDIF\n"); 1419 | #endif 1420 | u32& inst = BUF[elem.pos]; 1421 | if (!elem.uExtra) 1422 | { 1423 | // IF with no ELSE 1424 | inst &= ~(0xFFF << 10); 1425 | inst |= curPos << 10; 1426 | } else 1427 | { 1428 | // IF with an ELSE 1429 | inst &= ~0x3FF; 1430 | inst |= curPos - elem.uExtra; 1431 | } 1432 | lastWasEnd = true; 1433 | break; 1434 | } 1435 | 1436 | case SE_ARRAY: 1437 | { 1438 | #ifdef DEBUG 1439 | printf("ENDARRAY\n"); 1440 | #endif 1441 | DVLEData* dvle = GetDvleData(); 1442 | UniformAlloc& alloc = getAlloc(UTYPE_FVEC, dvle); 1443 | 1444 | if (g_aliases.find(g_constArrayName) != g_aliases.end()) 1445 | return duplicateIdentifier(g_constArrayName); 1446 | 1447 | int size = g_constArray.size(); 1448 | if (g_constArraySize >= 0) for (; size < g_constArraySize; size ++) 1449 | { 1450 | Constant c; 1451 | memset(&c, 0, sizeof(c)); 1452 | c.type = UTYPE_FVEC; 1453 | g_constArray.push_back(c); 1454 | } 1455 | 1456 | if (size == 0) 1457 | return throwError("no elements have been specified in array '%s'\n", g_constArrayName); 1458 | 1459 | int uniformPos = alloc.AllocLocal(size); 1460 | if (uniformPos < 0) 1461 | return throwError("not enough space for local constant array '%s'\n", g_constArrayName); 1462 | 1463 | if ((dvle->constantCount+size) > MAX_CONSTANT) 1464 | return throwError("too many local constants\n"); 1465 | 1466 | for (int i = 0; i < size; i ++) 1467 | { 1468 | Constant& src = g_constArray[i]; 1469 | Constant& dst = dvle->constantTable[dvle->constantCount++]; 1470 | src.regId = uniformPos+i; 1471 | memcpy(&dst, &src, sizeof(src)); 1472 | } 1473 | 1474 | g_aliases.insert( std::pair(g_constArrayName, uniformPos | (DEFAULT_OPSRC<<8)) ); 1475 | 1476 | g_constArray.clear(); 1477 | g_constArraySize = -1; 1478 | g_constArrayName = NULL; 1479 | break; 1480 | } 1481 | } 1482 | 1483 | return 0; 1484 | } 1485 | 1486 | DEF_DIRECTIVE(alias) 1487 | { 1488 | NEXT_ARG_SPC(aliasName); 1489 | NEXT_ARG_SPC(aliasReg); 1490 | ENSURE_NO_MORE_ARGS(); 1491 | 1492 | if (!validateIdentifier(aliasName)) 1493 | return throwError("invalid alias name: %s\n", aliasName); 1494 | if (isregp(aliasName[0]) && isdigit(aliasName[1])) 1495 | return throwError("cannot redefine register\n"); 1496 | ARG_TO_REG(rAlias, aliasReg); 1497 | 1498 | if (g_aliases.find(aliasName) != g_aliases.end()) 1499 | return duplicateIdentifier(aliasName); 1500 | 1501 | g_aliases.insert( std::pair(aliasName, rAlias | (rAliasSw<<8)) ); 1502 | return 0; 1503 | } 1504 | 1505 | DEF_DIRECTIVE(uniform) 1506 | { 1507 | DVLEData* dvle = GetDvleData(); 1508 | UniformAlloc& alloc = getAlloc(dirParam, dvle); 1509 | bool useSharedSpace = !dvle->usesGshSpace(); 1510 | 1511 | for (;;) 1512 | { 1513 | char* argText = nextArg(); 1514 | if (!argText) break; 1515 | 1516 | int uSize = 1; 1517 | char* sizePos = strchr(argText, '['); 1518 | if (sizePos) 1519 | { 1520 | char* closePos = strchr(sizePos, ']'); 1521 | if (!closePos) 1522 | return throwError("missing closing bracket: %s\n", argText); 1523 | *closePos = 0; 1524 | *sizePos++ = 0; 1525 | sizePos = trim_whitespace(sizePos); 1526 | uSize = atoi(sizePos); 1527 | if (uSize < 1) 1528 | return throwError("invalid uniform size: %s[%s]\n", argText, sizePos); 1529 | } 1530 | if (!validateIdentifier(argText)) 1531 | return throwError("invalid uniform name: %s\n", argText); 1532 | if (g_aliases.find(argText) != g_aliases.end()) 1533 | return duplicateIdentifier(argText); 1534 | 1535 | int uniformPos = -1; 1536 | 1537 | // Find the uniform in the table 1538 | int i; 1539 | for (i = 0; useSharedSpace && i < g_uniformCount; i ++) 1540 | { 1541 | Uniform& uniform = g_uniformTable[i]; 1542 | if (uniform.name == argText) 1543 | { 1544 | if (uniform.type != dirParam) 1545 | return throwError("mismatched uniform type: %s\n", argText); 1546 | if (uniform.size != uSize) 1547 | return throwError("uniform '%s' previously declared as having size %d\n", argText, uniform.size); 1548 | uniformPos = uniform.pos; 1549 | break; 1550 | } 1551 | } 1552 | 1553 | // If not found, create it 1554 | if (uniformPos < 0) 1555 | { 1556 | if (g_uniformCount == MAX_UNIFORM) 1557 | return throwError("too many global uniforms: %s\n", argText); 1558 | 1559 | uniformPos = alloc.AllocGlobal(uSize); 1560 | if (uniformPos < 0) 1561 | return throwError("not enough uniform space: %s[%d]\n", argText, uSize); 1562 | } 1563 | 1564 | if (useSharedSpace) 1565 | g_uniformTable[g_uniformCount++].init(argText, uniformPos, uSize, dirParam); 1566 | 1567 | if (*argText != '_') 1568 | { 1569 | // Add the uniform to the table 1570 | if (dvle->uniformCount == MAX_UNIFORM) 1571 | return throwError("too many referenced uniforms: %s\n", argText); 1572 | dvle->uniformTable[dvle->uniformCount++].init(argText, uniformPos, uSize, dirParam); 1573 | dvle->symbolSize += strlen(argText)+1; 1574 | } 1575 | 1576 | g_aliases.insert( std::pair(argText, uniformPos | (DEFAULT_OPSRC<<8)) ); 1577 | 1578 | #ifdef DEBUG 1579 | printf("uniform %s[%d] @ d%02X:d%02X\n", argText, uSize, uniformPos, uniformPos+uSize-1); 1580 | #endif 1581 | } 1582 | return 0; 1583 | } 1584 | 1585 | DEF_DIRECTIVE(const) 1586 | { 1587 | DVLEData* dvle = GetDvleData(); 1588 | UniformAlloc& alloc = getAlloc(dirParam, dvle); 1589 | 1590 | NEXT_ARG_CPAREN(constName); 1591 | NEXT_ARG(arg0Text); 1592 | NEXT_ARG(arg1Text); 1593 | NEXT_ARG(arg2Text); 1594 | char* arg3Text = mystrtok_pos; 1595 | if (!mystrtok_pos) return missingParam(); 1596 | char* parenPos = strchr(arg3Text, ')'); 1597 | if (!parenPos) return throwError("invalid syntax\n"); 1598 | *parenPos = 0; 1599 | arg3Text = trim_whitespace(arg3Text); 1600 | 1601 | if (g_aliases.find(constName) != g_aliases.end()) 1602 | return duplicateIdentifier(constName); 1603 | 1604 | int uniformPos = alloc.AllocLocal(1); 1605 | if (uniformPos < 0) 1606 | return throwError("not enough space for local constant '%s'\n", constName); 1607 | 1608 | if (dvle->constantCount == MAX_CONSTANT) 1609 | return throwError("too many local constants\n"); 1610 | 1611 | Constant& ct = dvle->constantTable[dvle->constantCount++]; 1612 | ct.regId = uniformPos; 1613 | ct.type = dirParam; 1614 | if (dirParam == UTYPE_FVEC) 1615 | { 1616 | ct.fparam[0] = atof(arg0Text); 1617 | ct.fparam[1] = atof(arg1Text); 1618 | ct.fparam[2] = atof(arg2Text); 1619 | ct.fparam[3] = atof(arg3Text); 1620 | } else if (dirParam == UTYPE_IVEC) 1621 | { 1622 | ct.iparam[0] = atoi(arg0Text) & 0xFF; 1623 | ct.iparam[1] = atoi(arg1Text) & 0xFF; 1624 | ct.iparam[2] = atoi(arg2Text) & 0xFF; 1625 | ct.iparam[3] = atoi(arg3Text) & 0xFF; 1626 | } 1627 | 1628 | g_aliases.insert( std::pair(constName, ct.regId | (DEFAULT_OPSRC<<8)) ); 1629 | 1630 | #ifdef DEBUG 1631 | if (dirParam == UTYPE_FVEC) 1632 | printf("constant %s(%f, %f, %f, %f) @ d%02X\n", constName, ct.fparam[0], ct.fparam[1], ct.fparam[2], ct.fparam[3], ct.regId); 1633 | else if (dirParam == UTYPE_IVEC) 1634 | printf("constant %s(%u, %u, %u, %u) @ d%02X\n", constName, ct.iparam[0], ct.iparam[1], ct.iparam[2], ct.iparam[3], ct.regId); 1635 | #endif 1636 | return 0; 1637 | }; 1638 | 1639 | DEF_DIRECTIVE(constfa) 1640 | { 1641 | bool inArray = g_stackPos && g_stack[g_stackPos-1].type == SE_ARRAY; 1642 | 1643 | if (!inArray) 1644 | { 1645 | NEXT_ARG(constName); 1646 | ENSURE_NO_MORE_ARGS(); 1647 | 1648 | if (NO_MORE_STACK) 1649 | return throwError("too many nested blocks\n"); 1650 | 1651 | char* sizePos = strchr(constName, '['); 1652 | if (!sizePos) 1653 | return throwError("missing opening bracket: %s\n", constName); 1654 | 1655 | char* closePos = strchr(sizePos, ']'); 1656 | if (!closePos) 1657 | return throwError("missing closing bracket: %s\n", constName); 1658 | 1659 | *closePos++ = 0; 1660 | *sizePos++ = 0; 1661 | closePos = trim_whitespace(closePos); 1662 | sizePos = trim_whitespace(sizePos); 1663 | 1664 | if (*closePos) 1665 | return throwError("garbage found: %s\n", closePos); 1666 | 1667 | if (*sizePos) 1668 | { 1669 | g_constArraySize = atoi(sizePos); 1670 | if (g_constArraySize <= 0) 1671 | return throwError("invalid array size: %s[%s]\n", constName, sizePos); 1672 | } 1673 | 1674 | if (!validateIdentifier(constName)) 1675 | return throwError("invalid array name: %s\n", constName); 1676 | 1677 | g_constArrayName = constName; 1678 | 1679 | StackEntry& elem = g_stack[g_stackPos++]; 1680 | elem.type = SE_ARRAY; 1681 | 1682 | } else 1683 | { 1684 | if (g_constArraySize >= 0 && g_constArraySize == g_constArray.size()) 1685 | return throwError("too many elements in the array, expected %d\n", g_constArraySize); 1686 | 1687 | NEXT_ARG(arg0Text); 1688 | if (*arg0Text != '(') 1689 | return throwError("invalid syntax\n"); 1690 | arg0Text++; 1691 | 1692 | NEXT_ARG(arg1Text); 1693 | NEXT_ARG(arg2Text); 1694 | char* arg3Text = mystrtok_pos; 1695 | if (!mystrtok_pos) return missingParam(); 1696 | char* parenPos = strchr(arg3Text, ')'); 1697 | if (!parenPos) return throwError("invalid syntax\n"); 1698 | *parenPos = 0; 1699 | arg3Text = trim_whitespace(arg3Text); 1700 | 1701 | Constant ct; 1702 | ct.type = UTYPE_FVEC; 1703 | ct.fparam[0] = atof(arg0Text); 1704 | ct.fparam[1] = atof(arg1Text); 1705 | ct.fparam[2] = atof(arg2Text); 1706 | ct.fparam[3] = atof(arg3Text); 1707 | g_constArray.push_back(ct); 1708 | } 1709 | 1710 | return 0; 1711 | } 1712 | 1713 | DEF_DIRECTIVE(setfi) 1714 | { 1715 | DVLEData* dvle = GetDvleData(); 1716 | 1717 | NEXT_ARG_CPAREN(constName); 1718 | NEXT_ARG(arg0Text); 1719 | NEXT_ARG(arg1Text); 1720 | NEXT_ARG(arg2Text); 1721 | char* arg3Text = mystrtok_pos; 1722 | if (!mystrtok_pos) return missingParam(); 1723 | char* parenPos = strchr(arg3Text, ')'); 1724 | if (!parenPos) return throwError("invalid syntax\n"); 1725 | *parenPos = 0; 1726 | arg3Text = trim_whitespace(arg3Text); 1727 | 1728 | ARG_TO_REG(constReg, constName); 1729 | if (dirParam == UTYPE_FVEC) 1730 | { 1731 | if (constReg < 0x20 || constReg >= 0x80) 1732 | return throwError("invalid floating point vector uniform: %s\n", constName); 1733 | } else if (dirParam == UTYPE_IVEC) 1734 | { 1735 | if (constReg < 0x80 || constReg >= 0x84) 1736 | return throwError("invalid integer vector uniform: %s\n", constName); 1737 | } 1738 | 1739 | if (dvle->constantCount == MAX_CONSTANT) 1740 | return throwError("too many local constants\n"); 1741 | 1742 | Constant& ct = dvle->constantTable[dvle->constantCount++]; 1743 | ct.regId = constReg; 1744 | ct.type = dirParam; 1745 | if (dirParam == UTYPE_FVEC) 1746 | { 1747 | ct.fparam[0] = atof(arg0Text); 1748 | ct.fparam[1] = atof(arg1Text); 1749 | ct.fparam[2] = atof(arg2Text); 1750 | ct.fparam[3] = atof(arg3Text); 1751 | } else if (dirParam == UTYPE_IVEC) 1752 | { 1753 | ct.iparam[0] = atoi(arg0Text) & 0xFF; 1754 | ct.iparam[1] = atoi(arg1Text) & 0xFF; 1755 | ct.iparam[2] = atoi(arg2Text) & 0xFF; 1756 | ct.iparam[3] = atoi(arg3Text) & 0xFF; 1757 | } 1758 | 1759 | return 0; 1760 | } 1761 | 1762 | static int parseBool(bool& out, const char* text) 1763 | { 1764 | if (stricmp(text, "true")==0 || stricmp(text, "on")==0 || stricmp(text, "1")==0) 1765 | { 1766 | out = true; 1767 | return 0; 1768 | } 1769 | if (stricmp(text, "false")==0 || stricmp(text, "off")==0 || stricmp(text, "0")==0) 1770 | { 1771 | out = false; 1772 | return 0; 1773 | } 1774 | return throwError("invalid bool value: %s\n", text); 1775 | } 1776 | 1777 | DEF_DIRECTIVE(setb) 1778 | { 1779 | DVLEData* dvle = GetDvleData(); 1780 | 1781 | NEXT_ARG_SPC(constName); 1782 | NEXT_ARG_SPC(valueText); 1783 | ENSURE_NO_MORE_ARGS(); 1784 | ARG_TO_BREG(constReg, constName); 1785 | 1786 | bool constVal = false; 1787 | safe_call(parseBool(constVal, valueText)); 1788 | 1789 | if (dvle->constantCount == MAX_CONSTANT) 1790 | return throwError("too many local constants\n"); 1791 | 1792 | Constant& ct = dvle->constantTable[dvle->constantCount++]; 1793 | ct.regId = constReg; 1794 | ct.type = UTYPE_BOOL; 1795 | ct.bparam = constVal; 1796 | 1797 | return 0; 1798 | } 1799 | 1800 | static int parseOutType(const char* text) 1801 | { 1802 | if (stricmp(text,"pos")==0 || stricmp(text,"position")==0) 1803 | return OUTTYPE_POS; 1804 | if (stricmp(text,"nquat")==0 || stricmp(text,"normalquat")==0) 1805 | return OUTTYPE_NQUAT; 1806 | if (stricmp(text,"clr")==0 || stricmp(text,"color")==0) 1807 | return OUTTYPE_CLR; 1808 | if (stricmp(text,"tcoord0")==0 || stricmp(text,"texcoord0")==0) 1809 | return OUTTYPE_TCOORD0; 1810 | if (stricmp(text,"tcoord0w")==0 || stricmp(text,"texcoord0w")==0) 1811 | return OUTTYPE_TCOORD0W; 1812 | if (stricmp(text,"tcoord1")==0 || stricmp(text,"texcoord1")==0) 1813 | return OUTTYPE_TCOORD1; 1814 | if (stricmp(text,"tcoord2")==0 || stricmp(text,"texcoord2")==0) 1815 | return OUTTYPE_TCOORD2; 1816 | if (stricmp(text,"view")==0) 1817 | return OUTTYPE_VIEW; 1818 | if (stricmp(text,"dummy")==0) 1819 | return OUTTYPE_DUMMY; 1820 | return -1; 1821 | } 1822 | 1823 | DEF_DIRECTIVE(in) 1824 | { 1825 | DVLEData* dvle = GetDvleData(); 1826 | 1827 | NEXT_ARG_SPC(inName); 1828 | char* inRegName = nextArgSpc(); 1829 | ENSURE_NO_MORE_ARGS(); 1830 | 1831 | if (!validateIdentifier(inName)) 1832 | return throwError("invalid identifier: %s\n", inName); 1833 | if (g_aliases.find(inName) != g_aliases.end()) 1834 | return duplicateIdentifier(inName); 1835 | 1836 | int oid = -1; 1837 | if (inRegName) 1838 | { 1839 | ARG_TO_REG(inReg, inRegName); 1840 | if (inReg < 0x00 || inReg >= 0x10) 1841 | return throwError("invalid input register: %s\n", inRegName); 1842 | oid = inReg; 1843 | } else 1844 | oid = dvle->findFreeInput(); 1845 | if (oid < 0) 1846 | return throwError("too many inputs\n"); 1847 | if (dvle->uniformCount == MAX_UNIFORM) 1848 | return throwError("too many uniforms in DVLE\n"); 1849 | 1850 | dvle->inputMask |= BIT(oid); 1851 | dvle->uniformTable[dvle->uniformCount++].init(inName, oid, 1, UTYPE_FVEC); 1852 | dvle->symbolSize += strlen(inName)+1; 1853 | g_aliases.insert( std::pair(inName, oid | (DEFAULT_OPSRC<<8)) ); 1854 | return 0; 1855 | } 1856 | 1857 | DEF_DIRECTIVE(out) 1858 | { 1859 | DVLEData* dvle = GetDvleData(); 1860 | 1861 | NEXT_ARG_SPC(outName); 1862 | NEXT_ARG_SPC(outType); 1863 | char* outDestRegName = nextArgSpc(); 1864 | ENSURE_NO_MORE_ARGS(); 1865 | 1866 | int oid = -1; 1867 | int sw = DEFAULT_OPSRC; 1868 | 1869 | if (outName[0]=='-' && !outName[1]) 1870 | outName = NULL; 1871 | else if (!validateIdentifier(outName)) 1872 | return throwError("invalid identifier: %s\n", outName); 1873 | 1874 | if (outDestRegName) 1875 | { 1876 | ARG_TO_REG(outDestReg, outDestRegName); 1877 | if (outDestReg < 0x00 || outDestReg >= dvle->maxOutputReg()) 1878 | return throwError("invalid output register: %s\n", outDestRegName); 1879 | oid = outDestReg; 1880 | sw = outDestRegSw; 1881 | } 1882 | 1883 | if (oid < 0) 1884 | { 1885 | char* dotPos = strchr(outType, '.'); 1886 | if (dotPos) 1887 | { 1888 | *dotPos++ = 0; 1889 | sw = parseSwizzling(dotPos); 1890 | if (sw < 0) 1891 | return throwError("invalid output mask: %s\n", dotPos); 1892 | } 1893 | } 1894 | 1895 | int mask = maskFromSwizzling(sw, false); 1896 | int type = parseOutType(outType); 1897 | if (type < 0) 1898 | return throwError("invalid output type: %s\n", outType); 1899 | 1900 | if (oid < 0) 1901 | oid = dvle->findFreeOutput(); 1902 | else if (dvle->outputUsedReg & (mask << (4*oid))) 1903 | return throwError("this output collides with another one previously defined\n"); 1904 | 1905 | if (oid < 0 || dvle->outputCount==MAX_OUTPUT) 1906 | return throwError("too many outputs\n"); 1907 | 1908 | if (outName && g_aliases.find(outName) != g_aliases.end()) 1909 | return duplicateIdentifier(outName); 1910 | 1911 | if (oid >= 7 && type != OUTTYPE_DUMMY) 1912 | return throwError("this register (o%d) can only be a dummy output\n", oid); 1913 | 1914 | #ifdef DEBUG 1915 | printf("output %s <- o%d (%d:%X)\n", outName, oid, type, mask); 1916 | #endif 1917 | 1918 | dvle->outputTable[dvle->outputCount++] = OUTPUT_MAKE(type, oid, mask); 1919 | dvle->outputMask |= BIT(oid); 1920 | dvle->outputUsedReg |= mask << (4*oid); 1921 | if (outName) 1922 | g_aliases.insert( std::pair(outName, oid | (DEFAULT_OPSRC<<8)) ); 1923 | if (type == OUTTYPE_DUMMY && dvle->usesGshSpace()) 1924 | dvle->isMerge = true; 1925 | return 0; 1926 | } 1927 | 1928 | DEF_DIRECTIVE(entry) 1929 | { 1930 | DVLEData* dvle = GetDvleData(); 1931 | 1932 | NEXT_ARG_SPC(entrypoint); 1933 | ENSURE_NO_MORE_ARGS(); 1934 | 1935 | if (!validateIdentifier(entrypoint)) 1936 | return throwError("invalid identifier: %s\n", entrypoint); 1937 | 1938 | dvle->entrypoint = entrypoint; 1939 | return 0; 1940 | } 1941 | 1942 | DEF_DIRECTIVE(nodvle) 1943 | { 1944 | DVLEData* dvle = GetDvleData(); 1945 | ENSURE_NO_MORE_ARGS(); 1946 | 1947 | if (!dvle->nodvle) 1948 | { 1949 | dvle->nodvle = true; 1950 | g_totalDvleCount --; 1951 | } 1952 | 1953 | return 0; 1954 | } 1955 | 1956 | static inline int parseGshType(const char* text) 1957 | { 1958 | if (stricmp(text,"point")==0) 1959 | return GSHTYPE_POINT; 1960 | if (stricmp(text,"variable")==0 || stricmp(text,"subdivision")==0) 1961 | return GSHTYPE_VARIABLE; 1962 | if (stricmp(text,"fixed")==0 || stricmp(text,"particle")==0) 1963 | return GSHTYPE_FIXED; 1964 | return -1; 1965 | } 1966 | 1967 | DEF_DIRECTIVE(gsh) 1968 | { 1969 | DVLEData* dvle = GetDvleData(); 1970 | char* gshMode = nextArgSpc(); 1971 | if (!gshMode) 1972 | { 1973 | dvle->isGeoShader = true; 1974 | dvle->isCompatGeoShader = true; 1975 | return 0; 1976 | } 1977 | 1978 | if (dvle->isGeoShader) 1979 | return throwError(".gsh had already been used\n"); 1980 | if (dvle->constantCount || dvle->uniformCount || dvle->outputMask) 1981 | return throwError(".gsh must be used before any constant, uniform or output is declared\n"); 1982 | 1983 | int mode = parseGshType(gshMode); 1984 | if (mode < 0) 1985 | return throwError("invalid geometry shader mode: %s\n", gshMode); 1986 | 1987 | dvle->isGeoShader = true; 1988 | dvle->geoShaderType = mode; 1989 | 1990 | NEXT_ARG_SPC(firstFreeRegName); 1991 | ARG_TO_REG(firstFreeReg, firstFreeRegName); 1992 | if (firstFreeReg < 0x20 || firstFreeReg >= 0x80) 1993 | return throwError("invalid float uniform register: %s\n", firstFreeRegName); 1994 | 1995 | unifAlloc[1].initForGsh(firstFreeReg); 1996 | 1997 | switch (mode) 1998 | { 1999 | case GSHTYPE_POINT: 2000 | ENSURE_NO_MORE_ARGS(); 2001 | break; 2002 | case GSHTYPE_VARIABLE: 2003 | { 2004 | NEXT_ARG_SPC(vtxNumText); 2005 | ENSURE_NO_MORE_ARGS(); 2006 | 2007 | ARG_TO_INT(vtxNum, vtxNumText, 0, 255); 2008 | dvle->geoShaderVariableNum = vtxNum; 2009 | break; 2010 | } 2011 | case GSHTYPE_FIXED: 2012 | { 2013 | NEXT_ARG_SPC(arrayStartText); 2014 | NEXT_ARG_SPC(vtxNumText); 2015 | ENSURE_NO_MORE_ARGS(); 2016 | 2017 | ARG_TO_REG(arrayStart, arrayStartText); 2018 | ARG_TO_INT(vtxNum, vtxNumText, 0, 255); 2019 | 2020 | if (arrayStart < 0x20 || arrayStart >= 0x80) 2021 | return throwError("invalid float uniform register: %s\n", arrayStartText); 2022 | if (arrayStart >= firstFreeReg) 2023 | return throwError("specified location overlaps uniform allocation pool: %s\n", arrayStartText); 2024 | 2025 | dvle->geoShaderFixedStart = arrayStart - 0x20; 2026 | dvle->geoShaderFixedNum = vtxNum; 2027 | break; 2028 | } 2029 | } 2030 | 2031 | return 0; 2032 | } 2033 | 2034 | 2035 | static const cmdTableType dirTable[] = 2036 | { 2037 | DEC_DIRECTIVE(proc), 2038 | DEC_DIRECTIVE(else), 2039 | DEC_DIRECTIVE(end), 2040 | DEC_DIRECTIVE(alias), 2041 | DEC_DIRECTIVE2(fvec, uniform, UTYPE_FVEC), 2042 | DEC_DIRECTIVE2(ivec, uniform, UTYPE_IVEC), 2043 | DEC_DIRECTIVE2(bool, uniform, UTYPE_BOOL), 2044 | DEC_DIRECTIVE2(constf, const, UTYPE_FVEC), 2045 | DEC_DIRECTIVE2(consti, const, UTYPE_IVEC), 2046 | DEC_DIRECTIVE(constfa), 2047 | DEC_DIRECTIVE(in), 2048 | DEC_DIRECTIVE(out), 2049 | DEC_DIRECTIVE(entry), 2050 | DEC_DIRECTIVE(nodvle), 2051 | DEC_DIRECTIVE(gsh), 2052 | DEC_DIRECTIVE2(setf, setfi, UTYPE_FVEC), 2053 | DEC_DIRECTIVE2(seti, setfi, UTYPE_IVEC), 2054 | DEC_DIRECTIVE(setb), 2055 | { NULL, NULL }, 2056 | }; 2057 | 2058 | int ProcessCommand(const char* cmd) 2059 | { 2060 | const cmdTableType* table = cmdTable; 2061 | if (*cmd == '.') 2062 | { 2063 | cmd ++; 2064 | table = dirTable; 2065 | } else if (!g_stackPos) 2066 | return throwError("instruction outside block\n"); 2067 | else 2068 | { 2069 | lastWasEnd = false; 2070 | if (!GetDvleData()->isGeoShader && g_outputBuf.size() > MAX_VSH_SIZE) 2071 | return throwError("instruction outside vertex shader code memory (max %d instructions, currently %d)\n", MAX_VSH_SIZE, g_outputBuf.size()); 2072 | } 2073 | 2074 | for (int i = 0; table[i].name; i ++) 2075 | if (stricmp(table[i].name, cmd) == 0) 2076 | return table[i].func(cmd, table[i].opcode, table[i].opcodei); 2077 | 2078 | return throwError("invalid instruction: %s\n", cmd); 2079 | } 2080 | -------------------------------------------------------------------------------- /source/picasso_frontend.cpp: -------------------------------------------------------------------------------- 1 | #include "picasso.h" 2 | 3 | // f24 has: 4 | // - 1 sign bit 5 | // - 7 exponent bits 6 | // - 16 mantissa bits 7 | uint32_t f32tof24(float f) 8 | { 9 | uint32_t i; 10 | memcpy(&i, &f, sizeof(f)); 11 | 12 | uint32_t mantissa = (i << 9) >> 9; 13 | int32_t exponent = (i << 1) >> 24; 14 | uint32_t sign = (i << 0) >> 31; 15 | 16 | // Truncate mantissa 17 | mantissa >>= 7; 18 | 19 | // Re-bias exponent 20 | exponent = exponent - 127 + 63; 21 | if (exponent < 0) 22 | { 23 | // Underflow: flush to zero 24 | return sign << 23; 25 | } 26 | else if (exponent > 0x7F) 27 | { 28 | // Overflow: saturate to infinity 29 | return (sign << 23) | (0x7F << 16); 30 | } 31 | 32 | return (sign << 23) | (exponent << 16) | mantissa; 33 | } 34 | 35 | #ifdef WIN32 36 | static inline void FixMinGWPath(char* buf) 37 | { 38 | if (buf && *buf == '/') 39 | { 40 | buf[0] = buf[1]; 41 | buf[1] = ':'; 42 | } 43 | } 44 | #endif 45 | 46 | int usage(const char* prog) 47 | { 48 | fprintf(stderr, 49 | "Usage: %s [options] files...\n" 50 | "Options:\n" 51 | " -o, --out= Specifies the name of the SHBIN file to generate\n" 52 | " -h, --header= Specifies the name of the header file to generate\n" 53 | " -n, --no-nop Disables the automatic insertion of padding NOPs\n" 54 | " -v, --version Displays version information\n" 55 | , prog); 56 | return EXIT_FAILURE; 57 | } 58 | 59 | int main(int argc, char* argv[]) 60 | { 61 | char *shbinFile = NULL, *hFile = NULL; 62 | 63 | static struct option long_options[] = 64 | { 65 | { "out", required_argument, NULL, 'o' }, 66 | { "header", required_argument, NULL, 'h' }, 67 | { "help", no_argument, NULL, '?' }, 68 | { "no-nop", no_argument, NULL, 'n' }, 69 | { "version",no_argument, NULL, 'v' }, 70 | { NULL, 0, NULL, 0 } 71 | }; 72 | 73 | int opt, optidx = 0; 74 | while ((opt = getopt_long(argc, argv, "o:h:?nv", long_options, &optidx)) != -1) 75 | { 76 | switch (opt) 77 | { 78 | case 'o': shbinFile = optarg; break; 79 | case 'h': hFile = optarg; break; 80 | case '?': usage(argv[0]); return EXIT_SUCCESS; 81 | case 'n': g_autoNop = false; break; 82 | case 'v': printf("%s - Built on %s %s\n", PACKAGE_STRING, __DATE__, __TIME__); return EXIT_SUCCESS; 83 | default: return usage(argv[0]); 84 | } 85 | } 86 | 87 | #ifdef WIN32 88 | FixMinGWPath(shbinFile); 89 | FixMinGWPath(hFile); 90 | #endif 91 | 92 | if (optind == argc) 93 | { 94 | fprintf(stderr, "%s: no input files are specified\n", argv[0]); 95 | return usage(argv[0]); 96 | } 97 | 98 | if (!shbinFile) 99 | { 100 | fprintf(stderr, "%s: no output file is specified\n", argv[0]); 101 | return usage(argv[0]); 102 | } 103 | 104 | int rc = 0; 105 | for (int i = optind; i < argc; i ++) 106 | { 107 | char* vshFile = argv[i]; 108 | 109 | #ifdef WIN32 110 | FixMinGWPath(vshFile); 111 | #endif 112 | 113 | char* sourceCode = StringFromFile(vshFile); 114 | if (!sourceCode) 115 | { 116 | fprintf(stderr, "error: cannot open input file: %s\n", vshFile); 117 | return EXIT_FAILURE; 118 | } 119 | 120 | rc = AssembleString(sourceCode, vshFile); 121 | free(sourceCode); 122 | if (rc != 0) 123 | return EXIT_FAILURE; 124 | } 125 | 126 | rc = RelocateProduct(); 127 | if (rc != 0) 128 | return EXIT_FAILURE; 129 | 130 | FileClass f(shbinFile, "wb"); 131 | 132 | if (f.openerror()) 133 | { 134 | fprintf(stderr, "Can't open output file!"); 135 | return EXIT_FAILURE; 136 | } 137 | 138 | u32 progSize = g_outputBuf.size(); 139 | u32 dvlpSize = 10*4 + progSize*4 + g_opdescCount*8; 140 | 141 | // Write DVLB header 142 | f.WriteWord(0x424C5644); // DVLB 143 | f.WriteWord(g_totalDvleCount); // Number of DVLEs 144 | 145 | // Calculate and write DVLE offsets 146 | u32 curOff = 2*4 + g_totalDvleCount*4 + dvlpSize; 147 | for (dvleTableIter dvle = g_dvleTable.begin(); dvle != g_dvleTable.end(); ++dvle) 148 | { 149 | if (dvle->nodvle) continue; 150 | f.WriteWord(curOff); 151 | curOff += 16*4; // Header 152 | curOff += dvle->constantCount*20; 153 | curOff += dvle->outputCount*8; 154 | curOff += dvle->uniformCount*8; 155 | curOff += dvle->symbolSize; 156 | curOff = (curOff + 3) &~ 3; // Word alignment 157 | } 158 | 159 | // Write DVLP header 160 | f.WriteWord(0x504C5644); // DVLP 161 | f.WriteWord(0); // version 162 | f.WriteWord(10*4); // offset to shader binary blob 163 | f.WriteWord(progSize); // size of shader binary blob 164 | f.WriteWord(10*4 + progSize*4); // offset to opdesc table 165 | f.WriteWord(g_opdescCount); // number of opdescs 166 | f.WriteWord(dvlpSize); // offset to symtable (TODO) 167 | f.WriteWord(0); // ???? 168 | f.WriteWord(0); // ???? 169 | f.WriteWord(0); // ???? 170 | 171 | // Write program 172 | for (outputBufIter it = g_outputBuf.begin(); it != g_outputBuf.end(); ++it) 173 | f.WriteWord(*it); 174 | 175 | // Write opdescs 176 | for (int i = 0; i < g_opdescCount; i ++) 177 | f.WriteDword(g_opdescTable[i]); 178 | 179 | // Write DVLEs 180 | for (dvleTableIter dvle = g_dvleTable.begin(); dvle != g_dvleTable.end(); ++dvle) 181 | { 182 | if (dvle->nodvle) continue; 183 | curOff = 16*4; 184 | 185 | f.WriteWord(0x454C5644); // DVLE 186 | f.WriteHword(0x1002); // maybe version? 187 | f.WriteByte(dvle->isGeoShader ? 1 : 0); // Shader type 188 | f.WriteByte(dvle->isMerge ? 1 : 0); 189 | f.WriteWord(dvle->entryStart); // offset to main 190 | f.WriteWord(dvle->entryEnd); // offset to end of main 191 | f.WriteHword(dvle->inputMask); 192 | f.WriteHword(dvle->outputMask); 193 | f.WriteByte(dvle->geoShaderType); 194 | f.WriteByte(dvle->geoShaderFixedStart); 195 | f.WriteByte(dvle->geoShaderVariableNum); 196 | f.WriteByte(dvle->geoShaderFixedNum); 197 | f.WriteWord(curOff); // offset to constant table 198 | f.WriteWord(dvle->constantCount); // size of constant table 199 | curOff += dvle->constantCount*5*4; 200 | f.WriteWord(curOff); // offset to label table (TODO) 201 | f.WriteWord(0); // size of label table (TODO) 202 | f.WriteWord(curOff); // offset to output table 203 | f.WriteWord(dvle->outputCount); // size of output table 204 | curOff += dvle->outputCount*8; 205 | f.WriteWord(curOff); // offset to uniform table 206 | f.WriteWord(dvle->uniformCount); // size of uniform table 207 | curOff += dvle->uniformCount*8; 208 | f.WriteWord(curOff); // offset to symbol table 209 | f.WriteWord(dvle->symbolSize); // size of symbol table 210 | 211 | // Sort uniforms by position 212 | std::sort(dvle->uniformTable, dvle->uniformTable + dvle->uniformCount); 213 | 214 | // Write constants 215 | for (int i = 0; i < dvle->constantCount; i ++) 216 | { 217 | Constant& ct = dvle->constantTable[i]; 218 | f.WriteHword(ct.type); 219 | if (ct.type == UTYPE_FVEC) 220 | { 221 | f.WriteHword(ct.regId-0x20); 222 | for (int j = 0; j < 4; j ++) 223 | f.WriteWord(f32tof24(ct.fparam[j])); 224 | } else if (ct.type == UTYPE_IVEC) 225 | { 226 | f.WriteHword(ct.regId-0x80); 227 | for (int j = 0; j < 4; j ++) 228 | f.WriteByte(ct.iparam[j]); 229 | } else if (ct.type == UTYPE_BOOL) 230 | { 231 | f.WriteHword(ct.regId-0x88); 232 | f.WriteWord(ct.bparam ? 1 : 0); 233 | } 234 | if (ct.type != UTYPE_FVEC) 235 | for (int j = 0; j < 3; j ++) 236 | f.WriteWord(0); // Padding 237 | } 238 | 239 | // Write outputs 240 | for (int i = 0; i < dvle->outputCount; i ++) 241 | f.WriteDword(dvle->outputTable[i]); 242 | 243 | // Write uniforms 244 | size_t sp = 0; 245 | for (int i = 0; i < dvle->uniformCount; i ++) 246 | { 247 | Uniform& u = dvle->uniformTable[i]; 248 | size_t l = u.name.length()+1; 249 | f.WriteWord(sp); sp += l; 250 | int pos = u.pos; 251 | if (pos >= 0x20) 252 | pos -= 0x10; 253 | f.WriteHword(pos); 254 | f.WriteHword(pos+u.size-1); 255 | } 256 | 257 | // Write symbols 258 | for (int i = 0; i < dvle->uniformCount; i ++) 259 | { 260 | std::string u(dvle->uniformTable[i].name); 261 | std::replace(u.begin(), u.end(), '$', '.'); 262 | size_t l = u.length()+1; 263 | f.WriteRaw(u.c_str(), l); 264 | } 265 | 266 | // Word alignment 267 | int pos = f.Tell(); 268 | int pad = ((pos+3)&~3)-pos; 269 | for (int i = 0; i < pad; i ++) 270 | f.WriteByte(0); 271 | } 272 | 273 | if (hFile) 274 | { 275 | FILE* f2 = fopen(hFile, "w"); 276 | if (!f2) 277 | { 278 | fprintf(stderr, "Can't open header file!\n"); 279 | return 1; 280 | } 281 | 282 | fprintf(f2, "// Generated by picasso\n"); 283 | fprintf(f2, "#pragma once\n"); 284 | const char* prefix = g_dvleTable.front().isGeoShader ? "GSH" : "VSH"; // WARNING: HORRIBLE HACK - PLEASE FIX!!!!!!! 285 | for (int i = 0; i < g_uniformCount; i ++) 286 | { 287 | Uniform& u = g_uniformTable[i]; 288 | const char* name = u.name.c_str(); 289 | if (*name == '_') continue; // Hidden uniform 290 | if (u.type == UTYPE_FVEC) 291 | fprintf(f2, "#define %s_FVEC_%s 0x%02X\n", prefix, name, u.pos-0x20); 292 | else if (u.type == UTYPE_IVEC) 293 | fprintf(f2, "#define %s_IVEC_%s 0x%02X\n", prefix, name, u.pos-0x80); 294 | else if (u.type == UTYPE_BOOL) 295 | { 296 | if (u.size == 1) 297 | fprintf(f2, "#define %s_FLAG_%s BIT(%d)\n", prefix, name, u.pos-0x88); 298 | else 299 | fprintf(f2, "#define %s_FLAG_%s(_n) BIT(%d+(_n))\n", prefix, name, u.pos-0x88); 300 | } 301 | fprintf(f2, "#define %s_ULEN_%s %d\n", prefix, name, u.size); 302 | } 303 | 304 | fclose(f2); 305 | } 306 | 307 | return EXIT_SUCCESS; 308 | } 309 | -------------------------------------------------------------------------------- /source/types.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | typedef uint64_t dword_t; 5 | typedef uint32_t word_t; 6 | typedef uint16_t hword_t; 7 | typedef uint8_t byte_t; 8 | typedef int64_t dlong_t; 9 | typedef int32_t long_t; 10 | typedef int16_t short_t; 11 | typedef int8_t char_t; 12 | typedef uint64_t u64; 13 | typedef uint32_t u32; 14 | typedef uint16_t u16; 15 | typedef uint8_t u8; 16 | 17 | #define BIT(n) (1U << (n)) 18 | 19 | #ifndef __BYTE_ORDER__ 20 | #include 21 | #define __BYTE_ORDER__ BYTE_ORDER 22 | #define __ORDER_LITTLE_ENDIAN__ LITTLE_ENDIAN 23 | #define __ORDER_BIG_ENDIAN__ BIG_ENDIAN 24 | #endif 25 | 26 | #ifndef __llvm__ 27 | #if !defined(__GNUC__) || (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 8) 28 | 29 | static inline uint16_t __builtin_bswap16(uint16_t x) 30 | { 31 | return ((x << 8) & 0xff00) | ((x >> 8) & 0x00ff); 32 | } 33 | 34 | #if defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ < 7) 35 | static inline uint32_t __builtin_bswap32(uint32_t x) 36 | { 37 | return ((x << 24) & 0xff000000) | 38 | ((x << 8) & 0x00ff0000) | 39 | ((x >> 8) & 0x0000ff00) | 40 | ((x >> 24) & 0x000000ff); 41 | } 42 | 43 | static inline uint64_t __builtin_bswap64(uint64_t x) 44 | { 45 | return (uint64_t)__builtin_bswap32(x>>32) | 46 | ((uint64_t)__builtin_bswap32(x&0xFFFFFFFF) << 32); 47 | } 48 | #endif 49 | #endif 50 | #endif 51 | 52 | #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 53 | #define be_dword(a) __builtin_bswap64(a) 54 | #define be_word(a) __builtin_bswap32(a) 55 | #define be_hword(a) __builtin_bswap16(a) 56 | #define le_dword(a) (a) 57 | #define le_word(a) (a) 58 | #define le_hword(a) (a) 59 | #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 60 | #define be_dword(a) (a) 61 | #define be_word(a) (a) 62 | #define be_hword(a) (a) 63 | #define le_dword(a) __builtin_bswap64(a) 64 | #define le_word(a) __builtin_bswap32(a) 65 | #define le_hword(a) __builtin_bswap16(a) 66 | #else 67 | #error "What's the endianness of the platform you're targeting?" 68 | #endif 69 | --------------------------------------------------------------------------------