├── .gitignore
├── COPYING
├── Changelog.md
├── Makefile.am
├── Manual.md
├── README.md
├── autogen.sh
├── clean.sh
├── configure.ac
├── example.vsh
└── source
    ├── FileClass.h
    ├── maestro_opcodes.h
    ├── picasso.h
    ├── picasso_assembler.cpp
    ├── picasso_frontend.cpp
    └── types.h


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.exe
 2 | *.o
 3 | *.elf
 4 | *~
 5 | *.shbin
 6 | *.vsh.h
 7 | *.bat
 8 | build/
 9 | 
10 | # The following is bullshit generated and/or required by autotools
11 | 
12 | NEWS
13 | README
14 | AUTHORS
15 | ChangeLog
16 | INSTALL
17 | Makefile.in
18 | aclocal.m4
19 | autom4te.cache
20 | config.guess
21 | config.sub
22 | configure
23 | depcomp
24 | install-sh
25 | missing
26 | config.log
27 | config.status
28 | Makefile
29 | picasso
30 | .deps/
31 | *.bz2
32 | 


--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014-2015, fincs
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/Changelog.md:
--------------------------------------------------------------------------------
 1 | # picasso Changelog
 2 | 
 3 | # v2.7.1
 4 | 
 5 | - Further improvements to overall system stability and other minor adjustments have been made to enhance the user experience.
 6 | 
 7 | # v2.7
 8 | 
 9 | - Added `dst`, `litp` and `break` instructions (thanks to @Tilka).
10 | - Added check to enforce index regs being used only with floating point vector uniforms.
11 | - Renamed index registers to match D3D naming conventions (`a0.x`, `a0.y`, `aL`) (old names still accepted).
12 | - Miscellaneous bugfixes and improvements (thanks to @lioncash).
13 | 
14 | # v2.6.2
15 | 
16 | - Fixed several compilation errors in some compilers.
17 | 
18 | # v2.6.1
19 | 
20 | - Reduced `mad` opdesc allocation errors by automatically swapping out of bounds opdesc entries with other ones in the addressable range (5 bits).
21 | 
22 | # v2.6
23 | 
24 | - Added `.in` directive for explicit specifying (and allocating) input registers and exporting them in the DVLE uniform table.
25 | - Added support for dollar signs (`$`) in identifier names, which are translated to period characters (`.`) in DVLE uniform names.
26 | - Output registers `o7` through `o15` are now allowed in vertex shaders (as dummy outputs).
27 | - DVLE uniform table is now sorted by register position.
28 | 
29 | # v2.5
30 | 
31 | - The `.gsh` directive has been enhanced to provide full support for all geometry shader operation modes (point, variable-sized primitive and fixed-size primitive). This also effectively separates vertex shader uniform space from geometry shader uniform space.
32 | - The `.out` directive has been enhanced to allow wiring semantics to any arbitrary output register. Additionally the `dummy` semantic was added while the `7` semantic was removed.
33 | - Added auto-insertion of NOP instruction in corner cases involving flow of control instructions, together with the `--no-nop` directive which instead of adding NOPs warns the user about the corner cases.
34 | - Added support for `rgba` and `stpq` in addition to `xyzw`.
35 | - Added an error message for invalid input register use (e.g. `add r0, v1, v2`).
36 | - The operand descriptor allocation algorithm has been enhanced to take into account unused operands.
37 | - The `6` and `7` conditional operators have been removed since they actually do not exist.
38 | - Really corrected MAD instruction encoding.
39 | - Several miscellaneous issues were fixed.
40 | 
41 | # v2.4
42 | 
43 | - Corrected MAD instruction encoding.
44 | - Added command line flag for retrieving the picasso version.
45 | 
46 | # v2.3
47 | 
48 | - Added `.constfa` for creating floating-point vector constant arrays.
49 | - Fixed `.nodvle` bug.
50 | 
51 | # v2.2
52 | 
53 | - Added proper support for the MOVA instruction.
54 | - Added support for inverting the condition in JMPU.
55 | - Fixed `lcnt` bug.
56 | 
57 | # v2.1
58 | 
59 | - Fixed input file open error message.
60 | - Fixed `.constf` misallocation bug.
61 | 
62 | # v2.0
63 | 
64 | - (**Breaking change**) Command line format changed.
65 | - Added support for assembling multiple shaders (DVLEs) into a single SHBIN.
66 | - Added new directives: `.entry`, `.nodvle`, `.gsh`, `.setf`, `.seti`, `.setb`.
67 | - Added auto-detection of inverted forms of opcodes. (Explicitly using `dphi`, `sgei`, `slti` and `madi` is now deprecated)
68 | - Several miscellaneous bug fixes.
69 | 
70 | # v1.0
71 | 
72 | - Initial release.
73 | 


--------------------------------------------------------------------------------
/Makefile.am:
--------------------------------------------------------------------------------
 1 | # Makefile.am -- Process this file with automake to produce Makefile.in
 2 | bin_PROGRAMS = picasso
 3 | 
 4 | _common_SOURCES	=	source/FileClass.h source/maestro_opcodes.h source/types.h
 5 | picasso_SOURCES	=	source/picasso_assembler.cpp source/picasso_frontend.cpp source/picasso.h $(_common_SOURCES)
 6 | picasso_CXXFLAGS	=
 7 | 
 8 | 
 9 | EXTRA_DIST = autogen.sh
10 | 


--------------------------------------------------------------------------------
/Manual.md:
--------------------------------------------------------------------------------
  1 | # picasso Manual
  2 | 
  3 | ## Basic concepts
  4 | 
  5 | Comments are introduced by the semicolon character. E.g.
  6 | 
  7 | ```
  8 | ; This is a comment
  9 | .fvec myFloat ; They can also appear in the same line
 10 | ```
 11 | 
 12 | Identifiers follow the same rules as C identifiers. Additionally, the dollar sign (`$`) is allowed in identifiers; mostly as a substitute for the period character (`.`) since the latter is used in `picasso` syntax.
 13 | 
 14 | Labels consist of an identifier plus a colon. E.g.
 15 | 
 16 | ```
 17 | myLabel:
 18 | 	mov r0, r1
 19 | ```
 20 | 
 21 | Procedures are delimited using the `.proc` and `.end` directives. E.g.
 22 | 
 23 | ```
 24 | .proc normalize3
 25 | 	dp4 r15, r8, r8
 26 | 	rsq r15, r15
 27 | 	mul r8, r15, r8
 28 | .end
 29 | ```
 30 | 
 31 | Instructions consist of an opcode name and a comma-delimited list of arguments.
 32 | 
 33 | Directives are special statements that start with a period and control certain aspects of `picasso`'s code emission; such as defining procedures, uniforms, constants and more.
 34 | 
 35 | PICA200 registers are often used as arguments to instructions. There exist the following registers:
 36 | 
 37 | - `o0` through `o15`: Output registers (usable as a destination operand). The range `o7` through `o15` is only available in vertex shaders.
 38 | - `v0` through `v15`: Input registers (usable as a source operand).
 39 | - `r0` through `r15`: Scratch registers (usable as both destination and source operands).
 40 | - `c0` through `c95`: Floating-point vector uniforms (usable as a special type of source operand called SRC1).
 41 | - `i0` through `i3`: Integer vector uniforms (special purpose).
 42 | - `b0` through `b15`: Boolean uniforms (special purpose).
 43 | 
 44 | All registers contain 24-bit floating point vectors; except for integer vector uniforms (containing 8-bit integers) and boolean uniforms. Vectors have 4 components: x, y, z and w. The components may alternatively be referred to as r, g, b and a (respectively); or s, t, p and q (respectively). Uniforms are special registers that are writable by the CPU; thus they are used to pass configuration parameters to the shader such as transformation matrices. Sometimes they are preloaded with constant values that may be used in the logic of the shader.
 45 | 
 46 | In most situations, vectors may be [swizzled](http://en.wikipedia.org/wiki/Swizzling_%28computer_graphics%29), that is; their components may be rearranged. Register arguments support specifying a swizzling mask: `r0.wwxy`. The swizzling mask usually has 4 components (but not more), if it has less the last component is repeated to fill the mask. The default mask applied to registers is `xyzw`; that is, identity (no effect).
 47 | 
 48 | Output parameters have an output mask instead of a swizzling mask. This allows the shader to write to some components of a register without affecting the others. In `picasso`, the output mask is parsed exactly the same way as the swizzling mask, enabling write access for the components that are used in it. By default it is also `xyzw`; that is, writing to all components.
 49 | 
 50 | Registers may also be assigned additional names in order to make the code more legible. These additional names are called aliases. Aliases may also contain a swizzling mask; if a swizzling mask is applied to an alias afterwards the masks are combined. For example, provided that `someAlias` is an alias for `c0.wyxz`, `someAlias.xxww` would be equivalent to `c0.wwzz`. Aliases may be created by several directives which reserve certain kinds of registers.
 51 | 
 52 | For convenience, registers may be addressed using an offset from a known register. This is called indexing. For example, `c8[4]` is equivalent to `c12`; and `r4[-2]` is equivalent to `r2`. Indexing is useful for addressing arrays of registers (such as matrices).
 53 | 
 54 | Some source operands of instructions (called SRC1) support relative addressing. This means that it is possible to use one of the three built-in indexing registers (`a0.x`, `a0.y` and `aL`) to address a register, e.g. `someArray[aL]`. Adding an offset is also supported, e.g. `someArray[aL+2]`. This is useful in FOR loops. Index registers can only be used with floating-point vector uniform registers, though. Note: Older versions of `picasso` called the indexing registers `a0`, `a1` and `a2` respectively (also `lcnt` for `a2`); these names are still accepted for backwards compatibility.
 55 | 
 56 | Normal floating-point vector registers may also be negated by prepending a minus sign before it, e.g. `-r2` or `-someArray[aL+2]`.
 57 | 
 58 | In geometry shaders, `b15` is automatically set to true *after* each execution of the geometry shader. This can be useful to detect whether program state should be initialized - GPU management code usually resets all unused boolean uniforms to false when setting up the PICA200's shader processing units.
 59 | 
 60 | ## Command Line Usage
 61 | 
 62 | ```
 63 | Usage: picasso [options] files...
 64 | Options:
 65 |   -o, --out=<file>        Specifies the name of the SHBIN file to generate
 66 |   -h, --header=<file>     Specifies the name of the header file to generate
 67 |   -n, --no-nop            Disables the automatic insertion of padding NOPs
 68 |   -v, --version           Displays version information
 69 | ```
 70 | 
 71 | DVLEs are generated in the same order as the files in the command line.
 72 | 
 73 | ## Linking Model
 74 | 
 75 | `picasso` takes one or more source code files, and assembles them into a single `.shbin` file. A DVLE object is generated for each source code file, unless the `.nodvle` directive is used (see below). Procedures are shared amongst all source code files, and they may be defined and called wherever. Uniform space for vertex shaders is also shared, that is, if two vertex shader source code files declare the same uniform, they are assigned the same location. Geometry shaders however do not share uniforms, and each geometry shader source code file will have its own uniform allocation map. On the other hand, constants are never shared, and the same space is reused for the constants of each DVLE. Outputs and aliases are, by necessity, never shared either.
 76 | 
 77 | The entry point of a DVLE may be set with the `.entry` directive. If this directive is not used, `main` is assumed as the entrypoint.
 78 | 
 79 | A DVLE by default is a vertex shader, unless the `.gsh` directive is used (in the case of which a geometry shader is specified).
 80 | 
 81 | Uniforms that start with the underscore (`_`) character are not exposed in the DVLE table of uniforms. This allows for creating private uniforms that can be internally used to configure the behaviour of shared procedures. Additionally, dollar signs (`$`) are automatically translated to period characters (`.`) in the DVLE uniform table.
 82 | 
 83 | **Note**: Older versions of `picasso` handled geometry shaders in a different way. Specifically, uniform space was shared with vertex shaders and it was possible to use `.gsh` without parameters or `setemit` to flag a DVLE as a geometry shader. For backwards compatibility purposes this functionality has been retained, however its use is not recommended.
 84 | 
 85 | ## PICA200 Caveats & Errata
 86 | 
 87 | The PICA200's shader units have numerous implementation caveats and errata that should be taken into account when designing and writing shader code. Some of these include:
 88 | 
 89 | - Certain flow of control statements may not work at the end of another block, including the closing of other nested blocks. picasso detects these situations and automatically inserts padding NOP instructions (unless the `--no-nop` command line flag is used).
 90 | - The `mova` instruction is finicky and for instance two consecutive `mova` instructions will freeze the PICA200.
 91 | - Only a single input register is able to be referenced reliabily at a time in the source registers of an operand. That is, while specifying the same input register in one or more source registers will behave correctly, specifying different input registers will produce incorrect results. picasso detects this situation and displays an error message.
 92 | 
 93 | ## Supported Directives
 94 | 
 95 | ### .proc
 96 | ```
 97 | .proc procName
 98 | ```
 99 | Introduces a procedure called `procName`. The procedure is terminated with `.end`.
100 | 
101 | ### .else
102 | ```
103 | .else
104 | ```
105 | Introduces the ELSE section of an IF statement.
106 | 
107 | ### .end
108 | ```
109 | .end
110 | ```
111 | Terminates a procedure, an IF statement or a FOR statement.
112 | 
113 | ### .alias
114 | ```
115 | .alias aliasName register
116 | ```
117 | Creates a new alias for `register` called `aliasName`. The specified register may also have a swizzling mask.
118 | 
119 | ### .fvec
120 | ```
121 | .fvec unifName1, unifName2[size], unifName3, ...
122 | ```
123 | Allocates new floating-point vector uniforms (or arrays of uniforms) and creates aliases for them that point to the allocated registers. Example:
124 | 
125 | ```
126 | .fvec scaler
127 | .fvec projMatrix[4], modelViewMatrix[4]
128 | ```
129 | 
130 | ### .ivec
131 | ```
132 | .ivec unifName1, unifName2[size], unifName3, ...
133 | ```
134 | Allocates new integer vector uniforms (or arrays of uniforms) and creates aliases for them that point to the allocated registers.
135 | 
136 | ### .bool
137 | ```
138 | .bool unifName1, unifName2[size], unifName3, ...
139 | ```
140 | Allocates new boolean uniforms (or arrays of uniforms) and creates aliases for them that point to the allocated registers. Example:
141 | 
142 | ```
143 | .bool useLight[4]
144 | .bool useRawVertexColor
145 | ```
146 | 
147 | ### .constf
148 | ```
149 | .constf constName(x, y, z, w)
150 | ```
151 | Reserves a new floating-point vector uniform to be preloaded with the specified constant; creates an alias for it that points to the allocated register. Example:
152 | 
153 | ```
154 | .constf floatConsts(0.0, 1.0, -1.0, 3.14159)
155 | ```
156 | 
157 | ### .consti
158 | ```
159 | .consti constName(x, y, z, w)
160 | ```
161 | Reserves a new integer vector uniform to be preloaded with the specified constant; creates an alias for it that points to the allocated register. Example:
162 | 
163 | ```
164 | .consti loopParams(16, 0, 1, 0)
165 | ```
166 | 
167 | ### .constfa
168 | ```
169 | .constfa arrayName[]
170 | .constfa arrayName[size]
171 | .constfa (x, y, z, w)
172 | ```
173 | Reserves a new array of floating-point vector uniforms to be preloaded with the specified constants; creates an alias for it that points to the first element. Example:
174 | 
175 | ```
176 | ; Create an array of two elements
177 | .constfa myArray[]
178 | .constfa (1.0, 2.0, 3.0, 4.0)
179 | .constfa (5.0, 6.0, 7.0, 8.0)
180 | .end
181 | ```
182 | 
183 | Optionally the size of the array may be specified. If a number of elements less than the size is specified, the missing elements are initialized to zero. Example:
184 | 
185 | ```
186 | .constfa myArray[4]
187 | .constfa (1.0, 2.0, 3.0, 4.0)
188 | .constfa (5.0, 6.0, 7.0, 8.0)
189 | ; The remaining two elements are vectors full of zeroes.
190 | .end
191 | ```
192 | 
193 | ### .in
194 | ```
195 | .in inName
196 | .in inName register
197 | ```
198 | Reserves an input register and creates an alias for it called `inName`. If no input register is specified it is automatically allocated. The input register is added to the DVLE's uniform table.
199 | 
200 | Example:
201 | 
202 | ```
203 | .in position
204 | .in texcoord
205 | .in special v15
206 | ```
207 | 
208 | ### .out
209 | ```
210 | .out outName propName
211 | .out outName propName register
212 | .out - propName register
213 | ```
214 | Wires an output register to a certain output property and (optionally) creates an alias for it called `outName` (specify a dash in order not to create the alias). If no output register is specified it is automatically allocated. The following property names are supported:
215 | 
216 | - `position` (or `pos`): Represents the position of the outputted vertex.
217 | - `normalquat` (or `nquat`): Used in fragment lighting, this represents the quaternion associated to the normal vector of the vertex.
218 | - `color` (or `clr`): Represents the color of the outputted vertex. Its format is (R, G, B, A) where R,G,B,A are values ranging from 0.0 to 1.0.
219 | - `texcoord0` (or `tcoord0`): Represents the first texture coordinate, which is always fed to the Texture Unit 0. Only the first two components are used.
220 | - `texcoord0w` (or `tcoord0w`): Represents the third component of the first texture coordinate, used for 3D/cube textures.
221 | - `texcoord1` (or `tcoord1`): Similarly to `texcoord0`, this is the second texture coordinate, which is usually but not always fed to Texture Unit 1.
222 | - `texcoord2` (or `tcoord2`): Similarly `texcoord0`, this is the third texture coordinate, which is usually but not always fed to Texture Unit 2.
223 | - `view`: Used in fragment lighting, this represents the view vector associated to the vertex. The fourth component is not used.
224 | - `dummy`: Used in vertex shaders to pass generic semanticless parameters to the geometry shader, and in geometry shaders to use the appropriate property type from the output map of the vertex shader, thus 'merging' the output maps.
225 | 
226 | An output mask that specifies to which components of the output register should the property be wired to is also accepted. If the output register is explicitly specified, it attaches to it (e.g. `o2.xy`); otherwise it attaches to the property name (e.g. `texcoord0.xy`).
227 | 
228 | Example:
229 | 
230 | ```
231 | .out outPos position
232 | .out outClr color.rgba
233 | .out outTex texcoord0.xy
234 | .out -      texcoord0w outTex.p
235 | ```
236 | 
237 | ### .entry
238 | ```
239 | .entry procedureName
240 | ```
241 | Specifies the name of the procedure to use as the entrypoint of the current DVLE. If this directive is not used, `main` is assumed.
242 | 
243 | ### .nodvle
244 | ```
245 | .nodvle
246 | ```
247 | This directive tells `picasso` not to generate a DVLE for the source code file that is being processed. This allows for writing files that contain shared procedures to be used by other files.
248 | 
249 | ### .gsh
250 | ```
251 | .gsh point firstReg
252 | .gsh variable firstReg vtxNum
253 | .gsh fixed firstReg arrayStartReg vtxNum
254 | ```
255 | This directive flags the current DVLE as a geometry shader and specifies the geometry shader operation mode, which can be one of the following:
256 | 
257 | - `point` mode: In this mode the geometry shader is called according to the input stride and input permutation configured by the user. On entry, the data is stored starting at the `v0` register. This type of geometry shader can be used with both array-drawing mode (aka `C3D_DrawArrays`) and element-drawing mode (aka `C3D_DrawElements`).
258 | - `variable` mode (also called `subdivision` mode): In this mode the geometry shader processes variable-sized primitives, which are required to have `vtxNum` vertices for which full attribute information will be stored, and **one or more** additional vertices for which only position information will be stored. On entry the register `c0` stores in all its components the total number of vertices of the primitive, and subsequent registers store vertex information in order. This type of geometry shader can only used with element-drawing mode - inside the index array each primitive is prefixed with the number of vertices in it.
259 | - `fixed` mode (also called `particle` mode): In this mode the geometry shader processes fixed-size primitives, which always have `vtxNum` vertices. On entry, the array of vertex information will be stored starting at the float uniform register `arrayStartReg`. This type of geometry shader can only used with element-drawing mode.
260 | 
261 | The `firstReg` parameter specifies the first float uniform register that is available for use in float uniform register allocation (this is especially useful in variable and fixed mode).
262 | 
263 | Examples:
264 | 
265 | ```
266 | .gsh point c0
267 | .gsh variable c48 3
268 | .gsh fixed c48 c0 4
269 | ```
270 | 
271 | **Note**: For backwards compatibility reasons, a legacy mode which does not accept any parameters is accepted; however it should not be used.
272 | 
273 | ### .setf
274 | ```
275 | .setf register(x, y, z, w)
276 | ```
277 | Similar to `.constf`, this directive adds a DVLE constant entry for the specified floating-point vector uniform register to be loaded with the specified value. This is useful in order to instantiate a generalized shared procedure with the specified parameters.
278 | 
279 | ### .seti
280 | ```
281 | .seti register(x, y, z, w)
282 | ```
283 | Similar to `.consti`, this directive adds a DVLE constant entry for the specified integer vector uniform register to be loaded with the specified value. This is useful in order to instantiate a generalized shared procedure with the specified parameters.
284 | 
285 | ### .setb
286 | ```
287 | .setb register value
288 | ```
289 | This directive adds a DVLE constant entry for the specified boolean uniform register to be loaded with the specified value (which may be `true`, `false`, `on`, `off`, `1` or `0`). This is useful in order to control the flow of a generalized shared procedure.
290 | 
291 | ## Supported Instructions
292 | 
293 | See [Shader Instruction Set](http://3dbrew.org/wiki/Shader_Instruction_Set) for more details.
294 | 
295 | Syntax                            | Description
296 | --------------------------------- | -----------------------------------
297 | `nop`                             | No operation.
298 | `end`                             | Signals the end of the program.
299 | `emit`                            | (Geoshader-only) Emits a vertex configured by a prior `setemit`.
300 | `setemit vtxId, emitFlags`        | (Geoshader-only) Configures a vertex for emission. The `emitFlags` parameter can be omitted.
301 | `add rDest, rSrc1, rSrc2`         |
302 | `dp3 rDest, rSrc1, rSrc2`         |
303 | `dp4 rDest, rSrc1, rSrc2`         |
304 | `dph rDest, rSrc1, rSrc2`         |
305 | `dst rDest, rSrc1, rSrc2`         |
306 | `mul rDest, rSrc1, rSrc2`         |
307 | `sge rDest, rSrc1, rSrc2`         |
308 | `slt rDest, rSrc1, rSrc2`         |
309 | `max rDest, rSrc1, rSrc2`         |
310 | `min rDest, rSrc1, rSrc2`         |
311 | `ex2 rDest, rSrc1`                |
312 | `lg2 rDest, rSrc1`                |
313 | `litp rDest, rSrc1`               |
314 | `flr rDest, rSrc1`                |
315 | `rcp rDest, rSrc1`                |
316 | `rsq rDest, rSrc1`                |
317 | `mov rDest, rSrc1`                |
318 | `mova idxReg, rSrc1`              |
319 | `cmp rSrc1, opx, opy, rSrc2`      |
320 | `call procName`                   |
321 | `for iReg`                        |
322 | `break`                           | (not recommended)
323 | `breakc condExp`                  |
324 | `callc condExp, procName`         |
325 | `ifc condExp`                     |
326 | `jmpc condExp, labelName`         |
327 | `callu bReg, procName`            |
328 | `ifu bReg`                        |
329 | `jmpu [!]bReg, labelName`         |
330 | `mad rDest, rSrc1, rSrc2, rSrc3`  |
331 | 
332 | ### Description of operands
333 | 
334 | - `rDest`: Represents a destination operand (register).
335 | - `rSrc1`/`rSrc2`/`rSrc3`: Represents a source operand (register). Depending on the position, some registers may be supported and some may not.
336 | 	- Narrow source operands are limited to input and scratch registers.
337 | 	- Wide source operands also support floating-point vector uniforms and relative addressing.
338 | 	- In instructions that take one source operand, it is always wide.
339 | 	- In instructions that take two source operands, the first is wide and the second is narrow.
340 | 	- `dph`/`sge`/`slt` have a special form where the first operand is narrow and the second is wide. This usage is detected automatically by `picasso`.
341 | 	- `mad`, which takes three source operands, has two forms: the first is narrow-wide-narrow, and the second is narrow-narrow-wide. This is also detected automatically.
342 | - `idxReg`: Represents an indexing register to write to using the mova instruction. Can be `a0.x`, `a0.y` or `a0.xy` (the latter writes to both components). Note: Older versions of `picasso` accepted `a0`, `a1` and `a01` respectively; this syntax is still supported for backwards compatibility.
343 | - `iReg`: Represents an integer vector uniform source operand.
344 | - `bReg`: Represents a boolean uniform source operand.
345 | - `procName`: Represents the name of a procedure.
346 | - `labelName`: Represents the name of a label.
347 | - `opx` and `opy`: They represent a conditional operator that is applied to the source registers and whose result is stored in the appropriate flag (`cmp.x` and `cmp.y` respectively). Supported values include:
348 | 	- `eq`: Equal
349 | 	- `ne`: Not equal
350 | 	- `lt`: Less than
351 | 	- `le`: Less or equal than
352 | 	- `gt`: Greater than
353 | 	- `ge`: Greater or equal than
354 | - `condExp`: Represents a conditional expression, which uses the conditional flags `cmp.x` and `cmp.y` set by the CMP instruction. These flags may be negated using the `!` symbol, e.g. `!cmp.x`. The conditional expression can take any of the following forms:
355 | 	- `flag1`: It tests a single flag.
356 | 	- `flag1 && flag2`: It performs AND between the two flags. Optionally, a single `&` may be specified.
357 | 	- `flag1 || flag2`: It performs OR between the two flags. Optionally, a single `|` may be specified.
358 | - `vtxId`: An integer ranging from 0 to 2 specifying the vertex ID used in geoshader vertex emission.
359 | - `emitFlags`: A space delimited combination of the following words:
360 | 	- `prim` (or `primitive`): Specifies that after emitting the vertex, a primitive should also be emitted.
361 | 	- `inv` (or `invert`): Specifies that the order of the vertices in the emitted primitive is inverted.
362 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # picasso
 2 | 
 3 | ## Introduction
 4 | 
 5 | `picasso` is a PICA200 shader assembler, written in C++. The PICA200 is the GPU used by the Nintendo 3DS.
 6 | 
 7 | `picasso` comes with a manual `Manual.md` that explains the shader language. `example.vsh` is simple example that demonstrates it.
 8 | 
 9 | ## Building
10 | 
11 | A working C++ compiler for the host is required (Windows users: use TDM-GCC), plus autotools. Use the following commands to build the program:
12 | 
13 |     ./autogen.sh
14 |     ./configure
15 |     make
16 | 
17 | ## Shout-outs
18 | 
19 | - **smea** for reverse-engineering the PICA200, writing documentation, working hard & making `aemstro_as.py` (the original homebrew PICA200 shader assembler)
20 | - **neobrain** for making `nihstro-assemble`, whose syntax inspired that of `picasso` and whose usage of boost inspired me to make my own assembler without hefty dependencies.
21 | 


--------------------------------------------------------------------------------
/autogen.sh:
--------------------------------------------------------------------------------
1 | touch NEWS README AUTHORS ChangeLog
2 | aclocal
3 | autoconf
4 | automake --add-missing -c
5 | 


--------------------------------------------------------------------------------
/clean.sh:
--------------------------------------------------------------------------------
1 | 
2 | # This script removes bullshit generated and/or required by autotools; as well as object/binary files
3 | rm -rf .deps autom4te.cache aclocal.m4 AUTHORS ChangeLog config.* configure depcomp INSTALL install-sh Makefile Makefile.in missing NEWS picasso *.exe *.o README
4 | 


--------------------------------------------------------------------------------
/configure.ac:
--------------------------------------------------------------------------------
 1 | #                                               -*- Autoconf -*-
 2 | # Process this file with autoconf to produce a configure script.
 3 | 
 4 | AC_PREREQ(2.61)
 5 | AC_INIT([picasso],[2.7.1],[https://github.com/devkitPro/picasso/issues])
 6 | AC_CONFIG_SRCDIR([source/picasso_frontend.cpp])
 7 | 
 8 | AM_INIT_AUTOMAKE([subdir-objects])
 9 | 
10 | AC_CANONICAL_BUILD
11 | AC_CANONICAL_HOST
12 | 
13 | AC_PROG_CC
14 | AC_PROG_CXX
15 | 
16 | AC_CONFIG_FILES([Makefile])
17 | AC_OUTPUT
18 | 


--------------------------------------------------------------------------------
/example.vsh:
--------------------------------------------------------------------------------
 1 | ; Really simple & stupid PICA200 shader
 2 | ; Also serves as an example of picasso syntax
 3 | 
 4 | ; Uniforms
 5 | .fvec projMtx[4], mdlvMtx[4]
 6 | 
 7 | ; Constants
 8 | .constf myconst(0.0, 1.0, -1.0, 0.0)
 9 | .alias zeros myconst.xxxx
10 | .alias ones myconst.yyyy
11 | .alias negones myconst.zzzz
12 | .alias dummytcoord myconst.xxxy ; (0,0,0,1)
13 | 
14 | ; Outputs
15 | .out outpos position
16 | .out outtc0 texcoord0
17 | .out outtc1 texcoord1
18 | .out outtc2 texcoord2
19 | .out outclr color
20 | 
21 | ; Inputs
22 | .alias inpos v0
23 | .alias intex v1
24 | .alias inarg v2
25 | 
26 | .proc main
27 | 	; r0 = (inpos.x, inpos.y, inpos.z, 1.0)
28 | 	mov r0.xyz, inpos
29 | 	mov r0.w, ones
30 | 	
31 | 	; r1 = mdlvMtx * r0
32 | 	dp4 r1.x, mdlvMtx[0], r0
33 | 	dp4 r1.y, mdlvMtx[1], r0
34 | 	dp4 r1.z, mdlvMtx[2], r0
35 | 	dp4 r1.w, mdlvMtx[3], r0
36 | 	
37 | 	; outpos = projMtx * r1
38 | 	dp4 outpos.x, projMtx[0], r1
39 | 	dp4 outpos.y, projMtx[1], r1
40 | 	dp4 outpos.z, projMtx[2], r1
41 | 	dp4 outpos.w, projMtx[3], r1
42 | 	
43 | 	; Set texcoords
44 | 	mov outtc0, intex
45 | 	mov outtc1, dummytcoord
46 | 	mov outtc2, dummytcoord
47 | 	
48 | 	; Set vertex color
49 | 	mov outclr.xyz, inarg
50 | 	mov outclr.w, ones
51 | 	
52 | 	end
53 | .end
54 | 


--------------------------------------------------------------------------------
/source/FileClass.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include <stdio.h>
  3 | #include "types.h"
  4 | 
  5 | class FileClass
  6 | {
  7 | 	FILE* f;
  8 | 	bool LittleEndian, own;
  9 | 	int filePos;
 10 | 
 11 | 	size_t _RawRead(void* buffer, size_t size)
 12 | 	{
 13 | 		size_t x = fread(buffer, 1, size, f);
 14 | 		filePos += x;
 15 | 		return x;
 16 | 	}
 17 | 
 18 | 	size_t _RawWrite(const void* buffer, size_t size)
 19 | 	{
 20 | 		size_t x = fwrite(buffer, 1, size, f);
 21 | 		filePos += x;
 22 | 		return x;
 23 | 	}
 24 | 
 25 | public:
 26 | 	FileClass(const char* file, const char* mode) : LittleEndian(true), own(true), filePos(0)
 27 | 	{
 28 | 		f = fopen(file, mode);
 29 | 	}
 30 | 	FileClass(FILE* inf) : f(inf), LittleEndian(true), own(false), filePos(0) { }
 31 | 	~FileClass()
 32 | 	{
 33 | 		if (f && own) fclose(f);
 34 | 	}
 35 | 
 36 | 	void SetLittleEndian() { LittleEndian = true; }
 37 | 	void SetBigEndian() { LittleEndian = false; }
 38 | 
 39 | 	FILE* get_ptr() { return f; }
 40 | 	bool openerror() { return f == NULL; }
 41 | 
 42 | 	dword_t ReadDword()
 43 | 	{
 44 | 		dword_t value;
 45 | 		_RawRead(&value, sizeof(dword_t));
 46 | 		return LittleEndian ? le_dword(value) : be_dword(value);
 47 | 	}
 48 | 
 49 | 	void WriteDword(dword_t value)
 50 | 	{
 51 | 		value = LittleEndian ? le_dword(value) : be_dword(value);
 52 | 		_RawWrite(&value, sizeof(dword_t));
 53 | 	}
 54 | 
 55 | 	word_t ReadWord()
 56 | 	{
 57 | 		word_t value;
 58 | 		_RawRead(&value, sizeof(word_t));
 59 | 		return LittleEndian ? le_word(value) : be_word(value);
 60 | 	}
 61 | 
 62 | 	void WriteWord(word_t value)
 63 | 	{
 64 | 		value = LittleEndian ? le_word(value) : be_word(value);
 65 | 		_RawWrite(&value, sizeof(word_t));
 66 | 	}
 67 | 
 68 | 	hword_t ReadHword()
 69 | 	{
 70 | 		hword_t value;
 71 | 		_RawRead(&value, sizeof(hword_t));
 72 | 		return LittleEndian ? le_hword(value) : be_hword(value);
 73 | 	}
 74 | 
 75 | 	void WriteHword(hword_t value)
 76 | 	{
 77 | 		value = LittleEndian ? le_hword(value) : be_hword(value);
 78 | 		_RawWrite(&value, sizeof(hword_t));
 79 | 	}
 80 | 
 81 | 	byte_t ReadByte()
 82 | 	{
 83 | 		byte_t value;
 84 | 		_RawRead(&value, sizeof(byte_t));
 85 | 		return value;
 86 | 	}
 87 | 
 88 | 	void WriteByte(byte_t value)
 89 | 	{
 90 | 		_RawWrite(&value, sizeof(byte_t));
 91 | 	}
 92 | 
 93 | 	float ReadFloat()
 94 | 	{
 95 | 		union { word_t w; float f; } t;
 96 | 		t.w = ReadWord();
 97 | 		return t.f;
 98 | 	}
 99 | 
100 | 	void WriteFloat(float value)
101 | 	{
102 | 		union { word_t w; float f; } t;
103 | 		t.f = value;
104 | 		WriteWord(t.w);
105 | 	}
106 | 	
107 | 	bool ReadRaw(void* buffer, size_t size) { return _RawRead(buffer, size) == size; }
108 | 	bool WriteRaw(const void* buffer, size_t size) { return _RawWrite(buffer, size) == size; }
109 | 
110 | 	void Seek(int pos, int mode) { fseek(f, pos, mode); }
111 | 	int Tell() { return filePos /*ftell(f)*/; }
112 | 	void Flush() { fflush(f); }
113 | };
114 | 
115 | static inline char* StringFromFile(const char* filename)
116 | {
117 | 	FILE* f = fopen(filename, "rb");
118 | 	if (!f) return NULL;
119 | 	fseek(f, 0, SEEK_END);
120 | 	int size = ftell(f);
121 | 	rewind(f);
122 | 	char* buf = (char*)malloc(size+1);
123 | 	if (!buf)
124 | 	{
125 | 		fclose(f);
126 | 		return NULL;
127 | 	}
128 | 	fread(buf, 1, size, f);
129 | 	buf[size] = 0;
130 | 	fclose(f);
131 | 	return buf;
132 | }
133 | 


--------------------------------------------------------------------------------
/source/maestro_opcodes.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | enum
 3 | {
 4 | 	MAESTRO_ADD = 0x00,
 5 | 	MAESTRO_DP3,
 6 | 	MAESTRO_DP4,
 7 | 	MAESTRO_DPH,
 8 | 	MAESTRO_DST,
 9 | 	MAESTRO_EX2,
10 | 	MAESTRO_LG2,
11 | 	MAESTRO_LITP,
12 | 	MAESTRO_MUL,
13 | 	MAESTRO_SGE,
14 | 	MAESTRO_SLT,
15 | 	MAESTRO_FLR,
16 | 	MAESTRO_MAX,
17 | 	MAESTRO_MIN,
18 | 	MAESTRO_RCP,
19 | 	MAESTRO_RSQ,
20 | 
21 | 	MAESTRO_unk10,
22 | 	MAESTRO_unk11,
23 | 	MAESTRO_MOVA,
24 | 	MAESTRO_MOV,
25 | 	MAESTRO_unk14,
26 | 	MAESTRO_unk15,
27 | 	MAESTRO_unk16,
28 | 	MAESTRO_unk17,
29 | 	MAESTRO_DPHI,
30 | 	MAESTRO_DSTI,
31 | 	MAESTRO_SGEI,
32 | 	MAESTRO_SLTI,
33 | 	MAESTRO_unk1C,
34 | 	MAESTRO_unk1D,
35 | 	MAESTRO_unk1E,
36 | 	MAESTRO_unk1F,
37 | 
38 | 	MAESTRO_BREAK,
39 | 	MAESTRO_NOP,
40 | 	MAESTRO_END,
41 | 	MAESTRO_BREAKC,
42 | 	MAESTRO_CALL,
43 | 	MAESTRO_CALLC,
44 | 	MAESTRO_CALLU,
45 | 	MAESTRO_IFU,
46 | 	MAESTRO_IFC,
47 | 	MAESTRO_FOR,
48 | 	MAESTRO_EMIT, // Geometry shader related
49 | 	MAESTRO_SETEMIT, // Geometry shader related
50 | 	MAESTRO_JMPC,
51 | 	MAESTRO_JMPU,
52 | 	MAESTRO_CMP, // only the upper 5 bits are used for the opcode
53 | 
54 | 	// Only the upper 3 bits are used for the following opcodes
55 | 	MAESTRO_MADI = 0x30,
56 | 	MAESTRO_MAD = 0x38,
57 | };
58 | 


--------------------------------------------------------------------------------
/source/picasso.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include <ctype.h>
  6 | #include <stdarg.h>
  7 | #include <getopt.h>
  8 | #ifdef WIN32
  9 | #include <fcntl.h>
 10 | #endif
 11 | #include "types.h"
 12 | 
 13 | #include <vector>
 14 | #include <list>
 15 | #include <map>
 16 | #include <string>
 17 | #include <algorithm>
 18 | 
 19 | #include "FileClass.h"
 20 | 
 21 | #include "maestro_opcodes.h"
 22 | 
 23 | #if !defined(WIN32) && !defined(stricmp)
 24 | #define stricmp strcasecmp
 25 | #endif
 26 | 
 27 | enum
 28 | {
 29 | 	COMP_X = 0,
 30 | 	COMP_Y,
 31 | 	COMP_Z,
 32 | 	COMP_W,
 33 | };
 34 | 
 35 | #define SWIZZLE_COMP(n,v) ((v) << (6-(n)*2))
 36 | #define OPSRC_MAKE(neg, sw) ((neg) | ((sw) << 1))
 37 | #define OPDESC_MAKE(out, src1, src2, src3) ((out) | ((src1) << 4) | ((src2) << (4+9)) | ((src3) << (4+9*2)))
 38 | #define FMT_OPCODE(n) ((n)<<26)
 39 | #define OUTPUT_MAKE(i, reg, mask) ((i) | ((reg)<<16) | ((u64)(mask)<<32))
 40 | 
 41 | #define DEFAULT_SWIZZLE (SWIZZLE_COMP(0,COMP_X) | SWIZZLE_COMP(1,COMP_Y) | SWIZZLE_COMP(2,COMP_Z) | SWIZZLE_COMP(3,COMP_W))
 42 | #define DEFAULT_OPSRC OPSRC_MAKE(0, DEFAULT_SWIZZLE)
 43 | 
 44 | #define OPDESC_MASK_D123 OPDESC_MAKE(0xF, 0x1FF, 0x1FF, 0x1FF)
 45 | #define OPDESC_MASK_D12  OPDESC_MAKE(0xF, 0x1FF, 0x1FF, 0)
 46 | #define OPDESC_MASK_D1   OPDESC_MAKE(0xF, 0x1FF, 0,     0)
 47 | #define OPDESC_MASK_1    OPDESC_MAKE(0,   0x1FF, 0,     0)
 48 | #define OPDESC_MASK_12   OPDESC_MAKE(0,   0x1FF, 0x1FF, 0)
 49 | 
 50 | enum
 51 | {
 52 | 	COND_EQ = 0,
 53 | 	COND_NE,
 54 | 	COND_LT,
 55 | 	COND_LE,
 56 | 	COND_GT,
 57 | 	COND_GE,
 58 | };
 59 | 
 60 | //-----------------------------------------------------------------------------
 61 | // Global data
 62 | //-----------------------------------------------------------------------------
 63 | 
 64 | // Output buffer
 65 | #define MAX_VSH_SIZE 512
 66 | typedef std::vector<u32> outputBufType;
 67 | typedef outputBufType::iterator outputBufIter;
 68 | extern outputBufType g_outputBuf;
 69 | 
 70 | enum
 71 | {
 72 | 	SE_PROC,
 73 | 	SE_FOR,
 74 | 	SE_IF,
 75 | 	SE_ARRAY,
 76 | };
 77 | 
 78 | struct StackEntry
 79 | {
 80 | 	int type;
 81 | 	size_t pos;
 82 | 	union
 83 | 	{
 84 | 		const char* strExtra;
 85 | 		size_t uExtra;
 86 | 	};
 87 | };
 88 | 
 89 | // Stack used to keep track of stuff.
 90 | #define MAX_STACK 32
 91 | extern StackEntry g_stack[MAX_STACK];
 92 | extern int g_stackPos;
 93 | 
 94 | // Operand descriptor stuff.
 95 | #define MAX_OPDESC 128
 96 | extern int g_opdescTable[MAX_OPDESC];
 97 | extern int g_opdeskMasks[MAX_OPDESC]; // used to keep track of used bits
 98 | extern int g_opdescCount;
 99 | 
100 | enum
101 | {
102 | 	UTYPE_BOOL = 0,
103 | 	UTYPE_IVEC,
104 | 	UTYPE_FVEC,
105 | };
106 | 
107 | struct Uniform
108 | {
109 | 	std::string name;
110 | 	int pos, size;
111 | 	int type;
112 | 
113 | 	inline bool operator <(const Uniform& rhs) const
114 | 	{
115 | 		return pos < rhs.pos;
116 | 	}
117 | 
118 | 	void init(const char* name, int pos, int size, int type)
119 | 	{
120 | 		this->name = name;
121 | 		this->pos = pos;
122 | 		this->size = size;
123 | 		this->type = type;
124 | 	}
125 | };
126 | 
127 | // List of uniforms
128 | #define MAX_UNIFORM 0x60
129 | extern Uniform g_uniformTable[MAX_UNIFORM];
130 | extern int g_uniformCount;
131 | 
132 | struct DVLEData; // Forward declaration
133 | 
134 | typedef std::pair<size_t, size_t> procedure; // position, size
135 | typedef std::pair<size_t, std::string> relocation; // position, name
136 | 
137 | typedef std::map<std::string, procedure> procTableType;
138 | typedef std::map<std::string, size_t> labelTableType;
139 | typedef std::map<std::string, int> aliasTableType;
140 | typedef std::vector<relocation> relocTableType;
141 | typedef std::list<DVLEData> dvleTableType;
142 | 
143 | typedef procTableType::iterator procTableIter;
144 | typedef labelTableType::iterator labelTableIter;
145 | typedef aliasTableType::iterator aliasTableIter;
146 | typedef relocTableType::iterator relocTableIter;
147 | typedef dvleTableType::iterator dvleTableIter;
148 | 
149 | extern procTableType g_procTable;
150 | extern dvleTableType g_dvleTable;
151 | extern relocTableType g_procRelocTable;
152 | extern int g_totalDvleCount;
153 | 
154 | // The following are cleared before each file is processed
155 | extern labelTableType g_labels;
156 | extern relocTableType g_labelRelocTable;
157 | extern aliasTableType g_aliases;
158 | 
159 | extern bool g_autoNop;
160 | 
161 | int AssembleString(char* str, const char* initialFilename);
162 | int RelocateProduct(void);
163 | 
164 | //-----------------------------------------------------------------------------
165 | // Local data
166 | //-----------------------------------------------------------------------------
167 | 
168 | enum
169 | {
170 | 	OUTTYPE_POS      = 0,
171 | 	OUTTYPE_NQUAT    = 1,
172 | 	OUTTYPE_CLR      = 2,
173 | 	OUTTYPE_TCOORD0  = 3,
174 | 	OUTTYPE_TCOORD0W = 4,
175 | 	OUTTYPE_TCOORD1  = 5,
176 | 	OUTTYPE_TCOORD2  = 6,
177 | 	OUTTYPE_VIEW     = 8,
178 | 	OUTTYPE_DUMMY    = 9,
179 | };
180 | 
181 | enum
182 | {
183 | 	GSHTYPE_POINT    = 0,
184 | 	GSHTYPE_VARIABLE = 1,
185 | 	GSHTYPE_FIXED    = 2,
186 | };
187 | 
188 | struct Constant
189 | {
190 | 	int regId;
191 | 	int type;
192 | 	union
193 | 	{
194 | 		float fparam[4];
195 | 		u8 iparam[4];
196 | 		bool bparam;
197 | 	};
198 | };
199 | 
200 | struct DVLEData
201 | {
202 | 	// General config
203 | 	std::string filename;
204 | 	std::string entrypoint;
205 | 	size_t entryStart, entryEnd;
206 | 	bool nodvle, isGeoShader, isCompatGeoShader, isMerge;
207 | 	u16 inputMask, outputMask;
208 | 	u8 geoShaderType;
209 | 	u8 geoShaderFixedStart;
210 | 	u8 geoShaderVariableNum;
211 | 	u8 geoShaderFixedNum;
212 | 
213 | 	// Uniforms
214 | 	Uniform uniformTable[MAX_UNIFORM];
215 | 	int uniformCount;
216 | 	size_t symbolSize;
217 | 
218 | 	// Constants
219 | 	#define MAX_CONSTANT 0x60
220 | 	Constant constantTable[MAX_CONSTANT];
221 | 	int constantCount;
222 | 
223 | 	// Outputs
224 | 	#define MAX_OUTPUT 16
225 | 	u64 outputTable[MAX_OUTPUT];
226 | 	u32 outputUsedReg;
227 | 	int outputCount;
228 | 
229 | 	bool usesGshSpace() const { return isGeoShader && !isCompatGeoShader; }
230 | 	int findFreeOutput() const
231 | 	{
232 | 		for (int i = 0; i < maxOutputReg(); i ++)
233 | 			if (!(outputMask & BIT(i)))
234 | 				return i;
235 | 		return -1;
236 | 	}
237 | 
238 | 	int findFreeInput() const
239 | 	{
240 | 		for (int i = 0; i < 16; i ++)
241 | 			if (!(inputMask & BIT(i)))
242 | 				return i;
243 | 		return -1;
244 | 	}
245 | 
246 | 	int maxOutputReg() const
247 | 	{
248 | 		return isGeoShader ? 0x07 : 0x10;
249 | 	}
250 | 
251 | 	DVLEData(const char* filename) :
252 | 		filename(filename), entrypoint("main"),
253 | 		nodvle(false), isGeoShader(false), isCompatGeoShader(false), isMerge(false),
254 | 		inputMask(0), outputMask(0), geoShaderType(0), geoShaderFixedStart(0), geoShaderVariableNum(0), geoShaderFixedNum(0),
255 | 		uniformCount(0), symbolSize(0), constantCount(0), outputUsedReg(0), outputCount(0) { }
256 | };
257 | 


--------------------------------------------------------------------------------
/source/picasso_assembler.cpp:
--------------------------------------------------------------------------------
   1 | #include "picasso.h"
   2 | 
   3 | //#define DEBUG
   4 | #define BUF g_outputBuf
   5 | #define NO_MORE_STACK (g_stackPos==MAX_STACK)
   6 | 
   7 | static const char* curFile = NULL;
   8 | static int curLine = -1;
   9 | static bool lastWasEnd = false;
  10 | 
  11 | std::vector<u32> g_outputBuf;
  12 | 
  13 | StackEntry g_stack[MAX_STACK];
  14 | int g_stackPos;
  15 | 
  16 | int g_opdescTable[MAX_OPDESC];
  17 | int g_opdescCount;
  18 | int g_opdescMasks[MAX_OPDESC];
  19 | u32 g_opdescIsMad;
  20 | 
  21 | Uniform g_uniformTable[MAX_UNIFORM];
  22 | int g_uniformCount;
  23 | 
  24 | std::vector<Constant> g_constArray;
  25 | int g_constArraySize = -1;
  26 | const char* g_constArrayName;
  27 | 
  28 | bool g_autoNop = true;
  29 | 
  30 | class UniformAlloc
  31 | {
  32 | 	int start, end, bound, tend;
  33 | public:
  34 | 	UniformAlloc(int start, int end) : start(start), end(end), bound(end), tend(end) { }
  35 | 	void ClearLocal(void) { end = tend; }
  36 | 	void Reinit(int start, int end)
  37 | 	{
  38 | 		this->start = start;
  39 | 		this->end = end;
  40 | 		this->bound = end;
  41 | 		this->tend = end;
  42 | 	}
  43 | 	int AllocGlobal(int size)
  44 | 	{
  45 | 		if ((start+size) > bound) return -1;
  46 | 		int ret = start;
  47 | 		start += size;
  48 | 		return ret;
  49 | 	}
  50 | 	int AllocLocal(int size)
  51 | 	{
  52 | 		int pos = end - size;
  53 | 		if (pos < start) return -1;
  54 | 		bound = pos < bound ? pos : bound;
  55 | 		end = pos;
  56 | 		return pos;
  57 | 	}
  58 | };
  59 | 
  60 | struct UniformAllocBundle
  61 | {
  62 | 	UniformAlloc fvecAlloc, ivecAlloc, boolAlloc;
  63 | 
  64 | 	UniformAllocBundle() :
  65 | 		fvecAlloc(0x20, 0x80), ivecAlloc(0x80, 0x84), boolAlloc(0x88, 0x98) { }
  66 | 
  67 | 	void clear()
  68 | 	{
  69 | 		fvecAlloc.ClearLocal();
  70 | 		ivecAlloc.ClearLocal();
  71 | 		boolAlloc.ClearLocal();
  72 | 	}
  73 | 
  74 | 	void initForGsh(int firstFree)
  75 | 	{
  76 | 		fvecAlloc.Reinit(firstFree, 0x80);
  77 | 		ivecAlloc.Reinit(0x80, 0x84);
  78 | 		boolAlloc.Reinit(0x88, 0x97);
  79 | 	}
  80 | };
  81 | 
  82 | static UniformAllocBundle unifAlloc[2];
  83 | 
  84 | static inline UniformAlloc& getAlloc(int type, const DVLEData* dvle)
  85 | {
  86 | 	int x = dvle->usesGshSpace();
  87 | 	switch (type)
  88 | 	{
  89 | 		default:
  90 | 		case UTYPE_FVEC: return unifAlloc[x].fvecAlloc;
  91 | 		case UTYPE_IVEC: return unifAlloc[x].ivecAlloc;
  92 | 		case UTYPE_BOOL: return unifAlloc[x].boolAlloc;
  93 | 	}
  94 | }
  95 | 
  96 | procTableType g_procTable;
  97 | dvleTableType g_dvleTable;
  98 | relocTableType g_procRelocTable;
  99 | int g_totalDvleCount;
 100 | 
 101 | labelTableType g_labels;
 102 | relocTableType g_labelRelocTable;
 103 | aliasTableType g_aliases;
 104 | 
 105 | static DVLEData* curDvle;
 106 | 
 107 | static void ClearStatus(void)
 108 | {
 109 | 	unifAlloc[0].clear();
 110 | 	g_labels.clear();
 111 | 	g_labelRelocTable.clear();
 112 | 	g_aliases.clear();
 113 | 	curDvle = NULL;
 114 | }
 115 | 
 116 | static DVLEData* GetDvleData(void)
 117 | {
 118 | 	if (!curDvle)
 119 | 	{
 120 | 		g_dvleTable.push_back( DVLEData(curFile) );
 121 | 		curDvle = &g_dvleTable.back();
 122 | 		g_totalDvleCount ++;
 123 | 	}
 124 | 	return curDvle;
 125 | }
 126 | 
 127 | static char* mystrtok_pos;
 128 | static char* mystrtok(char* str, const char* delim)
 129 | {
 130 | 	if (!str) str = mystrtok_pos;
 131 | 	if (!*str) return NULL;
 132 | 
 133 | 	size_t pos = strcspn(str, delim);
 134 | 	char* ret = str;
 135 | 	str += pos;
 136 | 	if (*str)
 137 | 		*str++ = 0;
 138 | 	mystrtok_pos = str;
 139 | 	return ret;
 140 | }
 141 | 
 142 | static char* mystrtok_spc(char* str)
 143 | {
 144 | 	char* ret = mystrtok(str, " \t");
 145 | 	if (!ret) return NULL;
 146 | 	if (*mystrtok_pos)
 147 | 		for (; *mystrtok_pos && isspace(*mystrtok_pos); mystrtok_pos++);
 148 | 	return ret;
 149 | }
 150 | 
 151 | static char* remove_comment(char* buf)
 152 | {
 153 | 	char* pos = strchr(buf, ';');
 154 | 	if (pos) *pos = 0;
 155 | 	return buf;
 156 | }
 157 | 
 158 | static char* trim_whitespace(char* buf)
 159 | {
 160 | 	if (!buf)
 161 | 		return NULL;
 162 | 
 163 | 	// Remove trailing whitespace
 164 | 	int pos;
 165 | 	for(pos = strlen(buf)-1; pos >= 0 && isspace(buf[pos]); pos --) buf[pos] = '\0';
 166 | 
 167 | 	// Remove leading whitespace
 168 | 	char* newbuf = buf;
 169 | 	for(; isspace(*newbuf); newbuf ++);
 170 | 
 171 | 	return newbuf;
 172 | }
 173 | 
 174 | static bool validateIdentifier(const char* id)
 175 | {
 176 | 	int len = strlen(id);
 177 | 	bool valid = true;
 178 | 	for (int i = 0; valid && i < len; i ++)
 179 | 	{
 180 | 		int c = id[i];
 181 | 		valid = isalpha(c) || c == '_' || c == '$' || (i > 0 && isdigit(c));
 182 | 	}
 183 | 	return valid;
 184 | }
 185 | 
 186 | static int throwError(const char* msg, ...)
 187 | {
 188 | 	va_list v;
 189 | 
 190 | 	fprintf(stderr, "%s:%d: error: ", curFile, curLine);
 191 | 
 192 | 	va_start(v, msg);
 193 | 	vfprintf(stderr, msg, v);
 194 | 	va_end(v);
 195 | 
 196 | 	return 1;
 197 | }
 198 | 
 199 | static int parseInt(char* pos, int& out, long long min, long long max)
 200 | {
 201 | 	char* endptr = NULL;
 202 | 	long long res = strtoll(pos, &endptr, 0);
 203 | 	if (pos == endptr)
 204 | 		return throwError("Invalid value: %s\n", pos);
 205 | 	if (res < min || res > max)
 206 | 		return throwError("Value out of range (%d..%u): %d\n", (int)min, (unsigned int)max, (int)res);
 207 | 	out = res;
 208 | 	return 0;
 209 | }
 210 | 
 211 | #define safe_call(x) do \
 212 | 	{ \
 213 | 		int _ = (x); \
 214 | 		if (_ != 0) return _; \
 215 | 	} while(0)
 216 | 
 217 | static int ProcessCommand(const char* cmd);
 218 | static int FixupLabelRelocations();
 219 | 
 220 | int AssembleString(char* str, const char* initialFilename)
 221 | {
 222 | 	curFile = initialFilename;
 223 | 	curLine = 1;
 224 | 
 225 | 	ClearStatus();
 226 | 
 227 | 	int nextLineIncr = 0;
 228 | 	char* nextStr = NULL;
 229 | 	for (; str; str = nextStr, curLine += nextLineIncr)
 230 | 	{
 231 | 		size_t len = strcspn(str, "\n");
 232 | 		int linedelim = str[len];
 233 | 		str[len] = 0;
 234 | 		nextStr = linedelim ? (str + len + 1) : NULL;
 235 | 		nextLineIncr = linedelim == '\n' ? 1 : 0;
 236 | 
 237 | 		char* line = trim_whitespace(remove_comment(str));
 238 | 
 239 | 		char* colonPos = NULL;
 240 | 		for (;;)
 241 | 		{
 242 | 			colonPos = strchr(line, ':');
 243 | 			if (!colonPos)
 244 | 				break;
 245 | 			*colonPos = 0;
 246 | 			char* labelName = line;
 247 | 			line = trim_whitespace(colonPos + 1);
 248 | 
 249 | 			if (!validateIdentifier(labelName))
 250 | 				return throwError("invalid label name: %s\n", labelName);
 251 | 
 252 | 			std::pair<labelTableIter,bool> ret = g_labels.insert( std::pair<std::string,size_t>(labelName, BUF.size()) );
 253 | 			if (!ret.second)
 254 | 				return throwError("duplicate label: %s\n", labelName);
 255 | 
 256 | 			//printf("Label: %s\n", labelName);
 257 | 		};
 258 | 
 259 | 		if (!*line)
 260 | 			continue;
 261 | 
 262 | 		if (*line == '#')
 263 | 		{
 264 | 			line = trim_whitespace(line + 1);
 265 | 			nextLineIncr = 0;
 266 | 			size_t pos = strcspn(line, " \t");
 267 | 			line[pos] = 0;
 268 | 			curLine = atoi(line);
 269 | 			line = trim_whitespace(line + pos + 1);
 270 | 			if (*line == '"')
 271 | 			{
 272 | 				line ++;
 273 | 				line[strlen(line)-1] = 0;
 274 | 			}
 275 | 			curFile = line;
 276 | 			continue;
 277 | 		}
 278 | 
 279 | 		char* tok = mystrtok_spc(line);
 280 | 		safe_call(ProcessCommand(tok));
 281 | 	}
 282 | 
 283 | 	if (g_stackPos)
 284 | 		return throwError("unclosed block(s)\n");
 285 | 
 286 | 	safe_call(FixupLabelRelocations());
 287 | 	
 288 | 	return 0;
 289 | }
 290 | 
 291 | int FixupLabelRelocations()
 292 | {
 293 | 	for (relocTableIter it = g_labelRelocTable.begin(); it != g_labelRelocTable.end(); ++it)
 294 | 	{
 295 | 		relocation& r = *it;
 296 | 		u32& inst = BUF[r.first];
 297 | 		labelTableIter lbl = g_labels.find(r.second);
 298 | 		if (lbl == g_labels.end())
 299 | 			return throwError("label '%s' is undefined\n", r.second.c_str());
 300 | 		u32 dst = lbl->second;
 301 | 		inst &= ~(0xFFF << 10);
 302 | 		inst |= dst << 10;
 303 | 	}
 304 | 	return 0;
 305 | }
 306 | 
 307 | int RelocateProduct()
 308 | {
 309 | 	for (relocTableIter it = g_procRelocTable.begin(); it != g_procRelocTable.end(); ++it)
 310 | 	{
 311 | 		relocation& r = *it;
 312 | 		u32& inst = BUF[r.first];
 313 | 		procTableIter proc = g_procTable.find(r.second);
 314 | 		if (proc == g_procTable.end())
 315 | 			return throwError("procedure '%s' is undefined\n", r.second.c_str());
 316 | 		u32 dst = proc->second.first;
 317 | 		u32 num = proc->second.second;
 318 | 		inst &= ~0x3FFFFF;
 319 | 		inst |= num | (dst << 10);
 320 | 	}
 321 | 
 322 | 	if (g_totalDvleCount == 0)
 323 | 		return throwError("no DVLEs can be generated from the given input file(s)\n");
 324 | 
 325 | 	for (dvleTableIter it = g_dvleTable.begin(); it != g_dvleTable.end(); ++it)
 326 | 	{
 327 | 		if (it->nodvle) continue;
 328 | 		curFile = it->filename.c_str();
 329 | 		curLine = 1;
 330 | 		procTableIter mainIt = g_procTable.find(it->entrypoint);
 331 | 		if (mainIt == g_procTable.end())
 332 | 			return throwError("entrypoint '%s' is undefined\n", it->entrypoint.c_str());
 333 | 		it->entryStart = mainIt->second.first;
 334 | 		it->entryEnd = it->entryStart + mainIt->second.second;
 335 | 	}
 336 | 	return 0;
 337 | }
 338 | 
 339 | // --------------------------------------------------------------------
 340 | // Commands
 341 | // --------------------------------------------------------------------
 342 | 
 343 | static char* nextArg()
 344 | {
 345 | 	return trim_whitespace(mystrtok(NULL, ","));
 346 | }
 347 | 
 348 | static char* nextArgCParen()
 349 | {
 350 | 	return trim_whitespace(mystrtok(NULL, "("));
 351 | }
 352 | 
 353 | static char* nextArgSpc()
 354 | {
 355 | 	return trim_whitespace(mystrtok_spc(NULL));
 356 | }
 357 | 
 358 | static int missingParam()
 359 | {
 360 | 	return throwError("missing parameter\n");
 361 | }
 362 | 
 363 | typedef struct
 364 | {
 365 | 	const char* name;
 366 | 	int (* func) (const char*, int, int);
 367 | 	int opcode, opcodei;
 368 | } cmdTableType;
 369 | 
 370 | #define NEXT_ARG(_varName) char* _varName; do \
 371 | 	{ \
 372 | 		_varName = nextArg(); \
 373 | 		if (!_varName) return missingParam(); \
 374 | 	} while (0)
 375 | 
 376 | #define NEXT_ARG_SPC(_varName) char* _varName; do \
 377 | 	{ \
 378 | 		_varName = nextArgSpc(); \
 379 | 		if (!_varName) return missingParam(); \
 380 | 	} while (0)
 381 | 
 382 | #define NEXT_ARG_CPAREN(_varName) char* _varName; do \
 383 | 	{ \
 384 | 		_varName = nextArgCParen(); \
 385 | 		if (!_varName) return missingParam(); \
 386 | 	} while (0)
 387 | 
 388 | #define NEXT_ARG_OPT(_varName, _opt) char* _varName; do \
 389 | 	{ \
 390 | 		_varName = nextArg(); \
 391 | 		if (!_varName) _varName = (char*)(_opt); \
 392 | 	} while (0)
 393 | 
 394 | #define DEF_COMMAND(name) \
 395 | 	static int cmd_##name(const char* cmdName, int opcode, int opcodei)
 396 | 
 397 | #define DEC_COMMAND(name, fun) \
 398 | 	{ #name, cmd_##fun, MAESTRO_##name, -1 }
 399 | 
 400 | #define DEC_COMMAND2(name, fun) \
 401 | 	{ #name, cmd_##fun, MAESTRO_##name, MAESTRO_##name##I }, \
 402 | 	{ #name "i", cmd_##fun, MAESTRO_##name, MAESTRO_##name##I }
 403 | 
 404 | #define DEF_DIRECTIVE(name) \
 405 | 	static int dir_##name(const char* cmdName, int dirParam, int _unused)
 406 | 
 407 | #define DEC_DIRECTIVE(name) \
 408 | 	{ #name, dir_##name, 0, 0 }
 409 | 
 410 | #define DEC_DIRECTIVE2(name, fun, opc) \
 411 | 	{ #name, dir_##fun, opc, 0 }
 412 | 
 413 | static int ensureNoMoreArgs()
 414 | {
 415 | 	return nextArg() ? throwError("too many parameters\n") : 0;
 416 | }
 417 | 
 418 | static int duplicateIdentifier(const char* id)
 419 | {
 420 | 	return throwError("identifier already used: %s\n", id);
 421 | }
 422 | 
 423 | static int ensureTarget(const char* target)
 424 | {
 425 | 	if (!validateIdentifier(target))
 426 | 		return throwError("invalid target: %s\n", target);
 427 | 	return 0;
 428 | }
 429 | 
 430 | static inline int ensure_valid_dest(int reg, const char* name)
 431 | {
 432 | 	if (reg < 0x00 || reg >= 0x20)
 433 | 		return throwError("invalid destination register: %s\n", name);
 434 | 	return 0;
 435 | }
 436 | 
 437 | static inline int ensure_valid_src_wide(int reg, const char* name, int srcId)
 438 | {
 439 | 	if (reg < 0x00 || reg >= 0x80)
 440 | 		return throwError("invalid source%d register: %s\n", srcId, name);
 441 | 	return 0;
 442 | }
 443 | 
 444 | static inline int ensure_valid_src_narrow(int reg, const char* name, int srcId)
 445 | {
 446 | 	if (reg < 0x00 || reg >= 0x20)
 447 | 		return throwError("invalid source%d register: %s\n", srcId, name);
 448 | 	return 0;
 449 | }
 450 | 
 451 | static inline int ensure_no_idxreg(int idxreg, int srcId)
 452 | {
 453 | 	if (idxreg > 0)
 454 | 		return throwError("index register not allowed in source%d\n", srcId);
 455 | 	return 0;
 456 | }
 457 | 
 458 | static inline int ensure_valid_ireg(int reg, const char* name)
 459 | {
 460 | 	if (reg < 0x80 || reg >= 0x88)
 461 | 		return throwError("invalid integer vector uniform: %s\n", name);
 462 | 	return 0;
 463 | }
 464 | 
 465 | static inline int ensure_valid_breg(int reg, const char* name)
 466 | {
 467 | 	if (reg < 0x88 || reg >= 0x98)
 468 | 		return throwError("invalid boolean uniform: %s\n", name);
 469 | 	return 0;
 470 | }
 471 | 
 472 | static inline int ensure_valid_condop(int condop, const char* name)
 473 | {
 474 | 	if (condop < 0)
 475 | 		return throwError("invalid conditional operator: %s\n", name);
 476 | 	return 0;
 477 | }
 478 | 
 479 | #define ENSURE_NO_MORE_ARGS() safe_call(ensureNoMoreArgs())
 480 | 
 481 | #define ARG_TO_INT(_varName, _argName, _min, _max) \
 482 | 	int _varName = 0; \
 483 | 	safe_call(parseInt(_argName, _varName, _min, _max))
 484 | 
 485 | #define ARG_TO_REG(_varName, _argName) \
 486 | 	int _varName = 0, _varName##Sw = 0; \
 487 | 	safe_call(parseReg(_argName, _varName, _varName##Sw));
 488 | 
 489 | #define ARG_TO_REG2(_varName, _argName) \
 490 | 	int _varName = 0, _varName##Sw = 0, _varName##Idx = 0; \
 491 | 	safe_call(parseReg(_argName, _varName, _varName##Sw, &_varName##Idx));
 492 | 
 493 | #define ARG_TO_CONDOP(_varName, _argName) \
 494 | 	int _varName = parseCondOp(_argName); \
 495 | 	safe_call(ensure_valid_condop(_varName, _argName))
 496 | 
 497 | #define ARG_TARGET(_argName) \
 498 | 	safe_call(ensureTarget(_argName))
 499 | 
 500 | #define ARG_TO_DEST_REG(_reg, _name) \
 501 | 	ARG_TO_REG(_reg, _name); \
 502 | 	safe_call(ensure_valid_dest(_reg, _name))
 503 | 
 504 | #define ARG_TO_SRC1_REG(_reg, _name) \
 505 | 	ARG_TO_REG(_reg, _name); \
 506 | 	safe_call(ensure_valid_src_wide(_reg, _name, 1))
 507 | 
 508 | #define ARG_TO_SRC1_REG2(_reg, _name) \
 509 | 	ARG_TO_REG2(_reg, _name); \
 510 | 	safe_call(ensure_valid_src_wide(_reg, _name, 1))
 511 | 
 512 | #define ARG_TO_SRC2_REG(_reg, _name) \
 513 | 	ARG_TO_REG(_reg, _name); \
 514 | 	safe_call(ensure_valid_src_narrow(_reg, _name, 2))
 515 | 
 516 | #define ARG_TO_IREG(_reg, _name) \
 517 | 	ARG_TO_REG(_reg, _name); \
 518 | 	safe_call(ensure_valid_ireg(_reg, _name))
 519 | 
 520 | #define ARG_TO_BREG(_reg, _name) \
 521 | 	ARG_TO_REG(_reg, _name); \
 522 | 	safe_call(ensure_valid_breg(_reg, _name))
 523 | 
 524 | static int parseSwizzling(const char* b)
 525 | {
 526 | 	int i, out = 0, q = COMP_X;
 527 | 	for (i = 0; b[i] && i < 4; i ++)
 528 | 	{
 529 | 		switch (tolower(b[i]))
 530 | 		{
 531 | 			case 'x': case 'r': case 's': q = COMP_X; break;
 532 | 			case 'y': case 'g': case 't': q = COMP_Y; break;
 533 | 			case 'z': case 'b': case 'p': q = COMP_Z; break;
 534 | 			case 'w': case 'a': case 'q': q = COMP_W; break;
 535 | 			default: return -1;
 536 | 		}
 537 | 		out |= SWIZZLE_COMP(i, q);
 538 | 	}
 539 | 	if (b[i])
 540 | 		return -1;
 541 | 	// Fill in missing bits
 542 | 	for (int j = i; j < 4; j ++)
 543 | 		out |= SWIZZLE_COMP(j, q);
 544 | 	return out<<1;
 545 | }
 546 | 
 547 | static int maskFromSwizzling(int sw, bool reverse = true)
 548 | {
 549 | 	sw >>= 1; // get rid of negation bit
 550 | 	int out = 0;
 551 | 	int prevbitid = 4;
 552 | 	for (int i = 0; i < 4; i ++)
 553 | 	{
 554 | 		int bitid = (sw>>(i*2))&3;
 555 | 		if (bitid > prevbitid)
 556 | 			fprintf(stderr, "%s:%d: warning: arbitrary swizzling has no effect for destination mask\n", curFile, curLine);
 557 | 		prevbitid=bitid;
 558 | 		if (reverse) bitid = 3 - bitid;
 559 | 		out |= BIT(bitid);
 560 | 	}
 561 | 	return out;
 562 | }
 563 | 
 564 | static void optimizeOpdesc(int& mask, int opcode, int opdesc)
 565 | {
 566 | 	int unused1 = 0, unused2 = 0, unused3 = 0;
 567 | 	bool optimize = false;
 568 | 
 569 | 	switch (opcode)
 570 | 	{
 571 | 		case MAESTRO_ADD:
 572 | 		case MAESTRO_MUL:
 573 | 		case MAESTRO_SGE:
 574 | 		case MAESTRO_SLT:
 575 | 		case MAESTRO_FLR:
 576 | 		case MAESTRO_MAX:
 577 | 		case MAESTRO_MIN:
 578 | 		case MAESTRO_MOV:
 579 | 		case MAESTRO_MAD:
 580 | 			for (int i = 0; i < 4; i ++)
 581 | 				if (!(opdesc & BIT(3-i)))
 582 | 					unused1 |= SWIZZLE_COMP(i,3);
 583 | 			unused2 = unused1;
 584 | 			unused3 = unused1;
 585 | 			break;
 586 | 
 587 | 		case MAESTRO_DP3:
 588 | 			unused1 = SWIZZLE_COMP(3,3);
 589 | 			unused2 = SWIZZLE_COMP(3,3);
 590 | 			break;
 591 | 
 592 | 		case MAESTRO_DPH:
 593 | 			unused1 = SWIZZLE_COMP(3,3);
 594 | 			break;
 595 | 
 596 | 		case MAESTRO_EX2:
 597 | 		case MAESTRO_LG2:
 598 | 		case MAESTRO_RCP:
 599 | 		case MAESTRO_RSQ:
 600 | 			unused1 = SWIZZLE_COMP(1,3) | SWIZZLE_COMP(2,3) | SWIZZLE_COMP(3,3);
 601 | 			break;
 602 | 
 603 | 		case MAESTRO_MOVA:
 604 | 			if (!(opdesc & BIT(3-COMP_X))) unused1 |= SWIZZLE_COMP(0,3);
 605 | 			if (!(opdesc & BIT(3-COMP_Y))) unused1 |= SWIZZLE_COMP(1,3);
 606 | 		case MAESTRO_CMP:
 607 | 			unused1 |= SWIZZLE_COMP(2,3) | SWIZZLE_COMP(3,3);
 608 | 			break;
 609 | 	}
 610 | 
 611 | 	mask &= ~OPDESC_MAKE(0,OPSRC_MAKE(0,unused1),OPSRC_MAKE(0,unused2),OPSRC_MAKE(0,unused3));
 612 | }
 613 | 
 614 | static int findOrAddOpdesc(int opcode, int& out, int opdesc, int mask)
 615 | {
 616 | 	optimizeOpdesc(mask, opcode, opdesc);
 617 | 
 618 | 	for (int i = 0; i < g_opdescCount; i ++)
 619 | 	{
 620 | 		int minMask = mask & g_opdescMasks[i];
 621 | 		if ((opdesc&minMask) == (g_opdescTable[i]&minMask))
 622 | 		{
 623 | 			// Update opdesc to include extra bits (if any)
 624 | 			g_opdescTable[i] = (g_opdescTable[i]&~mask) | (opdesc & mask);
 625 | 			g_opdescMasks[i] |= mask;
 626 | 			out = i;
 627 | 			return 0;
 628 | 		}
 629 | 	}
 630 | 	if (g_opdescCount == MAX_OPDESC)
 631 | 		return throwError("too many operand descriptors (limit is %d)\n", MAX_OPDESC);
 632 | 	g_opdescTable[g_opdescCount] = opdesc;
 633 | 	g_opdescMasks[g_opdescCount] = mask;
 634 | 	out = g_opdescCount++;
 635 | 	return 0;
 636 | }
 637 | 
 638 | static void swapOpdesc(u32 from, u32 to)
 639 | {
 640 | 	std::swap(g_opdescTable[from], g_opdescTable[to]);
 641 | 	std::swap(g_opdescMasks[from], g_opdescMasks[to]);
 642 | 	for (size_t i = 0; i < BUF.size(); i ++)
 643 | 	{
 644 | 		u32& opword = BUF[i];
 645 | 		u32 opcode = opword>>26;
 646 | 		if (opcode < 0x20 || (opcode&~1)==MAESTRO_CMP)
 647 | 		{
 648 | 			u32 cur_opdesc = opword & 0x7F;
 649 | 			if (cur_opdesc==from)
 650 | 				cur_opdesc=to;
 651 | 			else if (cur_opdesc==to)
 652 | 				cur_opdesc=from;
 653 | 			opword = (opword &~ 0x7F) | cur_opdesc;
 654 | 		}
 655 | 	}
 656 | }
 657 | 
 658 | static inline bool isregp(int x)
 659 | {
 660 | 	x = tolower(x);
 661 | 	return x=='o' || x=='v' || x=='r' || x=='c' || x=='i' || x=='b';
 662 | }
 663 | 
 664 | static inline int convertIdxRegName(const char* reg)
 665 | {
 666 | 	if (stricmp(reg, "a0")==0 || stricmp(reg, "a0.x")==0) return 1;
 667 | 	if (stricmp(reg, "a1")==0 || stricmp(reg, "a0.y")==0) return 2;
 668 | 	if (stricmp(reg, "a2")==0 || stricmp(reg, "lcnt")==0 || stricmp(reg, "aL")==0) return 3;
 669 | 	return 0;
 670 | }
 671 | 
 672 | static inline int parseCondOp(const char* name)
 673 | {
 674 | 	if (stricmp(name, "eq")==0) return COND_EQ;
 675 | 	if (stricmp(name, "ne")==0) return COND_NE;
 676 | 	if (stricmp(name, "lt")==0) return COND_LT;
 677 | 	if (stricmp(name, "le")==0) return COND_LE;
 678 | 	if (stricmp(name, "gt")==0) return COND_GT;
 679 | 	if (stricmp(name, "ge")==0) return COND_GE;
 680 | 	return -1;
 681 | }
 682 | 
 683 | static int parseReg(char* pos, int& outReg, int& outSw, int* idxType = NULL)
 684 | {
 685 | 	outReg = 0;
 686 | 	outSw = DEFAULT_OPSRC;
 687 | 	if (idxType) *idxType = 0;
 688 | 	if (*pos == '-')
 689 | 	{
 690 | 		pos++;
 691 | 		outSw |= 1; // negation bit
 692 | 	}
 693 | 	int regOffset = 0;
 694 | 	char* offPos = strchr(pos, '[');
 695 | 	char* dotPos = pos;
 696 | 	if (offPos)
 697 | 	{
 698 | 		dotPos = strchr(offPos, ']');
 699 | 		if (!dotPos)
 700 | 			return throwError("missing closing bracket: %s\n", pos);
 701 | 		*dotPos++ = 0;
 702 | 		*offPos++ = 0;
 703 | 		offPos = trim_whitespace(offPos);
 704 | 
 705 | 		// Check for idxreg+offset
 706 | 		int temp = convertIdxRegName(offPos);
 707 | 		if (temp>0)
 708 | 		{
 709 | 			if (!idxType)
 710 | 				return throwError("index register not allowed here: %s\n", offPos);
 711 | 			*idxType = temp;
 712 | 		} else do
 713 | 		{
 714 | 			char* plusPos = strchr(offPos, '+');
 715 | 			if (!plusPos)
 716 | 				break;
 717 | 			if (!idxType)
 718 | 				return throwError("index register not allowed here: %s\n", offPos);
 719 | 			*plusPos++ = 0;
 720 | 			char* idxRegName = trim_whitespace(offPos);
 721 | 			offPos = trim_whitespace(plusPos);
 722 | 			*idxType = convertIdxRegName(idxRegName);
 723 | 			if (!*idxType)
 724 | 				return throwError("invalid index register: %s\n", idxRegName);
 725 | 		} while (0);
 726 | 
 727 | 		regOffset = atoi(offPos);
 728 | 		if (regOffset < 0)
 729 | 			return throwError("invalid register offset: %s\n", offPos);
 730 | 	}
 731 | 	dotPos = strchr(dotPos, '.');
 732 | 	if (dotPos)
 733 | 	{
 734 | 		*dotPos++ = 0;
 735 | 		outSw = parseSwizzling(dotPos) | (outSw&1);
 736 | 		if (outSw < 0)
 737 | 			return throwError("invalid swizzling mask: %s\n", dotPos);
 738 | 	}
 739 | 	aliasTableIter it = g_aliases.find(pos);
 740 | 	if (it != g_aliases.end())
 741 | 	{
 742 | 		int x = it->second;
 743 | 		outReg = x & 0xFF;
 744 | 		outReg += regOffset;
 745 | 		outSw ^= (x>>8)&1;
 746 | 		x >>= 9;
 747 | 		// Combine swizzling
 748 | 		int temp = outSw & 1;
 749 | 		for (int j = 0; j < 4; j ++)
 750 | 		{
 751 | 			int comp = (outSw >> (7 - j*2)) & 3;
 752 | 			comp = (x >> (6 - comp*2)) & 3;
 753 | 			temp |= SWIZZLE_COMP(j, comp)<<1;
 754 | 		}
 755 | 		outSw = temp;
 756 | 		return 0;
 757 | 	}
 758 | 
 759 | 	if (!isregp(pos[0]) || !isdigit(pos[1]))
 760 | 		return throwError("invalid register: %s\n", pos);
 761 | 
 762 | 	safe_call(parseInt(pos+1, outReg, 0, 255));
 763 | 	switch (*pos)
 764 | 	{
 765 | 		case 'o': // Output registers
 766 | 			if (outReg < 0x00 || outReg >= GetDvleData()->maxOutputReg())
 767 | 				return throwError("invalid output register: %s\n", pos);
 768 | 			break;
 769 | 		case 'v': // Input attributes
 770 | 			if (outReg < 0x00 || outReg >= 0x0F)
 771 | 				return throwError("invalid input register: %s\n", pos);
 772 | 			break;
 773 | 		case 'r': // Temporary registers
 774 | 			outReg += 0x10;
 775 | 			if (outReg < 0x10 || outReg >= 0x20)
 776 | 				return throwError("invalid temporary register: %s\n", pos);
 777 | 			break;
 778 | 		case 'c': // Floating-point vector uniform registers
 779 | 			outReg += 0x20;
 780 | 			if (outReg < 0x20 || outReg >= 0x80)
 781 | 				return throwError("invalid floating-point vector uniform register: %s\n", pos);
 782 | 			break;
 783 | 		case 'i': // Integer vector uniforms
 784 | 			outReg += 0x80;
 785 | 			if (outReg < 0x80 || outReg >= 0x88)
 786 | 				return throwError("invalid integer vector uniform register: %s\n", pos);
 787 | 			break;
 788 | 		case 'b': // Boolean uniforms
 789 | 			outReg += 0x88;
 790 | 			if (outReg < 0x88 || outReg >= 0x98)
 791 | 				return throwError("invalid boolean uniform register: %s\n", pos);
 792 | 			break;
 793 | 	}
 794 | 	if (idxType && *idxType && (outReg < 0x20 || outReg >= 0x80))
 795 | 		return throwError("index register not allowed with this kind of register\n");
 796 | 	outReg += regOffset;
 797 | 	return 0;
 798 | }
 799 | 
 800 | static int parseCondExpOp(char* str, u32& outFlags, int& which)
 801 | {
 802 | 	int negation = 0;
 803 | 	for (; *str == '!'; str++) negation ^= 1;
 804 | 	if (stricmp(str, "cmp.x")==0)
 805 | 	{
 806 | 		which = 0;
 807 | 		outFlags ^= negation<<25;
 808 | 		return 0;
 809 | 	}
 810 | 	if (stricmp(str, "cmp.y")==0)
 811 | 	{
 812 | 		which = 1;
 813 | 		outFlags ^= negation<<24;
 814 | 		return 0;
 815 | 	}
 816 | 	return throwError("invalid condition register: %s\n", str);
 817 | }
 818 | 
 819 | static int parseCondExp(char* str, u32& outFlags)
 820 | {
 821 | 	outFlags = BIT(24) | BIT(25);
 822 | 	size_t len = strlen(str);
 823 | 	size_t pos = strcspn(str, "&|");
 824 | 	int op2 = -1;
 825 | 	if (pos < len)
 826 | 	{
 827 | 		char* str2 = str + pos;
 828 | 		int type = *str2;
 829 | 		*str2++ = 0;
 830 | 		if (*str2 == type)
 831 | 			str2++;
 832 | 		str = trim_whitespace(str);
 833 | 		str2 = trim_whitespace(str2);
 834 | 		if (type == '&')
 835 | 			outFlags |= 1<<22;
 836 | 		safe_call(parseCondExpOp(str2, outFlags, op2));
 837 | 	}
 838 | 	int op1 = -1;
 839 | 	safe_call(parseCondExpOp(str, outFlags, op1));
 840 | 	if (op1 == op2)
 841 | 		return throwError("condition register checked twice\n");
 842 | 	if (op2 < 0)
 843 | 		outFlags |= (op1+2)<<22;
 844 | 	return 0;
 845 | }
 846 | 
 847 | static inline bool isBadInputRegCombination(int a, int b)
 848 | {
 849 | 	return a < 0x10 && b < 0x10 && a != b;
 850 | }
 851 | 
 852 | static inline bool isBadInputRegCombination(int a, int b, int c)
 853 | {
 854 | 	return isBadInputRegCombination(a,b) || isBadInputRegCombination(b,c) || isBadInputRegCombination(c,a);
 855 | }
 856 | 
 857 | static void insertPaddingNop()
 858 | {
 859 | 	if (g_autoNop)
 860 | 		BUF.push_back(FMT_OPCODE(MAESTRO_NOP));
 861 | 	else
 862 | 		fprintf(stderr, "%s:%d: warning: a padding NOP is required here\n", curFile, curLine);
 863 | }
 864 | 
 865 | DEF_COMMAND(format0)
 866 | {
 867 | 	ENSURE_NO_MORE_ARGS();
 868 | 
 869 | 	BUF.push_back(FMT_OPCODE(opcode));
 870 | 	return 0;
 871 | }
 872 | 
 873 | DEF_COMMAND(format1)
 874 | {
 875 | 	NEXT_ARG(destName);
 876 | 	NEXT_ARG(src1Name);
 877 | 	NEXT_ARG(src2Name);
 878 | 	ENSURE_NO_MORE_ARGS();
 879 | 
 880 | 	ARG_TO_DEST_REG(rDest, destName);
 881 | 	ARG_TO_REG2(rSrc1, src1Name);
 882 | 	ARG_TO_REG2(rSrc2, src2Name);
 883 | 
 884 | 	bool inverted = opcodei >= 0 && rSrc1 < 0x20 && rSrc2 >= 0x20;
 885 | 
 886 | 	if (!inverted)
 887 | 	{
 888 | 		safe_call(ensure_valid_src_wide(rSrc1, src1Name, 1));
 889 | 		safe_call(ensure_valid_src_narrow(rSrc2, src2Name, 2));
 890 | 		safe_call(ensure_no_idxreg(rSrc2Idx, 2));
 891 | 	} else
 892 | 	{
 893 | 		safe_call(ensure_valid_src_narrow(rSrc1, src1Name, 1));
 894 | 		safe_call(ensure_no_idxreg(rSrc1Idx, 1));
 895 | 		safe_call(ensure_valid_src_wide(rSrc2, src2Name, 2));
 896 | 	}
 897 | 
 898 | 	if (isBadInputRegCombination(rSrc1, rSrc2))
 899 | 		return throwError("source operands must be different input registers (v0..v15)\n");
 900 | 
 901 | 	int opdesc = 0;
 902 | 	safe_call(findOrAddOpdesc(opcode, opdesc, OPDESC_MAKE(maskFromSwizzling(rDestSw), rSrc1Sw, rSrc2Sw, 0), OPDESC_MASK_D12));
 903 | 
 904 | #ifdef DEBUG
 905 | 	printf("%s:%02X d%02X, d%02X, d%02X (0x%X)\n", cmdName, opcode, rDest, rSrc1, rSrc2, opdesc);
 906 | #endif
 907 | 	if (!inverted)
 908 | 		BUF.push_back(FMT_OPCODE(opcode)  | opdesc | (rSrc2<<7) | (rSrc1<<12) | (rSrc1Idx<<19) | (rDest<<21));
 909 | 	else
 910 | 		BUF.push_back(FMT_OPCODE(opcodei) | opdesc | (rSrc2<<7) | (rSrc1<<14) | (rSrc2Idx<<19) | (rDest<<21));
 911 | 
 912 | 	return 0;
 913 | }
 914 | 
 915 | DEF_COMMAND(format1u)
 916 | {
 917 | 	NEXT_ARG(destName);
 918 | 	NEXT_ARG(src1Name);
 919 | 	ENSURE_NO_MORE_ARGS();
 920 | 
 921 | 	ARG_TO_DEST_REG(rDest, destName);
 922 | 	ARG_TO_SRC1_REG2(rSrc1, src1Name);
 923 | 
 924 | 	int opdesc = 0;
 925 | 	safe_call(findOrAddOpdesc(opcode, opdesc, OPDESC_MAKE(maskFromSwizzling(rDestSw), rSrc1Sw, 0, 0), OPDESC_MASK_D1));
 926 | 
 927 | #ifdef DEBUG
 928 | 	printf("%s:%02X d%02X, d%02X (0x%X)\n", cmdName, opcode, rDest, rSrc1, opdesc);
 929 | #endif
 930 | 	BUF.push_back(FMT_OPCODE(opcode) | opdesc | (rSrc1<<12) | (rSrc1Idx<<19) | (rDest<<21));
 931 | 
 932 | 	return 0;
 933 | }
 934 | 
 935 | DEF_COMMAND(format1c)
 936 | {
 937 | 	NEXT_ARG(src1Name);
 938 | 	NEXT_ARG(cmpxName);
 939 | 	NEXT_ARG(cmpyName);
 940 | 	NEXT_ARG(src2Name);
 941 | 	ENSURE_NO_MORE_ARGS();
 942 | 
 943 | 	ARG_TO_SRC1_REG2(rSrc1, src1Name);
 944 | 	ARG_TO_CONDOP(cmpx, cmpxName);
 945 | 	ARG_TO_CONDOP(cmpy, cmpyName);
 946 | 	ARG_TO_SRC2_REG(rSrc2, src2Name);
 947 | 
 948 | 	int opdesc = 0;
 949 | 	safe_call(findOrAddOpdesc(opcode, opdesc, OPDESC_MAKE(0, rSrc1Sw, rSrc2Sw, 0), OPDESC_MASK_12));
 950 | 
 951 | #ifdef DEBUG
 952 | 	printf("%s:%02X d%02X, %d, %d, d%02X (0x%X)\n", cmdName, opcode, rSrc1, cmpx, cmpy, rSrc2, opdesc);
 953 | #endif
 954 | 	BUF.push_back(FMT_OPCODE(opcode) | opdesc | (rSrc2<<7) | (rSrc1<<12) | (rSrc1Idx<<19) | (cmpy<<21) | (cmpx<<24));
 955 | 
 956 | 	return 0;
 957 | }
 958 | 
 959 | DEF_COMMAND(format5)
 960 | {
 961 | 	NEXT_ARG(destName);
 962 | 	NEXT_ARG(src1Name);
 963 | 	NEXT_ARG(src2Name);
 964 | 	NEXT_ARG(src3Name);
 965 | 	ENSURE_NO_MORE_ARGS();
 966 | 
 967 | 	ARG_TO_DEST_REG(rDest, destName);
 968 | 	ARG_TO_SRC2_REG(rSrc1, src1Name);
 969 | 	ARG_TO_REG2(rSrc2, src2Name);
 970 | 	ARG_TO_REG2(rSrc3, src3Name);
 971 | 
 972 | 	bool inverted = opcodei >= 0 && rSrc2 < 0x20 && (rSrc3 >= 0x20 || (rSrc3Idx && !rSrc2Idx));
 973 | 
 974 | 	if (!inverted)
 975 | 	{
 976 | 		safe_call(ensure_valid_src_wide(rSrc2, src2Name, 2));
 977 | 		safe_call(ensure_valid_src_narrow(rSrc3, src3Name, 3));
 978 | 		safe_call(ensure_no_idxreg(rSrc3Idx, 2));
 979 | 	} else
 980 | 	{
 981 | 		safe_call(ensure_valid_src_narrow(rSrc2, src2Name, 2));
 982 | 		safe_call(ensure_valid_src_wide(rSrc3, src3Name, 3));
 983 | 		safe_call(ensure_no_idxreg(rSrc2Idx, 2));
 984 | 	}
 985 | 
 986 | 	if (isBadInputRegCombination(rSrc1, rSrc2, rSrc3))
 987 | 		return throwError("source registers must be different input registers (v0..v15)\n");
 988 | 
 989 | 	int opdesc = 0;
 990 | 	safe_call(findOrAddOpdesc(opcode, opdesc, OPDESC_MAKE(maskFromSwizzling(rDestSw), rSrc1Sw, rSrc2Sw, rSrc3Sw), OPDESC_MASK_D123));
 991 | 
 992 | 	if (opdesc >= 32)
 993 | 	{
 994 | 		int which;
 995 | 		for (which = 0; which < 32; which ++)
 996 | 			if (!(g_opdescIsMad & BIT(which)))
 997 | 				break;
 998 | 		if (which == 32)
 999 | 			return throwError("opdesc allocation error\n");
1000 | 		swapOpdesc(which, opdesc);
1001 | 		opdesc = which;
1002 | 	}
1003 | 
1004 | 	g_opdescIsMad |= BIT(opdesc);
1005 | 
1006 | #ifdef DEBUG
1007 | 	printf("%s:%02X d%02X, d%02X, d%02X, d%02X (0x%X)\n", cmdName, opcode, rDest, rSrc1, rSrc2, rSrc3, opdesc);
1008 | #endif
1009 | 	if (!inverted)
1010 | 		BUF.push_back(FMT_OPCODE(opcode)  | opdesc | (rSrc3<<5) | (rSrc2<<10) | (rSrc1<<17) | (rSrc2Idx<<22) | (rDest<<24));
1011 | 	else
1012 | 		BUF.push_back(FMT_OPCODE(opcodei) | opdesc | (rSrc3<<5) | (rSrc2<<12) | (rSrc1<<17) | (rSrc3Idx<<22) | (rDest<<24));
1013 | 
1014 | 	return 0;
1015 | }
1016 | 
1017 | DEF_COMMAND(formatmova)
1018 | {
1019 | 	NEXT_ARG(targetReg);
1020 | 	NEXT_ARG(src1Name);
1021 | 	ENSURE_NO_MORE_ARGS();
1022 | 
1023 | 	int mask;
1024 | 	if      (stricmp(targetReg, "a0")==0  || stricmp(targetReg, "a0.x")==0)  mask = BIT(3);
1025 | 	else if (stricmp(targetReg, "a1")==0  || stricmp(targetReg, "a0.y")==0)  mask = BIT(2);
1026 | 	else if (stricmp(targetReg, "a01")==0 || stricmp(targetReg, "a0.xy")==0) mask = BIT(3) | BIT(2);
1027 | 	else return throwError("invalid destination register for mova: %s\n", targetReg);
1028 | 
1029 | 	ARG_TO_SRC1_REG2(rSrc1, src1Name);
1030 | 
1031 | 	int opdesc = 0;
1032 | 	safe_call(findOrAddOpdesc(opcode, opdesc, OPDESC_MAKE(mask, rSrc1Sw, 0, 0), OPDESC_MASK_D1));
1033 | 
1034 | #ifdef DEBUG
1035 | 	printf("%s:%02X d%02X (0x%X)\n", cmdName, opcode, rSrc1, opdesc);
1036 | #endif
1037 | 	BUF.push_back(FMT_OPCODE(opcode) | opdesc | (rSrc1<<12) | (rSrc1Idx<<19));
1038 | 
1039 | 	return 0;
1040 | }
1041 | 
1042 | static inline int parseSetEmitFlags(char* flags, bool& isPrim, bool& isInv)
1043 | {
1044 | 	isPrim = false;
1045 | 	isInv = false;
1046 | 	if (!flags)
1047 | 		return 0;
1048 | 
1049 | 	mystrtok_pos = flags;
1050 | 	while (char* flag = mystrtok_spc(NULL))
1051 | 	{
1052 | 		if (stricmp(flag, "prim")==0 || stricmp(flag, "primitive")==0)
1053 | 			isPrim = true;
1054 | 		else if (stricmp(flag, "inv")==0 || stricmp(flag, "invert")==0)
1055 | 			isInv = true;
1056 | 		else
1057 | 			throwError("unknown setemit flag: %s\n", flag);
1058 | 
1059 | 	}
1060 | 	return 0;
1061 | }
1062 | 
1063 | DEF_COMMAND(formatsetemit)
1064 | {
1065 | 	NEXT_ARG(vtxIdStr);
1066 | 	NEXT_ARG_OPT(flagStr, NULL);
1067 | 	ENSURE_NO_MORE_ARGS();
1068 | 
1069 | 	ARG_TO_INT(vtxId, vtxIdStr, 0, 2);
1070 | 	bool isPrim, isInv;
1071 | 	safe_call(parseSetEmitFlags(flagStr, isPrim, isInv));
1072 | 
1073 | 	DVLEData* dvle = GetDvleData();
1074 | 	if (!dvle->isGeoShader)
1075 | 	{
1076 | 		dvle->isGeoShader = true;
1077 | 		dvle->isCompatGeoShader = true;
1078 | 	}
1079 | 
1080 | #ifdef DEBUG
1081 | 	printf("%s:%02X vtx%d, %s, %s\n", cmdName, opcode, vtxId, isPrim?"true":"false", isInv?"true":"false");
1082 | #endif
1083 | 	BUF.push_back(FMT_OPCODE(opcode) | ((u32)isInv<<22) | ((u32)isPrim<<23) | (vtxId<<24));
1084 | 
1085 | 	return 0;
1086 | }
1087 | 
1088 | DEF_COMMAND(formatcall)
1089 | {
1090 | 	NEXT_ARG(procName);
1091 | 	ENSURE_NO_MORE_ARGS();
1092 | 
1093 | 	ARG_TARGET(procName);
1094 | 
1095 | 	g_procRelocTable.push_back( std::make_pair(BUF.size(), procName) );
1096 | 
1097 | 	BUF.push_back(FMT_OPCODE(opcode));
1098 | 
1099 | #ifdef DEBUG
1100 | 	printf("%s:%02X %s\n", cmdName, opcode, procName);
1101 | #endif
1102 | 	return 0;
1103 | }
1104 | 
1105 | DEF_COMMAND(formatfor)
1106 | {
1107 | 	NEXT_ARG(regName);
1108 | 	ENSURE_NO_MORE_ARGS();
1109 | 
1110 | 	ARG_TO_IREG(regId, regName);
1111 | 
1112 | 	if (NO_MORE_STACK)
1113 | 		return throwError("too many nested blocks\n");
1114 | 
1115 | 	StackEntry& elem = g_stack[g_stackPos++];
1116 | 	elem.type = SE_FOR;
1117 | 	elem.pos = BUF.size();
1118 | 
1119 | 	BUF.push_back(FMT_OPCODE(opcode) | ((regId-0x80) << 22));
1120 | 
1121 | #ifdef DEBUG
1122 | 	printf("%s:%02X d%02X\n", cmdName, opcode, regId);
1123 | #endif
1124 | 	return 0;
1125 | }
1126 | 
1127 | DEF_COMMAND(format2)
1128 | {
1129 | 	NEXT_ARG(condExp);
1130 | 
1131 | 	u32 instruction = 0;
1132 | 	safe_call(parseCondExp(condExp, instruction));
1133 | 
1134 | 	switch (opcode)
1135 | 	{
1136 | 		case MAESTRO_BREAKC:
1137 | 		{
1138 | 			ENSURE_NO_MORE_ARGS();
1139 | 
1140 | #ifdef DEBUG
1141 | 			printf("%s:%02X %s\n", cmdName, opcode, condExp);
1142 | #endif
1143 | 			break;
1144 | 		}
1145 | 
1146 | 		case MAESTRO_CALLC:
1147 | 		case MAESTRO_JMPC:
1148 | 		{
1149 | 			NEXT_ARG(targetName);
1150 | 			ENSURE_NO_MORE_ARGS();
1151 | 
1152 | 			ARG_TARGET(targetName);
1153 | 
1154 | 			relocTableType& rt = opcode==MAESTRO_CALLC ? g_procRelocTable : g_labelRelocTable;
1155 | 			rt.push_back( std::make_pair(BUF.size(), targetName) );
1156 | 
1157 | #ifdef DEBUG
1158 | 			printf("%s:%02X %s, %s\n", cmdName, opcode, condExp, targetName);
1159 | #endif
1160 | 			break;
1161 | 		}
1162 | 
1163 | 		case MAESTRO_IFC:
1164 | 		{
1165 | 			ENSURE_NO_MORE_ARGS();
1166 | 
1167 | 			if (NO_MORE_STACK)
1168 | 				return throwError("too many nested blocks\n");
1169 | 
1170 | 			StackEntry& elem = g_stack[g_stackPos++];
1171 | 			elem.type = SE_IF;
1172 | 			elem.pos = BUF.size();
1173 | 			elem.uExtra = 0;
1174 | 
1175 | #ifdef DEBUG
1176 | 			printf("%s:%02X %s\n", cmdName, opcode, condExp);
1177 | #endif
1178 | 			break;
1179 | 		}
1180 | 	}
1181 | 
1182 | 	BUF.push_back(FMT_OPCODE(opcode) | instruction);
1183 | 
1184 | 	return 0;
1185 | }
1186 | 
1187 | DEF_COMMAND(format3)
1188 | {
1189 | 	NEXT_ARG(regName);
1190 | 
1191 | 	u32 negation = 0;
1192 | 	if (*regName == '!')
1193 | 	{
1194 | 		if (opcode == MAESTRO_JMPU)
1195 | 		{
1196 | 			negation = 1;
1197 | 			regName ++;
1198 | 		} else
1199 | 			return throwError("Inverting the condition is not supported by %s\n", opcode==MAESTRO_CALLU ? "CALLU" : "IFU");
1200 | 	}
1201 | 
1202 | 	ARG_TO_BREG(regId, regName);
1203 | 
1204 | 	switch (opcode)
1205 | 	{
1206 | 		case MAESTRO_CALLU:
1207 | 		case MAESTRO_JMPU:
1208 | 		{
1209 | 			NEXT_ARG(targetName);
1210 | 			ENSURE_NO_MORE_ARGS();
1211 | 
1212 | 			ARG_TARGET(targetName);
1213 | 
1214 | 			relocTableType& rt = opcode==MAESTRO_CALLU ? g_procRelocTable : g_labelRelocTable;
1215 | 			rt.push_back( std::make_pair(BUF.size(), targetName) );
1216 | 
1217 | #ifdef DEBUG
1218 | 			printf("%s:%02X d%02X, %s\n", cmdName, opcode, regId, targetName);
1219 | #endif
1220 | 			break;
1221 | 		}
1222 | 
1223 | 		case MAESTRO_IFU:
1224 | 		{
1225 | 			ENSURE_NO_MORE_ARGS();
1226 | 
1227 | 			if (NO_MORE_STACK)
1228 | 				return throwError("too many nested blocks\n");
1229 | 
1230 | 			StackEntry& elem = g_stack[g_stackPos++];
1231 | 			elem.type = SE_IF;
1232 | 			elem.pos = BUF.size();
1233 | 			elem.uExtra = 0;
1234 | 
1235 | #ifdef DEBUG
1236 | 			printf("%s:%02X d%02X\n", cmdName, opcode, regId);
1237 | #endif
1238 | 			break;
1239 | 		}
1240 | 	}
1241 | 
1242 | 	BUF.push_back(FMT_OPCODE(opcode) | ((regId-0x88) << 22) | negation);
1243 | 
1244 | 	return 0;
1245 | }
1246 | 
1247 | static const cmdTableType cmdTable[] =
1248 | {
1249 | 	DEC_COMMAND(NOP, format0),
1250 | 	DEC_COMMAND(END, format0),
1251 | 	DEC_COMMAND(EMIT, format0),
1252 | 	DEC_COMMAND(BREAK, format0),
1253 | 
1254 | 	DEC_COMMAND(ADD, format1),
1255 | 	DEC_COMMAND(DP3, format1),
1256 | 	DEC_COMMAND(DP4, format1),
1257 | 	DEC_COMMAND2(DPH, format1),
1258 | 	DEC_COMMAND2(DST, format1),
1259 | 	DEC_COMMAND(MUL, format1),
1260 | 	DEC_COMMAND2(SGE, format1),
1261 | 	DEC_COMMAND2(SLT, format1),
1262 | 	DEC_COMMAND(MAX, format1),
1263 | 	DEC_COMMAND(MIN, format1),
1264 | 
1265 | 	DEC_COMMAND(EX2, format1u),
1266 | 	DEC_COMMAND(LG2, format1u),
1267 | 	DEC_COMMAND(LITP, format1u),
1268 | 	DEC_COMMAND(FLR, format1u),
1269 | 	DEC_COMMAND(RCP, format1u),
1270 | 	DEC_COMMAND(RSQ, format1u),
1271 | 	DEC_COMMAND(MOV, format1u),
1272 | 
1273 | 	DEC_COMMAND(MOVA, formatmova),
1274 | 
1275 | 	DEC_COMMAND(CMP, format1c),
1276 | 
1277 | 	DEC_COMMAND(CALL, formatcall),
1278 | 
1279 | 	DEC_COMMAND(FOR, formatfor),
1280 | 
1281 | 	DEC_COMMAND(BREAKC, format2),
1282 | 	DEC_COMMAND(CALLC, format2),
1283 | 	DEC_COMMAND(IFC, format2),
1284 | 	DEC_COMMAND(JMPC, format2),
1285 | 
1286 | 	DEC_COMMAND(CALLU, format3),
1287 | 	DEC_COMMAND(IFU, format3),
1288 | 	DEC_COMMAND(JMPU, format3),
1289 | 
1290 | 	DEC_COMMAND2(MAD, format5),
1291 | 
1292 | 	DEC_COMMAND(SETEMIT, formatsetemit),
1293 | 
1294 | 	{ NULL, NULL },
1295 | };
1296 | 
1297 | // --------------------------------------------------------------------
1298 | // Directives
1299 | // --------------------------------------------------------------------
1300 | 
1301 | DEF_DIRECTIVE(proc)
1302 | {
1303 | 	NEXT_ARG(procName);
1304 | 	ENSURE_NO_MORE_ARGS();
1305 | 
1306 | 	if (NO_MORE_STACK)
1307 | 		return throwError("too many nested blocks\n");
1308 | 
1309 | 	StackEntry& elem = g_stack[g_stackPos++];
1310 | 	elem.type = SE_PROC;
1311 | 	elem.pos = BUF.size();
1312 | 	elem.strExtra = procName;
1313 | 
1314 | 	if (g_procTable.find(procName) != g_procTable.end())
1315 | 		return throwError("proc already exists: %s\n", procName);
1316 | 
1317 | #ifdef DEBUG
1318 | 	printf("Defining %s\n", procName);
1319 | #endif
1320 | 	return 0;
1321 | }
1322 | 
1323 | DEF_DIRECTIVE(else)
1324 | {
1325 | 	ENSURE_NO_MORE_ARGS();
1326 | 	if (!g_stackPos)
1327 | 		return throwError(".else with unmatched IF\n");
1328 | 
1329 | 	StackEntry& elem = g_stack[g_stackPos-1];
1330 | 	if (elem.type != SE_IF)
1331 | 		return throwError(".else with unmatched IF\n");
1332 | 	if (elem.uExtra)
1333 | 		return throwError("spurious .else\n");
1334 | 
1335 | 	// Automatically add padding NOPs when necessary
1336 | 	if (lastWasEnd)
1337 | 	{
1338 | 		insertPaddingNop();
1339 | 		lastWasEnd = false;
1340 | 	} else
1341 | 	{
1342 | 		u32 p = BUF.size();
1343 | 		u32 lastOpcode = BUF[p-1] >> 26;
1344 | 		if (lastOpcode == MAESTRO_JMPC || lastOpcode == MAESTRO_JMPU
1345 | 			|| lastOpcode == MAESTRO_CALL || lastOpcode == MAESTRO_CALLC || lastOpcode == MAESTRO_CALLU
1346 | 			|| (p - elem.pos) < 2)
1347 | 			insertPaddingNop();
1348 | 	}
1349 | 
1350 | 	u32 curPos = BUF.size();
1351 | 	elem.uExtra = curPos;
1352 | 	u32& inst = BUF[elem.pos];
1353 | 	inst &= ~(0xFFF << 10);
1354 | 	inst |= curPos << 10;
1355 | 
1356 | #ifdef DEBUG
1357 | 	printf("ELSE\n");
1358 | #endif
1359 | 
1360 | 	return 0;
1361 | }
1362 | 
1363 | DEF_DIRECTIVE(end)
1364 | {
1365 | 	ENSURE_NO_MORE_ARGS();
1366 | 	if (!g_stackPos)
1367 | 		return throwError(".end with unmatched block\n");
1368 | 	
1369 | 	StackEntry& elem = g_stack[--g_stackPos];
1370 | 
1371 | 	// Automatically add padding NOPs when necessary
1372 | 	if (elem.type != SE_ARRAY && lastWasEnd)
1373 | 	{
1374 | 		insertPaddingNop();
1375 | 		lastWasEnd = false;
1376 | 	}
1377 | 
1378 | 	else if ((elem.type == SE_PROC || elem.type == SE_FOR || elem.type == SE_IF) && BUF.size() > 0)
1379 | 	{
1380 | 		u32 p = BUF.size();
1381 | 		u32 lastOpcode = BUF[p-1] >> 26;
1382 | 		if (lastOpcode == MAESTRO_JMPC || lastOpcode == MAESTRO_JMPU
1383 | 			|| lastOpcode == MAESTRO_CALL || lastOpcode == MAESTRO_CALLC || lastOpcode == MAESTRO_CALLU
1384 | 			|| (elem.type == SE_FOR && (lastOpcode == MAESTRO_BREAK || lastOpcode == MAESTRO_BREAKC))
1385 | 			|| (elem.type != SE_ARRAY && (p - elem.pos) < (elem.type != SE_PROC ? 2 : 1)))
1386 | 			insertPaddingNop();
1387 | 	}
1388 | 
1389 | 	u32 curPos = BUF.size();
1390 | 	u32 size = curPos - elem.pos;
1391 | 
1392 | 	switch (elem.type)
1393 | 	{
1394 | 		case SE_PROC:
1395 | 		{
1396 | #ifdef DEBUG
1397 | 			printf("proc: %s(%u, size:%u)\n", elem.strExtra, elem.pos, size);
1398 | #endif
1399 | 			g_procTable.insert( std::pair<std::string, procedure>(elem.strExtra, procedure(elem.pos, size)) );
1400 | 			break;
1401 | 		}
1402 | 
1403 | 		case SE_FOR:
1404 | 		{
1405 | #ifdef DEBUG
1406 | 			printf("ENDFOR\n");
1407 | #endif
1408 | 			u32& inst = BUF[elem.pos];
1409 | 			inst &= ~(0xFFF << 10);
1410 | 			inst |= (curPos-1) << 10;
1411 | 			lastWasEnd = true;
1412 | 			break;
1413 | 		}
1414 | 
1415 | 		case SE_IF:
1416 | 		{
1417 | #ifdef DEBUG
1418 | 			printf("ENDIF\n");
1419 | #endif
1420 | 			u32& inst = BUF[elem.pos];
1421 | 			if (!elem.uExtra)
1422 | 			{
1423 | 				// IF with no ELSE
1424 | 				inst &= ~(0xFFF << 10);
1425 | 				inst |= curPos << 10;
1426 | 			} else
1427 | 			{
1428 | 				// IF with an ELSE
1429 | 				inst &= ~0x3FF;
1430 | 				inst |= curPos - elem.uExtra;
1431 | 			}
1432 | 			lastWasEnd = true;
1433 | 			break;
1434 | 		}
1435 | 
1436 | 		case SE_ARRAY:
1437 | 		{
1438 | #ifdef DEBUG
1439 | 			printf("ENDARRAY\n");
1440 | #endif
1441 | 			DVLEData* dvle = GetDvleData();
1442 | 			UniformAlloc& alloc = getAlloc(UTYPE_FVEC, dvle);
1443 | 
1444 | 			if (g_aliases.find(g_constArrayName) != g_aliases.end())
1445 | 				return duplicateIdentifier(g_constArrayName);
1446 | 
1447 | 			int size = g_constArray.size();
1448 | 			if (g_constArraySize >= 0) for (; size < g_constArraySize; size ++)
1449 | 			{
1450 | 				Constant c;
1451 | 				memset(&c, 0, sizeof(c));
1452 | 				c.type = UTYPE_FVEC;
1453 | 				g_constArray.push_back(c);
1454 | 			}
1455 | 
1456 | 			if (size == 0)
1457 | 				return throwError("no elements have been specified in array '%s'\n", g_constArrayName);
1458 | 
1459 | 			int uniformPos = alloc.AllocLocal(size);
1460 | 			if (uniformPos < 0)
1461 | 				return throwError("not enough space for local constant array '%s'\n", g_constArrayName);
1462 | 
1463 | 			if ((dvle->constantCount+size) > MAX_CONSTANT)
1464 | 				return throwError("too many local constants\n");
1465 | 
1466 | 			for (int i = 0; i < size; i ++)
1467 | 			{
1468 | 				Constant& src = g_constArray[i];
1469 | 				Constant& dst = dvle->constantTable[dvle->constantCount++];
1470 | 				src.regId = uniformPos+i;
1471 | 				memcpy(&dst, &src, sizeof(src));
1472 | 			}
1473 | 
1474 | 			g_aliases.insert( std::pair<std::string,int>(g_constArrayName, uniformPos | (DEFAULT_OPSRC<<8)) );
1475 | 
1476 | 			g_constArray.clear();
1477 | 			g_constArraySize = -1;
1478 | 			g_constArrayName = NULL;
1479 | 			break;
1480 | 		}
1481 | 	}
1482 | 
1483 | 	return 0;
1484 | }
1485 | 
1486 | DEF_DIRECTIVE(alias)
1487 | {
1488 | 	NEXT_ARG_SPC(aliasName);
1489 | 	NEXT_ARG_SPC(aliasReg);
1490 | 	ENSURE_NO_MORE_ARGS();
1491 | 
1492 | 	if (!validateIdentifier(aliasName))
1493 | 		return throwError("invalid alias name: %s\n", aliasName);
1494 | 	if (isregp(aliasName[0]) && isdigit(aliasName[1]))
1495 | 		return throwError("cannot redefine register\n");
1496 | 	ARG_TO_REG(rAlias, aliasReg);
1497 | 
1498 | 	if (g_aliases.find(aliasName) != g_aliases.end())
1499 | 		return duplicateIdentifier(aliasName);
1500 | 
1501 | 	g_aliases.insert( std::pair<std::string,int>(aliasName, rAlias | (rAliasSw<<8)) );
1502 | 	return 0;
1503 | }
1504 | 
1505 | DEF_DIRECTIVE(uniform)
1506 | {
1507 | 	DVLEData* dvle = GetDvleData();
1508 | 	UniformAlloc& alloc = getAlloc(dirParam, dvle);
1509 | 	bool useSharedSpace = !dvle->usesGshSpace();
1510 | 
1511 | 	for (;;)
1512 | 	{
1513 | 		char* argText = nextArg();
1514 | 		if (!argText) break;
1515 | 
1516 | 		int uSize = 1;
1517 | 		char* sizePos = strchr(argText, '[');
1518 | 		if (sizePos)
1519 | 		{
1520 | 			char* closePos = strchr(sizePos, ']');
1521 | 			if (!closePos)
1522 | 				return throwError("missing closing bracket: %s\n", argText);
1523 | 			*closePos = 0;
1524 | 			*sizePos++ = 0;
1525 | 			sizePos = trim_whitespace(sizePos);
1526 | 			uSize = atoi(sizePos);
1527 | 			if (uSize < 1)
1528 | 				return throwError("invalid uniform size: %s[%s]\n", argText, sizePos);
1529 | 		}
1530 | 		if (!validateIdentifier(argText))
1531 | 			return throwError("invalid uniform name: %s\n", argText);
1532 | 		if (g_aliases.find(argText) != g_aliases.end())
1533 | 			return duplicateIdentifier(argText);
1534 | 
1535 | 		int uniformPos = -1;
1536 | 
1537 | 		// Find the uniform in the table
1538 | 		int i;
1539 | 		for (i = 0; useSharedSpace && i < g_uniformCount; i ++)
1540 | 		{
1541 | 			Uniform& uniform = g_uniformTable[i];
1542 | 			if (uniform.name == argText)
1543 | 			{
1544 | 				if (uniform.type != dirParam)
1545 | 					return throwError("mismatched uniform type: %s\n", argText);
1546 | 				if (uniform.size != uSize)
1547 | 					return throwError("uniform '%s' previously declared as having size %d\n", argText, uniform.size);
1548 | 				uniformPos = uniform.pos;
1549 | 				break;
1550 | 			}
1551 | 		}
1552 | 
1553 | 		// If not found, create it
1554 | 		if (uniformPos < 0)
1555 | 		{
1556 | 			if (g_uniformCount == MAX_UNIFORM)
1557 | 				return throwError("too many global uniforms: %s\n", argText);
1558 | 
1559 | 			uniformPos = alloc.AllocGlobal(uSize);
1560 | 			if (uniformPos < 0)
1561 | 				return throwError("not enough uniform space: %s[%d]\n", argText, uSize);
1562 | 		}
1563 | 
1564 | 		if (useSharedSpace)
1565 | 			g_uniformTable[g_uniformCount++].init(argText, uniformPos, uSize, dirParam);
1566 | 
1567 | 		if (*argText != '_')
1568 | 		{
1569 | 			// Add the uniform to the table
1570 | 			if (dvle->uniformCount == MAX_UNIFORM)
1571 | 				return throwError("too many referenced uniforms: %s\n", argText);
1572 | 			dvle->uniformTable[dvle->uniformCount++].init(argText, uniformPos, uSize, dirParam);
1573 | 			dvle->symbolSize += strlen(argText)+1;
1574 | 		}
1575 | 
1576 | 		g_aliases.insert( std::pair<std::string,int>(argText, uniformPos | (DEFAULT_OPSRC<<8)) );
1577 | 
1578 | #ifdef DEBUG
1579 | 		printf("uniform %s[%d] @ d%02X:d%02X\n", argText, uSize, uniformPos, uniformPos+uSize-1);
1580 | #endif
1581 | 	}
1582 | 	return 0;
1583 | }
1584 | 
1585 | DEF_DIRECTIVE(const)
1586 | {
1587 | 	DVLEData* dvle = GetDvleData();
1588 | 	UniformAlloc& alloc = getAlloc(dirParam, dvle);
1589 | 
1590 | 	NEXT_ARG_CPAREN(constName);
1591 | 	NEXT_ARG(arg0Text);
1592 | 	NEXT_ARG(arg1Text);
1593 | 	NEXT_ARG(arg2Text);
1594 | 	char* arg3Text = mystrtok_pos;
1595 | 	if (!mystrtok_pos) return missingParam();
1596 | 	char* parenPos = strchr(arg3Text, ')');
1597 | 	if (!parenPos) return throwError("invalid syntax\n");
1598 | 	*parenPos = 0;
1599 | 	arg3Text = trim_whitespace(arg3Text);
1600 | 
1601 | 	if (g_aliases.find(constName) != g_aliases.end())
1602 | 		return duplicateIdentifier(constName);
1603 | 
1604 | 	int uniformPos = alloc.AllocLocal(1);
1605 | 	if (uniformPos < 0)
1606 | 		return throwError("not enough space for local constant '%s'\n", constName);
1607 | 
1608 | 	if (dvle->constantCount == MAX_CONSTANT)
1609 | 		return throwError("too many local constants\n");
1610 | 
1611 | 	Constant& ct = dvle->constantTable[dvle->constantCount++];
1612 | 	ct.regId = uniformPos;
1613 | 	ct.type = dirParam;
1614 | 	if (dirParam == UTYPE_FVEC)
1615 | 	{
1616 | 		ct.fparam[0] = atof(arg0Text);
1617 | 		ct.fparam[1] = atof(arg1Text);
1618 | 		ct.fparam[2] = atof(arg2Text);
1619 | 		ct.fparam[3] = atof(arg3Text);
1620 | 	} else if (dirParam == UTYPE_IVEC)
1621 | 	{
1622 | 		ct.iparam[0] = atoi(arg0Text) & 0xFF;
1623 | 		ct.iparam[1] = atoi(arg1Text) & 0xFF;
1624 | 		ct.iparam[2] = atoi(arg2Text) & 0xFF;
1625 | 		ct.iparam[3] = atoi(arg3Text) & 0xFF;
1626 | 	}
1627 | 
1628 | 	g_aliases.insert( std::pair<std::string,int>(constName, ct.regId | (DEFAULT_OPSRC<<8)) );
1629 | 
1630 | #ifdef DEBUG
1631 | 	if (dirParam == UTYPE_FVEC)
1632 | 		printf("constant %s(%f, %f, %f, %f) @ d%02X\n", constName, ct.fparam[0], ct.fparam[1], ct.fparam[2], ct.fparam[3], ct.regId);
1633 | 	else if (dirParam == UTYPE_IVEC)
1634 | 		printf("constant %s(%u, %u, %u, %u) @ d%02X\n", constName, ct.iparam[0], ct.iparam[1], ct.iparam[2], ct.iparam[3], ct.regId);
1635 | #endif
1636 | 	return 0;
1637 | };
1638 | 
1639 | DEF_DIRECTIVE(constfa)
1640 | {
1641 | 	bool inArray = g_stackPos && g_stack[g_stackPos-1].type == SE_ARRAY;
1642 | 
1643 | 	if (!inArray)
1644 | 	{
1645 | 		NEXT_ARG(constName);
1646 | 		ENSURE_NO_MORE_ARGS();
1647 | 
1648 | 		if (NO_MORE_STACK)
1649 | 			return throwError("too many nested blocks\n");
1650 | 
1651 | 		char* sizePos = strchr(constName, '[');
1652 | 		if (!sizePos)
1653 | 			return throwError("missing opening bracket: %s\n", constName);
1654 | 
1655 | 		char* closePos = strchr(sizePos, ']');
1656 | 		if (!closePos)
1657 | 			return throwError("missing closing bracket: %s\n", constName);
1658 | 
1659 | 		*closePos++ = 0;
1660 | 		*sizePos++ = 0;
1661 | 		closePos = trim_whitespace(closePos);
1662 | 		sizePos = trim_whitespace(sizePos);
1663 | 
1664 | 		if (*closePos)
1665 | 			return throwError("garbage found: %s\n", closePos);
1666 | 
1667 | 		if (*sizePos)
1668 | 		{
1669 | 			g_constArraySize = atoi(sizePos);
1670 | 			if (g_constArraySize <= 0)
1671 | 				return throwError("invalid array size: %s[%s]\n", constName, sizePos);
1672 | 		}
1673 | 
1674 | 		if (!validateIdentifier(constName))
1675 | 			return throwError("invalid array name: %s\n", constName);
1676 | 
1677 | 		g_constArrayName = constName;
1678 | 
1679 | 		StackEntry& elem = g_stack[g_stackPos++];
1680 | 		elem.type = SE_ARRAY;
1681 | 
1682 | 	} else
1683 | 	{
1684 | 		if (g_constArraySize >= 0 && g_constArraySize == g_constArray.size())
1685 | 			return throwError("too many elements in the array, expected %d\n", g_constArraySize);
1686 | 
1687 | 		NEXT_ARG(arg0Text);
1688 | 		if (*arg0Text != '(')
1689 | 			return throwError("invalid syntax\n");
1690 | 		arg0Text++;
1691 | 
1692 | 		NEXT_ARG(arg1Text);
1693 | 		NEXT_ARG(arg2Text);
1694 | 		char* arg3Text = mystrtok_pos;
1695 | 		if (!mystrtok_pos) return missingParam();
1696 | 		char* parenPos = strchr(arg3Text, ')');
1697 | 		if (!parenPos) return throwError("invalid syntax\n");
1698 | 		*parenPos = 0;
1699 | 		arg3Text = trim_whitespace(arg3Text);
1700 | 
1701 | 		Constant ct;
1702 | 		ct.type = UTYPE_FVEC;
1703 | 		ct.fparam[0] = atof(arg0Text);
1704 | 		ct.fparam[1] = atof(arg1Text);
1705 | 		ct.fparam[2] = atof(arg2Text);
1706 | 		ct.fparam[3] = atof(arg3Text);
1707 | 		g_constArray.push_back(ct);
1708 | 	}
1709 | 
1710 | 	return 0;
1711 | }
1712 | 
1713 | DEF_DIRECTIVE(setfi)
1714 | {
1715 | 	DVLEData* dvle = GetDvleData();
1716 | 
1717 | 	NEXT_ARG_CPAREN(constName);
1718 | 	NEXT_ARG(arg0Text);
1719 | 	NEXT_ARG(arg1Text);
1720 | 	NEXT_ARG(arg2Text);
1721 | 	char* arg3Text = mystrtok_pos;
1722 | 	if (!mystrtok_pos) return missingParam();
1723 | 	char* parenPos = strchr(arg3Text, ')');
1724 | 	if (!parenPos) return throwError("invalid syntax\n");
1725 | 	*parenPos = 0;
1726 | 	arg3Text = trim_whitespace(arg3Text);
1727 | 
1728 | 	ARG_TO_REG(constReg, constName);
1729 | 	if (dirParam == UTYPE_FVEC)
1730 | 	{
1731 | 		if (constReg < 0x20 || constReg >= 0x80)
1732 | 			return throwError("invalid floating point vector uniform: %s\n", constName);
1733 | 	} else if (dirParam == UTYPE_IVEC)
1734 | 	{
1735 | 		if (constReg < 0x80 || constReg >= 0x84)
1736 | 			return throwError("invalid integer vector uniform: %s\n", constName);
1737 | 	}
1738 | 
1739 | 	if (dvle->constantCount == MAX_CONSTANT)
1740 | 		return throwError("too many local constants\n");
1741 | 
1742 | 	Constant& ct = dvle->constantTable[dvle->constantCount++];
1743 | 	ct.regId = constReg;
1744 | 	ct.type = dirParam;
1745 | 	if (dirParam == UTYPE_FVEC)
1746 | 	{
1747 | 		ct.fparam[0] = atof(arg0Text);
1748 | 		ct.fparam[1] = atof(arg1Text);
1749 | 		ct.fparam[2] = atof(arg2Text);
1750 | 		ct.fparam[3] = atof(arg3Text);
1751 | 	} else if (dirParam == UTYPE_IVEC)
1752 | 	{
1753 | 		ct.iparam[0] = atoi(arg0Text) & 0xFF;
1754 | 		ct.iparam[1] = atoi(arg1Text) & 0xFF;
1755 | 		ct.iparam[2] = atoi(arg2Text) & 0xFF;
1756 | 		ct.iparam[3] = atoi(arg3Text) & 0xFF;
1757 | 	}
1758 | 
1759 | 	return 0;
1760 | }
1761 | 
1762 | static int parseBool(bool& out, const char* text)
1763 | {
1764 | 	if (stricmp(text, "true")==0 || stricmp(text, "on")==0 || stricmp(text, "1")==0)
1765 | 	{
1766 | 		out = true;
1767 | 		return 0;
1768 | 	}
1769 | 	if (stricmp(text, "false")==0 || stricmp(text, "off")==0 || stricmp(text, "0")==0)
1770 | 	{
1771 | 		out = false;
1772 | 		return 0;
1773 | 	}
1774 | 	return throwError("invalid bool value: %s\n", text);
1775 | }
1776 | 
1777 | DEF_DIRECTIVE(setb)
1778 | {
1779 | 	DVLEData* dvle = GetDvleData();
1780 | 
1781 | 	NEXT_ARG_SPC(constName);
1782 | 	NEXT_ARG_SPC(valueText);
1783 | 	ENSURE_NO_MORE_ARGS();
1784 | 	ARG_TO_BREG(constReg, constName);
1785 | 
1786 | 	bool constVal = false;
1787 | 	safe_call(parseBool(constVal, valueText));
1788 | 
1789 | 	if (dvle->constantCount == MAX_CONSTANT)
1790 | 		return throwError("too many local constants\n");
1791 | 
1792 | 	Constant& ct = dvle->constantTable[dvle->constantCount++];
1793 | 	ct.regId = constReg;
1794 | 	ct.type = UTYPE_BOOL;
1795 | 	ct.bparam = constVal;
1796 | 
1797 | 	return 0;
1798 | }
1799 | 
1800 | static int parseOutType(const char* text)
1801 | {
1802 | 	if (stricmp(text,"pos")==0 || stricmp(text,"position")==0)
1803 | 		return OUTTYPE_POS;
1804 | 	if (stricmp(text,"nquat")==0 || stricmp(text,"normalquat")==0)
1805 | 		return OUTTYPE_NQUAT;
1806 | 	if (stricmp(text,"clr")==0 || stricmp(text,"color")==0)
1807 | 		return OUTTYPE_CLR;
1808 | 	if (stricmp(text,"tcoord0")==0 || stricmp(text,"texcoord0")==0)
1809 | 		return OUTTYPE_TCOORD0;
1810 | 	if (stricmp(text,"tcoord0w")==0 || stricmp(text,"texcoord0w")==0)
1811 | 		return OUTTYPE_TCOORD0W;
1812 | 	if (stricmp(text,"tcoord1")==0 || stricmp(text,"texcoord1")==0)
1813 | 		return OUTTYPE_TCOORD1;
1814 | 	if (stricmp(text,"tcoord2")==0 || stricmp(text,"texcoord2")==0)
1815 | 		return OUTTYPE_TCOORD2;
1816 | 	if (stricmp(text,"view")==0)
1817 | 		return OUTTYPE_VIEW;
1818 | 	if (stricmp(text,"dummy")==0)
1819 | 		return OUTTYPE_DUMMY;
1820 | 	return -1;
1821 | }
1822 | 
1823 | DEF_DIRECTIVE(in)
1824 | {
1825 | 	DVLEData* dvle = GetDvleData();
1826 | 
1827 | 	NEXT_ARG_SPC(inName);
1828 | 	char* inRegName = nextArgSpc();
1829 | 	ENSURE_NO_MORE_ARGS();
1830 | 
1831 | 	if (!validateIdentifier(inName))
1832 | 		return throwError("invalid identifier: %s\n", inName);
1833 | 	if (g_aliases.find(inName) != g_aliases.end())
1834 | 		return duplicateIdentifier(inName);
1835 | 
1836 | 	int oid = -1;
1837 | 	if (inRegName)
1838 | 	{
1839 | 		ARG_TO_REG(inReg, inRegName);
1840 | 		if (inReg < 0x00 || inReg >= 0x10)
1841 | 			return throwError("invalid input register: %s\n", inRegName);
1842 | 		oid = inReg;
1843 | 	} else
1844 | 		oid = dvle->findFreeInput();
1845 | 	if (oid < 0)
1846 | 		return throwError("too many inputs\n");
1847 | 	if (dvle->uniformCount == MAX_UNIFORM)
1848 | 		return throwError("too many uniforms in DVLE\n");
1849 | 
1850 | 	dvle->inputMask |= BIT(oid);
1851 | 	dvle->uniformTable[dvle->uniformCount++].init(inName, oid, 1, UTYPE_FVEC);
1852 | 	dvle->symbolSize += strlen(inName)+1;
1853 | 	g_aliases.insert( std::pair<std::string,int>(inName, oid | (DEFAULT_OPSRC<<8)) );
1854 | 	return 0;
1855 | }
1856 | 
1857 | DEF_DIRECTIVE(out)
1858 | {
1859 | 	DVLEData* dvle = GetDvleData();
1860 | 
1861 | 	NEXT_ARG_SPC(outName);
1862 | 	NEXT_ARG_SPC(outType);
1863 | 	char* outDestRegName = nextArgSpc();
1864 | 	ENSURE_NO_MORE_ARGS();
1865 | 
1866 | 	int oid = -1;
1867 | 	int sw = DEFAULT_OPSRC;
1868 | 
1869 | 	if (outName[0]=='-' && !outName[1])
1870 | 		outName = NULL;
1871 | 	else if (!validateIdentifier(outName))
1872 | 		return throwError("invalid identifier: %s\n", outName);
1873 | 
1874 | 	if (outDestRegName)
1875 | 	{
1876 | 		ARG_TO_REG(outDestReg, outDestRegName);
1877 | 		if (outDestReg < 0x00 || outDestReg >= dvle->maxOutputReg())
1878 | 			return throwError("invalid output register: %s\n", outDestRegName);
1879 | 		oid = outDestReg;
1880 | 		sw = outDestRegSw;
1881 | 	}
1882 | 
1883 | 	if (oid < 0)
1884 | 	{
1885 | 		char* dotPos = strchr(outType, '.');
1886 | 		if (dotPos)
1887 | 		{
1888 | 			*dotPos++ = 0;
1889 | 			sw = parseSwizzling(dotPos);
1890 | 			if (sw < 0)
1891 | 				return throwError("invalid output mask: %s\n", dotPos);
1892 | 		}
1893 | 	}
1894 | 
1895 | 	int mask = maskFromSwizzling(sw, false);
1896 | 	int type = parseOutType(outType);
1897 | 	if (type < 0)
1898 | 		return throwError("invalid output type: %s\n", outType);
1899 | 
1900 | 	if (oid < 0)
1901 | 		oid = dvle->findFreeOutput();
1902 | 	else if (dvle->outputUsedReg & (mask << (4*oid)))
1903 | 		return throwError("this output collides with another one previously defined\n");
1904 | 
1905 | 	if (oid < 0 || dvle->outputCount==MAX_OUTPUT)
1906 | 		return throwError("too many outputs\n");
1907 | 
1908 | 	if (outName && g_aliases.find(outName) != g_aliases.end())
1909 | 		return duplicateIdentifier(outName);
1910 | 
1911 | 	if (oid >= 7 && type != OUTTYPE_DUMMY)
1912 | 		return throwError("this register (o%d) can only be a dummy output\n", oid);
1913 | 
1914 | #ifdef DEBUG
1915 | 	printf("output %s <- o%d (%d:%X)\n", outName, oid, type, mask);
1916 | #endif
1917 | 
1918 | 	dvle->outputTable[dvle->outputCount++] = OUTPUT_MAKE(type, oid, mask);
1919 | 	dvle->outputMask |= BIT(oid);
1920 | 	dvle->outputUsedReg |= mask << (4*oid);
1921 | 	if (outName)
1922 | 		g_aliases.insert( std::pair<std::string,int>(outName, oid | (DEFAULT_OPSRC<<8)) );
1923 | 	if (type == OUTTYPE_DUMMY && dvle->usesGshSpace())
1924 | 		dvle->isMerge = true;
1925 | 	return 0;
1926 | }
1927 | 
1928 | DEF_DIRECTIVE(entry)
1929 | {
1930 | 	DVLEData* dvle = GetDvleData();
1931 | 
1932 | 	NEXT_ARG_SPC(entrypoint);
1933 | 	ENSURE_NO_MORE_ARGS();
1934 | 
1935 | 	if (!validateIdentifier(entrypoint))
1936 | 		return throwError("invalid identifier: %s\n", entrypoint);
1937 | 
1938 | 	dvle->entrypoint = entrypoint;
1939 | 	return 0;
1940 | }
1941 | 
1942 | DEF_DIRECTIVE(nodvle)
1943 | {
1944 | 	DVLEData* dvle = GetDvleData();
1945 | 	ENSURE_NO_MORE_ARGS();
1946 | 
1947 | 	if (!dvle->nodvle)
1948 | 	{
1949 | 		dvle->nodvle = true;
1950 | 		g_totalDvleCount --;
1951 | 	}
1952 | 
1953 | 	return 0;
1954 | }
1955 | 
1956 | static inline int parseGshType(const char* text)
1957 | {
1958 | 	if (stricmp(text,"point")==0)
1959 | 		return GSHTYPE_POINT;
1960 | 	if (stricmp(text,"variable")==0 || stricmp(text,"subdivision")==0)
1961 | 		return GSHTYPE_VARIABLE;
1962 | 	if (stricmp(text,"fixed")==0 || stricmp(text,"particle")==0)
1963 | 		return GSHTYPE_FIXED;
1964 | 	return -1;
1965 | }
1966 | 
1967 | DEF_DIRECTIVE(gsh)
1968 | {
1969 | 	DVLEData* dvle = GetDvleData();
1970 | 	char* gshMode = nextArgSpc();
1971 | 	if (!gshMode)
1972 | 	{
1973 | 		dvle->isGeoShader = true;
1974 | 		dvle->isCompatGeoShader = true;
1975 | 		return 0;
1976 | 	}
1977 | 
1978 | 	if (dvle->isGeoShader)
1979 | 		return throwError(".gsh had already been used\n");
1980 | 	if (dvle->constantCount || dvle->uniformCount || dvle->outputMask)
1981 | 		return throwError(".gsh must be used before any constant, uniform or output is declared\n");
1982 | 
1983 | 	int mode = parseGshType(gshMode);
1984 | 	if (mode < 0)
1985 | 		return throwError("invalid geometry shader mode: %s\n", gshMode);
1986 | 
1987 | 	dvle->isGeoShader = true;
1988 | 	dvle->geoShaderType = mode;
1989 | 
1990 | 	NEXT_ARG_SPC(firstFreeRegName);
1991 | 	ARG_TO_REG(firstFreeReg, firstFreeRegName);
1992 | 	if (firstFreeReg < 0x20 || firstFreeReg >= 0x80)
1993 | 		return throwError("invalid float uniform register: %s\n", firstFreeRegName);
1994 | 
1995 | 	unifAlloc[1].initForGsh(firstFreeReg);
1996 | 
1997 | 	switch (mode)
1998 | 	{
1999 | 		case GSHTYPE_POINT:
2000 | 			ENSURE_NO_MORE_ARGS();
2001 | 			break;
2002 | 		case GSHTYPE_VARIABLE:
2003 | 		{
2004 | 			NEXT_ARG_SPC(vtxNumText);
2005 | 			ENSURE_NO_MORE_ARGS();
2006 | 
2007 | 			ARG_TO_INT(vtxNum, vtxNumText, 0, 255);
2008 | 			dvle->geoShaderVariableNum = vtxNum;
2009 | 			break;
2010 | 		}
2011 | 		case GSHTYPE_FIXED:
2012 | 		{
2013 | 			NEXT_ARG_SPC(arrayStartText);
2014 | 			NEXT_ARG_SPC(vtxNumText);
2015 | 			ENSURE_NO_MORE_ARGS();
2016 | 
2017 | 			ARG_TO_REG(arrayStart, arrayStartText);
2018 | 			ARG_TO_INT(vtxNum, vtxNumText, 0, 255);
2019 | 
2020 | 			if (arrayStart < 0x20 || arrayStart >= 0x80)
2021 | 				return throwError("invalid float uniform register: %s\n", arrayStartText);
2022 | 			if (arrayStart >= firstFreeReg)
2023 | 				return throwError("specified location overlaps uniform allocation pool: %s\n", arrayStartText);
2024 | 
2025 | 			dvle->geoShaderFixedStart = arrayStart - 0x20;
2026 | 			dvle->geoShaderFixedNum = vtxNum;
2027 | 			break;
2028 | 		}
2029 | 	}
2030 | 
2031 | 	return 0;
2032 | }
2033 | 
2034 | 
2035 | static const cmdTableType dirTable[] =
2036 | {
2037 | 	DEC_DIRECTIVE(proc),
2038 | 	DEC_DIRECTIVE(else),
2039 | 	DEC_DIRECTIVE(end),
2040 | 	DEC_DIRECTIVE(alias),
2041 | 	DEC_DIRECTIVE2(fvec, uniform, UTYPE_FVEC),
2042 | 	DEC_DIRECTIVE2(ivec, uniform, UTYPE_IVEC),
2043 | 	DEC_DIRECTIVE2(bool, uniform, UTYPE_BOOL),
2044 | 	DEC_DIRECTIVE2(constf, const, UTYPE_FVEC),
2045 | 	DEC_DIRECTIVE2(consti, const, UTYPE_IVEC),
2046 | 	DEC_DIRECTIVE(constfa),
2047 | 	DEC_DIRECTIVE(in),
2048 | 	DEC_DIRECTIVE(out),
2049 | 	DEC_DIRECTIVE(entry),
2050 | 	DEC_DIRECTIVE(nodvle),
2051 | 	DEC_DIRECTIVE(gsh),
2052 | 	DEC_DIRECTIVE2(setf, setfi, UTYPE_FVEC),
2053 | 	DEC_DIRECTIVE2(seti, setfi, UTYPE_IVEC),
2054 | 	DEC_DIRECTIVE(setb),
2055 | 	{ NULL, NULL },
2056 | };
2057 | 
2058 | int ProcessCommand(const char* cmd)
2059 | {
2060 | 	const cmdTableType* table = cmdTable;
2061 | 	if (*cmd == '.')
2062 | 	{
2063 | 		cmd ++;
2064 | 		table = dirTable;
2065 | 	} else if (!g_stackPos)
2066 | 		return throwError("instruction outside block\n");
2067 | 	else
2068 | 	{
2069 | 		lastWasEnd = false;
2070 | 		if (!GetDvleData()->isGeoShader && g_outputBuf.size() > MAX_VSH_SIZE)
2071 | 			return throwError("instruction outside vertex shader code memory (max %d instructions, currently %d)\n", MAX_VSH_SIZE, g_outputBuf.size());
2072 | 	}
2073 | 
2074 | 	for (int i = 0; table[i].name; i ++)
2075 | 		if (stricmp(table[i].name, cmd) == 0)
2076 | 			return table[i].func(cmd, table[i].opcode, table[i].opcodei);
2077 | 
2078 | 	return throwError("invalid instruction: %s\n", cmd);
2079 | }
2080 | 


--------------------------------------------------------------------------------
/source/picasso_frontend.cpp:
--------------------------------------------------------------------------------
  1 | #include "picasso.h"
  2 | 
  3 | // f24 has:
  4 | //  - 1 sign bit
  5 | //  - 7 exponent bits
  6 | //  - 16 mantissa bits
  7 | uint32_t f32tof24(float f)
  8 | {
  9 | 	uint32_t i;
 10 | 	memcpy(&i, &f, sizeof(f));
 11 | 
 12 | 	uint32_t mantissa = (i << 9) >>  9;
 13 | 	int32_t  exponent = (i << 1) >> 24;
 14 | 	uint32_t sign     = (i << 0) >> 31;
 15 | 
 16 | 	// Truncate mantissa
 17 | 	mantissa >>= 7;
 18 | 
 19 | 	// Re-bias exponent
 20 | 	exponent = exponent - 127 + 63;
 21 | 	if (exponent < 0)
 22 | 	{
 23 | 		// Underflow: flush to zero
 24 | 		return sign << 23;
 25 | 	}
 26 | 	else if (exponent > 0x7F)
 27 | 	{
 28 | 		// Overflow: saturate to infinity
 29 | 		return (sign << 23) | (0x7F << 16);
 30 | 	}
 31 | 
 32 | 	return (sign << 23) | (exponent << 16) | mantissa;
 33 | }
 34 | 
 35 | #ifdef WIN32
 36 | static inline void FixMinGWPath(char* buf)
 37 | {
 38 | 	if (buf && *buf == '/')
 39 | 	{
 40 | 		buf[0] = buf[1];
 41 | 		buf[1] = ':';
 42 | 	}
 43 | }
 44 | #endif
 45 | 
 46 | int usage(const char* prog)
 47 | {
 48 | 	fprintf(stderr,
 49 | 		"Usage: %s [options] files...\n"
 50 | 		"Options:\n"
 51 | 		"  -o, --out=<file>        Specifies the name of the SHBIN file to generate\n"
 52 | 		"  -h, --header=<file>     Specifies the name of the header file to generate\n"
 53 | 		"  -n, --no-nop            Disables the automatic insertion of padding NOPs\n"
 54 | 		"  -v, --version           Displays version information\n"
 55 | 		, prog);
 56 | 	return EXIT_FAILURE;
 57 | }
 58 | 
 59 | int main(int argc, char* argv[])
 60 | {
 61 | 	char *shbinFile = NULL, *hFile = NULL;
 62 | 
 63 | 	static struct option long_options[] =
 64 | 	{
 65 | 		{ "out",    required_argument, NULL, 'o' },
 66 | 		{ "header", required_argument, NULL, 'h' },
 67 | 		{ "help",   no_argument,       NULL, '?' },
 68 | 		{ "no-nop", no_argument,       NULL, 'n' },
 69 | 		{ "version",no_argument,       NULL, 'v' },
 70 | 		{ NULL, 0, NULL, 0 }
 71 | 	};
 72 | 
 73 | 	int opt, optidx = 0;
 74 | 	while ((opt = getopt_long(argc, argv, "o:h:?nv", long_options, &optidx)) != -1)
 75 | 	{
 76 | 		switch (opt)
 77 | 		{
 78 | 			case 'o': shbinFile = optarg; break;
 79 | 			case 'h': hFile     = optarg; break;
 80 | 			case '?': usage(argv[0]); return EXIT_SUCCESS;
 81 | 			case 'n': g_autoNop = false; break;
 82 | 			case 'v': printf("%s - Built on %s %s\n", PACKAGE_STRING, __DATE__, __TIME__); return EXIT_SUCCESS;
 83 | 			default:  return usage(argv[0]);
 84 | 		}
 85 | 	}
 86 | 
 87 | #ifdef WIN32
 88 | 	FixMinGWPath(shbinFile);
 89 | 	FixMinGWPath(hFile);
 90 | #endif
 91 | 
 92 | 	if (optind == argc)
 93 | 	{
 94 | 		fprintf(stderr, "%s: no input files are specified\n", argv[0]);
 95 | 		return usage(argv[0]);
 96 | 	}
 97 | 
 98 | 	if (!shbinFile)
 99 | 	{
100 | 		fprintf(stderr, "%s: no output file is specified\n", argv[0]);
101 | 		return usage(argv[0]);
102 | 	}
103 | 
104 | 	int rc = 0;
105 | 	for (int i = optind; i < argc; i ++)
106 | 	{
107 | 		char* vshFile = argv[i];
108 | 
109 | #ifdef WIN32
110 | 		FixMinGWPath(vshFile);
111 | #endif
112 | 
113 | 		char* sourceCode = StringFromFile(vshFile);
114 | 		if (!sourceCode)
115 | 		{
116 | 			fprintf(stderr, "error: cannot open input file: %s\n", vshFile);
117 | 			return EXIT_FAILURE;
118 | 		}
119 | 
120 | 		rc = AssembleString(sourceCode, vshFile);
121 | 		free(sourceCode);
122 | 		if (rc != 0)
123 | 			return EXIT_FAILURE;
124 | 	}
125 | 
126 | 	rc = RelocateProduct();
127 | 	if (rc != 0)
128 | 		return EXIT_FAILURE;
129 | 
130 | 	FileClass f(shbinFile, "wb");
131 | 
132 | 	if (f.openerror())
133 | 	{
134 | 		fprintf(stderr, "Can't open output file!");
135 | 		return EXIT_FAILURE;
136 | 	}
137 | 
138 | 	u32 progSize = g_outputBuf.size();
139 | 	u32 dvlpSize = 10*4 + progSize*4 + g_opdescCount*8;
140 | 
141 | 	// Write DVLB header
142 | 	f.WriteWord(0x424C5644); // DVLB
143 | 	f.WriteWord(g_totalDvleCount); // Number of DVLEs
144 | 
145 | 	// Calculate and write DVLE offsets
146 | 	u32 curOff = 2*4 + g_totalDvleCount*4 + dvlpSize;
147 | 	for (dvleTableIter dvle = g_dvleTable.begin(); dvle != g_dvleTable.end(); ++dvle)
148 | 	{
149 | 		if (dvle->nodvle) continue;
150 | 		f.WriteWord(curOff);
151 | 		curOff += 16*4; // Header
152 | 		curOff += dvle->constantCount*20;
153 | 		curOff += dvle->outputCount*8;
154 | 		curOff += dvle->uniformCount*8;
155 | 		curOff += dvle->symbolSize;
156 | 		curOff  = (curOff + 3) &~ 3; // Word alignment
157 | 	}
158 | 
159 | 	// Write DVLP header
160 | 	f.WriteWord(0x504C5644); // DVLP
161 | 	f.WriteWord(0); // version
162 | 	f.WriteWord(10*4); // offset to shader binary blob
163 | 	f.WriteWord(progSize); // size of shader binary blob
164 | 	f.WriteWord(10*4 + progSize*4); // offset to opdesc table
165 | 	f.WriteWord(g_opdescCount); // number of opdescs
166 | 	f.WriteWord(dvlpSize); // offset to symtable (TODO)
167 | 	f.WriteWord(0); // ????
168 | 	f.WriteWord(0); // ????
169 | 	f.WriteWord(0); // ????
170 | 
171 | 	// Write program
172 | 	for (outputBufIter it = g_outputBuf.begin(); it != g_outputBuf.end(); ++it)
173 | 		f.WriteWord(*it);
174 | 
175 | 	// Write opdescs
176 | 	for (int i = 0; i < g_opdescCount; i ++)
177 | 		f.WriteDword(g_opdescTable[i]);
178 | 
179 | 	// Write DVLEs
180 | 	for (dvleTableIter dvle = g_dvleTable.begin(); dvle != g_dvleTable.end(); ++dvle)
181 | 	{
182 | 		if (dvle->nodvle) continue;
183 | 		curOff = 16*4;
184 | 
185 | 		f.WriteWord(0x454C5644); // DVLE
186 | 		f.WriteHword(0x1002); // maybe version?
187 | 		f.WriteByte(dvle->isGeoShader ? 1 : 0); // Shader type
188 | 		f.WriteByte(dvle->isMerge ? 1 : 0);
189 | 		f.WriteWord(dvle->entryStart); // offset to main
190 | 		f.WriteWord(dvle->entryEnd); // offset to end of main
191 | 		f.WriteHword(dvle->inputMask);
192 | 		f.WriteHword(dvle->outputMask);
193 | 		f.WriteByte(dvle->geoShaderType);
194 | 		f.WriteByte(dvle->geoShaderFixedStart);
195 | 		f.WriteByte(dvle->geoShaderVariableNum);
196 | 		f.WriteByte(dvle->geoShaderFixedNum);
197 | 		f.WriteWord(curOff); // offset to constant table
198 | 		f.WriteWord(dvle->constantCount); // size of constant table
199 | 		curOff += dvle->constantCount*5*4;
200 | 		f.WriteWord(curOff); // offset to label table (TODO)
201 | 		f.WriteWord(0); // size of label table (TODO)
202 | 		f.WriteWord(curOff); // offset to output table
203 | 		f.WriteWord(dvle->outputCount); // size of output table
204 | 		curOff += dvle->outputCount*8;
205 | 		f.WriteWord(curOff); // offset to uniform table
206 | 		f.WriteWord(dvle->uniformCount); // size of uniform table
207 | 		curOff += dvle->uniformCount*8;
208 | 		f.WriteWord(curOff); // offset to symbol table
209 | 		f.WriteWord(dvle->symbolSize); // size of symbol table
210 | 
211 | 		// Sort uniforms by position
212 | 		std::sort(dvle->uniformTable, dvle->uniformTable + dvle->uniformCount);
213 | 
214 | 		// Write constants
215 | 		for (int i = 0; i < dvle->constantCount; i ++)
216 | 		{
217 | 			Constant& ct = dvle->constantTable[i];
218 | 			f.WriteHword(ct.type);
219 | 			if (ct.type == UTYPE_FVEC)
220 | 			{
221 | 				f.WriteHword(ct.regId-0x20);
222 | 				for (int j = 0; j < 4; j ++)
223 | 					f.WriteWord(f32tof24(ct.fparam[j]));
224 | 			} else if (ct.type == UTYPE_IVEC)
225 | 			{
226 | 				f.WriteHword(ct.regId-0x80);
227 | 				for (int j = 0; j < 4; j ++)
228 | 					f.WriteByte(ct.iparam[j]);
229 | 			} else if (ct.type == UTYPE_BOOL)
230 | 			{
231 | 				f.WriteHword(ct.regId-0x88);
232 | 				f.WriteWord(ct.bparam ? 1 : 0);
233 | 			}
234 | 			if (ct.type != UTYPE_FVEC)
235 | 				for (int j = 0; j < 3; j ++)
236 | 					f.WriteWord(0); // Padding
237 | 		}
238 | 
239 | 		// Write outputs
240 | 		for (int i = 0; i < dvle->outputCount; i ++)
241 | 			f.WriteDword(dvle->outputTable[i]);
242 | 
243 | 		// Write uniforms
244 | 		size_t sp = 0;
245 | 		for (int i = 0; i < dvle->uniformCount; i ++)
246 | 		{
247 | 			Uniform& u = dvle->uniformTable[i];
248 | 			size_t l = u.name.length()+1;
249 | 			f.WriteWord(sp); sp += l;
250 | 			int pos = u.pos;
251 | 			if (pos >= 0x20)
252 | 				pos -= 0x10;
253 | 			f.WriteHword(pos);
254 | 			f.WriteHword(pos+u.size-1);
255 | 		}
256 | 
257 | 		// Write symbols
258 | 		for (int i = 0; i < dvle->uniformCount; i ++)
259 | 		{
260 | 			std::string u(dvle->uniformTable[i].name);
261 | 			std::replace(u.begin(), u.end(), '$', '.');
262 | 			size_t l = u.length()+1;
263 | 			f.WriteRaw(u.c_str(), l);
264 | 		}
265 | 
266 | 		// Word alignment
267 | 		int pos = f.Tell();
268 | 		int pad = ((pos+3)&~3)-pos;
269 | 		for (int i = 0; i < pad; i ++)
270 | 			f.WriteByte(0);
271 | 	}
272 | 
273 | 	if (hFile)
274 | 	{
275 | 		FILE* f2 = fopen(hFile, "w");
276 | 		if (!f2)
277 | 		{
278 | 			fprintf(stderr, "Can't open header file!\n");
279 | 			return 1;
280 | 		}
281 | 
282 | 		fprintf(f2, "// Generated by picasso\n");
283 | 		fprintf(f2, "#pragma once\n");
284 | 		const char* prefix = g_dvleTable.front().isGeoShader ? "GSH" : "VSH"; // WARNING: HORRIBLE HACK - PLEASE FIX!!!!!!!
285 | 		for (int i = 0; i < g_uniformCount; i ++)
286 | 		{
287 | 			Uniform& u = g_uniformTable[i];
288 | 			const char* name = u.name.c_str();
289 | 			if (*name == '_') continue; // Hidden uniform
290 | 			if (u.type == UTYPE_FVEC)
291 | 				fprintf(f2, "#define %s_FVEC_%s 0x%02X\n", prefix, name, u.pos-0x20);
292 | 			else if (u.type == UTYPE_IVEC)
293 | 				fprintf(f2, "#define %s_IVEC_%s 0x%02X\n", prefix, name, u.pos-0x80);
294 | 			else if (u.type == UTYPE_BOOL)
295 | 			{
296 | 				if (u.size == 1)
297 | 					fprintf(f2, "#define %s_FLAG_%s BIT(%d)\n", prefix, name, u.pos-0x88);
298 | 				else
299 | 					fprintf(f2, "#define %s_FLAG_%s(_n) BIT(%d+(_n))\n", prefix, name, u.pos-0x88);
300 | 			}
301 | 			fprintf(f2, "#define %s_ULEN_%s %d\n", prefix, name, u.size);
302 | 		}
303 | 
304 | 		fclose(f2);
305 | 	}
306 | 
307 | 	return EXIT_SUCCESS;
308 | }
309 | 


--------------------------------------------------------------------------------
/source/types.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <stdint.h>
 3 | 
 4 | typedef uint64_t dword_t;
 5 | typedef uint32_t word_t;
 6 | typedef uint16_t hword_t;
 7 | typedef uint8_t byte_t;
 8 | typedef int64_t dlong_t;
 9 | typedef int32_t long_t;
10 | typedef int16_t short_t;
11 | typedef int8_t char_t;
12 | typedef uint64_t u64;
13 | typedef uint32_t u32;
14 | typedef uint16_t u16;
15 | typedef uint8_t u8;
16 | 
17 | #define BIT(n) (1U << (n))
18 | 
19 | #ifndef __BYTE_ORDER__
20 | #include <sys/param.h>
21 | #define __BYTE_ORDER__ BYTE_ORDER
22 | #define __ORDER_LITTLE_ENDIAN__ LITTLE_ENDIAN
23 | #define __ORDER_BIG_ENDIAN__ BIG_ENDIAN
24 | #endif
25 | 
26 | #ifndef __llvm__
27 | #if !defined(__GNUC__) || (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 8)
28 | 
29 | static inline uint16_t __builtin_bswap16(uint16_t x)
30 | {
31 | 	return ((x << 8) & 0xff00) | ((x >> 8) & 0x00ff);
32 | }
33 | 
34 | #if defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ < 7)
35 | static inline uint32_t __builtin_bswap32(uint32_t x)
36 | {
37 | 	return ((x << 24) & 0xff000000) |
38 | 	       ((x <<  8) & 0x00ff0000) |
39 | 	       ((x >>  8) & 0x0000ff00) |
40 | 	       ((x >> 24) & 0x000000ff);
41 | }
42 | 
43 | static inline uint64_t __builtin_bswap64(uint64_t x)
44 | {
45 | 	return (uint64_t)__builtin_bswap32(x>>32) |
46 | 	      ((uint64_t)__builtin_bswap32(x&0xFFFFFFFF) << 32);
47 | }
48 | #endif
49 | #endif
50 | #endif
51 | 
52 | #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
53 | #define be_dword(a)  __builtin_bswap64(a)
54 | #define be_word(a)  __builtin_bswap32(a)
55 | #define be_hword(a) __builtin_bswap16(a)
56 | #define le_dword(a)  (a)
57 | #define le_word(a)  (a)
58 | #define le_hword(a) (a)
59 | #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
60 | #define be_dword(a)  (a)
61 | #define be_word(a)  (a)
62 | #define be_hword(a) (a)
63 | #define le_dword(a)  __builtin_bswap64(a)
64 | #define le_word(a)  __builtin_bswap32(a)
65 | #define le_hword(a) __builtin_bswap16(a)
66 | #else
67 | #error "What's the endianness of the platform you're targeting?"
68 | #endif
69 | 


--------------------------------------------------------------------------------