├── terrain.png
├── src
    ├── !Snowscape
    │   ├── CLib,ffa
    │   ├── !RunImage,ff8
    │   ├── CallASWI,ffa
    │   ├── assets
    │   │   ├── lookup
    │   │   └── lookup9
    │   ├── SharedULib,ffa
    │   └── !Run,feb
    ├── palette.h
    ├── mesh.h
    ├── render.h
    ├── GNUmakefile
    ├── Makefile
    ├── math3d.h
    ├── palette.c
    ├── main.c
    ├── math3d.c
    ├── mesh.c
    ├── render.c
    ├── cvector.h
    ├── poly.asm
    └── poly.s
├── Images
    ├── snowscapeA3020.adf
    └── snowscapeA5000.adf
├── Scripts
    ├── out_dithered.png
    ├── package.json
    ├── masktable.js
    ├── .gitignore
    ├── package-lock.json
    ├── palette.hex
    ├── pal.js
    └── gen_lookup.js
├── .gitignore
├── LICENSE
└── README.md


/terrain.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arkiruthis/snowscape/HEAD/terrain.png


--------------------------------------------------------------------------------
/src/!Snowscape/CLib,ffa:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arkiruthis/snowscape/HEAD/src/!Snowscape/CLib,ffa


--------------------------------------------------------------------------------
/Images/snowscapeA3020.adf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arkiruthis/snowscape/HEAD/Images/snowscapeA3020.adf


--------------------------------------------------------------------------------
/Images/snowscapeA5000.adf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arkiruthis/snowscape/HEAD/Images/snowscapeA5000.adf


--------------------------------------------------------------------------------
/Scripts/out_dithered.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arkiruthis/snowscape/HEAD/Scripts/out_dithered.png


--------------------------------------------------------------------------------
/src/!Snowscape/!RunImage,ff8:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arkiruthis/snowscape/HEAD/src/!Snowscape/!RunImage,ff8


--------------------------------------------------------------------------------
/src/!Snowscape/CallASWI,ffa:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arkiruthis/snowscape/HEAD/src/!Snowscape/CallASWI,ffa


--------------------------------------------------------------------------------
/src/!Snowscape/assets/lookup:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arkiruthis/snowscape/HEAD/src/!Snowscape/assets/lookup


--------------------------------------------------------------------------------
/src/!Snowscape/SharedULib,ffa:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arkiruthis/snowscape/HEAD/src/!Snowscape/SharedULib,ffa


--------------------------------------------------------------------------------
/src/!Snowscape/assets/lookup9:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arkiruthis/snowscape/HEAD/src/!Snowscape/assets/lookup9


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # C Object files
2 | */o/*,ffd
3 | src/obj
4 | /\!Snowscape
5 | 
6 | # Visual Studio Code
7 | .vscode
8 | 


--------------------------------------------------------------------------------
/Scripts/package.json:
--------------------------------------------------------------------------------
1 | {
2 |   "dependencies": {
3 |     "chroma-js": "^3.1.2",
4 |     "color-convert": "^2.0.1",
5 |     "pngjs": "^7.0.0"
6 |   }
7 | }
8 | 


--------------------------------------------------------------------------------
/src/!Snowscape/!Run,feb:
--------------------------------------------------------------------------------
1 | Set Game$Dir <Obey$Dir>
2 | WimpSlot -min 2048K -max 2048K
3 | | RMLoad <Game$Dir>.CLib
4 | | RMEnsure CallASWI 0.19 RMLoad <Game$Dir>.CallASWI
5 | | RMEnsure SharedUnixLibrary 1.07 RMLoad <Game$Dir>.SharedULib
6 | <Game$Dir>.!RunImage
7 | 


--------------------------------------------------------------------------------
/src/palette.h:
--------------------------------------------------------------------------------
 1 | #ifndef PALETTE_H
 2 | #define PALETTE_H
 3 | 
 4 | extern char *gBaseDirectoryPath;
 5 | 
 6 | extern unsigned int *g_fogTable;
 7 | 
 8 | void SetupPaletteLookup(int allocating);
 9 | void SetPalette(void);
10 | void Save256(void);
11 | int LoadFogLookup(void);
12 | 
13 | #endif // PALETTE_H
14 | 


--------------------------------------------------------------------------------
/src/mesh.h:
--------------------------------------------------------------------------------
 1 | #ifndef MESH_H
 2 | #define MESH_H
 3 | 
 4 | #include "math3d.h"
 5 | #include "cvector.h"
 6 | 
 7 | #define MAPW 128
 8 | #define _MAPW 127
 9 | #define MAPSHIFT 7
10 | 
11 | #define TILESHIFT 4 // 16x16 tiles
12 | #define IX(x, z) (((x) & _MAPW) + (((z) & _MAPW) << (MAPSHIFT)))
13 | 
14 | typedef struct Mesh
15 | {
16 |     cvector_vector_type(V3D) verts;
17 |     cvector_vector_type(TRI) faces;
18 |     cvector_vector_type(V3D) verts_transformed;
19 | } Mesh;
20 | 
21 | extern Mesh g_Mesh;
22 | 
23 | void GenerateTerrain(void);
24 | void DeAllocateTerrain(void);
25 | fix GetHeight(V3D *eyePos);
26 | 
27 | #endif // MESH_H
28 | 
29 | 


--------------------------------------------------------------------------------
/Scripts/masktable.js:
--------------------------------------------------------------------------------
 1 | function generateMask(xLeft, xRight) {
 2 |     // Start with all bits set (but handle as unsigned)
 3 |     let maskL = 0xFFFFFFFF >>> (xLeft * 4);
 4 |     let maskR = 0xFFFFFFFF << (xRight * 4);
 5 |     let mask = ~(maskL & maskR);
 6 |     
 7 |     // Convert to hex string correctly even for negative values
 8 |     return mask >>> 0; // Convert to unsigned 32-bit integer
 9 | }
10 | 
11 | function generateLookupTable() {
12 |     console.log('; Raster mask lookup table for all xLeft/xRight combinations');
13 |     console.log('RasterLookup');
14 | 
15 |     for (let xRight = 0; xRight < 8; xRight++) {
16 |         for (let xLeft = 7; xLeft >= 0; xLeft--) {
17 |             const mask = generateMask(xLeft, xRight);
18 |             // Convert to hex string with proper formatting
19 |             const hexStr = mask.toString(16).toUpperCase().padStart(8, '0');
20 |             console.log(`        DCD &${hexStr}`);
21 |         }
22 |         console.log(''); // Empty line between groups
23 |     }
24 | }
25 | 
26 | generateLookupTable();


--------------------------------------------------------------------------------
/Scripts/.gitignore:
--------------------------------------------------------------------------------
 1 | # Logs
 2 | logs
 3 | *.log
 4 | 
 5 | # Runtime data
 6 | pids
 7 | *.pid
 8 | *.seed
 9 | *.pid.lock
10 | 
11 | # Directory for instrumented libs generated by jscoverage/JSCover
12 | lib-cov
13 | 
14 | # Coverage directory used by tools like istanbul
15 | coverage
16 | 
17 | # nyc test coverage
18 | .nyc_output
19 | 
20 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
21 | .grunt
22 | 
23 | # node-waf configuration
24 | .lock-wscript
25 | 
26 | # Compiled binary addons (http://nodejs.org/api/addons.html)
27 | build/Release
28 | 
29 | # Dependency directories
30 | node_modules/
31 | jspm_packages/
32 | 
33 | # Typescript v1 declaration files
34 | typings/
35 | 
36 | # Optional npm cache directory
37 | .npm
38 | 
39 | # Optional eslint cache
40 | .eslintcache
41 | 
42 | # Optional REPL history
43 | .node_repl_history
44 | 
45 | # Output of 'npm pack'
46 | *.tgz
47 | 
48 | # Yarn Integrity file
49 | .yarn-integrity
50 | 
51 | # dotenv environment variables file
52 | .env
53 | .env.test
54 | .env.production
55 | 
56 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Nick Anderson
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/render.h:
--------------------------------------------------------------------------------
 1 | #ifndef RENDER_H
 2 | #define RENDER_H
 3 | 
 4 | #define SCREEN_W 320
 5 | #define SCREEN_H 256
 6 | #define MAXDEPTH 256
 7 | 
 8 | #include "math3d.h"
 9 | 
10 | void SetupRender(void);
11 | void RenderModel(MAT43 *mv, V3D *eyePos, int yaw);
12 | void MultV3DProj(V3D *v, int *clipflags);
13 | 
14 | // How many tiles around the look center. i.e., double this for the max tiles ahead.
15 | // The bigger the number, the more tiles will be rendered.
16 | 
17 | #ifdef A5000
18 |     #define SCANRANGE 10
19 | #else // A3000/A30X0
20 |     #define SCANRANGE 6
21 | #endif
22 | 
23 | #define SUBRANGE ((SCANRANGE - 1) << 8)
24 | 
25 | // Uncomment the following to enable the timing log.
26 | // #define TIMING_LOG 1
27 | 
28 | #ifdef TIMING_LOG
29 | typedef struct TimerLog
30 | {
31 |     int transformTiles;
32 |     int submitRenderTriangles;
33 |     int clippingQueue;
34 |     int project3D;
35 |     int sceneRender;
36 |     int biggestVertex;
37 |     int clippedCount;
38 | } TimerLog;
39 | 
40 | extern TimerLog gTimerLog;
41 | 
42 | // The following SWIs are for David Ruck's TimerMod.
43 | // Which can be found at https://armclub.org.uk/free/
44 | #define SWI_Timer_Start 0x000490C0
45 | #define SWI_Timer_Stop 0x000490C1
46 | #define SWI_Timer_Value 0x000490C2
47 | 
48 | int GetRenderDelta(void);
49 | 
50 | #endif // TIMING_LOG
51 | 
52 | #endif // RENDER_H
53 | 


--------------------------------------------------------------------------------
/Scripts/package-lock.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "Scripts",
 3 |   "lockfileVersion": 3,
 4 |   "requires": true,
 5 |   "packages": {
 6 |     "": {
 7 |       "dependencies": {
 8 |         "chroma-js": "^3.1.2",
 9 |         "color-convert": "^2.0.1",
10 |         "pngjs": "^7.0.0"
11 |       }
12 |     },
13 |     "node_modules/chroma-js": {
14 |       "version": "3.1.2",
15 |       "resolved": "https://registry.npmjs.org/chroma-js/-/chroma-js-3.1.2.tgz",
16 |       "integrity": "sha512-IJnETTalXbsLx1eKEgx19d5L6SRM7cH4vINw/99p/M11HCuXGRWL+6YmCm7FWFGIo6dtWuQoQi1dc5yQ7ESIHg=="
17 |     },
18 |     "node_modules/color-convert": {
19 |       "version": "2.0.1",
20 |       "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
21 |       "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
22 |       "dependencies": {
23 |         "color-name": "~1.1.4"
24 |       },
25 |       "engines": {
26 |         "node": ">=7.0.0"
27 |       }
28 |     },
29 |     "node_modules/color-name": {
30 |       "version": "1.1.4",
31 |       "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
32 |       "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA=="
33 |     },
34 |     "node_modules/pngjs": {
35 |       "version": "7.0.0",
36 |       "resolved": "https://registry.npmjs.org/pngjs/-/pngjs-7.0.0.tgz",
37 |       "integrity": "sha512-LKWqWJRhstyYo9pGvgor/ivk2w94eSjE3RGVuzLGlr3NmD8bf7RcYGze1mNdEHRP6TRP6rMuDHk5t44hnTRyow==",
38 |       "engines": {
39 |         "node": ">=14.19.0"
40 |       }
41 |     }
42 |   }
43 | }
44 | 


--------------------------------------------------------------------------------
/Scripts/palette.hex:
--------------------------------------------------------------------------------
  1 | 000000
  2 | 333333
  3 | 773377
  4 | 111111
  5 | 440000
  6 | 000000
  7 | 662266
  8 | 151270
  9 | 223110
 10 | 400020
 11 | 461035
 12 | 452152
 13 | 543260
 14 | 000060
 15 | 202060
 16 | 602060
 17 | 880000
 18 | BB3333
 19 | FF3377
 20 | 991111
 21 | CC0000
 22 | 880000
 23 | EE2266
 24 | 9D1270
 25 | AA3110
 26 | C80020
 27 | CE1035
 28 | CD2152
 29 | DC3260
 30 | 880060
 31 | A82060
 32 | E82060
 33 | 004400
 34 | 337733
 35 | 777777
 36 | 115511
 37 | 444400
 38 | 004400
 39 | 666666
 40 | 155670
 41 | 227510
 42 | 404420
 43 | 465435
 44 | 456552
 45 | 547660
 46 | 004460
 47 | 206460
 48 | 606460
 49 | 884400
 50 | BB7733
 51 | FF7777
 52 | 995511
 53 | CC4400
 54 | 884400
 55 | EE6666
 56 | 9D5670
 57 | AA7510
 58 | C84420
 59 | CE5435
 60 | CD6552
 61 | DC7660
 62 | 884460
 63 | A86460
 64 | E86460
 65 | 008800
 66 | 33BB33
 67 | 77BB77
 68 | 119911
 69 | 448800
 70 | 008800
 71 | 66AA66
 72 | 159A70
 73 | 22B910
 74 | 408820
 75 | 469835
 76 | 45A952
 77 | 54BA60
 78 | 008860
 79 | 20A860
 80 | 60A860
 81 | 888800
 82 | BBBB33
 83 | FFBB77
 84 | 999911
 85 | CC8800
 86 | 888800
 87 | EEAA66
 88 | 9D9A70
 89 | AAB910
 90 | C88820
 91 | CE9835
 92 | CDA952
 93 | DCBA60
 94 | 888860
 95 | A8A860
 96 | E8A860
 97 | 00CC00
 98 | 33FF33
 99 | 77FF77
100 | 11DD11
101 | 44CC00
102 | 00CC00
103 | 66EE66
104 | 15DE70
105 | 22FD10
106 | 40CC20
107 | 46DC35
108 | 45ED52
109 | 54FE60
110 | 00CC60
111 | 20EC60
112 | 60EC60
113 | 88CC00
114 | BBFF33
115 | FFFF77
116 | 99DD11
117 | CCCC00
118 | 88CC00
119 | EEEE66
120 | 9DDE70
121 | AAFD10
122 | C8CC20
123 | CEDC35
124 | CDED52
125 | DCFE60
126 | 88CC60
127 | A8EC60
128 | E8EC60
129 | 000088
130 | 3333BB
131 | 7733FF
132 | 111199
133 | 440088
134 | 000088
135 | 6622EE
136 | 1512F8
137 | 223198
138 | 4000A8
139 | 4610BD
140 | 4521DA
141 | 5432E8
142 | 0000E8
143 | 2020E8
144 | 6020E8
145 | 880088
146 | BB33BB
147 | FF33FF
148 | 991199
149 | CC0088
150 | 880088
151 | EE22EE
152 | 9D12F8
153 | AA3198
154 | C800A8
155 | CE10BD
156 | CD21DA
157 | DC32E8
158 | 8800E8
159 | A820E8
160 | E820E8
161 | 004488
162 | 3377BB
163 | 7777FF
164 | 115599
165 | 444488
166 | 004488
167 | 6666EE
168 | 1556F8
169 | 227598
170 | 4044A8
171 | 4654BD
172 | 4565DA
173 | 5476E8
174 | 0044E8
175 | 2064E8
176 | 6064E8
177 | 884488
178 | BB77BB
179 | FF77FF
180 | 995599
181 | CC4488
182 | 884488
183 | EE66EE
184 | 9D56F8
185 | AA7598
186 | C844A8
187 | CE54BD
188 | CD65DA
189 | DC76E8
190 | 8844E8
191 | A864E8
192 | E864E8
193 | 008888
194 | 33BBBB
195 | 77BBFF
196 | 119999
197 | 448888
198 | 008888
199 | 66AAEE
200 | 159AF8
201 | 22B998
202 | 4088A8
203 | 4698BD
204 | 45A9DA
205 | 54BAE8
206 | 0088E8
207 | 20A8E8
208 | 60A8E8
209 | 888888
210 | BBBBBB
211 | FFBBFF
212 | 999999
213 | CC8888
214 | 888888
215 | EEAAEE
216 | 9D9AF8
217 | AAB998
218 | C888A8
219 | CE98BD
220 | CDA9DA
221 | DCBAE8
222 | 8888E8
223 | A8A8E8
224 | E8A8E8
225 | 00CC88
226 | 33FFBB
227 | 77FFFF
228 | 11DD99
229 | 44CC88
230 | 00CC88
231 | 66EEEE
232 | 15DEF8
233 | 22FD98
234 | 40CCA8
235 | 46DCBD
236 | 45EDDA
237 | 54FEE8
238 | 00CCE8
239 | 20ECE8
240 | 60ECE8
241 | 88CC88
242 | BBFFBB
243 | FFFFFF
244 | 99DD99
245 | CCCC88
246 | 88CC88
247 | EEEEEE
248 | 9DDEF8
249 | AAFD98
250 | C8CCA8
251 | CEDCBD
252 | CDEDDA
253 | DCFEE8
254 | 88CCE8
255 | A8ECE8
256 | E8ECE8
257 | 


--------------------------------------------------------------------------------
/src/GNUmakefile:
--------------------------------------------------------------------------------
  1 | # The name of the program
  2 | PROJNAME = Snowscape
  3 | 
  4 | # Build configuration
  5 | export ELFOBJECTTYPE ?= HARDFPU
  6 | export OPTIMIZE ?= yes
  7 | 
  8 | # Set compiler flags based on ELFOBJECTTYPE (matching OSLib build system)
  9 | ifeq ($(OPTIMIZE),yes)
 10 | OPTFLAGS = -O2
 11 | else
 12 | OPTFLAGS = -g
 13 | endif
 14 | 
 15 | ifeq ($(ELFOBJECTTYPE),SOFTFPU)
 16 | CFLAGS = $(OPTFLAGS) -mfloat-abi=soft
 17 | else ifeq ($(ELFOBJECTTYPE),HARDFPU)
 18 | CFLAGS = $(OPTFLAGS) -mlibscl
 19 | else ifeq ($(ELFOBJECTTYPE),MODULE)
 20 | CFLAGS = $(OPTFLAGS) -mmodule
 21 | else
 22 | $(error Unknown ELFOBJECTTYPE value, possible values are SOFTFPU, HARDFPU and MODULE)
 23 | endif
 24 | 
 25 | # Feature flags - default to no if not specified
 26 | USE_256_COLORS ?= no
 27 | TARGET_A5000 ?= no
 28 | 
 29 | ifeq ($(TARGET_A5000),yes)
 30 | CFLAGS += -DA5000
 31 | endif
 32 | 
 33 | ifeq ($(USE_256_COLORS),yes)
 34 | CFLAGS += -DPAL_256
 35 | ASFLAGS = -Wa,--defsym,PAL_256=1
 36 | else
 37 | ASFLAGS = 
 38 | endif
 39 | 
 40 | #------------------------------------------------------------------------
 41 | # Paths & directories
 42 | SRCDIR := ./
 43 | OBJDIR := ./obj
 44 | DEPDIR := $(OBJDIR)/.deps
 45 | 
 46 | # Include paths
 47 | INCLUDES := -I$(GCCSDK_INSTALL_ENV)/include
 48 | 
 49 | # Libraries and linking
 50 | LIBS := -lm
 51 | LDFLAGS := -L$(GCCSDK_INSTALL_ENV)/lib -static
 52 | 
 53 | # Source files (.c or .s)
 54 | SRCFILES = main.c math3d.c mesh.c palette.c render.c poly.s
 55 | 
 56 | # Generate object and dependency file names
 57 | OBJFILES = $(patsubst %.c,$(OBJDIR)/%.o,$(patsubst %.s,$(OBJDIR)/%.o,$(SRCFILES)))
 58 | DEPFILES = $(patsubst %.c,$(DEPDIR)/%.d,$(filter %.c,$(SRCFILES)))
 59 | 
 60 | # Header files for dependency tracking
 61 | HEADERS = $(wildcard *.h)
 62 | 
 63 | #------------------------------------------------------------------------
 64 | # Tools
 65 | GCC = $(wildcard $(GCCSDK_INSTALL_CROSSBIN)/*gcc)
 66 | RM = rm -f
 67 | MKDIR_P = mkdir -p
 68 | 
 69 | #------------------------------------------------------------------------
 70 | # Targets
 71 | 
 72 | # Default target
 73 | all: !$(PROJNAME)/!RunImage,ff8
 74 | 
 75 | # Debug build target
 76 | debug: OPTIMIZE = no
 77 | debug: clean all
 78 | 
 79 | # Release build target  
 80 | release: OPTIMIZE = yes
 81 | release: clean all
 82 | 
 83 | # Create directories if they don't exist
 84 | $(OBJDIR) $(DEPDIR):
 85 | 	$(MKDIR_P) $@
 86 | 
 87 | # Link the ELF object files into an AIF executable
 88 | !$(PROJNAME)/!RunImage,ff8: $(OBJFILES)
 89 | 	$(GCC) $(CFLAGS) $(LDFLAGS) -o $@ $^ $(LIBS)
 90 | 
 91 | # Pattern rule for C sources with dependency generation
 92 | $(OBJDIR)/%.o: $(SRCDIR)/%.c | $(OBJDIR) $(DEPDIR)
 93 | 	$(GCC) -c $(CFLAGS) $(INCLUDES) -MMD -MP -MF $(DEPDIR)/$*.d $< -o $@
 94 | 
 95 | # Pattern rule for assembly sources
 96 | $(OBJDIR)/%.o: $(SRCDIR)/%.s | $(OBJDIR)
 97 | 	$(GCC) -c $(CFLAGS) $(ASFLAGS) $(INCLUDES) $< -o $@
 98 | 
 99 | # Include dependency files if they exist
100 | -include $(DEPFILES)
101 | 
102 | # Clean target
103 | clean:
104 | 	$(RM) $(OBJFILES) $(DEPFILES) !$(PROJNAME)/!RunImage,ff8
105 | 	$(RM) -d $(DEPDIR) $(OBJDIR) 2>/dev/null || true
106 | 
107 | # Force rebuild of all targets
108 | rebuild: clean all
109 | 
110 | # Show build configuration
111 | show-config:
112 | 	@echo "Project: $(PROJNAME)"
113 | 	@echo "ELFOBJECTTYPE: $(ELFOBJECTTYPE)"
114 | 	@echo "OPTIMIZE: $(OPTIMIZE)" 
115 | 	@echo "USE_256_COLORS: $(USE_256_COLORS)"
116 | 	@echo "TARGET_A5000: $(TARGET_A5000)"
117 | 	@echo "CFLAGS: $(CFLAGS)"
118 | 	@echo "ASFLAGS: $(ASFLAGS)"
119 | 	@echo "Compiler: $(GCC)"
120 | 
121 | .PHONY: all debug release clean rebuild show-config


--------------------------------------------------------------------------------
/src/Makefile:
--------------------------------------------------------------------------------
  1 | ###############################################################################
  2 | # Setting up ArchieSDK
  3 | ifeq ($(strip $(ARCHIESDK)),)
  4 | $(error "ARCHIESDK was not found in your environment. please export ARCHIESDK")
  5 | endif
  6 | include $(ARCHIESDK)/config.mk
  7 | ###############################################################################
  8 | 
  9 | # Project configuration
 10 | APPNAME = !RunImage
 11 | PROJNAME = Snowscape
 12 | 
 13 | # Directories
 14 | SRCDIR := ./
 15 | OBJDIR := ./obj
 16 | DEPDIR := $(OBJDIR)/.deps
 17 | TARGETDIR := ./!$(PROJNAME)
 18 | 
 19 | # Source files (.c or .s)
 20 | SRCFILES = main.c math3d.c mesh.c palette.c render.c poly.s
 21 | 
 22 | # Libraries
 23 | LIBS = -lm
 24 | 
 25 | # Build optimization - default to release
 26 | OPTIMIZE ?= yes
 27 | 
 28 | # Feature flags - default to no if not specified
 29 | USE_256_COLORS ?= no
 30 | TARGET_A5000 ?= no
 31 | 
 32 | # Set optimization flags
 33 | ifeq ($(OPTIMIZE),yes)
 34 | CFLAGS += -O2
 35 | else
 36 | CFLAGS += -g
 37 | endif
 38 | 
 39 | # Apply feature flags
 40 | ifeq ($(TARGET_A5000),yes)
 41 | CFLAGS += -DA5000
 42 | endif
 43 | 
 44 | ifeq ($(USE_256_COLORS),yes)
 45 | CFLAGS += -DPAL_256
 46 | ASFLAGS += -Wa,--defsym,PAL_256=1
 47 | else
 48 | ASFLAGS += 
 49 | endif
 50 | 
 51 | # Object and dependency files
 52 | OBJFILES = $(patsubst %.c,$(OBJDIR)/%.o,$(patsubst %.s,$(OBJDIR)/%.o,$(SRCFILES)))
 53 | DEPFILES = $(patsubst %.c,$(DEPDIR)/%.d,$(filter %.c,$(SRCFILES)))
 54 | 
 55 | # Tools
 56 | RM = rm -f
 57 | MKDIR_P = mkdir -p
 58 | 
 59 | #------------------------------------------------------------------------
 60 | # Targets
 61 | 
 62 | # Default target
 63 | all: $(TARGETDIR)/$(APPNAME),ff8
 64 | 
 65 | # Debug build target
 66 | debug: OPTIMIZE = no
 67 | debug: clean all
 68 | 
 69 | # Release build target  
 70 | release: OPTIMIZE = yes
 71 | release: clean all
 72 | 
 73 | # Create directories if they don't exist
 74 | $(OBJDIR) $(DEPDIR) $(TARGETDIR):
 75 | 	$(MKDIR_P) $@
 76 | 
 77 | # Build with separate compilation and linking for better dependency handling
 78 | $(TARGETDIR)/$(APPNAME),ff8: $(OBJFILES) | $(TARGETDIR)
 79 | 	$(ARCHIECC) $(CFLAGS) -o $(TARGETDIR)/tmpProg $(OBJFILES) $(LIBS)
 80 | 	$(ARCHIEOBJCOPY) -O binary $(TARGETDIR)/tmpProg $@
 81 | 	$(RM) $(TARGETDIR)/tmpProg
 82 | 
 83 | # Pattern rule for C sources with dependency generation
 84 | $(OBJDIR)/%.o: $(SRCDIR)/%.c | $(OBJDIR) $(DEPDIR)
 85 | 	$(ARCHIECC) -c $(CFLAGS) -MMD -MP -MF $(DEPDIR)/$*.d $< -o $@
 86 | 
 87 | # Pattern rule for assembly sources
 88 | $(OBJDIR)/%.o: $(SRCDIR)/%.s | $(OBJDIR)
 89 | 	$(ARCHIECC) -c $(CFLAGS) $(ASFLAGS) $< -o $@
 90 | 
 91 | # Include dependency files if they exist
 92 | -include $(DEPFILES)
 93 | 
 94 | # Generate ASM from C files with better organization
 95 | asm: | $(OBJDIR)
 96 | 	$(ARCHIECC) $(CFLAGS) $(ASFLAGS) -S $(filter %.c,$(SRCFILES)) -o $(OBJDIR)/
 97 | 
 98 | # Clean target
 99 | clean:
100 | 	$(RM) $(OBJFILES) $(DEPFILES) $(TARGETDIR)/$(APPNAME),ff8 $(TARGETDIR)/tmpProg
101 | 	$(RM) -d $(DEPDIR) $(OBJDIR) 2>/dev/null || true
102 | 
103 | # Force rebuild of all targets
104 | rebuild: clean all
105 | 
106 | # Show build configuration
107 | show-config:
108 | 	@echo "Project: $(PROJNAME)"
109 | 	@echo "App Name: $(APPNAME)"
110 | 	@echo "ArchieSDK: $(ARCHIESDK)"
111 | 	@echo "OPTIMIZE: $(OPTIMIZE)" 
112 | 	@echo "USE_256_COLORS: $(USE_256_COLORS)"
113 | 	@echo "TARGET_A5000: $(TARGET_A5000)"
114 | 	@echo "CFLAGS: $(CFLAGS)"
115 | 	@echo "ASFLAGS: $(ASFLAGS)"
116 | 	@echo "Compiler: $(ARCHIECC)"
117 | 	@echo "Object Copy: $(ARCHIEOBJCOPY)"
118 | 
119 | # Install/copy target (matches build.sh behavior)
120 | install: all
121 | 	@if [ -n "$(TARGETCOPY)" ]; then \
122 | 		echo "Copying to $(TARGETCOPY)/!$(PROJNAME)"; \
123 | 		$(MKDIR_P) "$(TARGETCOPY)/!$(PROJNAME)"; \
124 | 		cp -rf $(TARGETDIR)/* "$(TARGETCOPY)/!$(PROJNAME)/"; \
125 | 	else \
126 | 		echo "TARGETCOPY not set - skipping install"; \
127 | 	fi
128 | 
129 | .PHONY: all debug release clean rebuild asm show-config install


--------------------------------------------------------------------------------
/src/math3d.h:
--------------------------------------------------------------------------------
  1 | #ifndef MATH3D_H
  2 | #define MATH3D_H
  3 | 
  4 | #ifndef PI
  5 | #define PI 3.14159265
  6 | #endif // PI
  7 | 
  8 | #ifndef M_PI
  9 | #define M_PI 3.1415926535
 10 | #endif // M_PI
 11 | 
 12 | #define MAX_CLIPPING_VERTS 10
 13 | #define MAX_CLIPPING_TRIS 10
 14 | #define CLIP_LEFT 0x01
 15 | #define CLIP_RIGHT 0x02
 16 | #define CLIP_TOP 0x04
 17 | #define CLIP_BOTTOM 0x08
 18 | 
 19 | // We are working with a signed 32-bit fixed-point value which is divided halfway
 20 | // to offer 15 bits of integer (1 bit for sign) and 16 bits of mantissa
 21 | typedef signed int fix;
 22 | 
 23 | // Fixed point utilities
 24 | #define int2fix(x) ((x) << 16)
 25 | #define fix2int(x) ((x) >> 16)
 26 | #define float2fix(a) (fix)((a) * 65536.f)
 27 | #define fix2float(a) (float)((a) / 65536.f)
 28 | #define fixmult(a, b) (fix)(((a) >> 8) * ((b) >> 8))
 29 | #define fixmultINTL(a, b) (fix)(((a) >> 16) * (b))
 30 | #define fixdiv(a, b) (fix)(((a) << 8) / ((b) >> 8)) // NOTE - slow
 31 | 
 32 | #ifndef min
 33 | #define min(x, y) ((x) < (y) ? (x) : (y))
 34 | #endif // min
 35 | 
 36 | #ifndef max
 37 | #define max(x, y) ((x) > (y) ? (x) : (y))
 38 | #endif // max
 39 | 
 40 | #ifndef clamp
 41 | #define clamp(x, min, max) ((x) < (min) ? (min) : ((x) > (max) ? (max) : (x)))
 42 | #endif // clamp
 43 | 
 44 | // SIN/COS lookups
 45 | #define SINETABLE_SIZE 1024
 46 | #define _SINETABLE_SIZE 1023
 47 | #define fixsin(a) g_SineTable[(a) & _SINETABLE_SIZE]
 48 | #define fixcos(a) g_SineTable[((a) + 256) & _SINETABLE_SIZE]
 49 | extern fix *g_SineTable;
 50 | 
 51 | // Reciprocal table e.g., 1/a
 52 | #define ONEOVERTABLE_SIZE 1024
 53 | #define oneover(a) g_oneOver[(a)]
 54 | #define multOneOver(a, b) (fix)((a >> 16) * (oneover(b))) // Essentially INT * FIX, useful for reciprocal
 55 | extern fix *g_oneOver;
 56 | 
 57 | // Edge list buffers
 58 | #define EDGELIST_SIZE 256 // 256 * 4 = 1024 bytes, max screen height of 256
 59 | // extern fix *g_edgeList;
 60 | // extern fix *g_edgeListRight;
 61 | 
 62 | // General utilities
 63 | #define largest(x, y, z) ((x) * (x > y & x > z) + (y) * (y > x & y > z) + (z) * (z > x & z > y))
 64 | #define smin(a, b) (b + ((a - b) & ((a - b) >> (sizeof(int) * 8 - 1))))
 65 | #define smallest(x, y, z) smin(x, smin(y, z))
 66 | #define orient2d(a, b, c) fixmult((b).x - (a).x, (c).y - (a).y) - fixmult((b).y - (a).y, (c).x - (a).x)
 67 | #define orient2dint(a, b, c) (((b).x - (a).x) * ((c).y - (a).y)) - (((b).y - (a).y) * ((c).x - (a).x))
 68 | 
 69 | #define subtractV3D(a, b, c) \
 70 |     (c).x = (a).x - (b).x;   \
 71 |     (c).y = (a).y - (b).y;   \
 72 |     (c).z = (a).z - (b).z;
 73 | 
 74 | typedef struct V2D
 75 | {
 76 |     fix x;
 77 |     fix y;
 78 | } V2D;
 79 | 
 80 | typedef struct V3D
 81 | {
 82 |     fix x;
 83 |     fix y;
 84 |     fix z;
 85 | } V3D;
 86 | 
 87 | typedef struct V4D
 88 | {
 89 |     fix x;
 90 |     fix y;
 91 |     fix z;
 92 |     fix w;
 93 | } V4D;
 94 | 
 95 | #define TRI_INTENSITY_MASK 0xFF
 96 | #define TRI_CLIPPED_BIT 0x200
 97 | 
 98 | #define SET_BIT(a, b) ((a) |= (b))
 99 | #define CLEAR_BIT(a, b) ((a) &= ~(b))
100 | #define TEST_BIT(a, b) ((a) & (b))
101 | 
102 | typedef struct TRI
103 | {
104 |     unsigned short a, b, c, flags;
105 |     V2D centerpoint;
106 |     fix depth;
107 |     void *next;
108 | } TRI;
109 | 
110 | typedef struct POLYGON
111 | {
112 |     V3D verts[MAX_CLIPPING_VERTS];
113 |     int numVerts;
114 | } POLYGON;
115 | 
116 | typedef struct MAT43
117 | {
118 |     fix m11, m12, m13;
119 |     fix m21, m22, m23;
120 |     fix m31, m32, m33;
121 |     fix tx, ty, tz;
122 | } MAT43;
123 | 
124 | typedef struct MAT44
125 | {
126 |     fix m11, m12, m13, m14;
127 |     fix m21, m22, m23, m24;
128 |     fix m31, m32, m33, m34;
129 |     fix m41, m42, m43, m44;
130 | } MAT44;
131 | 
132 | void SetupMathsGlobals(int isAllocating);
133 | 
134 | void SetIdentity(MAT43 *mat);
135 | void SetScale(MAT43 *mat, fix sx, fix sy, fix sz);
136 | void SetScaleUniversal(MAT43 *mat, fix s);
137 | void MultMatMat(MAT43 *dest, MAT43 *a, MAT43 *b);
138 | void MultV3DMat(V3D *v, V3D *dest, MAT43 *mat);
139 | void MultV4DMat(V4D *v, V4D *dest, MAT44 *mat);
140 | void RotateX(MAT43 *mat, int angle);
141 | void RotateY(MAT43 *mat, int angle);
142 | void RotateAxis(MAT43 *mat, V3D *axis, int angle);
143 | void EulerToMat(MAT43 *mat, int heading, int pitch, int bank);
144 | void Normal(V3D *a, V3D *b, V3D *c, V3D *n);
145 | void Normalize(V3D *v);
146 | fix DotProduct(const V3D *v1, const V3D *v2);
147 | V3D SubV3D(const V3D *a, const V3D *b);
148 | V3D CrossProductV3D(const V3D *a, const V3D *b);
149 | void LookAt(const V3D *eyePos, const V3D *forward, MAT43 *mat);
150 | void PerspectiveProjection(MAT44 *mat, float fov, float aspect, float znear, float zfar);
151 | 
152 | #endif // MATH3D_H
153 | 
154 | 


--------------------------------------------------------------------------------
/src/palette.c:
--------------------------------------------------------------------------------
  1 | #include "palette.h"
  2 | 
  3 | #include <stdio.h>
  4 | #include <stdlib.h>
  5 | 
  6 | #include <kernel.h>
  7 | #include <swis.h>
  8 | 
  9 | #include "cvector.h"
 10 | 
 11 | static _kernel_oserror *err;
 12 | static _kernel_swi_regs rin, rout;
 13 | 
 14 | unsigned int *g_fogTable;
 15 | extern unsigned int FogTable;
 16 | 
 17 | #define HEX_VAL(x) (((x) >> 16) & 0xFF), (((x) >> 8) & 0xFF), ((x) & 0xFF)
 18 | 
 19 | static unsigned int inputPalette[16] = {
 20 | #ifdef PAL_256
 21 |     (0x004488), // low blue
 22 |     (0x3377bb), // mid blue
 23 |     (0x77bbff), // high blue
 24 |     (0x115511), // low green
 25 |     (0x448800), // mid green
 26 |     (0x88cc00), // high green
 27 |     (0x66aaee), // sky color
 28 |     (0x155A78),
 29 |     (0x2A7190),
 30 |     (0x4088A8),
 31 |     (0x4698BD),
 32 |     (0x4DA9D2),
 33 |     (0x54BAE8),
 34 |     (0x88CCE8),
 35 |     (0xA8ECE8),
 36 |     (0xE8ECE8),
 37 | #else  // Original ST/Amiga Midwinter Palette
 38 |     (0x000000),
 39 |     (0x004060),
 40 |     (0x206080),
 41 |     (0x4080a0),
 42 |     (0x80c0e0),
 43 |     (0xe0e0e0),
 44 |     (0x402000),
 45 |     (0x602000),
 46 |     (0x804020),
 47 |     (0xa06000),
 48 |     (0xc08060),
 49 |     (0x004000),
 50 |     (0x006000),
 51 |     (0xa02000),
 52 |     (0xc0c000),
 53 |     (0x20a0e0),
 54 | #endif // PAL_256
 55 | };
 56 | 
 57 | void SetupPaletteLookup(int allocating)
 58 | {
 59 |     if (allocating)
 60 |     {
 61 |         printf("Allocating tables required for 2x2 bayer lookup table...\n");
 62 | #ifdef PAL_256
 63 |         cvector_reserve(g_fogTable, 64 * 2 * 256 * 4);
 64 | #else
 65 |         cvector_reserve(g_fogTable, 4 * 16 * 32 * 4);
 66 | #endif // PAL_256
 67 |         FogTable = (unsigned int)(g_fogTable);
 68 |         printf("Done.\n");
 69 |     }
 70 |     else
 71 |     {
 72 |         printf("Freeing tables required for Palette...\n");
 73 |         cvector_free(g_fogTable);
 74 |         printf("Done.\n");
 75 |     }
 76 | }
 77 | 
 78 | // The Archimedes 256-color palette is only slightly tweakable. We cant set
 79 | // 16 colors to (mostly) any value we want, with the rest being a sort of
 80 | // 'house mix' of other colors and tints.
 81 | void SetPalette(void)
 82 | {
 83 |     typedef struct PalEntry
 84 |     {
 85 |         unsigned char VDU, INDEX, MODE, R, G, B;
 86 |     } PalEntry;
 87 | 
 88 |     PalEntry pal;
 89 |     int i = 0;
 90 | 
 91 |     pal.VDU = 19;
 92 |     pal.MODE = 16;
 93 | 
 94 |     rin.r[0] = (unsigned int)&pal;
 95 |     rin.r[1] = 6;
 96 | 
 97 |     for (i = 0; i < 16; ++i)
 98 |     {
 99 |         pal.R = (inputPalette[i] >> 16) & 0xFF;
100 |         pal.G = (inputPalette[i] >> 8) & 0xFF;
101 |         pal.B = (inputPalette[i]) & 0xFF;
102 |         pal.INDEX = i;
103 | 
104 |         err = _kernel_swi(OS_WriteN, &rin, &rout);
105 |     }
106 | }
107 | 
108 | void Save256()
109 | {
110 |     unsigned int i, j, h;
111 |     char hex[200];
112 |     FILE *file;
113 | 
114 |     unsigned char originalFound[16] = {0, 0, 0, 0, 0, 0, 0, 0,
115 |                                        0, 0, 0, 0, 0, 0, 0, 0};
116 | 
117 |     file = fopen("colors_txt", "w");
118 |     if (!file)
119 |         return;
120 | 
121 |     for (i = 0; i < 256; i++)
122 |     {
123 |         rin.r[0] = i;
124 |         rin.r[1] = 16;
125 |         err = _kernel_swi(OS_ReadPalette, &rin, &rout);
126 |         h = ((rout.r[2] >> 8) & 0xFF) << 16 | ((rout.r[2] >> 16) & 0xFF) << 8 |
127 |             ((rout.r[2] >> 24) & 0xFF);
128 | 
129 |         for (j = 0; j < 16; ++j)
130 |         {
131 |             if (h == inputPalette[j])
132 |             {
133 |                 ++originalFound[j];
134 |                 break;
135 |             }
136 |         }
137 | 
138 |         sprintf(hex, "%06X\n", h);
139 |         fputs(hex, file);
140 |     }
141 | 
142 |     for (i = 0; i < 16; ++i)
143 |     {
144 |         if (originalFound[i] > 0)
145 |             sprintf(hex, "%d: %d times\n", i, originalFound[i]);
146 |         else
147 |             sprintf(hex, "%d: not found\n", i);
148 | 
149 |         fputs(hex, file);
150 |     }
151 | 
152 |     fclose(file);
153 | }
154 | 
155 | /**
156 |  * Loads a lookup table for the bayer dithering effect.
157 |  */
158 | int LoadFogLookup(void)
159 | {
160 |     FILE *file;
161 |     char buf[256];
162 |     char *ptr;
163 |     const char *filename =
164 | #ifdef PAL_256
165 |         "assets.lookup";
166 | #else
167 |         "assets.lookup9";
168 | #endif // PAL_256
169 | 
170 |     sprintf(&buf[0], "%s.%s", gBaseDirectoryPath, filename);
171 |     ptr = &buf[0];
172 | 
173 |     file = fopen(ptr, "r");
174 |     if (file == NULL)
175 |     {
176 |         printf("Failed to open file: %s\n", ptr);
177 |         return 1;
178 |     }
179 | 
180 | #ifdef PAL_256
181 |     fread((void *)g_fogTable, sizeof(unsigned int), (64 * 2 * 256), file);
182 | #else
183 |     fread((void *)g_fogTable, sizeof(unsigned int), (4 * 16 * 32), file);
184 | #endif // PAL_256
185 | 
186 |     fclose(file);
187 | 
188 |     return 0;
189 | }
190 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Snowscape
 2 | 
 3 | ### Description
 4 | A small terrain engine for Acorn Archimedes inspired by the Midwinter series of games by Mike Singleton and Maelstrom. These games never came out on the Acorn  series of machines, so this was just a fun project to imagine what it might have looked like on the Archimedes utilizing it's slightly quirky 256-color palette. Written in C89 and ARM assembly. Utilizes the [C89 dynamic array](https://github.com/eteran/c-vector) by Evan Teran.
 5 | 
 6 | I haven't looked fully at either the 68k or x86 DOS dissassembly for Midwinter, and aside from the articles in magazines of the midpoint displacement method for terrain generation, **assume this to have little or no connection to how Maelstrom originally implemented the engine in Midwinter**.
 7 | 
 8 | As per the license this software is released **AS IS**. I don't have the time to look through pull requests, etc., but please feel free to fork the project and play with it as you will. :)
 9 | 
10 | Click on the image for a YouTube recording in Arculator\
11 | [![Click on the image for a YouTube recording in Arculator](terrain.png)](http://www.youtube.com/watch?v=fiJSxuPCPes "YouTube Video")
12 | 
13 | ### QuickStart and Controls
14 | Thanks to the amazing [Archimedes Live!](https://archi.medes.live/) you can run a prebuilt version directly in the browser.
15 | 
16 | [SnowScape A5000 Version](https://archi.medes.live#preset=a5000&ff=14400&disc=https://raw.githubusercontent.com/arkiruthis/snowscape/25f89f3b0dee86dc6675f090872a5919808afcab/Images/snowscapeA5000.adf&autoboot=desktop%20filer_run%20adfs::0.$.!Snowscape) (further draw distance)\
17 | [SnowScape A3020 Version](https://archi.medes.live#preset=a3020&ff=14400&disc=https://raw.githubusercontent.com/arkiruthis/snowscape/25f89f3b0dee86dc6675f090872a5919808afcab/Images/snowscapeA3020.adf&autoboot=desktop%20filer_run%20adfs::0.$.!Snowscape) 
18 | 
19 | 
20 | #### Controls
21 | LEFT CLICK - Move forwards\
22 | RIGHT CLICK - Move backwards\
23 | MOUSE - Move view\
24 | ESCAPE - Return to RISCOS
25 | 
26 | ### Known Issues
27 | - When you reach the edge of the terrain, you'll be abruptly reset to the starting position. TODO - like the original Midwinter, we could reset the camera and generate the terrain to match the next section you're in. 
28 | - Performance could probably be improved by moving more C into ARM assembly... but it's worth playing the original Midwinter on a stock STFM or A500 as a reminder of the original framerate... 😜 
29 | - Why is there a blue line at the bottom? Because when I draw there I get a stack heap corruption on exit. 😱 It'll be something trivial, it always is... 
30 | - Needs a 4MB setup. Could probably be fixed to run on 2MB machines.
31 | 
32 | ### Building Prerequisites
33 | Either of the following GCC cross-compiler build systems can be used to create the RISCOS binary. Both can be built and used using Linux, WSL in Windows, and Docker with Ubuntu in MacOS (ArchieSDK has a MacOS-native build now).
34 | - [GCCSDK](https://www.stevefryatt.org.uk/risc-os/build-tools/environment) - An older GCC 4 build SDK commonly used for RISCOS Open development
35 | - [ArchieSDK](https://gitlab.com/_targz/archiesdk) - A more recent GCC 8 build by Tara Colin and various contributors, particularly of interest to the Acorn demoscene.
36 | 
37 | ### Building
38 | 1. Open `build.sh` and ensure the output directory matches where you'd like the resultant App to go. (probably your Arculator hostfs folder)
39 | 2. Choose the toolchain you want `GCCSDK` or `ARCHIESDK`.
40 | 3. Choose 256 color and/or A5000 mode. The A5000 mode has a further draw distance and will perform much slower on A30X0 machines.
41 | 4. Run `build.sh` - if no errors, it should have copied the `!Snowscape` app to your hostfs and you can double-click the App to start it
42 | 
43 | ### Running on Original Hardware
44 | Use an emulator (Arculator, RPCEmu, ArchiEmu, etc.) to copy the Projects folder onto an ADF and either use that in a Gotek, or use it to prepare a floppy disk. This is to preserve the file types that are set up on HostFS so that they run correctly on native RISCOS. 
45 | 
46 | ### Enabling Profiling
47 | 1. Uncomment `// #define TIMING_LOG 1` in `/Projects/h/Render`
48 | 2. Uncomment `| Run <Obey$Dir>.TimerMod` in the `/Projects/!Run` script.
49 | 3. Download [TimerMod](https://armclub.org.uk/free/) and unpack into in the `/Projects` folder.
50 | 4. Rebuild and Run.
51 | 
52 | ### Scripts
53 | - The scripts folder contains a NodeJS script which is used to take the exported Archimedes palette (a list of RGB hex numbers) and generate a lookup table. It does this as a PNG first, because of the limited options, it doesn't always get the gradients looking nice, so we can tweak the table a bit before turning it into the binary lookup which is used by the engine in the /assets folder. 
54 | 
55 | ### Grateful Thanks and Shoutouts
56 | The Maelstrom team (particularly in memory of [Mike Singleton](https://en.wikipedia.org/wiki/Mike_Singleton)), whos incredible games made a little me believe whole worlds could exist in a small home computer!
57 | 
58 | David Ruck for his superb [TimerMod](https://armclub.org.uk/free/) utility which is available from his site which made profiling many of the routines far easier.
59 | 
60 | The amazing [Bitshifters](https://bitshifters.github.io/index.html) team for always being generous with their knowledge and pushing the Archimedes and BBC Micro to it's limits with their amazing demos!
61 | 
62 | [Tara Colin](https://gitlab.com/_targz/archiesdk), and others who have contributed to the ArchieSDK tools which has opened up an exciting new chapter in demoscene development for early RISCOS machines. 
63 | 
64 | Tom Sneddon for helping me fix the CORS issue which means we can link to Archimedes Live! direct from here. Also check out `b2`, his amazing [BBC Micro Emulator](https://github.com/tom-seddon/b2)! 
65 | 


--------------------------------------------------------------------------------
/src/main.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <math.h>
  4 | #include <time.h>
  5 | #include <kernel.h>
  6 | #include <swis.h>
  7 | 
  8 | #include "mesh.h"
  9 | #include "palette.h"
 10 | #include "render.h"
 11 | 
 12 | // ASM Routines
 13 | extern void VDUSetup(void);
 14 | extern void UpdateMemAddress(int screenStart, int screenMax);
 15 | extern void ReserveScreenBanks(void);
 16 | extern void SwitchScreenBank(void);
 17 | extern void ClearScreen(int color, int fullclear);
 18 | extern int KeyPress(int keyCode);
 19 | extern void FillEdgeLists(int triList, int color);
 20 | 
 21 | // SWI access
 22 | _kernel_oserror *err;
 23 | _kernel_swi_regs rin, rout;
 24 | 
 25 | char *gBaseDirectoryPath = NULL;
 26 | int *gEdgeList = NULL;
 27 | extern unsigned int EdgeList;
 28 | 
 29 | #define STRINGIFY(x) #x
 30 | #define TOSTRING(x) STRINGIFY(x)
 31 | 
 32 | int main(int argc, char *argv[])
 33 | {
 34 |     int i, swi_data[10], isRunning = 1;
 35 |     int heading = 498, pitch = -53, angle = 0;
 36 |     V3D eyePos, direction;
 37 |     V3D verts[3];
 38 |     MAT43 mat;
 39 |     int mouseX, mouseY;
 40 |     unsigned char block[9];
 41 | 
 42 |     SetupMathsGlobals(1);
 43 |     for (i = 0; i < 1024; ++i)
 44 |     {
 45 |         g_SineTable[i] = float2fix(sinf((i * M_PI * 2.f) / 1024.f));
 46 |         g_oneOver[i] = (i == 0) ? float2fix(1.f) : float2fix(1.f / i);
 47 |     }
 48 | 
 49 |     cvector_reserve(gEdgeList, 256);
 50 |     EdgeList = (unsigned int)(gEdgeList); // For ASM access
 51 | 
 52 |     SetupPaletteLookup(1);
 53 |     SetupRender();
 54 |     GenerateTerrain();
 55 | 
 56 |     gBaseDirectoryPath = getenv("Game$Dir");
 57 |     if (LoadFogLookup() != 0)
 58 |     {
 59 |         printf("ERROR: Failed to load fog lookup table.\n");
 60 |         return 1;
 61 |     }
 62 | 
 63 |     // (void)getchar(); // Uncomment to pause here and read data output
 64 | 
 65 |     // Disable the default escape handler
 66 |     rin.r[0] = 229;
 67 |     rin.r[1] = 0xFFFFFFFF;
 68 |     err = _kernel_swi(OS_Byte, &rin, &rout);
 69 | 
 70 |     VDUSetup();
 71 |     ReserveScreenBanks();
 72 |     SwitchScreenBank();
 73 |     SetPalette();
 74 |     // Save256(); // Uncomment to save the VIDC generated palette to a file
 75 | 
 76 |     // Obtain details about the current screen mode
 77 |     swi_data[0] = (int)148;         // screen base address
 78 |     swi_data[1] = (int)-1;          // terminate query
 79 |     rin.r[0] = (int)(&swi_data[0]); // Start of query
 80 |     rin.r[1] = (int)(&swi_data[3]); // Results
 81 |     err = _kernel_swi(OS_ReadVduVariables, &rin, &rout);
 82 |     UpdateMemAddress(swi_data[3], swi_data[4]);
 83 | 
 84 |     for (i = 0; i < 2; ++i)
 85 |     {
 86 |         SwitchScreenBank();             // Swap draw buffer with display buffer
 87 |         rin.r[0] = (int)(&swi_data[0]); // Get the new screen start address
 88 |         rin.r[1] = (int)(&swi_data[3]); // Results
 89 |         err = _kernel_swi(OS_ReadVduVariables, &rin, &rout);
 90 |         UpdateMemAddress(swi_data[3], swi_data[4]); // Pass these to the ASM side
 91 |         ClearScreen(0, 1);                          // Clear the new draw buffer
 92 |     }
 93 | 
 94 |     eyePos.x = float2fix((MAPW << TILESHIFT) + (.5f)) >> 1;
 95 |     eyePos.y = GetHeight(&eyePos);
 96 |     eyePos.z = float2fix((MAPW << TILESHIFT) + (.5f)) >> 1;
 97 | 
 98 | #ifdef TIMING_LOG
 99 |     gTimerLog.biggestVertex = 0;
100 | #endif // TIMING_LOG
101 | 
102 |     // Set an infinite screen box so the mouse doesn't stop at the screen edge.
103 |     block[0] = 0x01; // Reason code
104 |     block[1] = 0x00; // Left LSB
105 |     block[2] = 0x80; // Left MSB
106 |     block[3] = 0x00; // Bottom LSB
107 |     block[4] = 0x80; // Bottom MSB
108 |     block[5] = 0xFF; // Right LSB
109 |     block[6] = 0x7F; // Right MSB
110 |     block[7] = 0xFF; // Top LSB
111 |     block[8] = 0x7F; // Top MSB
112 |     rin.r[0] = 21;
113 |     rin.r[1] = (int)(&block[0]);
114 |     err = _kernel_swi(OS_Word, &rin, &rout);
115 | 
116 |     // Get initial mouse position
117 |     err = _kernel_swi(OS_Mouse, &rin, &rout);
118 |     mouseX = rout.r[0];
119 |     mouseY = rout.r[1];
120 |     eyePos.y = GetHeight(&eyePos); // Start at the correct height
121 | 
122 |     // Triangle mainly in the center of the 320x256 screen
123 |     verts[0].x = (164);
124 |     verts[0].y = (10);
125 |     verts[0].z = (0);
126 |     verts[1].x = (150);
127 |     verts[1].y = (200);
128 |     verts[1].z = (0);
129 |     verts[2].x = (170);
130 |     verts[2].y = (200);
131 |     verts[2].z = (0);
132 | 
133 |     if (err == NULL)
134 |     {
135 |         while (isRunning)
136 |         {
137 |             err = _kernel_swi(OS_Mouse, &rin, &rout); // Get the mouse position
138 |             heading += clamp((rout.r[0] - mouseX) >> 7, -32, 32);
139 |             pitch += clamp((rout.r[1] - mouseY) >> 7, -32, 32);
140 |             pitch = clamp(pitch, -100, 100);
141 | 
142 |             if (KeyPress(112)) // Escape
143 |                 isRunning = 0;
144 | 
145 |             if (rout.r[2] & 4) // Left mouse button - Walk forward
146 |             {
147 |                 // --angle;
148 |                 eyePos.x += (fixcos(heading)) << 1;
149 |                 eyePos.z -= (fixsin(heading)) << 1;
150 |                 eyePos.y = GetHeight(&eyePos);
151 |             }
152 |             if (rout.r[2] & 1) // Right mouse button - Walk backward
153 |             {
154 |                 // ++angle;
155 |                 eyePos.x -= (fixcos(heading));
156 |                 eyePos.z += (fixsin(heading));
157 |                 eyePos.y = GetHeight(&eyePos);
158 |             }
159 | 
160 |             SwitchScreenBank();             // Swap draw buffer with display buffer
161 |             rin.r[0] = (int)(&swi_data[0]); // Get the new screen start address
162 |             rin.r[1] = (int)(&swi_data[3]); // Results
163 |             err = _kernel_swi(OS_ReadVduVariables, &rin, &rout);
164 |             UpdateMemAddress(swi_data[3], swi_data[4]); // Pass these to the ASM side
165 | 
166 |             direction.x = fixcos(heading);
167 |             direction.y = fixsin(pitch);
168 |             direction.z = -fixsin(heading);
169 | 
170 |             LookAt(&eyePos, &direction, &mat); // TODO - SLOWWWWWW - 2 cross products in here
171 | 
172 | #ifdef PAL_256
173 |             ClearScreen(0xC6C6C6C6, 1); // Clear the new draw buffer
174 | #else
175 |             ClearScreen(0xFFFFFFFF, 1); // Clear the new draw buffer
176 | #endif // PAL_256
177 | 
178 |             RenderModel(&mat, &eyePos, heading); // Main render
179 | 
180 | #ifdef TIMING_LOG
181 |             {
182 |                 rin.r[0] = 30;
183 |                 err = _kernel_swi(OS_WriteC, &rin, &rout);
184 | 
185 |                 // printf("DISt     :  %d", dist);
186 |                 printf("\nTRANSFORM TILES : %d", gTimerLog.transformTiles);
187 |                 printf("\nSUBMIT TRIANGLES: %d", gTimerLog.submitRenderTriangles);
188 |                 printf("\nCLIPPING QUEUE  : %d", gTimerLog.clippingQueue);
189 |                 printf("\n3D PROJECTION   : %d", gTimerLog.project3D);
190 |                 printf("\nSCENE RENDER    : %d", gTimerLog.sceneRender);
191 |                 printf("\nBIGGEST VERTEX  : %d", gTimerLog.biggestVertex);
192 |                 printf("\nCLIPPED COUNT   : %d", gTimerLog.clippedCount);
193 |             }
194 | #endif // TIMING_LOG
195 |         }
196 |     }
197 |     else
198 |     {
199 |         printf("ERROR: %s", err->errmess);
200 |     }
201 | 
202 |     // Return to text mode
203 |     rin.r[0] = 22;
204 |     err = _kernel_swi(OS_WriteC, &rin, &rout);
205 |     rin.r[0] = 0;
206 |     err = _kernel_swi(OS_WriteC, &rin, &rout);
207 | 
208 |     // Re-enable the default escape handler
209 |     rin.r[0] = 229;
210 |     rin.r[1] = 0xFFFFFFFF;
211 |     err = _kernel_swi(OS_Byte, &rin, &rout);
212 | 
213 |     // Free up memory that was allocated
214 |     cvector_free(gEdgeList);
215 |     DeAllocateTerrain();
216 |     SetupMathsGlobals(0);
217 |     SetupPaletteLookup(0);
218 |     printf("Heading: %d, Pitch: %d\n", heading, pitch);
219 |     printf("Eyepos: %d, %d, %d\n", eyePos.x, eyePos.y, eyePos.z);
220 | 
221 |     // (void)getchar(); // Uncomment to pause here and read data output
222 | 
223 |     return 0;
224 | }
225 | 


--------------------------------------------------------------------------------
/src/math3d.c:
--------------------------------------------------------------------------------
  1 | #include "math3d.h"
  2 | 
  3 | #include <math.h>
  4 | #include <stdlib.h>
  5 | #include <stdio.h>
  6 | 
  7 | #include "cvector.h"
  8 | 
  9 | fix *g_oneOver;   // Reciprocal table (for max screen height of 256 in Mode 13)
 10 | extern unsigned int OneOver; // Address of the above table for ASM access
 11 | fix *g_SineTable; // SIN table. Offset used for COS.
 12 | 
 13 | void SetupMathsGlobals(int isAllocating)
 14 | {
 15 |     if (isAllocating)
 16 |     {
 17 |         printf("Allocating tables required for Math3D...\n");
 18 |         cvector_reserve(g_SineTable, SINETABLE_SIZE);
 19 |         cvector_reserve(g_oneOver, ONEOVERTABLE_SIZE);
 20 |         OneOver = (unsigned int)(g_oneOver);
 21 |         printf("Done.\n");
 22 |     }
 23 |     else
 24 |     {
 25 |         printf("Freeing tables required for Math3D...\n");
 26 |         cvector_free(g_SineTable);
 27 |         cvector_free(g_oneOver);
 28 |         printf("Done.\n");
 29 |     }
 30 | }
 31 | 
 32 | void SetIdentity(MAT43 *mat)
 33 | {
 34 |     mat->m11 = 65536;
 35 |     mat->m12 = 0;
 36 |     mat->m13 = 0;
 37 |     mat->m21 = 0;
 38 |     mat->m22 = 65536;
 39 |     mat->m23 = 0;
 40 |     mat->m31 = 0;
 41 |     mat->m32 = 0;
 42 |     mat->m33 = 65536;
 43 |     mat->tx = 0;
 44 |     mat->ty = 0;
 45 |     mat->tz = 0;
 46 | }
 47 | 
 48 | void SetScale(MAT43 *mat, fix sx, fix sy, fix sz)
 49 | {
 50 |     mat->m11 = fixmult(mat->m11, sx);
 51 |     mat->m22 = fixmult(mat->m22, sy);
 52 |     mat->m33 = fixmult(mat->m33, sz);
 53 | }
 54 | 
 55 | void SetScaleUniversal(MAT43 *mat, fix s)
 56 | {
 57 |     SetScale(mat, s, s, s);
 58 | }
 59 | 
 60 | void EulerToMat(MAT43 *mat, int heading, int pitch, int bank)
 61 | {
 62 |     fix sh, ch, sp, cp, sb, cb;
 63 |     sh = fixsin(heading);
 64 |     ch = fixcos(heading);
 65 |     sp = fixsin(pitch);
 66 |     cp = fixcos(pitch);
 67 |     sb = fixsin(bank);
 68 |     cb = fixcos(bank);
 69 | 
 70 |     mat->m11 = fixmult(ch, cb) + fixmult(fixmult(sh, sp), sb);
 71 |     mat->m12 = fixmult(-ch, sb) + fixmult(fixmult(sh, sp), cb);
 72 |     mat->m13 = fixmult(sh, cp);
 73 | 
 74 |     mat->m21 = fixmult(sb, cp);
 75 |     mat->m22 = fixmult(cb, cp);
 76 |     mat->m23 = -sp;
 77 | 
 78 |     mat->m31 = fixmult(-sh, cb) + fixmult(fixmult(ch, sp), sb);
 79 |     mat->m32 = fixmult(sb, sh) + fixmult(fixmult(ch, sp), cb);
 80 |     mat->m33 = fixmult(ch, cp);
 81 | 
 82 |     mat->tx = mat->ty = mat->tz = 0;
 83 | }
 84 | 
 85 | void RotateAxis(MAT43 *mat, V3D *axis, int angle)
 86 | {
 87 |     fix s, c, a, ax, ay, az;
 88 |     s = fixsin(angle);
 89 |     c = fixcos(angle);
 90 |     a = 65536 - c;
 91 |     ax = fixmult(a, axis->x);
 92 |     ay = fixmult(a, axis->y);
 93 |     az = fixmult(a, axis->z);
 94 | 
 95 |     mat->m11 = fixmult(ax, axis->x) + c;
 96 |     mat->m12 = fixmult(ax, axis->y) + fixmult(s, axis->z);
 97 |     mat->m13 = fixmult(ax, axis->z) - fixmult(s, axis->y);
 98 |     mat->m21 = fixmult(ay, axis->x) - fixmult(s, axis->z);
 99 |     mat->m22 = fixmult(ay, axis->y) + c;
100 |     mat->m23 = fixmult(ay, axis->z) + fixmult(s, axis->x);
101 |     mat->m31 = fixmult(az, axis->x) + fixmult(s, axis->y);
102 |     mat->m32 = fixmult(az, axis->y) - fixmult(s, axis->x);
103 |     mat->m33 = fixmult(az, axis->z) + c;
104 | 
105 |     mat->tx = mat->ty = mat->tz = 0;
106 | }
107 | 
108 | void RotateX(MAT43 *mat, int angle)
109 | {
110 |     fix s, c;
111 |     s = fixsin(angle);
112 |     c = fixcos(angle);
113 | 
114 |     mat->m11 = 65536;
115 |     mat->m12 = 0;
116 |     mat->m13 = 0;
117 |     mat->m21 = 0;
118 |     mat->m22 = c;
119 |     mat->m23 = s;
120 |     mat->m31 = 0;
121 |     mat->m32 = -s;
122 |     mat->m33 = c;
123 |     mat->tx = mat->ty = mat->tz = 0;
124 | }
125 | 
126 | void RotateY(MAT43 *mat, int angle)
127 | {
128 |     fix s, c;
129 |     s = fixsin(angle);
130 |     c = fixcos(angle);
131 | 
132 |     mat->m11 = c;
133 |     mat->m12 = 0;
134 |     mat->m13 = -s;
135 |     mat->m21 = 0;
136 |     mat->m22 = 65536;
137 |     mat->m23 = 0;
138 |     mat->m31 = s;
139 |     mat->m32 = 0;
140 |     mat->m33 = c;
141 |     mat->tx = mat->ty = mat->tz = 0;
142 | }
143 | 
144 | void Normal(V3D *a, V3D *b, V3D *c, V3D *n)
145 | {
146 |     V3D v1, v2;
147 |     v1.x = a->x - b->x;
148 |     v1.y = a->y - b->y;
149 |     v1.z = a->z - b->z;
150 |     v2.x = a->x - c->x;
151 |     v2.y = a->y - c->y;
152 |     v2.z = a->z - c->z;
153 | 
154 |     n->x = fixmult(v1.y, v2.z) - fixmult(v1.z, v2.y);
155 |     n->y = fixmult(v1.z, v2.x) - fixmult(v1.x, v2.z);
156 |     n->z = fixmult(v1.x, v2.y) - fixmult(v1.y, v2.x);
157 | }
158 | 
159 | void Normalize(V3D *v)
160 | {
161 |     double len, x, y, z;
162 |     x = fix2float(v->x);
163 |     y = fix2float(v->y);
164 |     z = fix2float(v->z);
165 |     len = sqrt(x * x + y * y + z * z);
166 | 
167 |     v->x = float2fix((float)(x / len));
168 |     v->y = float2fix((float)(y / len));
169 |     v->z = float2fix((float)(z / len));
170 | }
171 | 
172 | fix DotProduct(const V3D *v1, const V3D *v2)
173 | {
174 |     return fixmult(v1->x, v2->x) + fixmult(v1->y, v2->y) + fixmult(v1->z, v2->z);
175 | }
176 | 
177 | void MultMatMat(MAT43 *dest, MAT43 *a, MAT43 *b)
178 | {
179 |     MAT43 tmp;
180 |     tmp.m11 = fixmult(a->m11, b->m11) + fixmult(a->m12, b->m21) + fixmult(a->m13, b->m31);
181 |     tmp.m12 = fixmult(a->m11, b->m12) + fixmult(a->m12, b->m22) + fixmult(a->m13, b->m32);
182 |     tmp.m13 = fixmult(a->m11, b->m13) + fixmult(a->m12, b->m23) + fixmult(a->m13, b->m33);
183 | 
184 |     tmp.m21 = fixmult(a->m21, b->m11) + fixmult(a->m22, b->m21) + fixmult(a->m23, b->m31);
185 |     tmp.m22 = fixmult(a->m21, b->m12) + fixmult(a->m22, b->m22) + fixmult(a->m23, b->m32);
186 |     tmp.m23 = fixmult(a->m21, b->m13) + fixmult(a->m22, b->m23) + fixmult(a->m23, b->m33);
187 | 
188 |     tmp.m31 = fixmult(a->m31, b->m11) + fixmult(a->m32, b->m21) + fixmult(a->m33, b->m31);
189 |     tmp.m32 = fixmult(a->m31, b->m12) + fixmult(a->m32, b->m22) + fixmult(a->m33, b->m32);
190 |     tmp.m33 = fixmult(a->m31, b->m13) + fixmult(a->m32, b->m23) + fixmult(a->m33, b->m33);
191 | 
192 |     // Translation
193 |     tmp.tx = fixmult(a->tx, b->m11) + fixmult(a->ty, b->m21) + fixmult(a->tz, b->m31) + b->tx;
194 |     tmp.ty = fixmult(a->tx, b->m12) + fixmult(a->ty, b->m22) + fixmult(a->tz, b->m32) + b->ty;
195 |     tmp.tz = fixmult(a->tx, b->m13) + fixmult(a->ty, b->m23) + fixmult(a->tz, b->m33) + b->tz;
196 | 
197 |     *dest = tmp;
198 | }
199 | 
200 | void MultV3DMat(V3D *v, V3D *dest, MAT43 *mat)
201 | {
202 |     dest->x = fixmult(v->x, mat->m11) + fixmult(v->y, mat->m12) + fixmult(v->z, mat->m13) + mat->tx;
203 |     dest->y = fixmult(v->x, mat->m21) + fixmult(v->y, mat->m22) + fixmult(v->z, mat->m23) + mat->ty;
204 |     dest->z = fixmult(v->x, mat->m31) + fixmult(v->y, mat->m32) + fixmult(v->z, mat->m33) + mat->tz;
205 | }
206 | 
207 | void MultV4DMat(V4D *v, V4D *dest, MAT44 *mat)
208 | {
209 |     dest->x = fixmult(v->x, mat->m11) + fixmult(v->y, mat->m12) + fixmult(v->z, mat->m13) + fixmult(v->w, mat->m14);
210 |     dest->y = fixmult(v->x, mat->m21) + fixmult(v->y, mat->m22) + fixmult(v->z, mat->m23) + fixmult(v->w, mat->m24);
211 |     dest->z = fixmult(v->x, mat->m31) + fixmult(v->y, mat->m32) + fixmult(v->z, mat->m33) + fixmult(v->w, mat->m34);
212 |     dest->w = fixmult(v->x, mat->m41) + fixmult(v->y, mat->m42) + fixmult(v->z, mat->m43) + fixmult(v->w, mat->m44);
213 | }
214 | 
215 | V3D SubV3D(const V3D *a, const V3D *b)
216 | {
217 |     V3D result;
218 |     result.x = a->x - b->x;
219 |     result.y = a->y - b->y;
220 |     result.z = a->z - b->z;
221 |     return result;
222 | }
223 | 
224 | V3D CrossProductV3D(const V3D *a, const V3D *b)
225 | {
226 |     V3D result;
227 |     result.x = fixmult(a->y, b->z) - fixmult(a->z, b->y);
228 |     result.y = fixmult(a->z, b->x) - fixmult(a->x, b->z);
229 |     result.z = fixmult(a->x, b->y) - fixmult(a->y, b->x);
230 |     return result;
231 | }
232 | 
233 | void LookAt(const V3D *eyePos, const V3D *forward, MAT43 *mat)
234 | {
235 |     // Calculate the forward vector (direction from eye to target)
236 |     V3D up, right;
237 | 
238 |     // Define the up vector (world's up)
239 |     up.x = 0;
240 |     up.y = 65536;
241 |     up.z = 0;
242 | 
243 |     // Calculate the right vector (perpendicular to forward and up)
244 |     right = CrossProductV3D(&up, forward);
245 |     up = CrossProductV3D(forward, &right);
246 | 
247 |     mat->tx = -DotProduct(&right, eyePos);
248 |     mat->ty = -DotProduct(&up, eyePos);
249 |     mat->tz = -DotProduct(forward, eyePos);
250 | 
251 |     // Fill in the matrix values
252 |     mat->m11 = right.x;
253 |     mat->m12 = right.y;
254 |     mat->m13 = right.z;
255 | 
256 |     mat->m21 = up.x;
257 |     mat->m22 = up.y;
258 |     mat->m23 = up.z;
259 | 
260 |     mat->m31 = forward->x;
261 |     mat->m32 = forward->y;
262 |     mat->m33 = forward->z;
263 | }
264 | 
265 | void PerspectiveProjection(MAT44 *mat, float fov, float aspect, float znear, float zfar)
266 | {
267 |     float yScale = 1.f / tanf(fov / 2.f);
268 |     float xScale = yScale * aspect;
269 | 
270 |     mat->m11 = float2fix(xScale);
271 |     mat->m12 = 0;
272 |     mat->m13 = 0;
273 |     mat->m14 = 0;
274 | 
275 |     mat->m21 = 0;
276 |     mat->m22 = float2fix(yScale);
277 |     mat->m23 = 0;
278 |     mat->m24 = 0;
279 | 
280 |     mat->m31 = 0;
281 |     mat->m32 = 0;
282 |     mat->m33 = float2fix(zfar / (zfar - znear));
283 |     mat->m34 = 65535;
284 | 
285 |     mat->m41 = 0;
286 |     mat->m42 = 0;
287 |     mat->m43 = float2fix(znear * zfar / (zfar - znear));
288 |     mat->m44 = 0;
289 | }
290 | 


--------------------------------------------------------------------------------
/Scripts/pal.js:
--------------------------------------------------------------------------------
  1 | // This script reads a palette file and generates a dithered image and a binary file with the dithered palette
  2 | // USAGE:
  3 | // node pal.js
  4 | 
  5 | const fs = require('fs');
  6 | const path = require('path');
  7 | const PNG = require('pngjs').PNG;
  8 | const colorConvert = require('color-convert');
  9 | 
 10 | const pngFileName = 'out_dithered.png';
 11 | const lookupFileName = 'lookup';
 12 | 
 13 | const bayerSize = 2;
 14 | 
 15 | const png = new PNG({
 16 |   width: 128 * bayerSize,
 17 |   height: 256 * bayerSize,
 18 |   filterType: -1,
 19 | });
 20 | 
 21 | const filePath = path.join(__dirname, 'palette.hex');
 22 | 
 23 | let writeLookupTable = process.argv.includes("lookup");
 24 | let loadExistingPng = !process.argv.includes("png");
 25 | 
 26 | // Always load the palette.
 27 | fs.readFile(filePath, 'utf8', (err, data) => {
 28 |   if (err) {
 29 |     console.error('Error reading the file:', err);
 30 |     return;
 31 |   }
 32 | 
 33 |   const lines = data.split('\n');
 34 |   const palette = lines.filter((line) => line.match(/^[0-9A-Fa-f]{6}\r?$/)).map(hexToRgb);
 35 | 
 36 |   if (palette.length !== 256) {
 37 |     console.error('The file does not contain 256 valid hex colors');
 38 |     return;
 39 |   }
 40 | 
 41 |   if (loadExistingPng) {
 42 |     console.log('Loading existing PNG file');
 43 |     // Load the existing PNG file
 44 |     const existingPng = PNG.sync.read(fs.readFileSync(pngFileName));
 45 |     for (let y = 0; y < existingPng.height; y++) {
 46 |       for (let x = 0; x < existingPng.width; x++) {
 47 |         const idx = (existingPng.width * y + x) << 2;
 48 |         png.data[idx] = existingPng.data[idx];
 49 |         png.data[idx + 1] = existingPng.data[idx + 1];
 50 |         png.data[idx + 2] = existingPng.data[idx + 2];
 51 |         png.data[idx + 3] = existingPng.data[idx + 3];
 52 |       }
 53 |     }
 54 |   }
 55 |   else { // Proceed to generate the image
 56 |     console.log('Generating new PNG file');
 57 | 
 58 |     const numSteps = 64;
 59 |     const closestColors = [];
 60 |     const targetColor = hexToRgb("#66aaee");
 61 | 
 62 |     // Create the color steps towards the target for each color in the palette
 63 |     palette.forEach((color) => {
 64 |       const colorSteps = [];
 65 | 
 66 |       for (let i = 0; i < numSteps; i++) {
 67 |         const lerpColor = {
 68 |           r: parseInt(Math.round((1 - i / (numSteps - 1)) * (color.r - targetColor.r) + targetColor.r)),
 69 |           g: parseInt(Math.round((1 - i / (numSteps - 1)) * (color.g - targetColor.g) + targetColor.g)),
 70 |           b: parseInt(Math.round((1 - i / (numSteps - 1)) * (color.b - targetColor.b) + targetColor.b)),
 71 |         };
 72 | 
 73 |         const closestColor = findNearestColor(lerpColor, palette);
 74 |         const withDetails = { r: closestColor.r, g: closestColor.g, b: closestColor.b, bayerThreshold: 0, nextColor: targetColor };
 75 |         colorSteps.push(withDetails);
 76 |       }
 77 | 
 78 |       closestColors.push(colorSteps);
 79 |     });
 80 | 
 81 |     for (let y = 0; y < closestColors.length; y++) {
 82 |       let colorSet = closestColors[y];
 83 |       let startIndex = 0;
 84 |       let currentValue = colorSet[0];
 85 | 
 86 |       for (let i = 0; i < colorSet.length; i++) {
 87 |         if (isColorDifferent(currentValue, colorSet[i])) {
 88 |           // find the halway point of the first section
 89 |           let halwayPoint1stSection = Math.floor((i - startIndex) / 2) + startIndex;
 90 | 
 91 |           // find the halway point of the second section
 92 |           let oldValue = currentValue;
 93 |           currentValue = colorSet[i];
 94 | 
 95 |           // Traverse the next section until we either find a color that is different or we reach the end
 96 |           let j = i;
 97 |           while (j < colorSet.length && !isColorDifferent(currentValue, colorSet[j])) {
 98 |             j++;
 99 |           }
100 |           let halwayPoint2ndSection = Math.floor((j - i) / 2) + i;
101 | 
102 |           process.stdout.write(`i: ${i}, between ${startIndex} and ${i}, halwayPoint1stSection: ${halwayPoint1stSection}, halwayPoint2ndSection: ${halwayPoint2ndSection}\n`);
103 | 
104 |           // Go between the two halway points and set the bayer threshold across that range
105 |           for (let k = halwayPoint1stSection; k <= halwayPoint2ndSection; k++) {
106 |             const threshold = Math.round(((k - halwayPoint1stSection + 1) / (halwayPoint2ndSection - halwayPoint1stSection)) * 3);
107 |             colorSet[k] = {
108 |               r: oldValue.r,
109 |               g: oldValue.g,
110 |               b: oldValue.b,
111 |               bayerThreshold: threshold,
112 |               nextColor: currentValue,
113 |             };
114 |             process.stdout.write(`${colorSet[k].bayerThreshold}, `);
115 |           }
116 |           process.stdout.write(`\n--\n`);
117 |           i = halwayPoint2ndSection;
118 |           startIndex = halwayPoint2ndSection;
119 |         }
120 |       }
121 |     }
122 | 
123 |     const bayerMatrix = [
124 |       [0, 2],
125 |       [3, 1],
126 |       // [ 0,  8,  2, 10],
127 |       // [12,  4, 14,  6],
128 |       // [ 3, 11,  1,  9],
129 |       // [15,  7, 13,  5]    
130 |     ];
131 | 
132 |     console.assert(closestColors.length === 256, 'The closestColors array does not contain 256 colors');
133 | 
134 |     // Iterate over each row
135 |     for (let y = 0; y < closestColors.length * bayerSize; y++) {
136 |       const colorIndex = Math.floor(y / bayerSize) % palette.length;
137 |       const spread = closestColors[colorIndex];
138 | 
139 |       // Iterate over each pixel in the row
140 |       //for (let x = 0; x < spread.length * bayerSize; x++) {
141 |       for (let x = 0; x < spread.length * bayerSize * 2; x++) {
142 |         const newColor = closestColors[colorIndex][x >> 2];
143 |         const nextColor = newColor.nextColor;
144 |         const fraction = newColor.bayerThreshold;///(x * 5) / png.width;
145 |         const idx = (png.width * y + x) << 2;
146 | 
147 |         // Map the x position to the range [0, 3] for the Bayer matrix
148 |         const xBayer = x % 2;
149 |         const yBayer = y % 2;
150 |         const bayerValue = bayerMatrix[yBayer][xBayer];
151 | 
152 |         // Use the Bayer value to lerp between the baseColor and targetColor
153 |         const lerpColor = fraction > bayerValue ? nextColor : newColor;
154 | 
155 |         png.data[idx] = lerpColor.r;
156 |         png.data[idx + 1] = lerpColor.g;
157 |         png.data[idx + 2] = lerpColor.b;
158 |         png.data[idx + 3] = 255;
159 |       }
160 |     }
161 | 
162 |     png.pack().pipe(fs.createWriteStream(pngFileName));
163 |   } 
164 | 
165 |   if (writeLookupTable) {
166 |     console.log('Writing lookup table');
167 |     pngToLookupTable(png, palette, lookupFileName);
168 |   }
169 | });
170 | 
171 | function pngToLookupTable(png, palette, outputFilePath) {
172 |   // Open/Create a file
173 |   let stream = fs.createWriteStream(outputFilePath);
174 | 
175 |   for (let y = 0; y < png.height; y++) {
176 |     for (let x = 0; x < png.width; x++) {
177 |       const idx = (png.width * y + x) << 2;
178 |       const r = png.data[idx];
179 |       const g = png.data[idx + 1];
180 |       const b = png.data[idx + 2];
181 |       const index = palette.findIndex((color) => color.r === r && color.g === g && color.b === b);
182 |       if (index === -1) {
183 |         console.error(`Could not find the color at (${x}, ${y})`);
184 |         return;
185 |       }
186 |       const buffer = Buffer.alloc(1);
187 |       buffer.writeUInt8(index, 0);
188 |       stream.write(buffer);
189 |     }
190 |   }
191 | 
192 |   stream.end();
193 | }
194 | 
195 | function pngToLookupTablePackedOrder(png, palette, outputFilePath) {
196 |   // Open/Create a file
197 |   let stream = fs.createWriteStream(outputFilePath);
198 | 
199 |   for (let y = 0; y < png.height; y += 2) {
200 |     for (let x = 0; x < png.width; x += 4) {
201 |       // Process 2 lines at a time
202 |       for (let dy = 0; dy < 2; dy++) {
203 |         if (y + dy >= png.height) {
204 |           continue;
205 |         }
206 | 
207 |         // Process 4 bytes at a time
208 |         for (let dx = 0; dx < 4; dx++) {
209 |           if (x + dx >= png.width) {
210 |             continue;
211 |           }
212 | 
213 |           const idx = (png.width * (y + dy) + (x + dx)) << 2;
214 |           const r = png.data[idx];
215 |           const g = png.data[idx + 1];
216 |           const b = png.data[idx + 2];
217 |           const index = palette.findIndex((color) => color.r === r && color.g === g && color.b === b);
218 |           if (index === -1) {
219 |             console.error(`Could not find the color at (${x + dx}, ${y + dy})`);
220 |             return;
221 |           }
222 |           const buffer = Buffer.alloc(1);
223 |           buffer.writeUInt8(index, 0);
224 |           stream.write(buffer);
225 |         }
226 |       }
227 |     }
228 |   }
229 | 
230 |   stream.end();
231 | }
232 | 
233 | function hexToRgb(hex) {
234 |   const [r, g, b] = hex.replace('#', '').match(/.{1,2}/g).map(v => parseInt(v, 16));
235 |   return { r, g, b };
236 | }
237 | 
238 | function isColorDifferent(color1, color2) {
239 |   return color1.r !== color2.r || color1.g !== color2.g || color1.b !== color2.b;
240 | }
241 | function colorDifference(color1, color2) {
242 |   const lab1 = colorConvert.rgb.lab.raw(color1.r, color1.g, color1.b);
243 |   const lab2 = colorConvert.rgb.lab.raw(color2.r, color2.g, color2.b);
244 | 
245 |   const lDifference = Math.abs(lab1[0] - lab2[0]) * 3;
246 |   const aDifference = Math.abs(lab1[1] - lab2[1]);
247 |   const bDifference = Math.abs(lab1[2] - lab2[2]);
248 | 
249 |   return lDifference + aDifference + bDifference;
250 | }
251 | 
252 | function findNearestColor(color, palette) {
253 |   let nearestColor = null;
254 |   let smallestDifference = Infinity;
255 | 
256 |   for (const paletteColor of palette) {
257 |     const difference = colorDifference(color, paletteColor);
258 |     if (difference < smallestDifference) {
259 |       smallestDifference = difference;
260 |       nearestColor = paletteColor;
261 |     }
262 |   }
263 | 
264 |   return nearestColor;
265 | }
266 | 


--------------------------------------------------------------------------------
/Scripts/gen_lookup.js:
--------------------------------------------------------------------------------
  1 | // prettier-ignore
  2 | const inputPalette = [
  3 |   0x000000,
  4 |   0x004060,
  5 |   0x206080,
  6 |   0x4080a0,
  7 |   0x80c0e0,
  8 |   0xe0e0e0,
  9 |   0x402000,
 10 |   0x602000,
 11 |   0x804020,
 12 |   0xa06000,
 13 |   0xc08060,
 14 |   0x004000,
 15 |   0x006000,
 16 |   0xa02000,
 17 |   0xc0c000,
 18 |   0x20a0e0,
 19 | ];
 20 | 
 21 | const rowLength = 16;
 22 | const bayerSize = 2; // Bayer pattern size (2x2)
 23 | 
 24 | const findNearestColor = (color, palette) => {
 25 |   const chroma = require("chroma-js");
 26 | 
 27 |   // Convert hex number to hex string for chroma-js
 28 |   const hexString = "#" + color.toString(16).padStart(6, "0");
 29 |   const inputColor = chroma(hexString);
 30 | 
 31 |   let closestColorIndex = 0;
 32 |   let minDistance = Infinity;
 33 | 
 34 |   for (let i = 0; i < palette.length; i++) {
 35 |     const paletteColor = palette[i];
 36 |     // Convert palette hex number to hex string
 37 |     const paletteHex = "#" + paletteColor.toString(16).padStart(6, "0");
 38 | 
 39 |     // Calculate color difference in the LAB color space (perceptually uniform)
 40 |     //const distance = chroma.distance(inputColor, chroma(paletteHex), "lab");
 41 |     const distance = chroma.deltaE(inputColor, chroma(paletteHex));
 42 | 
 43 |     if (distance < minDistance) {
 44 |       minDistance = distance;
 45 |       closestColorIndex = i;
 46 |     }
 47 |   }
 48 | 
 49 |   return closestColorIndex;
 50 | };
 51 | 
 52 | const testColor = 0x20a0e0;
 53 | const nearestColorIndex = findNearestColor(testColor, inputPalette);
 54 | console.log(
 55 |   `Nearest color index for ${testColor.toString(16)}: ${nearestColorIndex}`
 56 | );
 57 | console.log(`Nearest color: ${inputPalette[nearestColorIndex].toString(16)}`);
 58 | 
 59 | // Add PNG generation functionality
 60 | const fs = require("fs");
 61 | const PNG = require("pngjs").PNG;
 62 | 
 63 | // Generate a perceptually uniform gradient between two colors
 64 | const generateGradient = (startColor, endColor, steps) => {
 65 |   const chroma = require("chroma-js");
 66 | 
 67 |   // Convert hex numbers to hex strings for chroma-js
 68 |   const startHex = "#" + startColor.toString(16).padStart(6, "0");
 69 |   const endHex = "#" + endColor.toString(16).padStart(6, "0");
 70 | 
 71 |   // Create a perceptually uniform scale in LAB color space
 72 |   const scale = chroma
 73 |     .scale([startHex, endHex])
 74 |     .mode("lab") // Use LAB color space for perceptual uniformity
 75 |     .colors(steps, "hex");
 76 | 
 77 |   // Convert the hex strings back to numbers
 78 |   const gradient = scale.map((hex) => parseInt(hex.substring(1), 16));
 79 | 
 80 |   return gradient;
 81 | };
 82 | 
 83 | // Generate a row of the lookup table
 84 | const generateLookupRow = (row, palette, testColor) => {
 85 |   const paletteColor = palette[row % palette.length];
 86 |   // Generate a gradient from palette color to test color
 87 |   const gradientColors = generateGradient(paletteColor, testColor, rowLength);
 88 |   const rowIndices = [];
 89 | 
 90 |   // Fill the row with the gradient, using nearest palette colors
 91 |   for (let x = 0; x < rowLength; x++) {
 92 |     const gradientColor = gradientColors[x];
 93 |     const nearestIndex = findNearestColor(gradientColor, palette);
 94 |     rowIndices.push(nearestIndex);
 95 |   }
 96 | 
 97 |   return rowIndices;
 98 | };
 99 | 
100 | const dither2x2 = [
101 |   [0.0, 0.75],
102 |   [0.5, 0.25],
103 | ];
104 | 
105 | const dither4x4 = [
106 |   [0.0 / 16.0, 8.0 / 16.0, 2.0 / 16.0, 10.0 / 16.0],
107 |   [12.0 / 16.0, 4.0 / 16.0, 14.0 / 16.0, 6.0 / 16.0],
108 |   [3.0 / 16.0, 11.0 / 16.0, 1.0 / 16.0, 9.0 / 16.0],
109 |   [15.0 / 16.0, 7.0 / 16.0, 13.0 / 16.0, 5.0 / 16.0],
110 | ];
111 | 
112 | const dither = (x, y, size) => {
113 |   switch (size) {
114 |     case 2:
115 |       return dither2x2[y][x];
116 |     case 4:
117 |       return dither4x4[y][x];
118 |     default:
119 |       throw new Error("Unsupported dither size");
120 |   }
121 | };
122 | 
123 | const drawBayerPattern = (x, y, intensity, color1, color2, png) => {
124 |   // Enough pixels to use a 2x2 bayer dither pattern
125 | 
126 |   for (let dy = 0; dy < bayerSize; dy++) {
127 |     for (let dx = 0; dx < bayerSize; dx++) {
128 |       const ditherValue = dither(dx, dy, bayerSize);
129 |       const pixelX = x * bayerSize + dx;
130 |       const pixelY = y * bayerSize + dy;
131 | 
132 |       // Choose color based on intensity
133 |       const color = intensity <= ditherValue ? color1 : color2;
134 | 
135 |       // Set pixel color in the PNG
136 |       png.data[(pixelY * png.width + pixelX) * 4] = (color >> 16) & 0xff; // Red
137 |       png.data[(pixelY * png.width + pixelX) * 4 + 1] = (color >> 8) & 0xff; // Green
138 |       png.data[(pixelY * png.width + pixelX) * 4 + 2] = color & 0xff; // Blue
139 |       png.data[(pixelY * png.width + pixelX) * 4 + 3] = 0xff; // Alpha (fully opaque)
140 |     }
141 |   }
142 | };
143 | 
144 | const fill2x2BlockDiagonal = (x, y, png) => {
145 |   // Enough pixels to use a 2x2 bayer dither pattern
146 | 
147 |   for (let i = 0; i < bayerSize; i++) {
148 |     const pixelX = x * bayerSize + i;
149 |     const pixelY = y * bayerSize + i;
150 |     const offsetY = (y - 1) * bayerSize + i;
151 | 
152 |     // Set pixel color in the PNG
153 |     png.data[(offsetY * png.width + pixelX) * 4] = png.data[(pixelY * png.width + pixelX) * 4]
154 |     png.data[(offsetY * png.width + pixelX) * 4 + 1] = png.data[(pixelY * png.width + pixelX) * 4 + 1]
155 |     png.data[(offsetY * png.width + pixelX) * 4 + 2] = png.data[(pixelY * png.width + pixelX) * 4 + 2]
156 |     png.data[(offsetY * png.width + pixelX) * 4 + 3] = png.data[(pixelY * png.width + pixelX) * 4 + 3]
157 |   }
158 | };
159 | 
160 | const generateLookupTablePNG = (palette, testColor) => {
161 |   const height = inputPalette.length * bayerSize * 2; // Number of rows in the lookup table
162 |   const width = rowLength * bayerSize; // Number of columns in the lookup table
163 |   const png = new PNG({ width, height });
164 | 
165 |   // Fill the PNG with the generated lookup table
166 |   for (let y = 0; y < height / 2; y++) {
167 |     const rowIndices = generateLookupRow(y, palette, testColor);
168 | 
169 |     let prevColor = rowIndices.length - 1;
170 |     let nextColor = rowIndices.length - 1;
171 |     let col = rowIndices.length - 1;
172 |     let firstBlock = true;
173 | 
174 |     while (col >= 0) {
175 |       col--;
176 | 
177 |       // The color changed or we reached the start of the row
178 |       if (col <= 0 || rowIndices[col] !== rowIndices[nextColor]) {
179 |         prevColor = col;
180 |         if (prevColor < 0) {
181 |           prevColor = 0;
182 |         }
183 | 
184 |         let spread = nextColor - prevColor;
185 | 
186 |         const color1 = rowIndices[prevColor];
187 |         const color2 = rowIndices[nextColor];
188 | 
189 |         for (let x = prevColor; x <= nextColor; x++) {
190 |           let intensity = (x - prevColor) / spread;
191 | 
192 |           if (firstBlock) {
193 |             intensity = Math.min(
194 |               intensity,
195 |               1.0 - 1.0 / (bayerSize * bayerSize)
196 |             );
197 |           }
198 | 
199 |           drawBayerPattern(
200 |             x,
201 |             y * 2,
202 |             intensity,
203 |             palette[color1],
204 |             palette[color2],
205 |             png
206 |           );
207 | 
208 |           if (y > 0) {
209 |             fill2x2BlockDiagonal(
210 |               x,
211 |               y * 2,
212 |               png
213 |             );
214 |           }
215 | 
216 |           // Fill out the second row of the Bayer pattern
217 |           drawBayerPattern(
218 |             x,
219 |             y * 2 + 1,
220 |             intensity,
221 |             palette[color1],
222 |             palette[color2],
223 |             png
224 |           );
225 |         }
226 | 
227 |         if (firstBlock) {
228 |           firstBlock = false;
229 |         }
230 | 
231 |         nextColor = prevColor;
232 |       }
233 |     }
234 |   }
235 | 
236 |   return png;
237 | };
238 | 
239 | /**
240 |  * Exports PNG data to a binary lookup table format.
241 |  * Each pixel is stored as a 4-bit index to the palette.
242 |  * Two indices are packed into each byte (high nibble, low nibble).
243 |  * Each byte is repeated 4 times for 32-bit register loading.
244 |  */
245 | const exportPNGtoBinaryLookup = (png) => {
246 |   const width = png.width;
247 |   const height = png.height;
248 | 
249 |   // Calculate original size (2 pixels per byte)
250 |   const originalSize = Math.ceil((width * height) / 2);
251 |   // Create buffer to hold binary data (each byte repeated 4 times)
252 |   const bufferSize = originalSize * 4;
253 |   const buffer = Buffer.alloc(bufferSize);
254 | 
255 |   let currentByte = 0;
256 |   let bufferIndex = 0;
257 |   let isHighNibble = true;
258 | 
259 |   // Process each pixel
260 |   for (let y = 0; y < height; y++) {
261 |     for (let x = 0; x < width; x++) {
262 |       // Get pixel color from PNG
263 |       const idx = (y * width + x) << 2;
264 |       const r = png.data[idx];
265 |       const g = png.data[idx + 1];
266 |       const b = png.data[idx + 2];
267 | 
268 |       // Convert RGB to hex format
269 |       const pixelColor = (r << 16) | (g << 8) | b;
270 | 
271 |       // Try to find exact match first (faster)
272 |       let paletteIndex = -1;
273 |       for (let i = 0; i < inputPalette.length; i++) {
274 |         if (inputPalette[i] === pixelColor) {
275 |           paletteIndex = i;
276 |           break;
277 |         }
278 |       }
279 | 
280 |       // If no exact match, use findNearestColor
281 |       if (paletteIndex === -1) {
282 |         paletteIndex = findNearestColor(pixelColor, inputPalette);
283 |       }
284 | 
285 |       // Pack two indices per byte
286 |       if (isHighNibble) {
287 |         // First index goes in high nibble (most significant 4 bits)
288 |         currentByte = paletteIndex << 4;
289 |         isHighNibble = false;
290 |       } else {
291 |         // Second index goes in low nibble (least significant 4 bits)
292 |         currentByte |= paletteIndex;
293 | 
294 |         isHighNibble = true;
295 | 
296 |         // Write the byte 4 times consecutively for 32-bit register loading
297 |         for (let i = 0; i < 4; i++) {
298 |           buffer[bufferIndex++] = currentByte;
299 |         }
300 |       }
301 |     }
302 |   }
303 | 
304 |   return buffer;
305 | };
306 | 
307 | // Generate and save the lookup table PNG
308 | console.log("Generating color lookup table...");
309 | const lookupTable = generateLookupTablePNG(inputPalette, testColor);
310 | const lookupTableBuffer = exportPNGtoBinaryLookup(lookupTable);
311 | console.log("Exporting lookup table to binary format...");
312 | fs.writeFileSync("lookup9", lookupTableBuffer);
313 | console.log("Lookup table saved as color_lookup.bin");
314 | const pngBuffer = PNG.sync.write(lookupTable);
315 | fs.writeFileSync("color_lookup.png", pngBuffer);
316 | console.log("Lookup table saved as color_lookup.png");
317 | 


--------------------------------------------------------------------------------
/src/mesh.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <string.h>
  3 | #include <time.h>
  4 | #include "mesh.h"
  5 | 
  6 | #define rand32(max) (((rand() << 16) | rand()) % (max))
  7 | #define rand32balanced(max) ((((rand() << 16) | rand()) % (max)) - ((max) >> 1))
  8 | 
  9 | Mesh g_Mesh;
 10 | 
 11 | #define SNOWLEVEL -655360
 12 | #define SANDLEVEL -3276800
 13 | 
 14 | void GenerateTerrain()
 15 | {
 16 |     int i, j, k, bsize, csize, rnd;
 17 |     V3D vertex, light;
 18 |     V3D _verts[4];
 19 |     V3D *ptrV3D = 0;
 20 |     TRI face;
 21 |     int facecounter = 0;
 22 |     fix tl, tr, bl, br;
 23 | 
 24 | #ifdef PAL_256
 25 |     unsigned char range[48] = {49, 49, 49, 49, 89, 89, 89, 89, 90, 90, 91, 91, 92, 92, 118, 118,
 26 |                                32, 32, 32, 32, 35, 35, 40, 40, 68, 68, 68, 112, 112, 115, 115, 113,
 27 |                                160, 160, 160, 160, 161, 161, 161, 161, 194, 194, 194, 194, 247, 247, 255, 255};
 28 | #endif // PAL_256
 29 | 
 30 |     // srand(time(NULL));
 31 |     srand(55555);
 32 | 
 33 |     printf("Reserving %d bytes for Terrain Vertices...\n", MAPW * MAPW * sizeof(V3D));
 34 |     cvector_reserve(g_Mesh.verts, MAPW * MAPW);
 35 |     printf("Reserving %d bytes for Terrain Faces...\n", (MAPW * MAPW) * 2 * sizeof(TRI));
 36 |     cvector_reserve(g_Mesh.faces, (MAPW * MAPW) * 2);
 37 |     printf("Reserving %d bytes for transformed Terrain Vertices...\n", MAPW * MAPW * sizeof(V3D));
 38 |     cvector_reserve(g_Mesh.verts_transformed, MAPW * MAPW);
 39 | 
 40 |     for (j = 0; j < MAPW; ++j)
 41 |         for (i = 0; i < MAPW; ++i)
 42 |         {
 43 |             g_Mesh.verts[IX(i, j)].x = int2fix(i << TILESHIFT);
 44 |             g_Mesh.verts[IX(i, j)].z = int2fix(j << TILESHIFT);
 45 |             g_Mesh.verts[IX(i, j)].y = 0;
 46 |         }
 47 | 
 48 |     bsize = MAPW / 4;
 49 |     csize = bsize / 2;
 50 |     rnd = int2fix(MAPW);
 51 | 
 52 |     while (bsize > 1)
 53 |     {
 54 |         // Edges
 55 |         for (i = 0; i < MAPW; i += bsize)
 56 |             for (j = 0; j < MAPW; j += bsize)
 57 |             {
 58 |                 k = g_Mesh.verts[IX(i, j)].y + g_Mesh.verts[IX(i + bsize, j)].y;
 59 |                 k >>= 1;
 60 |                 k += rand32balanced(rnd);
 61 | 
 62 |                 g_Mesh.verts[IX(i + csize, j)].y = k;
 63 | 
 64 |                 k = g_Mesh.verts[IX(i, j)].y + g_Mesh.verts[IX(i, j + bsize)].y;
 65 |                 k >>= 1;
 66 |                 k += rand32balanced(rnd);
 67 | 
 68 |                 g_Mesh.verts[IX(i, j + csize)].y = k;
 69 |             }
 70 | 
 71 |         // Center
 72 |         for (i = 0; i < MAPW; i += bsize)
 73 |             for (j = 0; j < MAPW; j += bsize)
 74 |             {
 75 |                 k = g_Mesh.verts[IX(i + csize, j)].y +
 76 |                     g_Mesh.verts[IX(i, j + csize)].y +
 77 |                     g_Mesh.verts[IX(i + bsize, j + csize)].y +
 78 |                     g_Mesh.verts[IX(i + csize, j + bsize)].y;
 79 |                 k >>= 2;
 80 |                 k += rand32balanced(rnd);
 81 | 
 82 |                 g_Mesh.verts[IX(i + csize, j + csize)].y = k;
 83 |             }
 84 | 
 85 |         rnd >>= 1;
 86 | 
 87 |         bsize >>= 1;
 88 |         csize >>= 1;
 89 |     };
 90 | 
 91 |     for (i = 0; i < MAPW * MAPW; ++i)
 92 |     {
 93 |         if (g_Mesh.verts[i].y < 0)
 94 |             g_Mesh.verts[i].y >>= 1;
 95 |     }
 96 | 
 97 |     for (j = 0; j < MAPW; ++j)
 98 |         for (i = 0; i < MAPW; ++i)
 99 |         {
100 |             tl = g_Mesh.verts[IX(i, j)].y;
101 |             tr = g_Mesh.verts[IX(i + 1, j)].y;
102 |             bl = g_Mesh.verts[IX(i, j + 1)].y;
103 |             br = g_Mesh.verts[IX(i + 1, j + 1)].y;
104 | 
105 |             // Midwinter used a |/|\| pattern for the terrain.
106 |             //                  |\|/|
107 |             if ((i + j) & 1)
108 |             {
109 |                 /* TL
110 |                     ___
111 |                    |  /
112 |                    | /
113 |                    |/
114 |                 */
115 | 
116 |                 face.a = i + j * (MAPW);
117 |                 face.b = (i + 1) + j * (MAPW);
118 |                 face.c = i + (j + 1) * (MAPW);
119 |                 face.next = NULL;
120 |                 face.centerpoint.x = int2fix(i << TILESHIFT) + int2fix(4);
121 |                 face.centerpoint.y = int2fix(j << TILESHIFT) + int2fix(4);
122 | #ifdef PAL_256
123 |                 k = clamp(12 + (((tl - tr) + (tl - bl)) >> 16), 0, 15);
124 |                 if (tl > SNOWLEVEL)
125 |                     k += 32;
126 |                 else if (tl > SANDLEVEL)
127 |                     k += 16;
128 |                 face.flags = range[k];
129 | #else
130 |                 k = clamp(6 + (((tl - tr) + (tl - bl)) >> 18), 2, 10);
131 |                 face.flags = k;
132 | #endif // PAL_256
133 | 
134 |                 g_Mesh.faces[facecounter++] = face;
135 | 
136 |                 /* BR
137 | 
138 |                      /|
139 |                     / |
140 |                    /__|
141 |                 */
142 | 
143 |                 face.a = (i + 1) + (j + 1) * (MAPW);
144 |                 face.b = i + (j + 1) * (MAPW);
145 |                 face.c = (i + 1) + j * (MAPW);
146 |                 face.next = NULL;
147 |                 face.centerpoint.x = int2fix(i << TILESHIFT) + int2fix(12);
148 |                 face.centerpoint.y = int2fix(j << TILESHIFT) + int2fix(12);
149 | 
150 | #ifdef PAL_256
151 |                 k = clamp(12 + (((bl - br) + (tr - br)) >> 16), 0, 15);
152 |                 if (tl > SNOWLEVEL)
153 |                     k += 32;
154 |                 else if (tl > SANDLEVEL)
155 |                     k += 16;
156 |                 face.flags = range[k];
157 | #else
158 |                 k = clamp(6 + (((bl - br) + (tr - br)) >> 18), 2, 10);
159 |                 face.flags = k;
160 | #endif // PAL_256
161 | 
162 |                 g_Mesh.faces[facecounter++] = face;
163 |             }
164 |             else
165 |             {
166 | 
167 |                 /* TR
168 |                    ___
169 |                    \  |
170 |                     \ |
171 |                      \|
172 |                 */
173 | 
174 |                 face.a = i + j * (MAPW);
175 |                 face.b = (i + 1) + j * (MAPW);
176 |                 face.c = (i + 1) + (j + 1) * (MAPW);
177 |                 face.next = NULL;
178 |                 face.centerpoint.x = int2fix(i << TILESHIFT) + int2fix(12);
179 |                 face.centerpoint.y = int2fix(j << TILESHIFT) + int2fix(4);
180 | 
181 | #ifdef PAL_256
182 |                 k = clamp(12 + (((tl - tr) + (tr - br)) >> 16), 0, 15);
183 |                 if (tl > SNOWLEVEL)
184 |                     k += 32;
185 |                 else if (tl > SANDLEVEL)
186 |                     k += 16;
187 |                 face.flags = range[k];
188 | #else
189 |                 k = clamp(6 + (((tl - tr) + (tr - br)) >> 17), 2, 10);
190 |                 face.flags = k;
191 | #endif // PAL_256
192 | 
193 |                 g_Mesh.faces[facecounter++] = face;
194 | 
195 |                 /* BL
196 | 
197 |                    |\
198 |                    | \
199 |                    |__\
200 |                 */
201 | 
202 |                 face.a = i + j * (MAPW);
203 |                 face.b = (i + 1) + (j + 1) * (MAPW);
204 |                 face.c = i + (j + 1) * (MAPW);
205 |                 face.next = NULL;
206 |                 face.centerpoint.x = int2fix(i << TILESHIFT) + int2fix(4);
207 |                 face.centerpoint.y = int2fix(j << TILESHIFT) + int2fix(12);
208 | 
209 | #ifdef PAL_256
210 |                 k = clamp(12 + (((bl - br) + (tl - bl)) >> 16), 0, 15);
211 |                 if (tl > SNOWLEVEL)
212 |                     k += 32;
213 |                 else if (tl > SANDLEVEL)
214 |                     k += 16;
215 |                 face.flags = range[k];
216 | #else
217 |                 k = clamp(6 + (((bl - br) + (tl - bl)) >> 17), 2, 10);
218 |                 face.flags = k;
219 | #endif // PAL_256
220 | 
221 |                 g_Mesh.faces[facecounter++] = face;
222 |             }
223 |         }
224 | 
225 |     // printf("Preparing Lighting pass...\n");
226 |     // light.x = int2fix(-120);
227 |     // light.y = int2fix(420);
228 |     // light.z = int2fix(120);
229 |     // Normalize(&light);
230 | 
231 |     // for (i = 0; i < (_MAPW * _MAPW) * 2; ++i)
232 |     // {
233 |     //     // _verts[0] = g_Mesh.verts[g_Mesh.faces[i].a];
234 |     //     // _verts[1] = g_Mesh.verts[g_Mesh.faces[i].b];
235 |     //     // _verts[2] = g_Mesh.verts[g_Mesh.faces[i].c];
236 |     //     // Normal(&_verts[0], &_verts[2], &_verts[1], &g_Mesh.faces[i].normal);
237 |     //     // Normalize(&g_Mesh.faces[i].normal);
238 | 
239 |     //     g_Mesh.faces[i].flags = ; //  min(max(0, (int)(fix2float(DotProduct(&g_Mesh.faces[i].normal, &light)) * 16.f)),15);
240 |     // }
241 |     // printf("Done.\n");
242 | }
243 | 
244 | void DeAllocateTerrain(void)
245 | {
246 |     cvector_free(g_Mesh.verts);
247 |     cvector_free(g_Mesh.faces);
248 |     cvector_free(g_Mesh.verts_transformed);
249 | }
250 | 
251 | fix GetHeight(V3D *eyePos)
252 | {
253 |     fix A, B, C;
254 |     fix mapX, mapZ;
255 |     fix localX, localZ;
256 | 
257 |     // Get the fixed normalized location
258 |     mapX = eyePos->x >> TILESHIFT;
259 |     mapZ = eyePos->z >> TILESHIFT;
260 | 
261 |     // Obtain the fractional part.
262 |     localX = mapX & 0xFFFF;
263 |     localZ = mapZ & 0xFFFF;
264 | 
265 |     // Now get map coords as integer.
266 |     mapX >>= 16;
267 |     mapZ >>= 16;
268 | 
269 |     if ((mapX + mapZ) & 1) // Top Left / Bottom Right
270 |     {
271 |         //  ___
272 |         // |  /|
273 |         // | / |
274 |         // |/__|
275 | 
276 |         // 2 shared corners
277 |         B = g_Mesh.verts[IX(mapX + 1, mapZ)].y;
278 |         C = g_Mesh.verts[IX(mapX, mapZ + 1)].y;
279 | 
280 |         if ((localX + localZ) < 65536)
281 |         {
282 |             // Top Left Triangle
283 |             A = g_Mesh.verts[IX(mapX, mapZ)].y;
284 |             B = fixmultINTL(B, localX);
285 |             C = fixmultINTL(C, localZ);
286 |         }
287 |         else
288 |         {
289 |             // Bottom Right Triangle
290 |             // Flip the local coords
291 |             localX = 65536 - localX;
292 |             localZ = 65536 - localZ;
293 |             A = g_Mesh.verts[IX(mapX + 1, mapZ + 1)].y;
294 |             B = fixmultINTL(B, localZ);
295 |             C = fixmultINTL(C, localX);
296 |         }
297 | 
298 |         A = fixmultINTL(A, (65536 - localX - localZ));
299 |     }
300 |     else // Top Right / Bottom Left
301 |     {
302 |         //  ___
303 |         // |\  |
304 |         // | \ |
305 |         // |__\|
306 |         // 2 shared corners
307 | 
308 |         B = g_Mesh.verts[IX(mapX, mapZ)].y;
309 |         C = g_Mesh.verts[IX(mapX + 1, mapZ + 1)].y;
310 | 
311 |         if (localX > localZ)
312 |         {
313 |             // Top Right Triangle
314 |             localX = 65536 - localX;
315 |             A = g_Mesh.verts[IX(mapX + 1, mapZ)].y;
316 |             B = fixmultINTL(B, localX);
317 |             C = fixmultINTL(C, localZ);
318 |         }
319 |         else
320 |         {
321 |             // Bottom Left Triangle
322 |             // Flip the local coords
323 |             localZ = 65536 - localZ;
324 |             A = g_Mesh.verts[IX(mapX, mapZ + 1)].y;
325 |             B = fixmultINTL(B, localZ);
326 |             C = fixmultINTL(C, localX);
327 |         }
328 | 
329 |         A = fixmultINTL(A, (65536 - localX - localZ));
330 |     }
331 | 
332 |     return A + B + C + 262144;
333 | }
334 | 


--------------------------------------------------------------------------------
/src/render.c:
--------------------------------------------------------------------------------
  1 | #include <kernel.h>
  2 | 
  3 | #include "render.h"
  4 | #include "math3d.h"
  5 | #include "mesh.h"
  6 | #include "palette.h"
  7 | 
  8 | extern void SortVertices(int triList);
  9 | extern void DrawTriangle(int triList, int color);
 10 | extern void FillEdgeLists(int triList, int color);
 11 | extern void ProjectVertex(int vertexPtr);
 12 | 
 13 | TRI *g_RenderQueue[MAXDEPTH];
 14 | 
 15 | int gDebug = 0;
 16 | 
 17 | #define MAX_EXTRA_VERTS 192
 18 | #define MAX_EXTRA_TRIS 64
 19 | 
 20 | TRI *clippedQueue1 = 0;
 21 | V3D clippedNearVerts[MAX_EXTRA_VERTS];
 22 | int clippedNearVertIndex = 0;
 23 | TRI clippedNearTris[MAX_EXTRA_TRIS];
 24 | int clippedNearTrisIndex = 0;
 25 | 
 26 | #ifdef TIMING_LOG
 27 | TimerLog gTimerLog;
 28 | 
 29 | int GetRenderDelta(void)
 30 | {
 31 |     _kernel_oserror *err;
 32 |     _kernel_swi_regs rin, rout;
 33 |     int deltaTime = 0;
 34 | 
 35 |     err = _kernel_swi(SWI_Timer_Stop, &rin, &rout);
 36 |     deltaTime = rout.r[1];
 37 |     err = _kernel_swi(SWI_Timer_Start, &rin, &rout);
 38 | 
 39 |     return deltaTime;
 40 | }
 41 | #endif // TIMING_LOG
 42 | 
 43 | void SetupRender(void)
 44 | {
 45 |     memset(g_RenderQueue, 0, MAXDEPTH * sizeof(TRI *));
 46 | }
 47 | 
 48 | #define interpolate(a, b)                                         \
 49 |     do                                                            \
 50 |     {                                                             \
 51 |         t = ((a)->z - (b)->z) >> 8;                               \
 52 |         if (t != 0)                                               \
 53 |         {                                                         \
 54 |             t = ((a)->z << 8) / t;                                \
 55 |             tmp.x = (((b)->x - (a)->x) >> 8) * (t >> 8) + (a)->x; \
 56 |             tmp.y = (((b)->y - (a)->y) >> 8) * (t >> 8) + (a)->y; \
 57 |             tmp.z = 0;                                            \
 58 |             inside.verts[inside.numVerts++] = tmp;                \
 59 |         }                                                         \
 60 |         else                                                      \
 61 |         {                                                         \
 62 |             (a)->z = 0;                                           \
 63 |         }                                                         \
 64 |     } while (0);
 65 | 
 66 | void ClipPolyonListToNearPlane(POLYGON *p)
 67 | {
 68 |     static POLYGON inside;
 69 |     static V3D *previous, *current;
 70 |     static V3D tmp;
 71 |     register int i, t;
 72 | 
 73 |     inside.numVerts = 0;
 74 |     previous = &p->verts[p->numVerts - 1];
 75 | 
 76 |     for (i = 0; i < p->numVerts; ++i)
 77 |     {
 78 |         current = &p->verts[i];
 79 | 
 80 |         // Have we crossed into near plane?
 81 |         if ((previous->z ^ current->z) & 0x80000000)
 82 |         {
 83 |             if (previous->z < 0)
 84 |             {
 85 |                 interpolate(current, previous);
 86 |             }
 87 |             else
 88 |             {
 89 |                 interpolate(previous, current);
 90 |             }
 91 |         }
 92 | 
 93 |         if (current->z >= 0)
 94 |             inside.verts[inside.numVerts++] = *current;
 95 | 
 96 |         previous = current;
 97 |     }
 98 | 
 99 |     *p = inside;
100 | }
101 | 
102 | void RenderTriangle(TRI *tri)
103 | {
104 |     int i, j, k;
105 |     V3D vscreen[3];
106 |     static int clipflags;
107 | 
108 |     vscreen[0] = g_Mesh.verts_transformed[tri->a];
109 |     vscreen[1] = g_Mesh.verts_transformed[tri->b];
110 |     vscreen[2] = g_Mesh.verts_transformed[tri->c];
111 | 
112 |     i = vscreen[0].z;
113 |     j = vscreen[1].z;
114 |     k = vscreen[2].z;
115 | 
116 |     // Clipping of triangles outside the view frustum
117 | 
118 |     if ((vscreen[0].x < -i) &&
119 |         (vscreen[1].x < -j) &&
120 |         (vscreen[2].x < -k))
121 |         return;
122 | 
123 |     if ((vscreen[0].x > i) &&
124 |         (vscreen[1].x > j) &&
125 |         (vscreen[2].x > k))
126 |         return;
127 | 
128 |     // Clipping of triangles based on Z depth
129 |     clipflags = (i < 0) + (j < 0) + (k < 0);
130 | 
131 |     // All verts are behind the camera, skip
132 |     if (clipflags == 3)
133 |         return;
134 | 
135 |     // Turn the average depth into an index that fits into our render queue range
136 |     k += (j + k);
137 |     k >>= 18;
138 |     k += 4;
139 | 
140 |     // Triangles that cross the near plane need to be clipped
141 |     if (clipflags && (clippedNearTrisIndex < MAX_EXTRA_TRIS))
142 |     {
143 |         tri->depth = max(k, 0);
144 |         tri->next = clippedQueue1;
145 |         clippedQueue1 = tri;
146 |     }
147 |     else if (k < MAXDEPTH)
148 |     {
149 |         // All verts are in front of the camera, no need to clip
150 |         tri->depth = k;
151 |         tri->next = g_RenderQueue[k];
152 |         g_RenderQueue[k] = tri;
153 |     }
154 | }
155 | 
156 | void RenderModel(MAT43 *mv, V3D *eyePos, int yaw)
157 | {
158 |     int i, j, k, x, z, mx, mz;
159 | 
160 | #ifdef TIMING_LOG
161 |     int largest;
162 | #endif // TIMING_LOG
163 | 
164 |     TRI **tri;
165 |     V3D _verts[3];
166 |     static POLYGON poly;
167 | 
168 |     clippedNearTrisIndex = clippedNearVertIndex = 0;
169 | 
170 |     // Get the fixed normalized location
171 |     mx = eyePos->x >> 20;
172 |     mz = eyePos->z >> 20;
173 | 
174 |     // mx += (fixcos(yaw) >> 13);
175 |     // mz -= (fixsin(yaw) >> 13);
176 | 
177 |     mx += ((fixcos(yaw) >> 8) * (SUBRANGE)) >> 16;
178 |     mz -= ((fixsin(yaw) >> 8) * (SUBRANGE)) >> 16;
179 | 
180 |     // We have a limit of SCANRANGE tiles to render, so if mx and mz get
181 |     // too lose to the limit of the map (MAPW) then move the player back
182 |     // to the center of the map.
183 |     if (mx <= SCANRANGE || mx >= MAPW - SCANRANGE || mz <= SCANRANGE || mz >= MAPW - SCANRANGE)
184 |     {
185 |         eyePos->x = int2fix(MAPW << TILESHIFT) / 2;
186 |         eyePos->z = int2fix(MAPW << TILESHIFT) / 2;
187 |     }
188 | 
189 | #ifdef TIMING_LOG
190 |     (void)GetRenderDelta();
191 | #endif // TIMING_LOG
192 | 
193 |     for (x = mx - SCANRANGE; x <= mx + SCANRANGE; ++x)
194 |     {
195 |         for (z = mz - SCANRANGE; z <= mz + SCANRANGE; ++z)
196 |         {
197 |             k = x + (z << MAPSHIFT);
198 |             MultV3DMat(&g_Mesh.verts[k], &g_Mesh.verts_transformed[k], mv);
199 |         }
200 |     }
201 | 
202 | #ifdef TIMING_LOG
203 |     gTimerLog.transformTiles = GetRenderDelta();
204 | #endif // TIMING_LOG
205 | 
206 |     for (x = mx - SCANRANGE; x < mx + SCANRANGE; ++x)
207 |     {
208 |         for (z = mz - SCANRANGE; z < mz + SCANRANGE; ++z)
209 |         {
210 |             k = x + (z << MAPSHIFT);
211 |             k <<= 1;
212 |             RenderTriangle(&g_Mesh.faces[k]);
213 |             ++k;
214 |             RenderTriangle(&g_Mesh.faces[k]);
215 |         }
216 |     }
217 | 
218 | #ifdef TIMING_LOG
219 |     gTimerLog.submitRenderTriangles = GetRenderDelta();
220 | #endif // TIMING_LOG
221 | 
222 |     // Clip triangles against near plane
223 |     while (clippedQueue1)
224 |     {
225 |         k = clippedQueue1->depth;
226 | 
227 |         poly.numVerts = 3;
228 |         poly.verts[0] = g_Mesh.verts_transformed[clippedQueue1->a];
229 |         poly.verts[1] = g_Mesh.verts_transformed[clippedQueue1->b];
230 |         poly.verts[2] = g_Mesh.verts_transformed[clippedQueue1->c];
231 | 
232 |         // Clip against near plane
233 |         ClipPolyonListToNearPlane(&poly);
234 | 
235 |         for (i = 0; i < poly.numVerts; ++i)
236 |         {
237 |             ProjectVertex((int)(&poly.verts[i]));
238 |         }
239 | 
240 |         // Render clipped triangles
241 |         for (i = 0; i < poly.numVerts - 2; ++i)
242 |         {
243 |             if (clippedNearTrisIndex >= MAX_EXTRA_TRIS)
244 |                 break;
245 | 
246 |             clippedNearVerts[clippedNearVertIndex] = poly.verts[0];
247 |             clippedNearTris[clippedNearTrisIndex].a = clippedNearVertIndex++;
248 |             clippedNearVerts[clippedNearVertIndex] = poly.verts[i + 1];
249 |             clippedNearTris[clippedNearTrisIndex].b = clippedNearVertIndex++;
250 |             clippedNearVerts[clippedNearVertIndex] = poly.verts[i + 2];
251 |             clippedNearTris[clippedNearTrisIndex].c = clippedNearVertIndex++;
252 | 
253 |             clippedNearTris[clippedNearTrisIndex].flags = clippedQueue1->flags | TRI_CLIPPED_BIT;
254 | 
255 |             // Stick these at the front of the render queue
256 |             clippedNearTris[clippedNearTrisIndex].depth = clippedQueue1->depth;
257 |             clippedNearTris[clippedNearTrisIndex].next = g_RenderQueue[k];
258 |             g_RenderQueue[k] = &clippedNearTris[clippedNearTrisIndex];
259 | 
260 |             ++clippedNearTrisIndex;
261 |         }
262 | 
263 |         clippedQueue1 = clippedQueue1->next;
264 |     }
265 | 
266 | #ifdef TIMING_LOG
267 |     gTimerLog.clippingQueue = GetRenderDelta();
268 | #endif // TIMING_LOG
269 | 
270 |     for (x = mx - SCANRANGE; x <= mx + SCANRANGE; ++x)
271 |     {
272 |         for (z = mz - SCANRANGE; z <= mz + SCANRANGE; ++z)
273 |         {
274 |             i = x + (z << MAPSHIFT);
275 |             // MultV3DProj(&g_Mesh.verts_transformed[i], &k);
276 |             ProjectVertex((int)&g_Mesh.verts_transformed[i]);
277 |         }
278 |     }
279 | 
280 | #ifdef TIMING_LOG
281 |     gTimerLog.project3D = GetRenderDelta();
282 |     largest = 0;
283 | #endif // TIMING_LOG
284 | 
285 |     // Painter's algorithm. Proceed from furthest to nearest.
286 |     for (i = MAXDEPTH - 1; i >= 0; i--)
287 |     {
288 |         tri = &g_RenderQueue[i];
289 |         while (*tri)
290 |         { // Render faces with current depth
291 | 
292 |             if ((*tri)->flags & TRI_CLIPPED_BIT)
293 |             {
294 |                 _verts[0] = clippedNearVerts[(*tri)->a];
295 |                 _verts[1] = clippedNearVerts[(*tri)->b];
296 |                 _verts[2] = clippedNearVerts[(*tri)->c];
297 |                 j = 0;
298 |             }
299 |             else
300 |             {
301 |                 _verts[0] = g_Mesh.verts_transformed[(*tri)->a];
302 |                 _verts[1] = g_Mesh.verts_transformed[(*tri)->b];
303 |                 _verts[2] = g_Mesh.verts_transformed[(*tri)->c];
304 | 
305 |                 // The following is a cheap approximation to a hypotenuse
306 |                 // If we use the projected depth value then it causes the dithering
307 |                 // to change as you rotate the camera.
308 |                 j = abs((*tri)->centerpoint.x - eyePos->x);
309 |                 k = abs((*tri)->centerpoint.y - eyePos->z);
310 |                 j = (j > k) ? (j + (k >> 1)) : (k + (j >> 1));
311 | 
312 | #ifdef PAL_256
313 | #ifdef A5000
314 |                 j >>= 18;
315 |                 j = max(0, min(j - 16, 63));
316 | #else
317 |                 j >>= 17;
318 |                 j = max(0, min(j - 16, 63));
319 | #endif // A5000
320 | #else  // 16 COLOR
321 | #ifdef A5000
322 |                 j >>= 20;
323 |                 j = max(0, j - 4);
324 | #else
325 |                 j >>= 19;
326 |                 j = max(0, j - 6);
327 | #endif // A5000
328 | #endif // PAL_256
329 |             }
330 | 
331 | #ifdef PAL_256
332 |             k = (((*tri)->flags & 255) << 7) + j;
333 |             if (orient2dint(_verts[0], _verts[1], _verts[2]) < 0)
334 |                 FillEdgeLists((int)&_verts[0], k);
335 | #else
336 |             k = (((*tri)->flags & 31) << 5) + j;
337 |             if (j < 16 && orient2dint(_verts[0], _verts[1], _verts[2]) < 0)
338 |                 FillEdgeLists((int)&_verts[0], k);
339 | #endif // PAL_256
340 | 
341 | #ifdef TIMING_LOG
342 |             largest = max(largest, abs(_verts[0].x));
343 |             largest = max(largest, abs(_verts[1].x));
344 |             largest = max(largest, abs(_verts[2].x));
345 |             largest = max(largest, abs(_verts[0].y));
346 |             largest = max(largest, abs(_verts[1].y));
347 |             largest = max(largest, abs(_verts[2].y));
348 | #endif // TIMING_LOG
349 | 
350 |             *tri = (*tri)->next; // Next face
351 |         }
352 |     }
353 | 
354 | #ifdef TIMING_LOG
355 |     gTimerLog.sceneRender = GetRenderDelta();
356 |     gTimerLog.biggestVertex = max(largest, gTimerLog.biggestVertex);
357 |     gTimerLog.clippedCount = clippedNearTrisIndex;
358 | #endif // TIMING_LOG
359 | }
360 | 


--------------------------------------------------------------------------------
/src/cvector.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2015 Evan Teran
  3 |  *
  4 |  * License: The MIT License (MIT)
  5 |  */
  6 | 
  7 | #ifndef CVECTOR_H_
  8 | #define CVECTOR_H_
  9 | 
 10 | #include <assert.h> /* for assert */
 11 | #include <stdlib.h> /* for malloc/realloc/free */
 12 | #include <string.h> /* for memcpy/memmove */
 13 | 
 14 | /* cvector heap implemented using C library malloc() */
 15 | 
 16 | /* in case C library malloc() needs extra protection,
 17 |  * allow these defines to be overridden.
 18 |  */
 19 | #ifndef cvector_clib_free
 20 | #define cvector_clib_free free
 21 | #endif
 22 | #ifndef cvector_clib_malloc
 23 | #define cvector_clib_malloc malloc
 24 | #endif
 25 | #ifndef cvector_clib_calloc
 26 | #define cvector_clib_calloc calloc
 27 | #endif
 28 | #ifndef cvector_clib_realloc
 29 | #define cvector_clib_realloc realloc
 30 | #endif
 31 | 
 32 | typedef void (*cvector_elem_destructor_t)(void *elem);
 33 | 
 34 | typedef struct cvector_metadata_t {
 35 |     size_t size;
 36 |     size_t capacity;
 37 |     cvector_elem_destructor_t elem_destructor;
 38 | } cvector_metadata_t;
 39 | 
 40 | /**
 41 |  * @brief cvector_vector_type - The vector type used in this library
 42 |  */
 43 | #define cvector_vector_type(type) type *
 44 | 
 45 | /**
 46 |  * @brief cvector_vec_to_base - For internal use, converts a vector pointer to a metadata pointer
 47 |  * @param vec - the vector
 48 |  * @return the metadata pointer of the vector
 49 |  */
 50 | #define cvector_vec_to_base(vec) \
 51 |     (&((cvector_metadata_t *)(vec))[-1])
 52 | 
 53 | /**
 54 |  * @brief cvector_base_to_vec - For internal use, converts a metadata pointer to a vector pointer
 55 |  * @param ptr - pointer to the metadata
 56 |  * @return the vector
 57 |  */
 58 | #define cvector_base_to_vec(ptr) \
 59 |     ((void *)&((cvector_metadata_t *)(ptr))[1])
 60 | 
 61 | /**
 62 |  * @brief cvector_capacity - gets the current capacity of the vector
 63 |  * @param vec - the vector
 64 |  * @return the capacity as a size_t
 65 |  */
 66 | #define cvector_capacity(vec) \
 67 |     ((vec) ? cvector_vec_to_base(vec)->capacity : (size_t)0)
 68 | 
 69 | /**
 70 |  * @brief cvector_size - gets the current size of the vector
 71 |  * @param vec - the vector
 72 |  * @return the size as a size_t
 73 |  */
 74 | #define cvector_size(vec) \
 75 |     ((vec) ? cvector_vec_to_base(vec)->size : (size_t)0)
 76 | 
 77 | /**
 78 |  * @brief cvector_elem_destructor - get the element destructor function used
 79 |  * to clean up elements
 80 |  * @param vec - the vector
 81 |  * @return the function pointer as cvector_elem_destructor_t
 82 |  */
 83 | #define cvector_elem_destructor(vec) \
 84 |     ((vec) ? cvector_vec_to_base(vec)->elem_destructor : NULL)
 85 | 
 86 | /**
 87 |  * @brief cvector_empty - returns non-zero if the vector is empty
 88 |  * @param vec - the vector
 89 |  * @return non-zero if empty, zero if non-empty
 90 |  */
 91 | #define cvector_empty(vec) \
 92 |     (cvector_size(vec) == 0)
 93 | 
 94 | /**
 95 |  * @brief cvector_reserve - Requests that the vector capacity be at least enough
 96 |  * to contain n elements. If n is greater than the current vector capacity, the
 97 |  * function causes the container to reallocate its storage increasing its
 98 |  * capacity to n (or greater).
 99 |  * @param vec - the vector
100 |  * @param n - Minimum capacity for the vector.
101 |  * @return void
102 |  */
103 | #define cvector_reserve(vec, capacity)           \
104 |     do {                                         \
105 |         size_t cv_cap__ = cvector_capacity(vec); \
106 |         if (cv_cap__ < (capacity)) {             \
107 |             cvector_grow((vec), (capacity));     \
108 |         }                                        \
109 |     } while (0)
110 | 
111 | /*
112 |  * @brief cvector_init - Initialize a vector.  The vector must be NULL for this to do anything.
113 |  * @param vec - the vector
114 |  * @param capacity - vector capacity to reserve
115 |  * @param elem_destructor_fn - element destructor function
116 |  * @return void
117 |  */
118 | #define cvector_init(vec, capacity, elem_destructor_fn)               \
119 |     do {                                                              \
120 |         if (!(vec)) {                                                 \
121 |             cvector_reserve((vec), capacity);                         \
122 |             cvector_set_elem_destructor((vec), (elem_destructor_fn)); \
123 |         }                                                             \
124 |     } while (0)
125 | 
126 | /**
127 |  * @brief cvector_erase - removes the element at index i from the vector
128 |  * @param vec - the vector
129 |  * @param i - index of element to remove
130 |  * @return void
131 |  */
132 | #define cvector_erase(vec, i)                                                               \
133 |     do {                                                                                    \
134 |         if (vec) {                                                                          \
135 |             const size_t cv_sz__ = cvector_size(vec);                                       \
136 |             if ((i) < cv_sz__) {                                                            \
137 |                 cvector_elem_destructor_t elem_destructor__ = cvector_elem_destructor(vec); \
138 |                 if (elem_destructor__) {                                                    \
139 |                     elem_destructor__(&vec[i]);                                             \
140 |                 }                                                                           \
141 |                 cvector_set_size((vec), cv_sz__ - 1);                                       \
142 |                 memmove(                                                                    \
143 |                     (vec) + (i),                                                            \
144 |                     (vec) + (i) + 1,                                                        \
145 |                     sizeof(*(vec)) * (cv_sz__ - 1 - (i)));                                  \
146 |             }                                                                               \
147 |         }                                                                                   \
148 |     } while (0)
149 | 
150 | /**
151 |  * @brief cvector_clear - erase all of the elements in the vector
152 |  * @param vec - the vector
153 |  * @return void
154 |  */
155 | #define cvector_clear(vec)                                                              \
156 |     do {                                                                                \
157 |         if (vec) {                                                                      \
158 |             cvector_elem_destructor_t elem_destructor__ = cvector_elem_destructor(vec); \
159 |             if (elem_destructor__) {                                                    \
160 |                 size_t i__;                                                             \
161 |                 for (i__ = 0; i__ < cvector_size(vec); ++i__) {                         \
162 |                     elem_destructor__(&vec[i__]);                                       \
163 |                 }                                                                       \
164 |             }                                                                           \
165 |             cvector_set_size(vec, 0);                                                   \
166 |         }                                                                               \
167 |     } while (0)
168 | 
169 | /**
170 |  * @brief cvector_free - frees all memory associated with the vector
171 |  * @param vec - the vector
172 |  * @return void
173 |  */
174 | #define cvector_free(vec)                                                               \
175 |     do {                                                                                \
176 |         if (vec) {                                                                      \
177 |             void *p1__                                  = cvector_vec_to_base(vec);     \
178 |             cvector_elem_destructor_t elem_destructor__ = cvector_elem_destructor(vec); \
179 |             if (elem_destructor__) {                                                    \
180 |                 size_t i__;                                                             \
181 |                 for (i__ = 0; i__ < cvector_size(vec); ++i__) {                         \
182 |                     elem_destructor__(&vec[i__]);                                       \
183 |                 }                                                                       \
184 |             }                                                                           \
185 |             cvector_clib_free(p1__);                                                    \
186 |         }                                                                               \
187 |     } while (0)
188 | 
189 | /**
190 |  * @brief cvector_begin - returns an iterator to first element of the vector
191 |  * @param vec - the vector
192 |  * @return a pointer to the first element (or NULL)
193 |  */
194 | #define cvector_begin(vec) \
195 |     (vec)
196 | 
197 | /**
198 |  * @brief cvector_end - returns an iterator to one past the last element of the vector
199 |  * @param vec - the vector
200 |  * @return a pointer to one past the last element (or NULL)
201 |  */
202 | #define cvector_end(vec) \
203 |     ((vec) ? &((vec)[cvector_size(vec)]) : NULL)
204 | 
205 | /* user request to use logarithmic growth algorithm */
206 | #ifdef CVECTOR_LOGARITHMIC_GROWTH
207 | 
208 | /**
209 |  * @brief cvector_compute_next_grow - returns an the computed size in next vector grow
210 |  * size is increased by multiplication of 2
211 |  * @param size - current size
212 |  * @return size after next vector grow
213 |  */
214 | #define cvector_compute_next_grow(size) \
215 |     ((size) ? ((size) << 1) : 1)
216 | 
217 | #else
218 | 
219 | /**
220 |  * @brief cvector_compute_next_grow - returns an the computed size in next vector grow
221 |  * size is increased by 1
222 |  * @param size - current size
223 |  * @return size after next vector grow
224 |  */
225 | #define cvector_compute_next_grow(size) \
226 |     ((size) + 1)
227 | 
228 | #endif /* CVECTOR_LOGARITHMIC_GROWTH */
229 | 
230 | /**
231 |  * @brief cvector_push_back - adds an element to the end of the vector
232 |  * @param vec - the vector
233 |  * @param value - the value to add
234 |  * @return void
235 |  */
236 | #define cvector_push_back(vec, value)                                 \
237 |     do {                                                              \
238 |         size_t cv_cap__ = cvector_capacity(vec);                      \
239 |         if (cv_cap__ <= cvector_size(vec)) {                          \
240 |             cvector_grow((vec), cvector_compute_next_grow(cv_cap__)); \
241 |         }                                                             \
242 |         (vec)[cvector_size(vec)] = (value);                           \
243 |         cvector_set_size((vec), cvector_size(vec) + 1);               \
244 |     } while (0)
245 | 
246 | /**
247 |  * @brief cvector_insert - insert element at position pos to the vector
248 |  * @param vec - the vector
249 |  * @param pos - position in the vector where the new elements are inserted.
250 |  * @param val - value to be copied (or moved) to the inserted elements.
251 |  * @return void
252 |  */
253 | #define cvector_insert(vec, pos, val)                                 \
254 |     do {                                                              \
255 |         size_t cv_cap__ = cvector_capacity(vec);                      \
256 |         if (cv_cap__ <= cvector_size(vec)) {                          \
257 |             cvector_grow((vec), cvector_compute_next_grow(cv_cap__)); \
258 |         }                                                             \
259 |         if ((pos) < cvector_size(vec)) {                              \
260 |             memmove(                                                  \
261 |                 (vec) + (pos) + 1,                                    \
262 |                 (vec) + (pos),                                        \
263 |                 sizeof(*(vec)) * ((cvector_size(vec)) - (pos)));      \
264 |         }                                                             \
265 |         (vec)[(pos)] = (val);                                         \
266 |         cvector_set_size((vec), cvector_size(vec) + 1);               \
267 |     } while (0)
268 | 
269 | /**
270 |  * @brief cvector_pop_back - removes the last element from the vector
271 |  * @param vec - the vector
272 |  * @return void
273 |  */
274 | #define cvector_pop_back(vec)                                                       \
275 |     do {                                                                            \
276 |         cvector_elem_destructor_t elem_destructor__ = cvector_elem_destructor(vec); \
277 |         if (elem_destructor__) {                                                    \
278 |             elem_destructor__(&(vec)[cvector_size(vec) - 1]);                       \
279 |         }                                                                           \
280 |         cvector_set_size((vec), cvector_size(vec) - 1);                             \
281 |     } while (0)
282 | 
283 | /**
284 |  * @brief cvector_copy - copy a vector
285 |  * @param from - the original vector
286 |  * @param to - destination to which the function copy to
287 |  * @return void
288 |  */
289 | #define cvector_copy(from, to)                                          \
290 |     do {                                                                \
291 |         if ((from)) {                                                   \
292 |             cvector_grow(to, cvector_size(from));                       \
293 |             cvector_set_size(to, cvector_size(from));                   \
294 |             memcpy((to), (from), cvector_size(from) * sizeof(*(from))); \
295 |         }                                                               \
296 |     } while (0)
297 | 
298 | /**
299 |  * @brief cvector_set_capacity - For internal use, sets the capacity variable of the vector
300 |  * @param vec - the vector
301 |  * @param size - the new capacity to set
302 |  * @return void
303 |  */
304 | #define cvector_set_capacity(vec, size)                  \
305 |     do {                                                 \
306 |         if (vec) {                                       \
307 |             cvector_vec_to_base(vec)->capacity = (size); \
308 |         }                                                \
309 |     } while (0)
310 | 
311 | /**
312 |  * @brief cvector_set_size - For internal use, sets the size variable of the vector
313 |  * @param vec - the vector
314 |  * @param size - the new capacity to set
315 |  * @return void
316 |  */
317 | #define cvector_set_size(vec, _size)                  \
318 |     do {                                              \
319 |         if (vec) {                                    \
320 |             cvector_vec_to_base(vec)->size = (_size); \
321 |         }                                             \
322 |     } while (0)
323 | 
324 | /**
325 |  * @brief cvector_set_elem_destructor - set the element destructor function
326 |  * used to clean up removed elements
327 |  * @param vec - the vector
328 |  * @param elem_destructor_fn - function pointer of type cvector_elem_destructor_t used to destroy elements
329 |  * @return void
330 |  */
331 | #define cvector_set_elem_destructor(vec, elem_destructor_fn)                  \
332 |     do {                                                                      \
333 |         if (vec) {                                                            \
334 |             cvector_vec_to_base(vec)->elem_destructor = (elem_destructor_fn); \
335 |         }                                                                     \
336 |     } while (0)
337 | 
338 | /**
339 |  * @brief cvector_grow - For internal use, ensures that the vector is at least <count> elements big
340 |  * @param vec - the vector
341 |  * @param count - the new capacity to set
342 |  * @return void
343 |  */
344 | #define cvector_grow(vec, count)                                                      \
345 |     do {                                                                              \
346 |         const size_t cv_sz__ = (count) * sizeof(*(vec)) + sizeof(cvector_metadata_t); \
347 |         if (vec) {                                                                    \
348 |             void *cv_p1__ = cvector_vec_to_base(vec);                                 \
349 |             void *cv_p2__ = cvector_clib_realloc(cv_p1__, cv_sz__);                   \
350 |             assert(cv_p2__);                                                          \
351 |             (vec) = cvector_base_to_vec(cv_p2__);                                     \
352 |         } else {                                                                      \
353 |             void *cv_p__ = cvector_clib_malloc(cv_sz__);                              \
354 |             assert(cv_p__);                                                           \
355 |             (vec) = cvector_base_to_vec(cv_p__);                                      \
356 |             cvector_set_size((vec), 0);                                               \
357 |             cvector_set_elem_destructor((vec), NULL);                                 \
358 |         }                                                                             \
359 |         cvector_set_capacity((vec), (count));                                         \
360 |     } while (0)
361 | 
362 | #endif /* CVECTOR_H_ */
363 | 


--------------------------------------------------------------------------------
/src/poly.asm:
--------------------------------------------------------------------------------
  1 | OS_WriteC               * &00
  2 | OS_Byte                 * &06
  3 | OS_ChangeDynamicArea    * &2A
  4 | OS_ReadModeVariable     * &35
  5 | OS_RemoveCursors        * &36
  6 | OS_ReadDynamicArea      * &5C
  7 | 
  8 |         AREA |C$$code|, CODE, READONLY
  9 | 
 10 | ScreenHeightLimit       * &FF
 11 | 
 12 |         EXPORT EdgeList
 13 |         EXPORT FogTable  
 14 |         EXPORT OneOver
 15 |         EXPORT ScreenBank
 16 |         EXPORT ScreenStart
 17 |         EXPORT ScreenMax
 18 |         EXPORT ScreenPartial
 19 | 
 20 | ; ====== RESERVE SCREEN BANKS =====
 21 | ; Set the display to Mode 13 and disable the screen cursor.
 22 | 
 23 |         EXPORT  VDUSetup
 24 | VDUSetup
 25 |         ; Only enable OS_WriteC VDU output through OS_Byte_3
 26 |         MOV r0,#3
 27 |         MOV r1,#84
 28 |         SWI OS_Byte
 29 | 
 30 |         ; Set Mode 13
 31 |         MOV r0,#22 ; VDU 22
 32 |         SWI OS_WriteC
 33 |         IF :DEF: PAL_256
 34 |         MOV r0,#13 ; 256 color mode
 35 |         ELSE
 36 |         MOV r0,#9 ; 16 color mode
 37 |         ENDIF
 38 |         SWI OS_WriteC
 39 |         SWI OS_RemoveCursors
 40 |         MOVS pc,lr
 41 | 
 42 |         EXPORT  UpdateMemAddress ; (R1: screenStart, R2: screenMax)
 43 | UpdateMemAddress
 44 |         STR a1,ScreenStart
 45 |         ;STR a2,ScreenMax
 46 |         MOVS pc,lr
 47 | 
 48 | ; ====== RESERVE SCREEN BANKS =====
 49 | ; Reserve 2 banks of screen memory
 50 | 
 51 |         EXPORT  ReserveScreenBanks
 52 | ReserveScreenBanks
 53 |         MVN r0,#0 ; -1 to get current screen mode
 54 |         MOV r1,#7 ; Number of bytes for entire screen
 55 |         SWI OS_ReadModeVariable
 56 | 
 57 |         MOV r3,#2   ; Double buffered (2 banks)
 58 |         MUL r1,r2,r3 ; Double the number of bytes for 1 screen
 59 |         MOV r2,r1
 60 | 
 61 |         MOV r0,#2 ; Read area 2 (aka screen area)
 62 |         SWI OS_ReadDynamicArea
 63 | 
 64 |         SUB r1,r2,r1 ; Subtract 2 screens from total available memory
 65 |         MOV r0,#2
 66 |         SWI OS_ChangeDynamicArea
 67 | ;        MOV r0,r1 ; Return the amount the area has changed (in bytes)
 68 |         MOVS pc,lr
 69 | ; ====== SWITCH SCREEN BANK =====
 70 | ; Toggles the current screen bank for drawing.
 71 | 
 72 |         EXPORT  SwitchScreenBank
 73 | SwitchScreenBank
 74 |         MOV r0,#19 ; Wait for refresh
 75 |         SWI OS_Byte
 76 | 
 77 |         LDR r1,ScreenBank ; Load the current drawing bank and make it the visible bank
 78 |         MOV r0,#113 ; Select the visible bank
 79 |         SWI OS_Byte
 80 | 
 81 |         LDR r1,ScreenBank ; Reload, as 113 may corrupt r1
 82 |         ADD r1,r1,#1 ; Increment current bank
 83 |         CMP r1,#2    ; If greater than 2
 84 |         MOVGT r1,#1  ; Reset back to 1
 85 |         STR r1,ScreenBank
 86 | 
 87 |         MOV r0,#112 ; Set the buffer bank for drawing
 88 |         SWI OS_Byte
 89 | 
 90 |         MOV pc,lr
 91 | 
 92 | ; ====== CLEAR SCREEN =====
 93 | ; Clears the current screen buffer 40 bytes at a time.
 94 | 
 95 |         EXPORT ClearScreen ; ClearScreen(int color);
 96 | ClearScreen
 97 |         STMFD sp!,{r4-r11}
 98 |         MOV r3, r0
 99 |         MOV r4, r0
100 |         MOV r5, r0
101 |         MOV r6, r0
102 |         MOV r7, r0
103 |         MOV r8, r0
104 |         MOV r9, r0
105 |         MOV r10, r0
106 |         MOV r11, r0
107 | 
108 |         CMP r1,#0
109 |         LDRNE r2,ScreenMax
110 |         LDREQ r2,ScreenPartial
111 |         LDR r1,ScreenStart
112 |         MOVEQ r2,r2,LSR#1
113 |         ADD r12, r1, r2
114 |         MOV r2,r0
115 |         ; a1 has start, a2 is the max mem location
116 | CSloop  ; Write 10 words at a time till max
117 |         STMEA r1!,{r2-r11}
118 |         STMEA r1!,{r2-r11}
119 |         STMEA r1!,{r2-r11}
120 |         STMEA r1!,{r2-r11}
121 |         STMEA r1!,{r2-r11}
122 |         STMEA r1!,{r2-r11}
123 |         STMEA r1!,{r2-r11}
124 |         STMEA r1!,{r2-r11}
125 |         STMEA r1!,{r2-r11}
126 |         STMEA r1!,{r2-r11}
127 |         STMEA r1!,{r2-r11}
128 |         STMEA r1!,{r2-r11}
129 |         STMEA r1!,{r2-r11}
130 |         STMEA r1!,{r2-r11}
131 |         STMEA r1!,{r2-r11}
132 |         STMEA r1!,{r2-r11}
133 |         STMEA r1!,{r2-r11}
134 |         STMEA r1!,{r2-r11}
135 |         STMEA r1!,{r2-r11}
136 |         STMEA r1!,{r2-r11}
137 |         STMEA r1!,{r2-r11}
138 |         STMEA r1!,{r2-r11}
139 |         STMEA r1!,{r2-r11}
140 |         STMEA r1!,{r2-r11}
141 |         STMEA r1!,{r2-r11}
142 |         STMEA r1!,{r2-r11}
143 |         STMEA r1!,{r2-r11}
144 |         STMEA r1!,{r2-r11}
145 |         STMEA r1!,{r2-r11}
146 |         STMEA r1!,{r2-r11}
147 |         STMEA r1!,{r2-r11}
148 |         STMEA r1!,{r2-r11}
149 |         CMP r1,r12
150 |         BLT CSloop
151 | 
152 |         LDMFD sp!,{r4-r11}
153 |         MOV pc,lr
154 | 
155 | r0_LongGradient    RN r0   ; Gradient of the long edge
156 | r1_ShortGradient   RN r1   ; Gradient of a short edge
157 | r2_EdgeList        RN r2   ; Edge list
158 | v1_X               RN r7   ; V1 X
159 | v1_Y               RN r8   ; V1 Y
160 | v2_X               RN r9   ; V2 X
161 | v2_Y               RN r10  ; V2 Y
162 | v3_X               RN r11  ; V3 X
163 | v3_Y               RN r12  ; V3 Y
164 | 
165 |         EXPORT FillEdgeLists ; FillEdgeList(int triList, int color);
166 | FillEdgeLists ROUT
167 |         STMFD sp!,{r1,r4-r12,r14} ; Store the current registers
168 | 
169 |         LDMFD r0!,{v1_X,v1_Y}       ; Load 3 2D un-sorted coords
170 |         ADD   r0,r0,#4            ; Skip z
171 |         LDMFD r0!,{v2_X,v2_Y}
172 |         ADD   r0,r0,#4
173 |         LDMFD r0,{v3_X,v3_Y}
174 | 
175 |         CMP   v1_X,#320
176 |         CMPLO v2_X,#320
177 |         CMPLO v3_X,#320
178 |         CMPLO v1_Y,#ScreenHeightLimit
179 |         CMPLO v2_Y,#ScreenHeightLimit
180 |         CMPLO v3_Y,#ScreenHeightLimit
181 |         BLO TrivialTriangleRoutine
182 | 
183 | ; ==========================================
184 | ; ========= CLIPPED TRIANGLE ===============
185 | ; ==========================================
186 | 
187 | ClippedTriangleRoutine
188 | 
189 |         CMP   v1_Y, #ScreenHeightLimit
190 |         CMPGE v2_Y, #ScreenHeightLimit
191 |         CMPGE v3_Y, #ScreenHeightLimit
192 |         BGE   EdgeListEnd ; All Y coords are off screen bottom
193 | 
194 |         CMP   v1_X, #320
195 |         CMPGE v2_X, #320
196 |         CMPGE v3_X, #320
197 |         BGE   EdgeListEnd ; All X coords are off screen
198 | 
199 |         CMP   v1_Y, #0
200 |         CMPLE v2_Y, #0
201 |         CMPLE v3_Y, #0
202 |         BLE   EdgeListEnd ; All Y coords are off screen top
203 | 
204 |         CMP   v1_X, #0
205 |         CMPLE v2_X, #0
206 |         CMPLE v3_X, #0
207 |         BLE   EdgeListEnd ; All X coords are off screen
208 | 
209 |         ; Sort V0-V2 by Y, swap where necessary.
210 |         ; V0 and V1
211 |         CMP v1_Y,v2_Y
212 |         MOVGT r2,v1_X
213 |         MOVGT v1_X,v2_X
214 |         MOVGT v2_X,r2
215 |         MOVGT r2,v1_Y
216 |         MOVGT v1_Y,v2_Y
217 |         MOVGT v2_Y,r2
218 | 
219 |         ; V0 and V2
220 |         CMP v1_Y,v3_Y
221 |         MOVGT r2,v1_X
222 |         MOVGT v1_X,v3_X
223 |         MOVGT v3_X,r2
224 |         MOVGT r2,v1_Y
225 |         MOVGT v1_Y,v3_Y
226 |         MOVGT v3_Y,r2
227 | 
228 |         ; V1 and V2
229 |         CMP v2_Y,v3_Y
230 |         MOVGT r2,v2_X
231 |         MOVGT v2_X,v3_X
232 |         MOVGT v3_X,r2
233 |         MOVGT r2,v2_Y
234 |         MOVGT v2_Y,v3_Y
235 |         MOVGT v3_Y,r2
236 | 
237 |         ; LONG DELTA CALCULATION
238 |         ; We always calculate the long edge first as even if we jump to the bottom half, we still need
239 |         ; to step along the long edge to find the correct x starting position.
240 | CalcLongSide
241 |         ; Calculate m between V0 and V2
242 |         SUB r14,v3_Y,v1_Y          ; y3 - y1
243 |         SUB r1,v3_X,v1_X           ; x3 - x1
244 | 
245 |         CMP r14,#&400           ; Due to reciprocal limit, we need to limit the gradient inputs
246 |         BLT LongSideTableLookup
247 | 
248 |         STMFD sp!,{r1,r14} ; Store the current registers
249 |         MOV r0,r1
250 |         MOV r1,r14
251 |         BL GenericDivide
252 |         LDMFD sp!,{r1,r14} ; Restore the current registers
253 |         B LongSideGradientSafe
254 | 
255 | LongSideTableLookup
256 |         LDR r6,OneOver          ; start of oneOver block
257 |         LDR r3,[r6,r14,LSL#2]    ; >> 16 << 2 (4 byte jump)
258 |         MUL r0_LongGradient,r3,r1  ; Store m in r0, r3 is available
259 | LongSideGradientSafe
260 | 
261 |         ; CLIPPING
262 |         ; If our middle vertex is above the screen we need the longside to catch up
263 |         ; and go straight to drawing the bottom half. By proxy, v1_Y is also offscreen.
264 |         CMP   v2_Y,#0
265 |         MOVLT r4,v1_X,ASL#16                 ; x1 to fixed point
266 |         SUBLT r14,v2_Y,v1_Y                  ; y2 - y1, if less than 0, we have a flat top triangle
267 |         MLALT r4,r0_LongGradient,r14,r4      ; r4 += m * (+v1_Y)
268 |         MOVLT v1_Y,#0
269 |         LDRLT r2_EdgeList,EdgeList           ; Edge list needs to be set up for the jump to the bottom half
270 |         BLT CalcBottomShortSide               ; Flat top triangle, skip the top part
271 | 
272 | CalcTopShortSide
273 |         SUB r2,v2_X,v1_X                    ; x2 - x1
274 |         SUBS r14,v2_Y,v1_Y                  ; y2 - y1, if less than 0, we have a flat top triangle
275 |         MOVLE r4,v1_X,ASL#16                    ; x0 to fixed point
276 |         LDRLE r2_EdgeList,EdgeList              ; Bottom half assumes edge list is already assigned to r2
277 |         ADDLE r2_EdgeList,r2_EdgeList,v1_Y,LSL#2        ; Add the Y coord to the list address
278 |         BLE CalcBottomShortSide                         ; Flat top triangle, skip the top part
279 | 
280 |         CMP r14,#&400 ; Due to reciprocal limit, we need to limit the gradient inputs
281 |         BLT TopTableLookup
282 | 
283 |         STMFD sp!,{r0,r2,r4-r6,r14} ; Store the current registers
284 |         MOV r0,r2
285 |         MOV r1,r14
286 |         BL GenericDivide
287 |         MOV r1,r0
288 |         LDMFD sp!,{r0,r2,r4-r6, r14} ; Restore the current registers
289 |         B TopGradientSafe
290 | 
291 | TopTableLookup
292 |         LDR r6,OneOver          ; start of oneOver block
293 |         LDR r4,[r6,r14,LSL#2]
294 |         MUL r1_ShortGradient,r4,r2
295 | 
296 | TopGradientSafe
297 |         LDR r2_EdgeList,EdgeList
298 | 
299 |         SUB r14,v2_Y,v1_Y       ; Need to reset the y delta
300 | 
301 |         MOV r4,v1_X,ASL#16 ; x1 to fixed point
302 |         MOV r5,r4
303 |         ; r7 is free at this point, use it as a temp
304 | 
305 |         ; CLIPPING
306 |         ; If y1 < 0, we need to adjust the starting position
307 |         CMP v1_Y,#0
308 |         ADDLT r14,r14,v1_Y              ; r14 = positive y delta
309 |         RSBLT r3,v1_Y,#0                ; r2 = positive v1_Y
310 |         MOVLT v1_Y,#0                   ; v1_Y = 0
311 |         MLALT r4,r0_LongGradient,r3,r4  ; r4 += m * (+v1_Y)
312 |         MLALT r5,r1_ShortGradient,r3,r5 ; r5 += m * (+v1_Y)
313 |         ADDGE r2_EdgeList,r2_EdgeList,v1_Y,LSL#2 ; Add the Y coord to the list address
314 |         ; If y2 > ScreenHeightLimit, we need to trim the top part and reduce the y delta
315 |         CMP v2_Y,#ScreenHeightLimit
316 |         SUBGE r7,v2_Y,#ScreenHeightLimit        ; r7 = positive y delta over ScreenHeightLimit
317 |         SUBGE r14,r14,r7                        ; Reduce the y delta accordingly
318 | 
319 | 12      ; Fill the edge list for the top part of the triangle
320 |         MOV r7,r4
321 |         CMP r7,#&0
322 |         MOVLT r7,#&0
323 |         CMP r7,#&1400000
324 |         MOVGE r7,#&1400000
325 |         MOV r3,r7,LSR#16
326 | 
327 |         MOV r7,r5
328 |         CMP r7,#&0
329 |         MOVLT r7,#&0
330 |         CMP r7,#&1400000
331 |         MOVGE r7,#&1400000
332 | 
333 |         MOV r3,r3,LSL#16
334 |         ORR r3,r3,r7,LSR#16
335 | 
336 |         STR r3,[r2_EdgeList],#4     ; Store the current x value
337 |         ADD r4,r4,r0_LongGradient   ; Add the gradient to the current x value
338 |         ADD r5,r5,r1_ShortGradient  ; Add the gradient to the current x value
339 |         SUBS r14,r14,#1          ; Decrement the y counter
340 |         BGT %BT12                ; Loop until we reach y2
341 | 
342 | CalcBottomShortSide
343 |         SUB r14,v3_Y,v2_Y         ; y3 - y2
344 |         SUB r1_ShortGradient,v3_X,v2_X ; x3 - x2
345 | 
346 |         CMP r14,#&400            ; Due to reciprocal limit, we need to limit the gradient inputs
347 |         BLT BottomTableLookup
348 | 
349 |         STMFD sp!,{r0,r2,r4-r6,r14} ; Store the current registers
350 |         MOV r0,r1
351 |         MOV r1,r14
352 |         BL GenericDivide
353 |         MOV r1,r0
354 |         LDMFD sp!,{r0,r2,r4-r6, r14} ; Restore the current registers
355 |         B BottomGradientSafe
356 | 
357 | BottomTableLookup
358 |         LDR r6,OneOver          ; start of oneOver block
359 |         LDR r5,[r6,r14,LSL#2]    ; >> 16 << 2 (4 byte jump)
360 |         MUL r1_ShortGradient,r5,r1_ShortGradient
361 | 
362 | BottomGradientSafe
363 |         MOV r5,v2_X,ASL#16         ; x2 to fixed point
364 | 
365 |         SUB r14,v3_Y,v2_Y      ; Need to reset the y delta
366 | 
367 |         ; CLIPPING
368 |         ; If y1 < 0, we need to adjust the starting position
369 |         CMP   v2_Y,#0
370 |         ADDLT r14,r14,v2_Y              ; r14 = positive y delta
371 |         RSBLT r3,v2_Y,#0                ; r2 = positive v1_Y
372 |         MLALT r4,r0_LongGradient,r3,r4  ; r4 += m * (+v1_Y)
373 |         MLALT r5,r1_ShortGradient,r3,r5 ; r5 += m * (+v1_Y)
374 |         ; If y3 > ScreenHeightLimit, we need to adjust the ending position
375 |         CMP   v3_Y,#ScreenHeightLimit
376 |         SUBGE r7,v3_Y,#ScreenHeightLimit        ; r7 = positive y delta over 255
377 |         SUBGE r14,r14,r7                        ; Reduce the y delta accordingly
378 | 
379 | 14      ; Fill the edge list for the bottom part of the triangle
380 |         MOV r7,r4
381 |         CMP r7,#&0
382 |         MOVLT r7,#&0
383 |         CMP r7,#&1400000
384 |         MOVGE r7,#&1400000
385 |         MOV r3,r7,LSR#16
386 | 
387 |         MOV r7,r5
388 |         CMP r7,#&0
389 |         MOVLT r7,#&0
390 |         CMP r7,#&1400000
391 |         MOVGE r7,#&1400000
392 | 
393 |         MOV r3,r3,LSL#16
394 |         ORR r3,r3,r7,LSR#16
395 | 
396 |         STR r3,[r2_EdgeList],#4     ; Store the current x value
397 |         ADD r4,r4,r0_LongGradient   ; Add the gradient to the current x value
398 |         ADD r5,r5,r1_ShortGradient  ; Add the gradient to the current x value
399 |         SUBS r14,r14,#1          ; Decrement the y counter
400 |         BGT %BT14                ; Loop until we reach y2
401 | 
402 |         ; CLIPPING
403 |         ; If y2 > ScreenHeightLimit, we need to adjust the ending position
404 |         CMP v3_Y,#ScreenHeightLimit
405 |         MOVGE v3_Y,#ScreenHeightLimit
406 |         CMPGE v1_Y,v3_Y
407 |         BGE EdgeListEnd
408 | 
409 |         B Triv_DrawEdges
410 | 
411 | ; ==========================================
412 | ; ========= TRIVIAL TRIANGLE ===============
413 | ; ==========================================
414 | 
415 | TrivialTriangleRoutine
416 | 
417 |         ; Sort V0-V2 by Y, swap where necessary.
418 |         ; V0 and V1
419 |         CMP v1_Y,v2_Y
420 |         MOVGT r2,v1_X
421 |         MOVGT v1_X,v2_X
422 |         MOVGT v2_X,r2
423 |         MOVGT r2,v1_Y
424 |         MOVGT v1_Y,v2_Y
425 |         MOVGT v2_Y,r2
426 | 
427 |         ; V0 and V2
428 |         CMP v1_Y,v3_Y
429 |         MOVGT r2,v1_X
430 |         MOVGT v1_X,v3_X
431 |         MOVGT v3_X,r2
432 |         MOVGT r2,v1_Y
433 |         MOVGT v1_Y,v3_Y
434 |         MOVGT v3_Y,r2
435 | 
436 |         ; V1 and V2
437 |         CMP v2_Y,v3_Y
438 |         MOVGT r2,v2_X
439 |         MOVGT v2_X,v3_X
440 |         MOVGT v3_X,r2
441 |         MOVGT r2,v2_Y
442 |         MOVGT v2_Y,v3_Y
443 |         MOVGT v3_Y,r2
444 | 
445 |         ; LONG DELTA CALCULATION
446 |         ; We always calculate the long edge first as even if we jump to the bottom half, we still need
447 |         ; to step along the long edge to find the correct x starting position.
448 | Triv_CalcLongSide
449 |         ; Calculate m between V0 and V2
450 |         SUB r14,v3_Y,v1_Y          ; y3 - y1
451 |         SUB r1,v3_X,v1_X           ; x3 - x1
452 | 
453 |         LDR r6,OneOver          ; start of oneOver block
454 |         LDR r3,[r6,r14,LSL#2]    ; >> 16 << 2 (4 byte jump)
455 |         MUL r0_LongGradient,r3,r1  ; Store m in r0, r3 is available
456 | 
457 | Triv_CalcTopShortSide
458 |         SUB r2,v2_X,v1_X                    ; x2 - x1
459 |         SUBS r14,v2_Y,v1_Y                  ; y2 - y1, if less than 0, we have a flat top triangle
460 |         MOVLE r4,v1_X,ASL#16                    ; x0 to fixed point
461 |         LDRLE r2_EdgeList,EdgeList              ; Bottom half assumes edge list is already assigned to r2
462 |         ADDLE r2_EdgeList,r2_EdgeList,v1_Y,LSL#2        ; Add the Y coord to the list address
463 |         BLE Triv_CalcBottomShortSide                         ; Flat top triangle, skip the top part
464 | 
465 |         LDR r4,[r6,r14,LSL#2]
466 |         MUL r1_ShortGradient,r4,r2
467 |         LDR r2_EdgeList,EdgeList
468 | 
469 |         MOV r4,v1_X,ASL#16 ; x1 to fixed point
470 |         MOV r5,r4
471 |         ; r7 is free at this point, use it as a temp
472 | 
473 |         ADD r2_EdgeList,r2_EdgeList,v1_Y,LSL#2 ; Add the Y coord to the list address
474 | 
475 | Triv_TopEdgeList       ; Fill the edge list for the top part of the triangle
476 |         MOV r3,r4,LSR#16
477 |         MOV r3,r3,LSL#16
478 |         ORR r3,r3,r5,LSR#16
479 | 
480 |         STR r3,[r2_EdgeList],#4     ; Store the current x value
481 |         ADD r4,r4,r0_LongGradient   ; Add the gradient to the current x value
482 |         ADD r5,r5,r1_ShortGradient  ; Add the gradient to the current x value
483 |         SUBS r14,r14,#1          ; Decrement the y counter
484 |         BGT Triv_TopEdgeList                ; Loop until we reach y2
485 | 
486 | Triv_CalcBottomShortSide
487 |         SUB r14,v3_Y,v2_Y         ; y3 - y2
488 |         SUB r1_ShortGradient,v3_X,v2_X ; x3 - x2
489 | 
490 |         LDR r5,[r6,r14,LSL#2]    ; >> 16 << 2 (4 byte jump)
491 |         MUL r1_ShortGradient,r5,r1_ShortGradient
492 |         MOV r5,v2_X,ASL#16         ; x2 to fixed point
493 | 
494 | Triv_BottomEdgeList       ; Fill the edge list for the bottom part of the triangle
495 |         MOV r3,r4,LSR#16
496 |         MOV r3,r3,LSL#16
497 |         ORR r3,r3,r5,LSR#16
498 | 
499 |         STR r3,[r2_EdgeList],#4     ; Store the current x value
500 |         ADD r4,r4,r0_LongGradient   ; Add the gradient to the current x value
501 |         ADD r5,r5,r1_ShortGradient  ; Add the gradient to the current x value
502 |         SUBS r14,r14,#1          ; Decrement the y counter
503 |         BGT Triv_BottomEdgeList                ; Loop until we reach y2
504 | 
505 | Triv_DrawEdges
506 |         SUB r14,v3_Y,v1_Y          ; y2 - y1 (i.e., the number of lines to draw)
507 |         LDR r12,EdgeList
508 |         ADD r12,r12,v1_Y,LSL#2      ; Add the top Y coord to the list address
509 |         LDR r11,ScreenStart     ; Load the screen mem start location
510 |         MOV r3,v1_Y               ; Initial Y position
511 |         IF :DEF: PAL_256
512 |         MOV r2,r3,LSL#8         ; Multiply by 320 in 2 stages (<< 8) + (<< 6)
513 |         ADD r2,r2,r3,LSL#6      ; Total Y offset * 320
514 |         ELSE
515 |         MOV r2,r3,LSL#7         ; Multiply by 160 in 2 stages (<< 7) + (<< 5)
516 |         ADD r2,r2,r3,LSL#5      ; Total Y offset * 160
517 |         ENDIF
518 |         ADD r11,r11,r2          ; Add Y offset to screen offset start location
519 | 
520 |         LDR r0,FogTable
521 |         LDR r7,[sp]             ; Load the color
522 |         ADD r0,r0,r7,LSL#2      ; Load the fog value
523 |         TST v1_Y,#1             ; Does the triangle start on an odd line?
524 |         IF :DEF: PAL_256
525 |         LDRNE r7,[r0]             ; Load the fog value
526 |         LDRNE r8,[r0,#&100]       ; Load the fog value
527 |         LDREQ r8,[r0]             ; Load the fog value
528 |         LDREQ r7,[r0,#&100]       ; Load the fog value
529 |         ELSE
530 |         LDRNE r7,[r0]             ; Load the fog value
531 |         LDRNE r8,[r0,#64]       ; Load the fog value offset by 16 bytes
532 |         LDREQ r8,[r0]             ; Load the fog value
533 |         LDREQ r7,[r0,#64]       ; Load the fog value offset by 16 bytes
534 |         ENDIF
535 | 
536 | ; ==========================================
537 | ; ========= RASTERIZE THE EDGE LIST ========
538 | ; ==========================================
539 | 
540 | RasterScanlineLoop
541 |         LDR r2,[r12],#4         ; Load the left edge x coord
542 |         MOV r3,r2,LSR#16        ; Move the left edge x coord to integer
543 |         MOV r2,r2,LSL#16        ; Clear out the left edge x coord leaving the right edge
544 |         MOV r2,r2,LSR#16        ; Move back to integer
545 | 
546 |         CMP r3,r2       ; if x2 < x1
547 |         BEQ Continue      ; Skip the swap
548 |         EORMI r3,r3,r2    ; swap x1 and x2
549 |         EORMI r2,r3,r2    ; swap x1 and x2
550 |         EORMI r3,r3,r2    ; swap x1 and x2
551 | 
552 |         IF :DEF: PAL_256
553 |         ADD r9,r11,r2           ; Add the left edge x coord to the screen offset
554 |         ADD r10,r11,r3          ; Add the right edge x coord to the screen offset
555 | 
556 |         MOV r0,r7       ; Load the fog value
557 |         MOV r1,r0
558 |         ANDS r2,r9,#1   ; Used to rotate the color
559 |         MOVNE r1,r1,ROR#8  ; Rotate the color
560 | 
561 |         SUB r4,r10,r9           ; Get the number of pixels left
562 |         CMP r4,#4
563 |         BLT SpinLastBytes            
564 | 
565 |         ; The following are awkward starting points, so we'll just use STRB        
566 |         TST r9,#3
567 |         STRNEB r1,[r9],#1 ; Store the color
568 |         MOVNE r1,r1,ROR#8  ; Rotate the color
569 |         SUBNE r4,r4,#1
570 |         TSTNE r9,#3
571 |         STRNEB r1,[r9],#1 ; Store the color
572 |         MOVNE r1,r1,ROR#8  ; Rotate the color
573 |         SUBNE r4,r4,#1
574 |         TSTNE r9,#3
575 |         STRNEB r1,[r9],#1 ; Store the color
576 |         MOVNE r1,r1,ROR#8  ; Rotate the color
577 |         SUBNE r4,r4,#1
578 |         ELSE
579 | 
580 |         ; Our first check is to see if we have an xL and xR within the same quad boundary
581 |         EOR r9, r2, r3 
582 |         TST r9, #&1F8 ; Are xL and xR on the same boundary? (ie, anything outside of 0b111)
583 | 
584 |         ; If not, we have an easy job as we just mask 0xFFFFFFFF and shift it by our required pixels
585 |         AND r0, r2, #7 ; How far in we are
586 |         MOV r0, r0, LSL #2 ; Shift in nibbles
587 |         MVN r1, #0 ; 0xFFFFFFFF
588 |         MOV r0, r1, LSL r0
589 | 
590 |         ; If xL and xR are on the same boundary, shift r1 right and clear from the other side
591 |         ANDEQ r10, r3, #7
592 |         MOVEQ r10, r10, LSL #2 ; Shift in nibbles
593 |         BICEQ r0, r0, r1, LSL r10
594 | 
595 |         ; Load existing screen color, mask and write back
596 |         ADD r9, r11, r2, LSR #1 ; Add the left edge x coord to the screen offset
597 |         BIC r9, r9, #3 ; Move screen buffer back to boundary
598 |         LDR r1, [r9] ; Load existing color
599 |         BIC r1, r1, r0 ; Mask out existing color
600 |         AND r0, r7, r0 ; Mask out new color with inverted mask (which is no longer required)
601 |         ORR r0, r0, r1 ; Combine masked data
602 |         STR r0, [r9], #4 ; Write it back in again
603 |         BEQ Continue ; If this was a short raster, we can just jump ahead. 
604 | 
605 |         ; Otherwise, let's get the end sorted
606 |         ANDS r0, r3, #7 ; How far in we are on the xR side
607 |         MOV r0, r0, LSL #2 ; Shift in nibbles
608 |         MVN r1, #0 ; 0xFFFFFFFF
609 |         MOV r0, r1, LSL r0
610 | 
611 |         ADD r10, r11, r3, LSR #1 ; Add the right edge x coord to the screen offset
612 |         BICNE r10, r10, #3 ; Move screen buffer back to boundary
613 |         LDR r1, [r10] ; Load existing color
614 |         AND r1, r1, r0 ; Mask out existing color
615 |         BIC r0, r7, r0 ; Mask out new color with inverted mask (which is no longer required)
616 |         ORR r0, r0, r1 ; Combine masked data
617 |         STR r0, [r10], #4 ; Write it back in again
618 |         ADD r10, r11, r3, LSR #1 ; Add the right edge x coord to the screen offset
619 |         ENDIF
620 | 
621 | QuadBlit
622 |         IF :DEF: PAL_256
623 |         MOV r4,r4,LSR#4
624 |         CMP r4,#32
625 |         BGE RotateColor
626 |         RSB r4,r4,#32 ; reverse order
627 |         MOV r1,r0
628 |         MOV r2,r0
629 |         MOV r3,r0
630 |         ADD pc,pc,r4,LSL#2 ; If remaining width > 16 pixels, we can use this jump table.
631 |         MOV r0,r0
632 |         STMIA r9!,{r0-r3}
633 |         STMIA r9!,{r0-r3}
634 |         STMIA r9!,{r0-r3}
635 |         STMIA r9!,{r0-r3}
636 |         STMIA r9!,{r0-r3}
637 |         STMIA r9!,{r0-r3}
638 |         STMIA r9!,{r0-r3}
639 |         STMIA r9!,{r0-r3}
640 |         STMIA r9!,{r0-r3}
641 |         STMIA r9!,{r0-r3}
642 |         STMIA r9!,{r0-r3}
643 |         STMIA r9!,{r0-r3}
644 |         STMIA r9!,{r0-r3}
645 |         STMIA r9!,{r0-r3}
646 |         STMIA r9!,{r0-r3}
647 |         STMIA r9!,{r0-r3}
648 |         STMIA r9!,{r0-r3}
649 |         STMIA r9!,{r0-r3}
650 |         STMIA r9!,{r0-r3}
651 |         STMIA r9!,{r0-r3}
652 |         STMIA r9!,{r0-r3}
653 |         STMIA r9!,{r0-r3}
654 |         STMIA r9!,{r0-r3}
655 |         STMIA r9!,{r0-r3}
656 |         STMIA r9!,{r0-r3}
657 |         STMIA r9!,{r0-r3}
658 |         STMIA r9!,{r0-r3}
659 |         STMIA r9!,{r0-r3}
660 |         STMIA r9!,{r0-r3}
661 |         STMIA r9!,{r0-r3}
662 |         STMIA r9!,{r0-r3}
663 |         STMIA r9!,{r0-r3}
664 | 
665 |         ; ; In theory, there should be less than 16 bytes left, so attempt STR with quads now
666 |         SUB r4,r10,r9           ; Get the number of pixels left
667 |         MOV r4,r4,LSR#2
668 |         RSB r4,r4,#4 ; reverse order
669 |         ADD pc,pc,r4,LSL#2 ; If remaining width > 4 pixels, we can use this jump table.
670 |         MOV r0,r0
671 |         STR r0,[r9],#4
672 |         STR r0,[r9],#4
673 |         STR r0,[r9],#4
674 |         STR r0,[r9],#4
675 | 
676 | RotateColor
677 |         MOV r1,r0
678 |         ANDS r2,r9,#1   ; Used to rotate the color
679 |         MOVNE r1,r1,ROR#8  ; Rotate the color
680 | 
681 | SpinLastBytes
682 |         CMP r9,r10
683 |         STRLTB r1,[r9],#1
684 |         MOVLT r1,r1,ROR#8
685 |         CMPLT r9,r10
686 |         STRLTB r1,[r9],#1
687 |         MOVLT r1,r1,ROR#8
688 |         CMPLT r9,r10
689 |         STRLTB r1,[r9],#1
690 |         MOVLT r1,r1,ROR#8
691 |         CMPLT r9,r10
692 |         STRLTB r1,[r9],#1
693 | 
694 |         ELSE
695 | 
696 |         MOV r0, r7
697 |         SUB r4, r10, r9 ; Get the number of pixels left
698 |         MOV r4, r4, LSR #4
699 |         CMP r4, #16 ; Reduced from 32 to 16 since bytes are halved
700 |         BGE Continue
701 |         RSB r4, r4, #16 ; Reverse order, reduced from 32 to 16
702 |         ; MOV r0, #&DD
703 |         ; EOR r0, r0, r0, LSL #8
704 |         ; EOR r0, r0, r0, LSL #16
705 |         MOV r1, r0
706 |         MOV r2, r0
707 |         MOV r3, r0
708 |         ADD pc, pc, r4, LSL #2
709 |         MOV r0, r0
710 |         STMIA r9!, {r0-r3} ; Reduced number of STMIA instructions by half
711 |         STMIA r9!, {r0-r3}
712 |         STMIA r9!, {r0-r3}
713 |         STMIA r9!, {r0-r3}
714 |         STMIA r9!, {r0-r3}
715 |         STMIA r9!, {r0-r3}
716 |         STMIA r9!, {r0-r3}
717 |         STMIA r9!, {r0-r3}
718 |         STMIA r9!, {r0-r3}
719 |         STMIA r9!, {r0-r3}
720 |         STMIA r9!, {r0-r3}
721 |         STMIA r9!, {r0-r3}
722 |         STMIA r9!, {r0-r3}
723 |         STMIA r9!, {r0-r3}
724 |         STMIA r9!, {r0-r3}
725 |         STMIA r9!, {r0-r3}
726 | 
727 |         ; ; In theory, there should be less than 16 bytes left, so attempt STR with quads now
728 |         SUB r4, r10, r9 ; Get the number of pixels left
729 |         MOV r4, r4, LSR #2
730 |         RSB r4, r4, #4 ; reverse order
731 |         ADD pc, pc, r4, LSL #2 ; If remaining width > 4 pixels, we can use this jump table.
732 |         MOV r0, r0
733 |         STR r0, [r9], #4
734 |         STR r0, [r9], #4
735 |         STR r0, [r9], #4
736 |         STR r0, [r9], #4
737 |         ENDIF
738 | 
739 | Continue
740 |         EOR r7, r7, r8 ; Swap dither pattern
741 |         EOR r8, r7, r8 ;
742 |         EOR r7, r7, r8 ;
743 | 
744 |         IF :DEF: PAL_256
745 |         ADD r11,r11,#320        ; Add 320 to the screen offset
746 |         SUBS r14,r14,#1         ; Decrement the y counter
747 |         ELSE        
748 |         ADD r11, r11, #160 ; Changed from 320 to 160 bytes per scanline
749 |         SUBS r14, r14, #1 ; Decrement the y counter
750 |         ENDIF
751 |         BGT RasterScanlineLoop ; Loop until we reach y2
752 | 
753 | EdgeListEnd
754 |         LDMFD sp!,{r1,r4-r12,r14}  ; Restore registers before returning
755 |         MOV pc,lr
756 | 
757 | EdgeList        DCD &0          ; Our table of edge lists
758 | FogTable        DCD &0          ; Our table of fog values  
759 | OneOver         DCD &0          ; Our table of reciprocal 1/X values
760 | ScreenBank      DCD &1          ; Initial screen bank index
761 | ScreenStart     DCD &0
762 | 
763 |         IF :DEF: PAL_256
764 | ScreenMax       DCD &00014000
765 | ScreenPartial   DCD &0000FA00   ; 0 to 200 in Mode 13
766 |         ELSE
767 | ScreenMax       DCD &0000A000   ; Changed from 14000 to A000 (halved for 4-bit mode)
768 | ScreenPartial   DCD &0000FA00   ; 0 to 200 in Mode 9
769 |         ENDIF
770 | 
771 | ALIGN
772 | 
773 |         EXPORT KeyPress ; KeyPress(int keycode);
774 | KeyPress
775 |         EOR r1,r0,#&FF
776 |         MOV r0,#129
777 |         MOV r2,#255
778 |         SWI OS_Byte
779 |         MOV r0,r1 ; r0 contains either 0xFF or 0x00
780 |         MOV pc,lr
781 | 
782 |         ; EXPORT GenericDivide
783 | GenericDivide ROUT
784 |         ; Enter with dividend in R0, divisor in R1.
785 |         ; Trashes R4 - R6.
786 |         ; Returns with quotient in R0.
787 |         ; The divisor must not be zero. The dividend can be negative.
788 |         CMP     R0, #0
789 |         MOVEQ   pc, lr
790 | 
791 |         MOVS    R4, R0          ; Store, as we need to check sign
792 |         RSBMI   R0, R0, #0      ; Make positive
793 | 
794 |         MOV     R1, R1, LSL#16  ; Int to Fix
795 |         MOV     R6, #0          ; Result in R6
796 |         MOV     R5, #&80000000  ; Used as a counter until bit is pushed off end
797 | 10      MOVS    R0, R0, LSL#1   ; Double R0 and store status
798 |         CMPCC   R0, R1
799 |         SUBCS   R0, R0, R1
800 |         ORRCS   R6, R6, R5
801 |         MOVS    R5, R5, LSR #1
802 |         BCC     %BT10
803 | 
804 |         CMP     R4, #0
805 |         RSBMI   R0, R6, #0
806 |         MOVPL   R0, R6
807 | 
808 |         MOV pc,lr
809 | 
810 |         EXPORT ProjectVertex ; ProjectVertex(int vertexPtr);
811 | ProjectVertex ROUT
812 |         LDMFD r0!,{r1-r3}  ; Load X, Y, Z from the vertex
813 |         MOVS r3,r3,ASR#8   ; Divide Z by 256
814 |         ADDS r3,r3,#64     ; Push forward on the Z plane a little
815 |         BLE NoDivide
816 | 
817 |         STMFD sp!,{r4-r6}  ; Save some registers
818 | 
819 |         ; Enter with dividend (X) in R4, divisor (Z) in R3.
820 |         ; The divisor must not be zero. The dividend can be negative.
821 |         MOVS    R4, R1          ; Preserve original X for sign checking
822 |         RSBMI   R4, R4, #0      ; If negative, negate R4 to make it positive
823 | 
824 |         MOV     R5, R3          ; Put the divisor in R5.
825 |         CMP     R5, R4, LSR #1  ; Then double it until
826 | 10      MOVLS   R5, R5, LSL #1  ; 2 * R5 > R4.
827 |         CMP     R5, R4, LSR #1
828 |         BLS     %BT10           ; Loop until 2 * R5 > R4
829 |         MOV     R6, #0          ; Initialise the quotient
830 | 20      CMP     R4, R5          ; Can we subtract R5?
831 |         SUBCS   R4, R4, R5      ; If we can, do so
832 |         ADC     R6, R6, R6      ; Double quotient and add new bit
833 |         MOV     R5, R5, LSR #1  ; Halve R5.
834 |         CMP     R5, R3          ; And loop until we've gone
835 |         BHS     %BT20           ; past the original divisor,
836 | 
837 |         CMP     R1, #0          ; Check original X's sign again
838 |         RSBMI   R1, R6, #0      ; If it was negative, negate the quotient
839 |         MOVPL   R1, R6          ; Move the quotient to R1
840 | 
841 |         ; Enter with dividend (Y) in R4, divisor (Z) in R3.
842 |         ; The divisor must not be zero. The dividend can be negative.
843 |         MOVS    R4, R2          ; Preserve original Y for sign checking
844 |         RSBMI   R4, R4, #0      ; If negative, negate R4 to make it positive
845 | 
846 |         MOV     R5, R3          ; Put the divisor in R5.
847 |         CMP     R5, R4, LSR #1  ; Then double it until
848 | 30      MOVLS   R5, R5, LSL #1  ; 2 * R5 > R4.
849 |         CMP     R5, R4, LSR #1
850 |         BLS     %BT30
851 |         MOV     R6, #0          ; Initialise the quotient
852 | 40      CMP     R4, R5          ; Can we subtract R5?
853 |         SUBCS   R4, R4, R5      ; If we can, do so
854 |         ADC     R6, R6, R6      ; Double quotient and add new bit
855 |         MOV     R5, R5, LSR #1  ; Halve R5.
856 |         CMP     R5, R3          ; And loop until we've gone
857 |         BHS     %BT40            ; past the original divisor,
858 | 
859 |         CMP     R2, #0          ; Check original Y's sign again
860 |         RSBPL   R2, R6, #0      ; If it was positive, negate the quotient
861 |         MOVMI   R2, R6          ; Move the quotient to R1
862 | 
863 |         LDMFD sp!,{r4-r6}  ; Restore some registers
864 | 
865 | NoDivide
866 |         ADD r1,r1,#160
867 |         ADD r2,r2,#128
868 | 
869 |         STMFD r0!,{r1-r3}  ; Store X, Y, Z back to the vertex
870 |         MOV pc,lr
871 | 
872 |         END
873 | 


--------------------------------------------------------------------------------
/src/poly.s:
--------------------------------------------------------------------------------
  1 | .set OS_WriteC, 0
  2 | .set OS_Byte, 6
  3 | .set OS_ChangeDynamicArea, 42
  4 | .set OS_ReadModeVariable, 53
  5 | .set OS_RemoveCursors, 54
  6 | .set OS_ReadDynamicArea, 92
  7 | 
  8 |         .section .text, "ax"
  9 | 
 10 | .set ScreenHeightLimit, 255
 11 | 
 12 |         .global EdgeList
 13 |         .global FogTable  
 14 |         .global OneOver
 15 |         .global ScreenBank
 16 |         .global ScreenStart
 17 |         .global ScreenMax
 18 |         .global ScreenPartial
 19 | 
 20 | // ====== RESERVE SCREEN BANKS =====
 21 | // Set the display to Mode 13 and disable the screen cursor.
 22 | 
 23 |         .global  VDUSetup
 24 | VDUSetup:
 25 |         // Only enable OS_WriteC VDU output through OS_Byte_3
 26 |         MOV r0,#3
 27 |         MOV r1,#84
 28 |         SWI OS_Byte
 29 | 
 30 |         // Set Mode 13
 31 |         MOV r0,#22 // VDU 22
 32 |         SWI OS_WriteC
 33 |         .ifdef PAL_256
 34 |         MOV r0,#13 // 256 color mode
 35 |         .else
 36 |         MOV r0,#9 // 16 color mode
 37 |         .endif
 38 |         SWI OS_WriteC
 39 |         SWI OS_RemoveCursors
 40 |         MOVS pc,lr
 41 | 
 42 |         .global  UpdateMemAddress // (R1: screenStart, R2: screenMax)
 43 | UpdateMemAddress:
 44 |         STR a1,ScreenStart
 45 |         //STR a2,ScreenMax
 46 |         MOVS pc,lr
 47 | 
 48 | // ====== RESERVE SCREEN BANKS =====
 49 | // Reserve 2 banks of screen memory
 50 | 
 51 |         .global  ReserveScreenBanks
 52 | ReserveScreenBanks:
 53 |         MVN r0,#0 // -1 to get current screen mode
 54 |         MOV r1,#7 // Number of bytes for entire screen
 55 |         SWI OS_ReadModeVariable
 56 | 
 57 |         MOV r3,#2   // Double buffered (2 banks)
 58 |         MUL r1,r2,r3 // Double the number of bytes for 1 screen
 59 |         MOV r2,r1
 60 | 
 61 |         MOV r0,#2 // Read area 2 (aka screen area)
 62 |         SWI OS_ReadDynamicArea
 63 | 
 64 |         SUB r1,r2,r1 // Subtract 2 screens from total available memory
 65 |         MOV r0,#2
 66 |         SWI OS_ChangeDynamicArea
 67 | //        MOV r0,r1 ; Return the amount the area has changed (in bytes)
 68 |         MOVS pc,lr
 69 | // ====== SWITCH SCREEN BANK =====
 70 | // Toggles the current screen bank for drawing.
 71 | 
 72 |         .global  SwitchScreenBank
 73 | SwitchScreenBank:
 74 |         MOV r0,#19 // Wait for refresh
 75 |         SWI OS_Byte
 76 | 
 77 |         LDR r1,ScreenBank // Load the current drawing bank and make it the visible bank
 78 |         MOV r0,#113 // Select the visible bank
 79 |         SWI OS_Byte
 80 | 
 81 |         LDR r1,ScreenBank // Reload, as 113 may corrupt r1
 82 |         ADD r1,r1,#1 // Increment current bank
 83 |         CMP r1,#2    // If greater than 2
 84 |         MOVGT r1,#1  // Reset back to 1
 85 |         STR r1,ScreenBank
 86 | 
 87 |         MOV r0,#112 // Set the buffer bank for drawing
 88 |         SWI OS_Byte
 89 | 
 90 |         MOV pc,lr
 91 | 
 92 | // ====== CLEAR SCREEN =====
 93 | // Clears the current screen buffer 40 bytes at a time.
 94 | 
 95 |         .global ClearScreen // ClearScreen(int color);
 96 | ClearScreen:
 97 |         STMFD sp!,{r4-r11}
 98 |         MOV r3, r0
 99 |         MOV r4, r0
100 |         MOV r5, r0
101 |         MOV r6, r0
102 |         MOV r7, r0
103 |         MOV r8, r0
104 |         MOV r9, r0
105 |         MOV r10, r0
106 |         MOV r11, r0
107 | 
108 |         CMP r1,#0
109 |         LDRNE r2,ScreenMax
110 |         LDREQ r2,ScreenPartial
111 |         LDR r1,ScreenStart
112 |         MOVEQ r2,r2,LSR#1
113 |         ADD r12, r1, r2
114 |         MOV r2,r0
115 |         // a1 has start, a2 is the max mem location
116 | CSloop:
117 |         // Write 10 words at a time till max
118 |         STMEA r1!,{r2-r11}
119 |         STMEA r1!,{r2-r11}
120 |         STMEA r1!,{r2-r11}
121 |         STMEA r1!,{r2-r11}
122 |         STMEA r1!,{r2-r11}
123 |         STMEA r1!,{r2-r11}
124 |         STMEA r1!,{r2-r11}
125 |         STMEA r1!,{r2-r11}
126 |         STMEA r1!,{r2-r11}
127 |         STMEA r1!,{r2-r11}
128 |         STMEA r1!,{r2-r11}
129 |         STMEA r1!,{r2-r11}
130 |         STMEA r1!,{r2-r11}
131 |         STMEA r1!,{r2-r11}
132 |         STMEA r1!,{r2-r11}
133 |         STMEA r1!,{r2-r11}
134 |         STMEA r1!,{r2-r11}
135 |         STMEA r1!,{r2-r11}
136 |         STMEA r1!,{r2-r11}
137 |         STMEA r1!,{r2-r11}
138 |         STMEA r1!,{r2-r11}
139 |         STMEA r1!,{r2-r11}
140 |         STMEA r1!,{r2-r11}
141 |         STMEA r1!,{r2-r11}
142 |         STMEA r1!,{r2-r11}
143 |         STMEA r1!,{r2-r11}
144 |         STMEA r1!,{r2-r11}
145 |         STMEA r1!,{r2-r11}
146 |         STMEA r1!,{r2-r11}
147 |         STMEA r1!,{r2-r11}
148 |         STMEA r1!,{r2-r11}
149 |         STMEA r1!,{r2-r11}
150 |         CMP r1,r12
151 |         BLT CSloop
152 | 
153 |         LDMFD sp!,{r4-r11}
154 |         MOV pc,lr
155 | 
156 | r0_LongGradient    .req r0   // Gradient of the long edge
157 | r1_ShortGradient   .req r1   // Gradient of a short edge
158 | r2_EdgeList        .req r2   // Edge list
159 | v1_X               .req r7   // V1 X
160 | v1_Y               .req r8   // V1 Y
161 | v2_X               .req r9   // V2 X
162 | v2_Y               .req r10  // V2 Y
163 | v3_X               .req r11  // V3 X
164 | v3_Y               .req r12  // V3 Y
165 | 
166 |         .global FillEdgeLists // FillEdgeList(int triList, int color);
167 | FillEdgeLists:
168 |         STMFD sp!,{r1,r4-r12,r14} // Store the current registers
169 | 
170 |         LDMFD r0!,{v1_X,v1_Y}       // Load 3 2D un-sorted coords
171 |         ADD   r0,r0,#4            // Skip z
172 |         LDMFD r0!,{v2_X,v2_Y}
173 |         ADD   r0,r0,#4
174 |         LDMFD r0,{v3_X,v3_Y}
175 | 
176 |         CMP   v1_X,#320
177 |         CMPLO v2_X,#320
178 |         CMPLO v3_X,#320
179 |         CMPLO v1_Y,#255
180 |         CMPLO v2_Y,#255
181 |         CMPLO v3_Y,#255
182 |         BLO TrivialTriangleRoutine
183 | 
184 | // ==========================================
185 | // ========= CLIPPED TRIANGLE ===============
186 | // ==========================================
187 | 
188 | ClippedTriangleRoutine:
189 | 
190 |         CMP   v1_Y, #255
191 |         CMPGE v2_Y, #255
192 |         CMPGE v3_Y, #255
193 |         BGE   EdgeListEnd // All Y coords are off screen bottom
194 | 
195 |         CMP   v1_X, #320
196 |         CMPGE v2_X, #320
197 |         CMPGE v3_X, #320
198 |         BGE   EdgeListEnd // All X coords are off screen
199 | 
200 |         CMP   v1_Y, #0
201 |         CMPLE v2_Y, #0
202 |         CMPLE v3_Y, #0
203 |         BLE   EdgeListEnd // All Y coords are off screen top
204 | 
205 |         CMP   v1_X, #0
206 |         CMPLE v2_X, #0
207 |         CMPLE v3_X, #0
208 |         BLE   EdgeListEnd // All X coords are off screen
209 | 
210 |         // Sort V0-V2 by Y, swap where necessary.
211 |         // V0 and V1
212 |         CMP v1_Y,v2_Y
213 |         MOVGT r2,v1_X
214 |         MOVGT v1_X,v2_X
215 |         MOVGT v2_X,r2
216 |         MOVGT r2,v1_Y
217 |         MOVGT v1_Y,v2_Y
218 |         MOVGT v2_Y,r2
219 | 
220 |         // V0 and V2
221 |         CMP v1_Y,v3_Y
222 |         MOVGT r2,v1_X
223 |         MOVGT v1_X,v3_X
224 |         MOVGT v3_X,r2
225 |         MOVGT r2,v1_Y
226 |         MOVGT v1_Y,v3_Y
227 |         MOVGT v3_Y,r2
228 | 
229 |         // V1 and V2
230 |         CMP v2_Y,v3_Y
231 |         MOVGT r2,v2_X
232 |         MOVGT v2_X,v3_X
233 |         MOVGT v3_X,r2
234 |         MOVGT r2,v2_Y
235 |         MOVGT v2_Y,v3_Y
236 |         MOVGT v3_Y,r2
237 | 
238 |         // LONG DELTA CALCULATION
239 |         // We always calculate the long edge first as even if we jump to the bottom half, we still need
240 |         // to step along the long edge to find the correct x starting position.
241 | CalcLongSide:
242 |         // Calculate m between V0 and V2
243 |         SUB r14,v3_Y,v1_Y          // y3 - y1
244 |         SUB r1,v3_X,v1_X           // x3 - x1
245 | 
246 |         CMP r14,#0x0400           // Due to reciprocal limit, we need to limit the gradient inputs
247 |         BLT LongSideTableLookup
248 | 
249 |         STMFD sp!,{r1,r14} // Store the current registers
250 |         MOV r0,r1
251 |         MOV r1,r14
252 |         BL GenericDivide
253 |         LDMFD sp!,{r1,r14} // Restore the current registers
254 |         B LongSideGradientSafe
255 | 
256 | LongSideTableLookup:
257 |         LDR r6,OneOver          // start of oneOver block
258 |         LDR r3,[r6,r14,LSL#2]    // >> 16 << 2 (4 byte jump)
259 |         MUL r0_LongGradient,r3,r1  // Store m in r0, r3 is available
260 | LongSideGradientSafe:
261 | 
262 |         // CLIPPING
263 |         // If our middle vertex is above the screen we need the longside to catch up
264 |         // and go straight to drawing the bottom half. By proxy, v1_Y is also offscreen.
265 |         CMP   v2_Y,#0
266 |         MOVLT r4,v1_X,ASL#16                 // x1 to fixed point
267 |         SUBLT r14,v2_Y,v1_Y                  // y2 - y1, if less than 0, we have a flat top triangle
268 |         MLALT r4,r0_LongGradient,r14,r4      // r4 += m * (+v1_Y)
269 |         MOVLT v1_Y,#0
270 |         LDRLT r2_EdgeList,EdgeList           // Edge list needs to be set up for the jump to the bottom half
271 |         BLT CalcBottomShortSide               // Flat top triangle, skip the top part
272 | 
273 | CalcTopShortSide:
274 |         SUB r2,v2_X,v1_X                    // x2 - x1
275 |         SUBS r14,v2_Y,v1_Y                  // y2 - y1, if less than 0, we have a flat top triangle
276 |         MOVLE r4,v1_X,ASL#16                    // x0 to fixed point
277 |         LDRLE r2_EdgeList,EdgeList              // Bottom half assumes edge list is already assigned to r2
278 |         ADDLE r2_EdgeList,r2_EdgeList,v1_Y,LSL#2        // Add the Y coord to the list address
279 |         BLE CalcBottomShortSide                         // Flat top triangle, skip the top part
280 | 
281 |         CMP r14,#0x0400 // Due to reciprocal limit, we need to limit the gradient inputs
282 |         BLT TopTableLookup
283 | 
284 |         STMFD sp!,{r0,r2,r4-r6,r14} // Store the current registers
285 |         MOV r0,r2
286 |         MOV r1,r14
287 |         BL GenericDivide
288 |         MOV r1,r0
289 |         LDMFD sp!,{r0,r2,r4-r6, r14} // Restore the current registers
290 |         B TopGradientSafe
291 | 
292 | TopTableLookup:
293 |         LDR r6,OneOver          // start of oneOver block
294 |         LDR r4,[r6,r14,LSL#2]
295 |         MUL r1_ShortGradient,r4,r2
296 | 
297 | TopGradientSafe:
298 |         LDR r2_EdgeList,EdgeList
299 | 
300 |         SUB r14,v2_Y,v1_Y       // Need to reset the y delta
301 | 
302 |         MOV r4,v1_X,ASL#16 // x1 to fixed point
303 |         MOV r5,r4
304 |         // r7 is free at this point, use it as a temp
305 | 
306 |         // CLIPPING
307 |         // If y1 < 0, we need to adjust the starting position
308 |         CMP v1_Y,#0
309 |         ADDLT r14,r14,v1_Y              // r14 = positive y delta
310 |         RSBLT r3,v1_Y,#0                // r2 = positive v1_Y
311 |         MOVLT v1_Y,#0                   // v1_Y = 0
312 |         MLALT r4,r0_LongGradient,r3,r4  // r4 += m * (+v1_Y)
313 |         MLALT r5,r1_ShortGradient,r3,r5 // r5 += m * (+v1_Y)
314 |         ADDGE r2_EdgeList,r2_EdgeList,v1_Y,LSL#2 // Add the Y coord to the list address
315 |         // If y2 > ScreenHeightLimit, we need to trim the top part and reduce the y delta
316 |         CMP v2_Y,#255
317 |         SUBGE r7,v2_Y,#255        // r7 = positive y delta over ScreenHeightLimit
318 |         SUBGE r14,r14,r7                        // Reduce the y delta accordingly
319 | 
320 | LFillEdgeLists__local_12_2:
321 |                            // Fill the edge list for the top part of the triangle
322 |         MOV r7,r4
323 |         CMP r7,#0
324 |         MOVLT r7,#0
325 |         CMP r7,#0x01400000
326 |         MOVGE r7,#0x01400000
327 |         MOV r3,r7,LSR#16
328 | 
329 |         MOV r7,r5
330 |         CMP r7,#0
331 |         MOVLT r7,#0
332 |         CMP r7,#0x01400000
333 |         MOVGE r7,#0x01400000
334 | 
335 |         MOV r3,r3,LSL#16
336 |         ORR r3,r3,r7,LSR#16
337 | 
338 |         STR r3,[r2_EdgeList],#4     // Store the current x value
339 |         ADD r4,r4,r0_LongGradient   // Add the gradient to the current x value
340 |         ADD r5,r5,r1_ShortGradient  // Add the gradient to the current x value
341 |         SUBS r14,r14,#1          // Decrement the y counter
342 |         BGT LFillEdgeLists__local_12_2                // Loop until we reach y2
343 | 
344 | CalcBottomShortSide:
345 |         SUB r14,v3_Y,v2_Y         // y3 - y2
346 |         SUB r1_ShortGradient,v3_X,v2_X // x3 - x2
347 | 
348 |         CMP r14,#0x0400            // Due to reciprocal limit, we need to limit the gradient inputs
349 |         BLT BottomTableLookup
350 | 
351 |         STMFD sp!,{r0,r2,r4-r6,r14} // Store the current registers
352 |         MOV r0,r1
353 |         MOV r1,r14
354 |         BL GenericDivide
355 |         MOV r1,r0
356 |         LDMFD sp!,{r0,r2,r4-r6, r14} // Restore the current registers
357 |         B BottomGradientSafe
358 | 
359 | BottomTableLookup:
360 |         LDR r6,OneOver          // start of oneOver block
361 |         LDR r5,[r6,r14,LSL#2]    // >> 16 << 2 (4 byte jump)
362 |         MUL r1_ShortGradient,r5,r1_ShortGradient
363 | 
364 | BottomGradientSafe:
365 |         MOV r5,v2_X,ASL#16         // x2 to fixed point
366 | 
367 |         SUB r14,v3_Y,v2_Y      // Need to reset the y delta
368 | 
369 |         // CLIPPING
370 |         // If y1 < 0, we need to adjust the starting position
371 |         CMP   v2_Y,#0
372 |         ADDLT r14,r14,v2_Y              // r14 = positive y delta
373 |         RSBLT r3,v2_Y,#0                // r2 = positive v1_Y
374 |         MLALT r4,r0_LongGradient,r3,r4  // r4 += m * (+v1_Y)
375 |         MLALT r5,r1_ShortGradient,r3,r5 // r5 += m * (+v1_Y)
376 |         // If y3 > ScreenHeightLimit, we need to adjust the ending position
377 |         CMP   v3_Y,#255
378 |         SUBGE r7,v3_Y,#255        // r7 = positive y delta over 255
379 |         SUBGE r14,r14,r7                        // Reduce the y delta accordingly
380 | 
381 | LFillEdgeLists__local_14_2:
382 |                            // Fill the edge list for the bottom part of the triangle
383 |         MOV r7,r4
384 |         CMP r7,#0
385 |         MOVLT r7,#0
386 |         CMP r7,#0x01400000
387 |         MOVGE r7,#0x01400000
388 |         MOV r3,r7,LSR#16
389 | 
390 |         MOV r7,r5
391 |         CMP r7,#0
392 |         MOVLT r7,#0
393 |         CMP r7,#0x01400000
394 |         MOVGE r7,#0x01400000
395 | 
396 |         MOV r3,r3,LSL#16
397 |         ORR r3,r3,r7,LSR#16
398 | 
399 |         STR r3,[r2_EdgeList],#4     // Store the current x value
400 |         ADD r4,r4,r0_LongGradient   // Add the gradient to the current x value
401 |         ADD r5,r5,r1_ShortGradient  // Add the gradient to the current x value
402 |         SUBS r14,r14,#1          // Decrement the y counter
403 |         BGT LFillEdgeLists__local_14_2                // Loop until we reach y2
404 | 
405 |         // CLIPPING
406 |         // If y2 > ScreenHeightLimit, we need to adjust the ending position
407 |         CMP v3_Y,#255
408 |         MOVGE v3_Y,#255
409 |         CMPGE v1_Y,v3_Y
410 |         BGE EdgeListEnd
411 | 
412 |         B Triv_DrawEdges
413 | 
414 | // ==========================================
415 | // ========= TRIVIAL TRIANGLE ===============
416 | // ==========================================
417 | 
418 | TrivialTriangleRoutine:
419 | 
420 |         // Sort V0-V2 by Y, swap where necessary.
421 |         // V0 and V1
422 |         CMP v1_Y,v2_Y
423 |         MOVGT r2,v1_X
424 |         MOVGT v1_X,v2_X
425 |         MOVGT v2_X,r2
426 |         MOVGT r2,v1_Y
427 |         MOVGT v1_Y,v2_Y
428 |         MOVGT v2_Y,r2
429 | 
430 |         // V0 and V2
431 |         CMP v1_Y,v3_Y
432 |         MOVGT r2,v1_X
433 |         MOVGT v1_X,v3_X
434 |         MOVGT v3_X,r2
435 |         MOVGT r2,v1_Y
436 |         MOVGT v1_Y,v3_Y
437 |         MOVGT v3_Y,r2
438 | 
439 |         // V1 and V2
440 |         CMP v2_Y,v3_Y
441 |         MOVGT r2,v2_X
442 |         MOVGT v2_X,v3_X
443 |         MOVGT v3_X,r2
444 |         MOVGT r2,v2_Y
445 |         MOVGT v2_Y,v3_Y
446 |         MOVGT v3_Y,r2
447 | 
448 |         // LONG DELTA CALCULATION
449 |         // We always calculate the long edge first as even if we jump to the bottom half, we still need
450 |         // to step along the long edge to find the correct x starting position.
451 | Triv_CalcLongSide:
452 |         // Calculate m between V0 and V2
453 |         SUB r14,v3_Y,v1_Y          // y3 - y1
454 |         SUB r1,v3_X,v1_X           // x3 - x1
455 | 
456 |         LDR r6,OneOver          // start of oneOver block
457 |         LDR r3,[r6,r14,LSL#2]    // >> 16 << 2 (4 byte jump)
458 |         MUL r0_LongGradient,r3,r1  // Store m in r0, r3 is available
459 | 
460 | Triv_CalcTopShortSide:
461 |         SUB r2,v2_X,v1_X                    // x2 - x1
462 |         SUBS r14,v2_Y,v1_Y                  // y2 - y1, if less than 0, we have a flat top triangle
463 |         MOVLE r4,v1_X,ASL#16                    // x0 to fixed point
464 |         LDRLE r2_EdgeList,EdgeList              // Bottom half assumes edge list is already assigned to r2
465 |         ADDLE r2_EdgeList,r2_EdgeList,v1_Y,LSL#2        // Add the Y coord to the list address
466 |         BLE Triv_CalcBottomShortSide                         // Flat top triangle, skip the top part
467 | 
468 |         LDR r4,[r6,r14,LSL#2]
469 |         MUL r1_ShortGradient,r4,r2
470 |         LDR r2_EdgeList,EdgeList
471 | 
472 |         MOV r4,v1_X,ASL#16 // x1 to fixed point
473 |         MOV r5,r4
474 |         // r7 is free at this point, use it as a temp
475 | 
476 |         ADD r2_EdgeList,r2_EdgeList,v1_Y,LSL#2 // Add the Y coord to the list address
477 | 
478 | Triv_TopEdgeList:
479 |                        // Fill the edge list for the top part of the triangle
480 |         MOV r3,r4,LSR#16
481 |         MOV r3,r3,LSL#16
482 |         ORR r3,r3,r5,LSR#16
483 | 
484 |         STR r3,[r2_EdgeList],#4     // Store the current x value
485 |         ADD r4,r4,r0_LongGradient   // Add the gradient to the current x value
486 |         ADD r5,r5,r1_ShortGradient  // Add the gradient to the current x value
487 |         SUBS r14,r14,#1          // Decrement the y counter
488 |         BGT Triv_TopEdgeList                // Loop until we reach y2
489 | 
490 | Triv_CalcBottomShortSide:
491 |         SUB r14,v3_Y,v2_Y         // y3 - y2
492 |         SUB r1_ShortGradient,v3_X,v2_X // x3 - x2
493 | 
494 |         LDR r5,[r6,r14,LSL#2]    // >> 16 << 2 (4 byte jump)
495 |         MUL r1_ShortGradient,r5,r1_ShortGradient
496 |         MOV r5,v2_X,ASL#16         // x2 to fixed point
497 | 
498 | Triv_BottomEdgeList:
499 |                           // Fill the edge list for the bottom part of the triangle
500 |         MOV r3,r4,LSR#16
501 |         MOV r3,r3,LSL#16
502 |         ORR r3,r3,r5,LSR#16
503 | 
504 |         STR r3,[r2_EdgeList],#4     // Store the current x value
505 |         ADD r4,r4,r0_LongGradient   // Add the gradient to the current x value
506 |         ADD r5,r5,r1_ShortGradient  // Add the gradient to the current x value
507 |         SUBS r14,r14,#1          // Decrement the y counter
508 |         BGT Triv_BottomEdgeList                // Loop until we reach y2
509 | 
510 | Triv_DrawEdges:
511 |         SUB r14,v3_Y,v1_Y          // y2 - y1 (i.e., the number of lines to draw)
512 |         LDR r12,EdgeList
513 |         ADD r12,r12,v1_Y,LSL#2      // Add the top Y coord to the list address
514 |         LDR r11,ScreenStart     // Load the screen mem start location
515 |         MOV r3,v1_Y               // Initial Y position
516 |         .ifdef PAL_256
517 |         MOV r2,r3,LSL#8         // Multiply by 320 in 2 stages (<< 8) + (<< 6)
518 |         ADD r2,r2,r3,LSL#6      // Total Y offset * 320
519 |         .else
520 |         MOV r2,r3,LSL#7         // Multiply by 160 in 2 stages (<< 7) + (<< 5)
521 |         ADD r2,r2,r3,LSL#5      // Total Y offset * 160
522 |         .endif
523 |         ADD r11,r11,r2          // Add Y offset to screen offset start location
524 | 
525 |         LDR r0,FogTable
526 |         LDR r7,[sp]             // Load the color
527 |         ADD r0,r0,r7,LSL#2      // Load the fog value
528 |         TST v1_Y,#1             // Does the triangle start on an odd line?
529 |         .ifdef PAL_256
530 |         LDRNE r7,[r0]             // Load the fog value
531 |         LDRNE r8,[r0,#256]       // Load the fog value
532 |         LDREQ r8,[r0]             // Load the fog value
533 |         LDREQ r7,[r0,#256]       // Load the fog value
534 |         .else
535 |         LDRNE r7,[r0]             // Load the fog value
536 |         LDRNE r8,[r0,#64]       // Load the fog value offset by 16 bytes
537 |         LDREQ r8,[r0]             // Load the fog value
538 |         LDREQ r7,[r0,#64]       // Load the fog value offset by 16 bytes
539 |         .endif
540 | 
541 | // ==========================================
542 | // ========= RASTERIZE THE EDGE LIST ========
543 | // ==========================================
544 | 
545 | RasterScanlineLoop:
546 |         LDR r2,[r12],#4         // Load the left edge x coord
547 |         MOV r3,r2,LSR#16        // Move the left edge x coord to integer
548 |         MOV r2,r2,LSL#16        // Clear out the left edge x coord leaving the right edge
549 |         MOV r2,r2,LSR#16        // Move back to integer
550 | 
551 |         CMP r3,r2       // if x2 < x1
552 |         BEQ Continue      // Skip the swap
553 |         EORMI r3,r3,r2    // swap x1 and x2
554 |         EORMI r2,r3,r2    // swap x1 and x2
555 |         EORMI r3,r3,r2    // swap x1 and x2
556 | 
557 |         .ifdef PAL_256
558 |         ADD r9,r11,r2           // Add the left edge x coord to the screen offset
559 |         ADD r10,r11,r3          // Add the right edge x coord to the screen offset
560 | 
561 |         MOV r0,r7       // Load the fog value
562 |         MOV r1,r0
563 |         ANDS r2,r9,#1   // Used to rotate the color
564 |         MOVNE r1,r1,ROR#8  // Rotate the color
565 | 
566 |         SUB r4,r10,r9           // Get the number of pixels left
567 |         CMP r4,#4
568 |         BLT SpinLastBytes            
569 | 
570 |         // The following are awkward starting points, so we'll just use STRB        
571 |         TST r9,#3
572 |         STRNEB r1,[r9],#1 // Store the color
573 |         MOVNE r1,r1,ROR#8  // Rotate the color
574 |         SUBNE r4,r4,#1
575 |         TSTNE r9,#3
576 |         STRNEB r1,[r9],#1 // Store the color
577 |         MOVNE r1,r1,ROR#8  // Rotate the color
578 |         SUBNE r4,r4,#1
579 |         TSTNE r9,#3
580 |         STRNEB r1,[r9],#1 // Store the color
581 |         MOVNE r1,r1,ROR#8  // Rotate the color
582 |         SUBNE r4,r4,#1
583 |         .else
584 | 
585 |         // Our first check is to see if we have an xL and xR within the same quad boundary
586 |         EOR r9, r2, r3 
587 |         TST r9, #504 // Are xL and xR on the same boundary? (ie, anything outside of 0b111)
588 | 
589 |         // If not, we have an easy job as we just mask 0xFFFFFFFF and shift it by our required pixels
590 |         AND r0, r2, #7 // How far in we are
591 |         MOV r0, r0, LSL #2 // Shift in nibbles
592 |         MVN r1, #0 // 0xFFFFFFFF
593 |         MOV r0, r1, LSL r0
594 | 
595 |         // If xL and xR are on the same boundary, shift r1 right and clear from the other side
596 |         ANDEQ r10, r3, #7
597 |         MOVEQ r10, r10, LSL #2 // Shift in nibbles
598 |         BICEQ r0, r0, r1, LSL r10
599 | 
600 |         // Load existing screen color, mask and write back
601 |         ADD r9, r11, r2, LSR #1 // Add the left edge x coord to the screen offset
602 |         BIC r9, r9, #3 // Move screen buffer back to boundary
603 |         LDR r1, [r9] // Load existing color
604 |         BIC r1, r1, r0 // Mask out existing color
605 |         AND r0, r7, r0 // Mask out new color with inverted mask (which is no longer required)
606 |         ORR r0, r0, r1 // Combine masked data
607 |         STR r0, [r9], #4 // Write it back in again
608 |         BEQ Continue // If this was a short raster, we can just jump ahead. 
609 | 
610 |         // Otherwise, let's get the end sorted
611 |         ANDS r0, r3, #7 // How far in we are on the xR side
612 |         MOV r0, r0, LSL #2 // Shift in nibbles
613 |         MVN r1, #0 // 0xFFFFFFFF
614 |         MOV r0, r1, LSL r0
615 | 
616 |         ADD r10, r11, r3, LSR #1 // Add the right edge x coord to the screen offset
617 |         BICNE r10, r10, #3 // Move screen buffer back to boundary
618 |         LDR r1, [r10] // Load existing color
619 |         AND r1, r1, r0 // Mask out existing color
620 |         BIC r0, r7, r0 // Mask out new color with inverted mask (which is no longer required)
621 |         ORR r0, r0, r1 // Combine masked data
622 |         STR r0, [r10], #4 // Write it back in again
623 |         ADD r10, r11, r3, LSR #1 // Add the right edge x coord to the screen offset
624 |         .endif
625 | 
626 | QuadBlit:
627 |         .ifdef PAL_256
628 |         MOV r4,r4,LSR#4
629 |         CMP r4,#32
630 |         BGE RotateColor
631 |         RSB r4,r4,#32 // reverse order
632 |         MOV r1,r0
633 |         MOV r2,r0
634 |         MOV r3,r0
635 |         ADD pc,pc,r4,LSL#2 // If remaining width > 16 pixels, we can use this jump table.
636 |         MOV r0,r0
637 |         STMIA r9!,{r0-r3}
638 |         STMIA r9!,{r0-r3}
639 |         STMIA r9!,{r0-r3}
640 |         STMIA r9!,{r0-r3}
641 |         STMIA r9!,{r0-r3}
642 |         STMIA r9!,{r0-r3}
643 |         STMIA r9!,{r0-r3}
644 |         STMIA r9!,{r0-r3}
645 |         STMIA r9!,{r0-r3}
646 |         STMIA r9!,{r0-r3}
647 |         STMIA r9!,{r0-r3}
648 |         STMIA r9!,{r0-r3}
649 |         STMIA r9!,{r0-r3}
650 |         STMIA r9!,{r0-r3}
651 |         STMIA r9!,{r0-r3}
652 |         STMIA r9!,{r0-r3}
653 |         STMIA r9!,{r0-r3}
654 |         STMIA r9!,{r0-r3}
655 |         STMIA r9!,{r0-r3}
656 |         STMIA r9!,{r0-r3}
657 |         STMIA r9!,{r0-r3}
658 |         STMIA r9!,{r0-r3}
659 |         STMIA r9!,{r0-r3}
660 |         STMIA r9!,{r0-r3}
661 |         STMIA r9!,{r0-r3}
662 |         STMIA r9!,{r0-r3}
663 |         STMIA r9!,{r0-r3}
664 |         STMIA r9!,{r0-r3}
665 |         STMIA r9!,{r0-r3}
666 |         STMIA r9!,{r0-r3}
667 |         STMIA r9!,{r0-r3}
668 |         STMIA r9!,{r0-r3}
669 | 
670 |         // ; In theory, there should be less than 16 bytes left, so attempt STR with quads now
671 |         SUB r4,r10,r9           // Get the number of pixels left
672 |         MOV r4,r4,LSR#2
673 |         RSB r4,r4,#4 // reverse order
674 |         ADD pc,pc,r4,LSL#2 // If remaining width > 4 pixels, we can use this jump table.
675 |         MOV r0,r0
676 |         STR r0,[r9],#4
677 |         STR r0,[r9],#4
678 |         STR r0,[r9],#4
679 |         STR r0,[r9],#4
680 | 
681 | RotateColor:
682 |         MOV r1,r0
683 |         ANDS r2,r9,#1   // Used to rotate the color
684 |         MOVNE r1,r1,ROR#8  // Rotate the color
685 | 
686 | SpinLastBytes:
687 |         CMP r9,r10
688 |         STRLTB r1,[r9],#1
689 |         MOVLT r1,r1,ROR#8
690 |         CMPLT r9,r10
691 |         STRLTB r1,[r9],#1
692 |         MOVLT r1,r1,ROR#8
693 |         CMPLT r9,r10
694 |         STRLTB r1,[r9],#1
695 |         MOVLT r1,r1,ROR#8
696 |         CMPLT r9,r10
697 |         STRLTB r1,[r9],#1
698 | 
699 |         .else
700 | 
701 |         MOV r0, r7
702 |         SUB r4, r10, r9 // Get the number of pixels left
703 |         MOV r4, r4, LSR #4
704 |         CMP r4, #16 // Reduced from 32 to 16 since bytes are halved
705 |         BGE Continue
706 |         RSB r4, r4, #16 // Reverse order, reduced from 32 to 16
707 |         // MOV r0, #&DD
708 |         // EOR r0, r0, r0, LSL #8
709 |         // EOR r0, r0, r0, LSL #16
710 |         MOV r1, r0
711 |         MOV r2, r0
712 |         MOV r3, r0
713 |         ADD pc, pc, r4, LSL #2
714 |         MOV r0, r0
715 |         STMIA r9!, {r0-r3} // Reduced number of STMIA instructions by half
716 |         STMIA r9!, {r0-r3}
717 |         STMIA r9!, {r0-r3}
718 |         STMIA r9!, {r0-r3}
719 |         STMIA r9!, {r0-r3}
720 |         STMIA r9!, {r0-r3}
721 |         STMIA r9!, {r0-r3}
722 |         STMIA r9!, {r0-r3}
723 |         STMIA r9!, {r0-r3}
724 |         STMIA r9!, {r0-r3}
725 |         STMIA r9!, {r0-r3}
726 |         STMIA r9!, {r0-r3}
727 |         STMIA r9!, {r0-r3}
728 |         STMIA r9!, {r0-r3}
729 |         STMIA r9!, {r0-r3}
730 |         STMIA r9!, {r0-r3}
731 | 
732 |         // ; In theory, there should be less than 16 bytes left, so attempt STR with quads now
733 |         SUB r4, r10, r9 // Get the number of pixels left
734 |         MOV r4, r4, LSR #2
735 |         RSB r4, r4, #4 // reverse order
736 |         ADD pc, pc, r4, LSL #2 // If remaining width > 4 pixels, we can use this jump table.
737 |         MOV r0, r0
738 |         STR r0, [r9], #4
739 |         STR r0, [r9], #4
740 |         STR r0, [r9], #4
741 |         STR r0, [r9], #4
742 |         .endif
743 | 
744 | Continue:
745 |         EOR r7, r7, r8 // Swap dither pattern
746 |         EOR r8, r7, r8 //
747 |         EOR r7, r7, r8 //
748 | 
749 |         .ifdef PAL_256
750 |         ADD r11,r11,#320        // Add 320 to the screen offset
751 |         SUBS r14,r14,#1         // Decrement the y counter
752 |         .else
753 |         ADD r11, r11, #160 // Changed from 320 to 160 bytes per scanline
754 |         SUBS r14, r14, #1 // Decrement the y counter
755 |         .endif
756 |         BGT RasterScanlineLoop // Loop until we reach y2
757 | 
758 | EdgeListEnd:
759 |         LDMFD sp!,{r1,r4-r12,r14}  // Restore registers before returning
760 |         MOV pc,lr
761 | 
762 | EdgeList:
763 |                 .word 0          // Our table of edge lists
764 | FogTable:
765 |                 .word 0          // Our table of fog values
766 | OneOver:
767 |                 .word 0          // Our table of reciprocal 1/X values
768 | ScreenBank:
769 |                 .word 1          // Initial screen bank index
770 | ScreenStart:
771 |                 .word 0
772 | 
773 |         .ifdef PAL_256
774 | ScreenMax:
775 |                 .word 0x014000
776 | ScreenPartial:
777 |                 .word 0xfa00   // 0 to 200 in Mode 13
778 |         .else
779 | ScreenMax:
780 |                 .word 0xa000   // Changed from 14000 to A000 (halved for 4-bit mode)
781 | ScreenPartial:
782 |                 .word 0xfa00   // 0 to 200 in Mode 9
783 |         .endif
784 | 
785 | ALIGN:
786 | 
787 |         .global KeyPress // KeyPress(int keycode);
788 | KeyPress:
789 |         EOR r1,r0,#255
790 |         MOV r0,#129
791 |         MOV r2,#255
792 |         SWI OS_Byte
793 |         MOV r0,r1 // r0 contains either 0xFF or 0x00
794 |         MOV pc,lr
795 | 
796 |         // EXPORT GenericDivide
797 | GenericDivide:
798 |         // Enter with dividend in R0, divisor in R1.
799 |         // Trashes R4 - R6.
800 |         // Returns with quotient in R0.
801 |         // The divisor must not be zero. The dividend can be negative.
802 |         CMP     R0, #0
803 |         MOVEQ   pc, lr
804 | 
805 |         MOVS    R4, R0          // Store, as we need to check sign
806 |         RSBMI   R0, R0, #0      // Make positive
807 | 
808 |         MOV     R1, R1, LSL#16  // Int to Fix
809 |         MOV     R6, #0          // Result in R6
810 |         MOV     R5, #-0xffff80000000  // Used as a counter until bit is pushed off end
811 | LGenericDivide__local_10_3:
812 |                            MOVS    R0, R0, LSL#1   // Double R0 and store status
813 |         CMPCC   R0, R1
814 |         SUBCS   R0, R0, R1
815 |         ORRCS   R6, R6, R5
816 |         MOVS    R5, R5, LSR #1
817 |         BCC     LGenericDivide__local_10_3
818 | 
819 |         CMP     R4, #0
820 |         RSBMI   R0, R6, #0
821 |         MOVPL   R0, R6
822 | 
823 |         MOV pc,lr
824 | 
825 |         .global ProjectVertex // ProjectVertex(int vertexPtr);
826 | ProjectVertex:
827 |         LDMFD r0!,{r1-r3}  // Load X, Y, Z from the vertex
828 |         MOVS r3,r3,ASR#8   // Divide Z by 256
829 |         ADDS r3,r3,#64     // Push forward on the Z plane a little
830 |         BLE NoDivide
831 | 
832 |         STMFD sp!,{r4-r6}  // Save some registers
833 | 
834 |         // Enter with dividend (X) in R4, divisor (Z) in R3.
835 |         // The divisor must not be zero. The dividend can be negative.
836 |         MOVS    R4, R1          // Preserve original X for sign checking
837 |         RSBMI   R4, R4, #0      // If negative, negate R4 to make it positive
838 | 
839 |         MOV     R5, R3          // Put the divisor in R5.
840 |         CMP     R5, R4, LSR #1  // Then double it until
841 | LProjectVertex__local_10_4:
842 |                            MOVLS   R5, R5, LSL #1  // 2 * R5 > R4.
843 |         CMP     R5, R4, LSR #1
844 |         BLS     LProjectVertex__local_10_4           // Loop until 2 * R5 > R4
845 |         MOV     R6, #0          // Initialise the quotient
846 | LProjectVertex__local_20_4:
847 |                            CMP     R4, R5          // Can we subtract R5?
848 |         SUBCS   R4, R4, R5      // If we can, do so
849 |         ADC     R6, R6, R6      // Double quotient and add new bit
850 |         MOV     R5, R5, LSR #1  // Halve R5.
851 |         CMP     R5, R3          // And loop until we've gone
852 |         BHS     LProjectVertex__local_20_4           // past the original divisor,
853 | 
854 |         CMP     R1, #0          // Check original X's sign again
855 |         RSBMI   R1, R6, #0      // If it was negative, negate the quotient
856 |         MOVPL   R1, R6          // Move the quotient to R1
857 | 
858 |         // Enter with dividend (Y) in R4, divisor (Z) in R3.
859 |         // The divisor must not be zero. The dividend can be negative.
860 |         MOVS    R4, R2          // Preserve original Y for sign checking
861 |         RSBMI   R4, R4, #0      // If negative, negate R4 to make it positive
862 | 
863 |         MOV     R5, R3          // Put the divisor in R5.
864 |         CMP     R5, R4, LSR #1  // Then double it until
865 | LProjectVertex__local_30_4:
866 |                            MOVLS   R5, R5, LSL #1  // 2 * R5 > R4.
867 |         CMP     R5, R4, LSR #1
868 |         BLS     LProjectVertex__local_30_4
869 |         MOV     R6, #0          // Initialise the quotient
870 | LProjectVertex__local_40_4:
871 |                            CMP     R4, R5          // Can we subtract R5?
872 |         SUBCS   R4, R4, R5      // If we can, do so
873 |         ADC     R6, R6, R6      // Double quotient and add new bit
874 |         MOV     R5, R5, LSR #1  // Halve R5.
875 |         CMP     R5, R3          // And loop until we've gone
876 |         BHS     LProjectVertex__local_40_4            // past the original divisor,
877 | 
878 |         CMP     R2, #0          // Check original Y's sign again
879 |         RSBPL   R2, R6, #0      // If it was positive, negate the quotient
880 |         MOVMI   R2, R6          // Move the quotient to R1
881 | 
882 |         LDMFD sp!,{r4-r6}  // Restore some registers
883 | 
884 | NoDivide:
885 |         ADD r1,r1,#160
886 |         ADD r2,r2,#128
887 | 
888 |         STMFD r0!,{r1-r3}  // Store X, Y, Z back to the vertex
889 |         MOV pc,lr
890 | 
891 | 
892 | 


--------------------------------------------------------------------------------