├── http_shader
    ├── cpp
    │   ├── run.sh
    │   ├── README.md
    │   ├── cppRunner.cpp
    │   └── httpd_ivec4.glsl
    ├── ispc_ivec4
    │   ├── run.sh
    │   ├── README.md
    │   ├── runner_ivec4.ispc
    │   ├── httpd.ispc.h
    │   ├── httpd_ispc_ivec4.cpp
    │   └── httpd_ivec4.glsl
    ├── vulkan
    │   ├── run.sh
    │   ├── README.md
    │   └── httpd_ivec4.glsl
    ├── ispc_int
    │   ├── run.sh
    │   ├── request.txt
    │   ├── README.md
    │   ├── runner.ispc
    │   ├── httpd.ispc.h
    │   ├── httpd_ispc.cpp
    │   └── httpd_int.glsl
    ├── ispc_char
    │   ├── run.sh
    │   ├── request.txt
    │   ├── README.md
    │   ├── runner.ispc
    │   ├── httpd.ispc.h
    │   ├── httpd_ispc.cpp
    │   └── httpd.glsl
    ├── build.sh
    ├── .gitignore
    ├── build_vulkan.sh
    ├── build_cpp.sh
    ├── build_ispc_char.sh
    ├── build_ispc_int.sh
    ├── build_ispc_ivec4.sh
    ├── README.md
    ├── preprocess.js
    └── chr.glsl
├── spirv-io
    ├── test
    │   ├── .gitignore
    │   ├── test_data
    │   │   └── hello.txt
    │   ├── test_file.glsl
    │   ├── test_hashtable.glsl
    │   └── test_array.glsl
    ├── .gitignore
    ├── examples
    │   ├── hello.glsl
    │   ├── hello_1.glsl
    │   ├── rerun.glsl
    │   ├── template_literal.glsl
    │   ├── discard.glsl
    │   ├── bm.glsl
    │   ├── clock.glsl
    │   ├── hello_dlopen_gh.glsl
    │   ├── memalloc.glsl
    │   ├── wait_for_stdin.glsl
    │   ├── cat.glsl
    │   ├── hello_dlopen.glsl
    │   ├── http_client.glsl
    │   ├── listen3.glsl
    │   ├── grep_cpu.glsl
    │   └── grep.glsl
    ├── lib
    │   ├── assert.glsl
    │   ├── thread_id.glsl
    │   ├── dlopen.glsl
    │   ├── stat.glsl
    │   ├── statemachine.glsl
    │   ├── binary_data.glsl
    │   ├── chr.glsl
    │   ├── malloc.glsl
    │   ├── errno.glsl
    │   └── hashtable.glsl
    ├── Makefile
    ├── bin
    │   ├── glsl2spv
    │   ├── gls_generate_tests.js
    │   └── gls_resolve_includes.js
    └── src
    │   ├── test_string.cpp
    │   ├── test_file.cpp
    │   ├── gls.cpp
    │   ├── gls_cpu.cpp
    │   └── parse_spv.hpp
├── docker
    ├── .gitignore
    ├── spirv-cross-linux-x86-64
    ├── glsl2wasm.sh
    ├── ispc2wasm.sh
    ├── spirv-runner
    │   ├── fix_ispc_input_output_order
    │   ├── Makefile
    │   ├── runner.ispc
    │   ├── program.h
    │   └── ispcRunner.cpp
    ├── Dockerfile
    ├── README.md
    └── mandelbrot.ispc
├── .gitignore
├── Makefile
├── include
    └── spirv_cross
    │   ├── image.hpp
    │   ├── barrier.hpp
    │   ├── thread_group.hpp
    │   ├── sampler.hpp
    │   └── external_interface.h
├── src
    ├── mandel.comp
    └── mandel.cpp
└── README.md


/http_shader/cpp/run.sh:
--------------------------------------------------------------------------------
1 | time ./cppRunner
2 | 


--------------------------------------------------------------------------------
/spirv-io/test/.gitignore:
--------------------------------------------------------------------------------
1 | test_data/
2 | 


--------------------------------------------------------------------------------
/http_shader/ispc_ivec4/run.sh:
--------------------------------------------------------------------------------
1 | time ./httpd_ivec4
2 | 


--------------------------------------------------------------------------------
/http_shader/vulkan/run.sh:
--------------------------------------------------------------------------------
1 | time ./vulkanRunner
2 | 


--------------------------------------------------------------------------------
/spirv-io/test/test_data/hello.txt:
--------------------------------------------------------------------------------
1 | Hello, world!
2 | 


--------------------------------------------------------------------------------
/docker/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | *.wasm
3 | *.js
4 | *.html
5 | 


--------------------------------------------------------------------------------
/http_shader/ispc_int/run.sh:
--------------------------------------------------------------------------------
1 | time ./httpd < request.txt
2 | 


--------------------------------------------------------------------------------
/http_shader/ispc_char/run.sh:
--------------------------------------------------------------------------------
1 | time ./httpd < request.txt
2 | 


--------------------------------------------------------------------------------
/http_shader/build.sh:
--------------------------------------------------------------------------------
1 | ./build_ispc.sh
2 | ./build_ispc_ivec4.sh
3 | ./build_vulkan.sh
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | src/*.js
2 | src/*.html
3 | src/*.wasm
4 | src/*.spv
5 | src/*.spv.cpp
6 | src/*.o
7 | 


--------------------------------------------------------------------------------
/spirv-io/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | test_file
3 | test_string
4 | grep
5 | gls
6 | *.comp
7 | *.full*
8 | 


--------------------------------------------------------------------------------
/docker/spirv-cross-linux-x86-64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kig/spirv-wasm/HEAD/docker/spirv-cross-linux-x86-64


--------------------------------------------------------------------------------
/spirv-io/examples/hello.glsl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env gls
2 | 
3 | #include <file.glsl>
4 | 
5 | println("Hello, world!");
6 | 


--------------------------------------------------------------------------------
/http_shader/.gitignore:
--------------------------------------------------------------------------------
 1 | *.comp
 2 | *.spv
 3 | *.swp
 4 | *.o
 5 | vulkanRunner
 6 | httpd_i
 7 | httpd
 8 | httpd_ivec4
 9 | cppRunner
10 | 


--------------------------------------------------------------------------------
/spirv-io/examples/hello_1.glsl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env gls
2 | #include "file.glsl"
3 | ThreadLocalCount = 1;
4 | ThreadGroupCount = 1;
5 | 
6 | println("Hello, world!");
7 | 


--------------------------------------------------------------------------------
/spirv-io/lib/assert.glsl:
--------------------------------------------------------------------------------
1 | #include <file.glsl>
2 | 
3 | #define assert(f) { if (!(f)) { FREE_ALL(eprintln(concat(__FILE__, ":", str(__LINE__), " Assertion failed: ", #f ))); } }
4 | 


--------------------------------------------------------------------------------
/http_shader/build_vulkan.sh:
--------------------------------------------------------------------------------
1 | cd vulkan &&
2 | cpp httpd_ivec4.glsl | node ../preprocess.js > out.comp &&
3 | glslangValidator -V -o httpd.spv out.comp &&
4 | clang++ -lvulkan -lpthread -m64 -O2 -o vulkanRunner vulkanRunner.cpp -std=c++11
5 | 


--------------------------------------------------------------------------------
/http_shader/build_cpp.sh:
--------------------------------------------------------------------------------
1 | cd cpp &&
2 | cpp httpd_ivec4.glsl | node ../preprocess.js > out.comp &&
3 | glslangValidator -V -o httpd.spv out.comp &&
4 | spirv-cross --cpp --output httpd.cpp httpd.spv &&
5 | clang++ -lpthread -I../../include -lm -O3 -o cppRunner cppRunner.cpp -std=c++11
6 | 


--------------------------------------------------------------------------------
/spirv-io/examples/rerun.glsl:
--------------------------------------------------------------------------------
 1 | #include <file.glsl>
 2 | 
 3 | void main() {
 4 |     if (ThreadId == 0) {
 5 |         if (runCount < 10) {
 6 |             println(concat("Hello from run ", str(runCount)));
 7 |             rerunProgram = RERUN_NOW;
 8 |         }
 9 |     }
10 | }
11 | 


--------------------------------------------------------------------------------
/spirv-io/examples/template_literal.glsl:
--------------------------------------------------------------------------------
 1 | #include <file.glsl>
 2 | 
 3 | ThreadLocalCount = 1;
 4 | ThreadGroupCount = 1;
 5 | 
 6 | void main() {
 7 |     float x = 1.2345;
 8 |     string s = `x = ${x}
 9 | x * x = ${
10 |     x * x
11 | }
12 | Hello template literal!`;
13 | 
14 |     println(s);
15 | }
16 | 


--------------------------------------------------------------------------------
/spirv-io/examples/discard.glsl:
--------------------------------------------------------------------------------
 1 | #include <file.glsl>
 2 | ThreadLocalCount = 8;
 3 | ThreadGroupCount = 1;
 4 | 
 5 | void main() {
 6 |     for (int i = 0; i < 10; i++) {
 7 |         if (i > 3 && ThreadId > 2) stopIO = 1;
 8 |         if (ThreadId == 0 && i > 6) exitSync(0);
 9 |         println(str(ThreadId), ": ", str(i));
10 |     }
11 | }
12 | 
13 | 


--------------------------------------------------------------------------------
/spirv-io/lib/thread_id.glsl:
--------------------------------------------------------------------------------
1 | int32_t ThreadGroupCount = int(gl_NumWorkGroups.x);
2 | int32_t ThreadLocalCount = int(gl_WorkGroupSize.x);
3 | int32_t ThreadCount = ThreadGroupCount * ThreadLocalCount;
4 | int32_t ThreadId = int(gl_GlobalInvocationID.x);
5 | int32_t ThreadGroupId = int(gl_WorkGroupID.x);
6 | int32_t ThreadLocalId = int(gl_LocalInvocationID.x);
7 | 


--------------------------------------------------------------------------------
/http_shader/build_ispc_char.sh:
--------------------------------------------------------------------------------
1 | cd ispc_char &&
2 | cpp httpd.glsl | node ../preprocess.js > out.comp &&
3 | glslangValidator -V -o httpd.spv out.comp &&
4 | spirv-cross-ispc --ispc --output httpd.ispc httpd.spv &&
5 | 
6 | ispc -O3 -o httpd.ispc.o runner.ispc &&
7 | clang++ -I/usr/local/bin -pthread -std=c++11 -lm -pthread -O3 -o httpd httpd.ispc.o ../tasksys.cpp httpd_ispc.cpp
8 | 


--------------------------------------------------------------------------------
/http_shader/build_ispc_int.sh:
--------------------------------------------------------------------------------
1 | cd ispc_int &&
2 | cpp httpd_int.glsl | node ../preprocess.js > out.comp &&
3 | glslangValidator -V -o httpd.spv out.comp &&
4 | spirv-cross-linux-x86-64 --ispc --output httpd.ispc httpd.spv &&
5 | 
6 | ispc -O3 -o httpd.ispc.o runner.ispc &&
7 | clang++ -I/usr/local/bin -pthread -std=c++11 -lm -pthread -O3 -o httpd httpd.ispc.o ../tasksys.cpp httpd_ispc.cpp
8 | 


--------------------------------------------------------------------------------
/http_shader/ispc_int/request.txt:
--------------------------------------------------------------------------------
 1 | GET /gl HTTP/1.1
 2 | Host: localhost:9000
 3 | User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0
 4 | Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
 5 | Accept-Language: en-US,en;q=0.5
 6 | Accept-Encoding: gzip, deflate
 7 | Connection: keep-alive
 8 | Upgrade-Insecure-Requests: 1
 9 | 
10 | 


--------------------------------------------------------------------------------
/http_shader/build_ispc_ivec4.sh:
--------------------------------------------------------------------------------
1 | cd ispc_ivec4 &&
2 | cpp httpd_ivec4.glsl | node ../preprocess.js > out_int.comp &&
3 | glslangValidator -V -o httpd.spv out_int.comp &&
4 | spirv-cross-ispc --ispc --output httpd.ispc httpd.spv &&
5 | ispc -O3 --target=avx2-i64x4 -o httpd.ispc.o runner_ivec4.ispc &&
6 | clang++ -pthread -std=c++11 -lm -pthread -O3 -o httpd_ivec4 httpd.ispc.o ../tasksys.cpp httpd_ispc_ivec4.cpp
7 | 


--------------------------------------------------------------------------------
/http_shader/ispc_char/request.txt:
--------------------------------------------------------------------------------
 1 | GET /gl HTTP/1.1
 2 | Host: localhost:9000
 3 | User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0
 4 | Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
 5 | Accept-Language: en-US,en;q=0.5
 6 | Accept-Encoding: gzip, deflate
 7 | Connection: keep-alive
 8 | Upgrade-Insecure-Requests: 1
 9 | 
10 | 


--------------------------------------------------------------------------------
/docker/glsl2wasm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [[ -z $1 ]]
 4 | then
 5 |  echo
 6 |  echo USAGE $0 source.glsl
 7 |  echo Generates source.glsl.html, source.glsl.worker.js, source.glsl.js, and source.glsl.wasm
 8 |  echo
 9 |  exit 1
10 | fi
11 | 
12 | docker run -i -v `pwd`:/tmp --rm ispc-wasm:latest bash -c "cd /usr/local/src/spirv-wasm && cp /tmp/$1 program.comp.glsl && make TARGET="$1" build && cp $1.{html,wasm,worker.js,js} /tmp/"
13 | 
14 | 


--------------------------------------------------------------------------------
/docker/ispc2wasm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [[ -z $1 ]]
 4 | then
 5 |  echo
 6 |  echo USAGE $0 source.ispc
 7 |  echo Generates source.ispc.o and source.ispc.wasm
 8 |  echo
 9 |  exit 1
10 | fi
11 | 
12 | docker run -i -v `pwd`:/tmp --rm ispc-wasm:latest bash -c "ispc --target=wasm-i32x4 --nostdlib --emit-llvm-text -o - /tmp/$1 | llc -O3 -filetype=obj - -o /tmp/$1.o && wasm-ld --no-entry --export-all --allow-undefined -o /tmp/$1.wasm /tmp/$1.o"
13 | 
14 | 


--------------------------------------------------------------------------------
/spirv-io/examples/bm.glsl:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "file.glsl"
 3 | 
 4 | layout ( local_size_x = 32, local_size_y = 1, local_size_z = 1 ) in;
 5 | 
 6 | #define TEST(testFn) FREE(FREE_IO(printTest(testFn(), #testFn)))
 7 | 
 8 | void main() {
 9 |     initGlobals();
10 | 
11 |     alloc_t s = malloc(2048);
12 |     awaitIO(_ioPingPong(s));
13 | 
14 |     if (ThreadID == 0) {
15 |         println(concat("IO pingpong on ", str(ThreadCount), " threads, total bytes ", str(ThreadCount * 2048)));
16 |     }
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/spirv-io/Makefile:
--------------------------------------------------------------------------------
 1 | CPP := clang++
 2 | CFLAGS := -m64 -march=native -mtune=native -std=c++17 -I../include -O2
 3 | LDFLAGS := -ldl -llz4 -lzstd -lvulkan -lpthread
 4 | 
 5 | gls:
 6 | 	$(CPP) $(CFLAGS) $(LDFLAGS) -o bin/gls src/gls.cpp
 7 | 
 8 | gls_cpu:
 9 | 	$(CPP) $(CFLAGS) $(LDFLAGS) -o bin/gls_cpu src/gls_cpu.cpp
10 | 
11 | install: gls
12 | 	install -d $(DESTDIR)$(PREFIX)/lib/
13 | 	install -m 644 lib/* $(DESTDIR)$(PREFIX)/lib/
14 | 	install -d $(DESTDIR)$(PREFIX)/bin/
15 | 	install -m 755 bin/* $(DESTDIR)$(PREFIX)/bin/
16 | 
17 | all: gls
18 | 


--------------------------------------------------------------------------------
/docker/spirv-runner/fix_ispc_input_output_order:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | 
 3 | STDIN.read.sub(
 4 | 	/program_ispc_main\(.*uniform struct outputs&\s+([^,]+),\s*uniform struct inputs&\s+([^\)]+)/
 5 | ) {|m| 
 6 | 	"program_ispc_main(uniform struct inputs& #{$2}, uniform struct outputs\& #{$1}" 
 7 | }.sub(
 8 | 	/program_ispc_main\(.*uniform struct inputs/
 9 | ) {|m| 
10 | 	"program_ispc_main(uniform int3 gl_NumWorkGroups, uniform int3 gl_WorkGroupID, varying int3 gl_LocalInvocationID, varying int3 gl_GlobalInvocationID, varying int gl_LocalInvocationIndex, uniform struct inputs" 
11 | }.split(/^export void /)[0].display
12 | 


--------------------------------------------------------------------------------
/spirv-io/bin/glsl2spv:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 4 | 
 5 | if [[ -z $1 ]]
 6 | then
 7 | 	echo "USAGE: glsl2spv program.glsl [program.spv]"
 8 | 	exit 1
 9 | fi
10 | 
11 | target=$2
12 | 
13 | if [[ -z $target ]]
14 | then
15 | 	target=`echo "$1" | sed -e 's/glsl$/spv/i'`
16 | fi
17 | 
18 | tmp_dir=$(mktemp -d -t gls-XXXXXXXXXX)
19 | tmp=${tmp_dir}/$(basename "$1")
20 | 
21 | grep -v '^#!' "$1" | node "$DIR"/gls_resolve_includes.js |
22 | cpp -nostdinc -x c -I/usr/local/lib/glsl -o "${tmp}.full" &&
23 | node "$DIR"/gls_preprocess.js "${tmp}.full" &&
24 | glslangValidator -V -o "${target}" "${tmp}.full.comp" | grep -v "^${tmp}.full.comp$"
25 | if [[ -f "${target}" ]]
26 | then
27 | 	cat "${tmp}.full.defs.spv" >> "${target}" #&&
28 | 	#rm -r "${tmp_dir}"
29 | fi
30 | 


--------------------------------------------------------------------------------
/http_shader/ispc_char/README.md:
--------------------------------------------------------------------------------
 1 | Change the buffer type from int to int8 in the generated ISPC files.
 2 | 
 3 | Easiest way to run this is without rebuilding the ISPC files.
 4 | 
 5 | ```bash
 6 | ispc -O3 -o httpd.ispc.o runner.ispc &&
 7 | clang++ -I/usr/local/bin -pthread -std=c++11 -lm -pthread -O3 -o httpd httpd.ispc.o ../tasksys.cpp httpd_ispc.cpp &&
 8 | sh run.sh
 9 | # 59
10 | # HTTP/1.1 200 OK
11 | # Content-Type: text/plain
12 | #
13 | # Hello, World!
14 | # Elapsed: 25451 ms
15 | # Million requests per second: 10.300
16 | # 423.24user 2.08system 0:25.56elapsed 1663%CPU (0avgtext+0avgdata 790060maxresident)k
17 | # 0inputs+0outputs (0major+262349minor)pagefaults 0swaps
18 | ```
19 | 
20 | If you want to do a full build:
21 | 
22 | ```bash
23 | (cd .. && sh build_ispc_char.sh)
24 | # Do the type change in httpd.ispc, changing int to int8 in the buffers.
25 | sh run.sh
26 | ```
27 | 


--------------------------------------------------------------------------------
/spirv-io/examples/clock.glsl:
--------------------------------------------------------------------------------
 1 | #include <file.glsl>
 2 | 
 3 | ThreadGroupCount = 1;
 4 | ThreadLocalCount = 1;
 5 | 
 6 | void main() {
 7 |     uint64_t ct = clockARB();
 8 |     uint64_t rt = clockRealtimeEXT();
 9 |     int64_t t0 = microTimeSync();
10 |     int64_t t1 = microTimeSync();
11 |     int64_t t2 = microTimeSync();
12 |     int64_t t3 = microTimeSync();
13 |     int64_t t4 = microTimeSync();
14 |     uint64_t ct2 = clockARB();
15 |     uint64_t rt2 = clockRealtimeEXT();
16 | 
17 |     println(concat("Wallclock time: ", str(t0)));
18 |     println(concat("Wallclock time: ", str(t1)));
19 |     println(concat("Wallclock time: ", str(t2)));
20 |     println(concat("Wallclock time: ", str(t3)));
21 |     println(concat("Wallclock time: ", str(t4)));
22 |     println(concat("clock: ", str(ct), " elapsed ", str(ct2-ct)));
23 |     println(concat("clockRealtime: ", str(rt), " elapsed ", str(rt2-rt)));
24 | }
25 | 


--------------------------------------------------------------------------------
/spirv-io/lib/dlopen.glsl:
--------------------------------------------------------------------------------
 1 | io dlcall(uint64_t lib, string symbol, alloc_t args, alloc_t result) {
 2 |     return requestIO(ioRequest(IO_DLCALL, IO_START, int64_t(lib), strLen(args), symbol, args, 0,0,result,0,0));
 3 | }
 4 | 
 5 | io dlopen(string path, alloc_t dstBuffer) {
 6 |     return requestIO(ioRequest(IO_DLOPEN, IO_START, 0, 0, path, dstBuffer, 0,0,string(0,0),0,0));
 7 | }
 8 | 
 9 | uint64_t dlopenSync(string path) {
10 |     uint64_t lib;
11 |     FREE(FREE_IO(
12 |         string res = awaitIO(dlopen(path, malloc(8)), true);
13 |         lib = readU64fromIO(res.x);
14 |     ))
15 |     return lib;
16 | }
17 | 
18 | string dlcallSync(uint64_t lib, string symbol, alloc_t args, alloc_t result) {
19 |     string res;
20 |     FREE_IO( res = awaitIO(dlcall(lib, symbol, args, result)); )
21 |     return res;
22 | }
23 | 
24 | void dlcallSync(uint64_t lib, string symbol, alloc_t args) {
25 |     dlcallSync(lib, symbol, args, string(-4,-4));
26 | }
27 | 


--------------------------------------------------------------------------------
/spirv-io/examples/hello_dlopen_gh.glsl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env gls
 2 | 
 3 | #include <file.glsl>
 4 | #include <https://raw.githubusercontent.com/kig/spirv-wasm/master/spirv-io/lib/dlopen.glsl> @ dbc62e1bd6df8765f90b9f54e72bb644e20489ad17fefae51450cdf5321ca769
 5 | 
 6 | ThreadLocalCount = 1;
 7 | ThreadGroupCount = 1;
 8 | 
 9 | writeSync("hello.c", "#include <stdio.h>\nvoid hello(char* s){printf(\"Hello, %s!\\n\",s);}\nvoid sub(int* v, unsigned int vlen, int* res, unsigned int reslen) { res[0] = v[0]-v[1]; }");
10 | awaitIO(runCmd("cc --shared -o hello.so hello.c"));
11 | uint64_t lib = dlopenSync("./hello.so");
12 | dlcallSync(lib, "hello", "GLSL\u0000", string(-4,-4));
13 | alloc_t params = malloc(8);
14 | i32heap[params.x/4] = 7;
15 | i32heap[params.x/4+1] = 12;
16 | alloc_t res = dlcallSync(lib, "sub", params, malloc(4));
17 | int32_t subResult = readI32heap(res.x);
18 | println(concat(str(i32heap[params.x/4]), " - ", str(i32heap[params.x/4+1]), " = ", str(subResult)));
19 | 


--------------------------------------------------------------------------------
/http_shader/README.md:
--------------------------------------------------------------------------------
 1 | # http_shader
 2 | 
 3 | GLSL shaders that parse HTTP requests and write out HTTP responses. 
 4 | 
 5 | There are three different ways here:
 6 | 
 7 | * `httpd.glsl` - Turn a inputBuffer of 8-bit ASCII characters into a buffer of ints, parse requests in the int buffer, create responses in another buffer of ints, convert the response buffer into 8-bit char outputBuffer.
 8 | * `httpd_int.glsl` - Same thing but when converting the 8-bit chars to ints, turn them into SOA format (`[req_0_0, req_1_0, req_2_0, req_3_0, ... req_31_0, req_0_1, ...]`) for some faster-going
 9 | * `httpd_ivec4.glsl` - Just deal with the inputBuffer & outputBuffer directly. Using ivec4s because doesn't that sound painful? The ISPC version does >100 million "Hello, world!" requests per second on a TR2950X 16-core.
10 | 
11 | I ... found out that SPIR-V in Vulkan 1.2 supports 8-bit ints as a native type. I want to use those instead of writing helper functions to get/set individual bytes in ivec4s.
12 | 
13 | In a shocking turn of events, running these on the CPU via SPIR-V to ISPC performs better than the GPU. Even after removing the buffer uploads and downloads.
14 | 


--------------------------------------------------------------------------------
/http_shader/preprocess.js:
--------------------------------------------------------------------------------
 1 | const fs = require('fs');
 2 | 
 3 | const source = fs.readFileSync(0);
 4 | 
 5 | const segments = source.toString().replace(/^# .*/mg, '').split(/("|')/g);
 6 | 
 7 | let inString = false;
 8 | let inChar = false;
 9 | let lastSegment = '';
10 | let stringSegments = [];
11 | 
12 | const output = [];
13 | 
14 | for (segment of segments) {
15 | 	if (segment === '"' && lastSegment[lastSegment.length-1] !== '\\') {
16 | 		inString = !inString;
17 | 		if (!inString) {
18 | 			const str = stringSegments.join('');
19 | 			output.push(`{${Buffer.from(JSON.parse('"'+str+'"')).join(",")}}`);
20 | 		}
21 | 		stringSegments = [];
22 | 	} else if (inString) {
23 | 		stringSegments.push(segment);
24 | 	} else if (segment === "'" && lastSegment[lastSegment.length-1] !== '\\') {
25 | 		inChar = !inChar;
26 | 		if (!inChar) {
27 | 			const str = stringSegments.join('');
28 | 			output.push(`${Buffer.from(eval("'"+str+"'")).readInt32LE(0)}`);
29 | 		}
30 | 		stringSegments = [];
31 | 	} else if (inChar) {
32 | 		stringSegments.push(segment);
33 | 	} else {
34 | 		output.push(segment);
35 | 	}
36 | 	lastSegment = segment;
37 | }
38 | 
39 | console.log(output.join(''));
40 | 


--------------------------------------------------------------------------------
/http_shader/ispc_int/README.md:
--------------------------------------------------------------------------------
 1 | Change the request and response buffer types from int to int8 in the generated ISPC files for ~2x perf.
 2 | 
 3 | Easiest way to run this is without rebuilding the ISPC files.
 4 | 
 5 | ```bash
 6 | ispc -O3 -o httpd.ispc.o runner.ispc &&
 7 | clang++ -I/usr/local/bin -pthread -std=c++11 -lm -pthread -O3 -o httpd httpd.ispc.o ../tasksys.cpp httpd_ispc.cpp &&
 8 | sh run.sh
 9 | # 367
10 | # HTTP/1.1 200 OK
11 | # Content-Type: text/plain
12 | #
13 | # Hello, World!
14 | # Host: localhost:9000
15 | # User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0
16 | # Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
17 | # Accept-Language: en-US,en;q=0.5
18 | # Accept-Encoding: gzip, deflate
19 | # Connection: keep-alive
20 | # Upgrade-Insecure-Requests:
21 | #
22 | # Elapsed: 2848 ms
23 | # Million requests per second: 11.506
24 | #
25 | # 84.56user 0.44system 0:02.86elapsed 2965%CPU (0avgtext+0avgdata 98164maxresident)k
26 | # 0inputs+0outputs (0major+23821minor)pagefaults 0swaps
27 | ```
28 | 
29 | If you want to do a full build:
30 | 
31 | ```bash
32 | (cd .. && sh build_ispc_int.sh)
33 | # Do the type change in httpd.ispc, changing int to int8 in the request and response buffers.
34 | sh run.sh
35 | ```
36 | 


--------------------------------------------------------------------------------
/docker/spirv-runner/Makefile:
--------------------------------------------------------------------------------
 1 | TARGET := TEST
 2 | 
 3 | builtins.o: /usr/local/src/ispc-wasm/builtins/builtins.c
 4 | 	emcc -DWASM_IMPLEMENTATION /usr/local/src/ispc-wasm/builtins/builtins.c -O3 -s EXPORTED_FUNCTIONS='["___wasm_do_print", "___wasm_clock"]' -c -o builtins.o
 5 | 
 6 | tasksys.o: /usr/local/src/ispc-wasm/examples/tasksys.cpp
 7 | 	emcc -DWASM -DISPC_USE_PTHREADS -s USE_PTHREADS=1 /usr/local/src/ispc-wasm/examples/tasksys.cpp -I./ -O2 -c -o tasksys.o
 8 | 
 9 | ispcRunner.o: ispcRunner.cpp
10 | 	emcc -DWASM ispcRunner.cpp -I./ -O3 -s EXPORTED_FUNCTIONS='["_main", "_run"]' -c -o ispcRunner.o -msimd128 -s SIMD=1
11 | 
12 | runner.ispc.o: program.comp.glsl
13 | 	glslangValidator -V -o program.spv program.comp.glsl
14 | 	spirv-cross-ispc --ispc --output program.ispc.raw program.spv
15 | 	ruby fix_ispc_input_output_order < program.ispc.raw > program.ispc
16 | 	ispc runner.ispc -O2 --target=wasm-i32x4 -o runner.ispc.o
17 | 
18 | build: builtins.o tasksys.o ispcRunner.o runner.ispc.o
19 | 	emcc -O3 ispcRunner.o builtins.o tasksys.o runner.ispc.o -s USE_PTHREADS=1 -s PTHREAD_POOL_SIZE=64 -s TOTAL_MEMORY=268435456 -o $(TARGET).html -msimd128 -s SIMD=1 -s EXPORTED_FUNCTIONS='["_main", "_run"]' -s EXTRA_EXPORTED_RUNTIME_METHODS='["ccall"]'
20 | 
21 | all: builtins.o tasksys.o ispcRunner.o runner.ispc.o build
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | SOURCES := $(wildcard src/*.comp)
 2 | SPIRV := $(SOURCES:.comp=.spv)
 3 | CPP_INTERFACE := $(SOURCES:.comp=.spv.cpp)
 4 | CPP_DRIVER := $(SOURCES:.comp=.cpp)
 5 | EXECUTABLES := $(SOURCES:.comp=.html)
 6 | OBJECTS := $(CPP_DRIVER:.cpp=.o) $(CPP_INTERFACE:.cpp=.o)
 7 | 
 8 | TOTAL_MEMORY := 67108864
 9 | TOTAL_THREADS := 16
10 | USE_THREADS := 1
11 | USE_SIMD := 0
12 | 
13 | ifeq ($(USE_THREADS), 1)
14 | 	THREAD_FLAGS := -pthread -s USE_PTHREADS=1 -s PTHREAD_POOL_SIZE=$(TOTAL_THREADS)
15 | endif
16 | ifeq ($(USE_SIMD), 1)
17 | 	SIMD_FLAGS := -msimd128 -s SIMD=1
18 | endif
19 | 
20 | CXXFLAGS += -std=c++11 -Iinclude -Isrc -I/usr/local/include -O3 -s WASM=1 $(THREAD_FLAGS) -s TOTAL_MEMORY=$(TOTAL_MEMORY) -s EXTRA_EXPORTED_RUNTIME_METHODS='["ccall"]' $(SIMD_FLAGS)
21 | LDFLAGS += -lm -O3 -s WASM=1 $(THREAD_FLAGS) -s TOTAL_MEMORY=$(TOTAL_MEMORY) -s EXTRA_EXPORTED_RUNTIME_METHODS='["ccall"]' $(SIMD_FLAGS)
22 | 
23 | all: $(EXECUTABLES)
24 | 
25 | %.spv: %.comp
26 | 	glslangValidator -V -o $@ $<
27 | 
28 | %.spv.cpp: %.spv
29 | 	spirv-cross --cpp --output $@ $<
30 | 
31 | %.o: %.cpp
32 | 	$(CXX) -c -o $@ $< $(CXXFLAGS)
33 | 
34 | %.html: %.o %.spv.o
35 | 	$(CXX) -o $@ $^ $(LDFLAGS)
36 | 
37 | clean:
38 | 	$(RM) -f $(EXECUTABLES) $(SPIRV) $(CPP_INTERFACE) $(OBJECTS)
39 | 
40 | .PHONY: clean
41 | .SECONDARY:
42 | 
43 | 


--------------------------------------------------------------------------------
/spirv-io/examples/memalloc.glsl:
--------------------------------------------------------------------------------
 1 | #include <file.glsl>
 2 | 
 3 | ThreadLocalCount = 4;
 4 | ThreadGroupCount = 4;
 5 | 
 6 | void main() {
 7 |     int64_t ptr;
 8 |     alloc_t res = malloc(8,8);
 9 | 
10 |     awaitIO(memAlloc(100, res));
11 |     ptr = i64heap[res.x/8];
12 |     string hello = `Thread ${ThreadId} says: Hello, CPU!`;
13 |     awaitIO(memWrite(ptr, hello));
14 |     alloc_t buf = malloc(strLen(hello));
15 |     string s = awaitIO(memRead(ptr, buf));
16 |     println(s);
17 |     awaitIO(memFree(ptr));
18 | 
19 |     if (ThreadId == 0) {
20 |         // Allocate a 30 GB buffer
21 |         awaitIO(memAlloc(30000000000L, res));
22 |         ptr = i64heap[res.x/8];
23 |         // Write something every 1 MB
24 |         for (int64_t i = 0; i < 30000000000L; i+=1000000L) {
25 |             FREE_ALL( awaitIO(memWrite(ptr + i, str(i))) );
26 |         }
27 |         // Test that the writes succeeded
28 |         for (int64_t i = 0; i < 30000000000L; i+=1000000L) {
29 |             FREE_ALL(
30 |                 string num = str(i);
31 |                 string rd = awaitIO(memRead( ptr + i, malloc(strLen(num)) ));
32 |                 if (!strEq(num, rd)) println(`Roundtrip failed at ${i}: ${num} != ${rd}`);
33 |             )
34 |         }
35 |         println("Read-write roundtrips successful to a 30 GB buffer");
36 |         awaitIO(memFree(ptr));
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/http_shader/cpp/README.md:
--------------------------------------------------------------------------------
 1 | C++ version of the key-value store shader. Compiled to C++ with spirv-cross.
 2 | 
 3 | The atomics implementation seems to not be working too well.
 4 | 
 5 | Build and run:
 6 | 
 7 | ```bash
 8 | (cd .. && sh build_cpp.sh)
 9 | sh run.sh
10 | # 200 OK HTTP/1.1
11 | # content-type: text/plain
12 | # 
13 | # OK.
14 | # 200 OK HTTP/1.1
15 | # content-type:  text/html
16 | # 
17 | # <html><body>This is document number 1.</body></html>
18 | # 200 OK HTTP/1.1
19 | # content-type: text/plain
20 | # 
21 | # BLK
22 | # 200 OK HTTP/1.1
23 | # content-type:  text/html
24 | # 
25 | # <html><body>This is document number 3.</body></html>
26 | # 200 OK HTTP/1.1
27 | # content-type: text/plain
28 | # 
29 | # OK.
30 | # 200 OK HTTP/1.1
31 | # content-type:  text/html
32 | # 
33 | # <html><body>This is document number 5.</body></html>
34 | # 200 OK HTTP/1.1
35 | # content-type: text/plain
36 | # 
37 | # OK.
38 | # 200 OK HTTP/1.1
39 | # content-type:  text/html
40 | # 
41 | # <html><body>This is 2067 spam-post 2067 number 2067.</body></html>
42 | # 200 OK HTTP/1.1
43 | # content-type: text/plain
44 | # 
45 | # BLK
46 | # 200 OK HTTP/1.1
47 | # content-type:  text/html
48 | # 
49 | # <html><body>This is document number 9.</body></html>
50 | # 
51 | # Elapsed: 3180 ms
52 | # Million requests per second: 16.487
53 | # 
54 | # 40.38user 0.91system 0:03.65elapsed 1129%CPU (0avgtext+0avgdata 1576176maxresident)k
55 | # 0inputs+0outputs (0major+393371minor)pagefaults 0swaps
56 | ```
57 | 


--------------------------------------------------------------------------------
/spirv-io/examples/wait_for_stdin.glsl:
--------------------------------------------------------------------------------
 1 | #include <file.glsl>
 2 | #include <statemachine.glsl>
 3 | 
 4 | ThreadGroupCount = 1;
 5 | ThreadLocalCount = 1;
 6 | 
 7 | const int s_Init = 0;
 8 | const int s_Reading = 1;
 9 | 
10 | const int a_Read = 0;
11 | 
12 | void main() {
13 |     /*
14 |     Why not the easy way[1]?
15 | 
16 |     Because GPUs hang while waiting for IO and the driver kills the program after a few seconds.
17 |     That said, making the below Just Work would be great.
18 |         Compile awaitIO into "exit program with RERUN_ON_IO".
19 |         Store and load IOs automatically.
20 | 
21 |     [1] The easy way
22 |     println("What's your name?");
23 |     string name = awaitIO(readLine(stdin, malloc(256)));
24 |     println(concat("Hello, ", name, "!"));
25 |     */
26 | 
27 |     stateMachine m = loadStateMachine(s_Init);
28 |     rerunProgram = RERUN_ON_IO;
29 |     switch (getState(m)) {
30 |         case s_Init:
31 |             println("What's your name?");
32 |             setAttr(m, a_Read, readLine(stdin, malloc(256)));
33 |             setState(m, s_Reading);
34 |             break;
35 | 
36 |         case s_Reading:
37 |             io r = getIOAttr(m, a_Read);
38 |             if (pollIO(r)) {
39 |                 rerunProgram = NO_RERUN;
40 |                 string name = awaitIO(r);
41 |                 println(concat("Hello, ", name, "!"));
42 |                 return; // Done, exit program.
43 |             }
44 |             break;
45 |     }
46 |     saveStateMachine(m);
47 | }
48 | 


--------------------------------------------------------------------------------
/spirv-io/examples/cat.glsl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env gls
 2 | 
 3 | ThreadLocalCount = 32;
 4 | ThreadGroupCount = 1;
 5 | 
 6 | HeapSize = 65536;
 7 | FromIOSize = 65536;
 8 | ToIOSize = 65536;
 9 | 
10 | #include <file.glsl>
11 | 
12 | shared int eof;
13 | 
14 | void main() {
15 | 
16 |     int64_t blockSize = int64_t(HeapSize);
17 | 
18 |     int argc = arrLen(argv);
19 |     for (int i = 1; i < argc; i++) {
20 |         eof = 0;
21 |         string filename = aGet(argv, i);
22 |         int64_t block = 0;
23 |         barrier();
24 |         while (eof == 0) {
25 |             FREE(
26 |                 string res;
27 |                 FREE_IO(
28 |                     int64_t off = (block * int64_t(ThreadCount) + int64_t(ThreadId)) * blockSize;
29 | 
30 |                     io r = read(filename, off, size_t(blockSize), malloc(size_t(blockSize)));
31 |                     barrier();
32 | 
33 |                     res = awaitIO(r);
34 |                     barrier();
35 |                 )
36 |                 FREE_IO(
37 |                     for (int i = 0; i < ThreadCount; i++) {
38 |                         barrier();
39 |                         if (i == ThreadId && strLen(res) > 0) {
40 |                             print(res);
41 |                         }
42 |                     }
43 | 
44 |                     if (strLen(res) < size_t(blockSize)) atomicAdd(eof, 1);
45 | 
46 |                     block++;
47 |                     barrier();
48 |                 )
49 |             )
50 |         }
51 |     }
52 | 
53 | }
54 | 
55 | 


--------------------------------------------------------------------------------
/spirv-io/lib/stat.glsl:
--------------------------------------------------------------------------------
 1 | struct Nanotime {
 2 |     uint64_t tv_sec;
 3 |     uint64_t tv_nsec;
 4 | };
 5 | const int32_t NanotimeSize = 16;
 6 | 
 7 | struct Stat {
 8 |     Nanotime st_atim;
 9 |     Nanotime st_mtim;
10 |     Nanotime st_ctim;
11 | 
12 |     uint64_t st_ino;
13 |     uint64_t st_size;
14 |     uint64_t st_blocks;
15 | 
16 |     uint32_t st_dev;
17 |     uint32_t st_mode;
18 |     uint32_t st_nlink;
19 |     uint32_t st_uid;
20 |     uint32_t st_gid;
21 |     uint32_t st_rdev;
22 |     uint32_t st_blksize;
23 | 
24 |     int32_t error;
25 | };
26 | const int32_t StatSize = 3 * NanotimeSize + 3 * 8 + 8 * 4; // 104
27 | 
28 | Stat initStat(string s) {
29 |     Stat st;
30 |     if (strLen(s) < StatSize) {
31 |         st.error = -1;
32 |         return st;
33 |     }
34 |     ptr_t i = s.x;
35 | 
36 |     st.st_atim.tv_sec = readU64heap(i); i+=8;
37 |     st.st_atim.tv_nsec = readU64heap(i); i+=8;
38 |     st.st_mtim.tv_sec = readU64heap(i); i+=8;
39 |     st.st_mtim.tv_nsec = readU64heap(i); i+=8;
40 |     st.st_ctim.tv_sec = readU64heap(i); i+=8;
41 |     st.st_ctim.tv_nsec = readU64heap(i); i+=8;
42 | 
43 |     st.st_ino = readU64heap(i); i+=8;
44 |     st.st_size = readU64heap(i); i+=8;
45 |     st.st_blocks = readU64heap(i); i+=8;
46 | 
47 |     st.st_dev = readU32heap(i); i+=4;
48 |     st.st_mode = readU32heap(i); i+=4;
49 |     st.st_nlink = readU32heap(i); i+=4;
50 |     st.st_uid = readU32heap(i); i+=4;
51 |     st.st_gid = readU32heap(i); i+=4;
52 |     st.st_rdev = readU32heap(i); i+=4;
53 |     st.st_blksize = readU32heap(i); i+=4;
54 | 
55 |     st.error  = readI32heap(i); i+=4;
56 |     return st;
57 | }
58 | 


--------------------------------------------------------------------------------
/include/spirv_cross/image.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2015-2017 ARM Limited
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #ifndef SPIRV_CROSS_IMAGE_HPP
18 | #define SPIRV_CROSS_IMAGE_HPP
19 | 
20 | #ifndef GLM_SWIZZLE
21 | #define GLM_SWIZZLE
22 | #endif
23 | 
24 | #ifndef GLM_FORCE_RADIANS
25 | #define GLM_FORCE_RADIANS
26 | #endif
27 | 
28 | #include <glm/glm.hpp>
29 | 
30 | namespace spirv_cross
31 | {
32 | template <typename T>
33 | struct image2DBase
34 | {
35 | 	virtual ~image2DBase() = default;
36 | 	inline virtual T load(glm::ivec2 coord) const
37 | 	{
38 | 		return T(0, 0, 0, 1);
39 | 	}
40 | 	inline virtual void store(glm::ivec2 coord, const T &v)
41 | 	{
42 | 	}
43 | };
44 | 
45 | typedef image2DBase<glm::vec4> image2D;
46 | typedef image2DBase<glm::ivec4> iimage2D;
47 | typedef image2DBase<glm::uvec4> uimage2D;
48 | 
49 | template <typename T>
50 | inline T imageLoad(const image2DBase<T> &image, glm::ivec2 coord)
51 | {
52 | 	return image.load(coord);
53 | }
54 | 
55 | template <typename T>
56 | void imageStore(image2DBase<T> &image, glm::ivec2 coord, const T &value)
57 | {
58 | 	image.store(coord, value);
59 | }
60 | }
61 | 
62 | #endif
63 | 


--------------------------------------------------------------------------------
/docker/spirv-runner/runner.ispc:
--------------------------------------------------------------------------------
 1 | #include "program.ispc"
 2 | 
 3 | export void runner_workgroup(uniform int numWorkGroups[3], uniform int workGroupID[3], uniform struct inputs& input, uniform struct outputs& output)
 4 | {
 5 |     uniform int3 gl_NumWorkGroups = int3(numWorkGroups[0], numWorkGroups[1], numWorkGroups[2]);
 6 |     uniform int3 gl_WorkGroupID = int3(workGroupID[0], workGroupID[1], workGroupID[2]);
 7 | 
 8 |     // Vectorise the workgroup
 9 |     foreach(lz = 0 ... gl_WorkGroupSize.z, ly = 0 ... gl_WorkGroupSize.y, lx = 0 ... gl_WorkGroupSize.x)
10 |     {
11 |         varying int3 gl_LocalInvocationID = int3(lx, ly, lz);
12 |         varying int3 gl_GlobalInvocationID = gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID;
13 |         varying int gl_LocalInvocationIndex = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + gl_LocalInvocationID.y * gl_WorkGroupSize.x + gl_LocalInvocationID.x;
14 |         
15 |         program_ispc_main(gl_NumWorkGroups, gl_WorkGroupID, gl_LocalInvocationID, gl_GlobalInvocationID, gl_LocalInvocationIndex, input, output);
16 |     }
17 | }
18 | 
19 | task
20 | void runner_task(uniform int work_groups[3], uniform struct inputs& input, uniform struct outputs& output)
21 | {
22 |     uniform int workGroupID[3];
23 |     workGroupID[0] = taskIndex0;
24 |     workGroupID[1] = taskIndex1;
25 |     workGroupID[2] = taskIndex2;
26 |     runner_workgroup(work_groups, workGroupID, input, output);
27 | }
28 | 
29 | export void runner_main(uniform int work_groups[3], uniform struct inputs& input, uniform struct outputs& output)
30 | {
31 |     launch[work_groups[0], work_groups[1], work_groups[2]]
32 |     runner_task(work_groups, input, output);
33 | }
34 | 
35 | export static int stdout = 1;
36 | export static int stderr = 2;


--------------------------------------------------------------------------------
/spirv-io/bin/gls_generate_tests.js:
--------------------------------------------------------------------------------
 1 | const fs = require('fs');
 2 | 
 3 | const source = fs.readFileSync(process.argv[2]).toString();
 4 | 
 5 | const testFuncs = {};
 6 | let i = 0;
 7 | 
 8 | const segs = source.split(/(\/\*T)|(\*\/)/y);
 9 | 
10 | let inTest = false;
11 | let testBody = '';
12 | for (let i = 0; i < segs.length; i++) {
13 |     const seg = segs[i];
14 |     if (seg === '\/*T') {
15 |         inTest = true;
16 |         testBody = '';
17 |     } else if (seg === '*\/' && inTest) {
18 |         inTest = false;
19 |         const nextSeg = segs[i+1];
20 |         if (nextSeg) {
21 |             const m = nextSeg.match(/^\s*\S+\s+([^(]+)/m);
22 |             if (m) {
23 |                 let name = 'test_'+m[1].trim();
24 |                 while (testFuncs[name]) {
25 |                     name += '_';
26 |                 }
27 |                 testBody = testBody.replace(
28 |                     /^\s*(\S+)\s*(<=?|>=?|==|!=)\s*([^;]+);\s*$/mg,
29 |                     (m, lv, cmp, rv) =>
30 |                         `    assert(${lv} ${cmp} ${rv});`
31 |                 );
32 |                 testFuncs[name] = `void ${name}() {\n${testBody}\n}`;
33 |             }
34 |         }
35 |     } else if (inTest && seg) {
36 |         testBody = seg;
37 |     }
38 | }
39 | 
40 | let testSource = [`
41 | #include <assert.glsl>
42 | #include "${process.argv[2]}"
43 | 
44 | ThreadLocalCount = 1;
45 | ThreadGroupCount = 1;
46 | 
47 | HeapSize = 16777216;
48 | ToIOSize = 16777216;
49 | FromIOSize = 16777216;
50 | `];
51 | let testMain = `void main() {
52 | `;
53 | for (const funcName in testFuncs) {
54 |     testMain += `    FREE_ALL(${funcName}());\n`
55 |     testSource.push(testFuncs[funcName]);
56 | }
57 | testMain += '}';
58 | testSource.push(testMain);
59 | 
60 | console.log(testSource.join("\n\n"));
61 | 


--------------------------------------------------------------------------------
/spirv-io/examples/hello_dlopen.glsl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env gls
 2 | 
 3 | #include <file.glsl>
 4 | #include <dlopen.glsl>
 5 | 
 6 | ThreadLocalCount = 1;
 7 | ThreadGroupCount = 1;
 8 | 
 9 | uint64_t lib = 0;
10 | 
11 | int32_t dlcallSync_i32(uint64_t lib, string func, int32_t p0, int32_t p1) {
12 |     int32_t r = 0;
13 |     FREE_ALL(
14 |         alloc_t params = malloc(8, 4);
15 |         i32heap[params.x/4] = p0;
16 |         i32heap[params.x/4 + 1] = p1;
17 |         alloc_t res = dlcallSync(lib, func, params, malloc(4));
18 |         r = readI32heap(res.x);
19 |     );
20 |     return r;
21 | }
22 | 
23 | #define DLFUNC_I32_I32_I32(lib, func) int32_t func (int32_t p0, int32_t p1) { return dlcallSync_i32(lib, #func, p0, p1); }
24 | #define DLFUNC_ALLOC_VOID(lib, func) void func (alloc_t buf) { FREE_ALL(dlcallSync(lib, #func, buf, string(-4, -4))); }
25 | #define DLFUNC_CSTR_VOID(lib, func) void func (alloc_t buf) {\
26 |     FREE_ALL(\
27 |         alloc_t buf2 = malloc(strLen(buf)+1);\
28 |         strCopy(buf2, buf);\
29 |         setC(buf2, strLen(buf), char(0));\
30 |         dlcallSync(lib, #func, buf2, string(-4, -4));\
31 |     )\
32 | }
33 | 
34 | DLFUNC_I32_I32_I32(lib, sub)
35 | DLFUNC_CSTR_VOID(lib, hello)
36 | 
37 | void main() {
38 |     writeSync("hello.c", "#include <stdio.h>\nvoid hello(char* s){printf(\"Hello, %s!\\n\",s);}\nvoid sub(int* v, unsigned int vlen, int* res, unsigned int reslen) { res[0] = v[0]-v[1]; }");
39 |     awaitIO(runCmd("cc --shared -o hello.so hello.c"));
40 | 
41 |     lib = dlopenSync("./hello.so");
42 | 
43 |     dlcallSync(lib, "hello", "GLSL\u0000");
44 | 
45 |     int32_t a = 7, b = 12;
46 |     int32_t subResult = dlcallSync_i32(lib, "sub", a, b);
47 |     println(concat(str(a), " - ", str(b), " = ", str(subResult)));
48 | 
49 |     hello("GLSL macro");
50 |     a = 8829;
51 |     b = 3741;
52 |     println(concat(str(a), " - ", str(b), " = ", str(sub(a, b))));
53 | }
54 | 


--------------------------------------------------------------------------------
/spirv-io/src/test_string.cpp:
--------------------------------------------------------------------------------
 1 | #include "compute_application.hpp"
 2 | 
 3 | class App : public ComputeApplication
 4 | {
 5 |   public:
 6 |     App() {
 7 |         timings = true;
 8 |         runIO = false;
 9 |     }
10 | 
11 |     void runProgram() {
12 |         printf("Thread count: %d\n", threadCount);
13 | 
14 |         std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
15 |         int i;
16 |         for (i = 0; i < 100; i++) {
17 |             startCommandBuffer();
18 |             waitCommandBuffer();
19 |         }
20 |         bufferCopy(fromGPUBuffer, 0, heapBuffer, 0, heapBufferSize);
21 |         readFromGPUIO(0, fromGPUBufferSize);
22 |         std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
23 | 
24 |     	for (int j = 0; j < threadCount; j++) {
25 | 	    	bool allOk = true;
26 | 	    	int k = 0;
27 | 	        for (k = 0; k < 256; k++) {
28 | 	            int ok = ((int*)mappedFromGPUMemory)[(j+1)*(fromGPUSize/4) - 256 + k];
29 | 	            if (ok == 0) break;
30 | 	            if (ok != 1) {
31 | 	            	printf("[%d] Test %d failed: %d\n", j, k, ok);
32 | 	            	allOk = false;
33 | 	            }
34 | 	        }
35 | 	        if (allOk) {
36 | 	        	printf("[%d] All %d tests succeeded.\n", j, k);
37 | 	        }
38 |         }
39 | 
40 |         printf("\nElapsed: %ld ms\n", std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count());
41 |         printf("Test runs per second: %.0f\n\n", (float)(threadCount * i) / (0.000001 * std::chrono::duration_cast<std::chrono::microseconds>(end - begin).count()));
42 | 
43 |     }
44 | };
45 | 
46 | int main(int argc, char *argv[])
47 | {
48 |     App app;
49 | 
50 |     try
51 |     {
52 |         app.run("test_string.spv", argc, argv);
53 |     }
54 |     catch (const std::runtime_error &e)
55 |     {
56 |         printf("%s\n", e.what());
57 |         app.cleanup();
58 |         return EXIT_FAILURE;
59 |     }
60 | 
61 |     return app.exitCode;
62 | }
63 | 


--------------------------------------------------------------------------------
/http_shader/ispc_char/runner.ispc:
--------------------------------------------------------------------------------
 1 | const varying int int_cast(const varying unsigned int32 v) { return (int32)v; }
 2 | 
 3 | #include "httpd.ispc"
 4 | 
 5 | 
 6 | export void runner_dispatch(uniform int work_group_ID[3], uniform int work_groups[3], 
 7 | 	uniform struct outputBuffer& v_656,  uniform struct inputBuffer& v_613,  uniform struct heapBuffer& _901
 8 | )
 9 | {
10 |     uniform int3 gl_NumWorkGroups = int3(work_groups[0], work_groups[1], work_groups[2]);
11 |     uniform int3 gl_WorkGroupID = int3(work_group_ID[0], work_group_ID[1], work_group_ID[2]);
12 |     
13 |     // Vectorise the workgroup
14 |     foreach_tiled(lx = 0 ... gl_WorkGroupSize.x)
15 |     {
16 |         varying int3 gl_LocalInvocationID = int3(lx, 0, 0);
17 |         varying int3 gl_GlobalInvocationID = gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID;
18 |         varying int gl_LocalInvocationIndex = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + gl_LocalInvocationID.y * gl_WorkGroupSize.x + gl_LocalInvocationID.x;
19 |         
20 |         httpd_ispc_main( 
21 | 	        v_613,
22 | 	        v_656,
23 | 	        gl_NumWorkGroups,
24 | 	        gl_GlobalInvocationID,
25 | 	        _901
26 |         );
27 |     }
28 | }
29 | 
30 | task
31 | void runner_task(uniform int work_groups[3],
32 | 	uniform struct outputBuffer& v_656,  uniform struct inputBuffer& v_613,  uniform struct heapBuffer& _901
33 | )
34 | {
35 |     uniform int workGroupID[3];
36 |     workGroupID[0] = taskIndex0;
37 |     workGroupID[1] = taskIndex1;
38 |     workGroupID[2] = taskIndex2;
39 |     runner_dispatch(workGroupID, work_groups,
40 |         v_656, v_613, _901
41 |     );
42 | }
43 | 
44 | export void runner_main(uniform int work_groups[3],
45 | 	uniform struct inputBuffer& v_613,  
46 | 	uniform struct outputBuffer& v_656,  
47 | 	uniform struct heapBuffer& _901
48 | )
49 | {
50 |     launch[work_groups[0], work_groups[1], work_groups[2]]
51 |     runner_task(work_groups, 
52 |         v_656, v_613, _901
53 |     );
54 | }
55 | 
56 | 


--------------------------------------------------------------------------------
/src/mandel.comp:
--------------------------------------------------------------------------------
 1 | #version 450
 2 | #extension GL_ARB_separate_shader_objects : enable
 3 | 
 4 | #define WORKER_SIZE 16
 5 | 
 6 | #define WORKGROUP_SIZE 4
 7 | layout (local_size_x = WORKGROUP_SIZE, local_size_y = WORKGROUP_SIZE, local_size_z = 1 ) in;
 8 | 
 9 | struct Pixel{
10 |   vec4 value;
11 | };
12 | 
13 | layout(std430, binding = 0) buffer inputs
14 | {
15 |   float dimensions[];
16 | };
17 | 
18 | layout(std430, binding = 1) buffer outputs
19 | {
20 |   Pixel imageData[];
21 | };
22 | 
23 | void main() {
24 | 
25 |   int width = int(dimensions[0]);
26 |   int height = int(dimensions[1]);
27 | 
28 |   for (uint iy = 0; iy < WORKER_SIZE; iy++)
29 |   for (uint ix = 0; ix < WORKER_SIZE; ix++)
30 |   {
31 | 
32 |   uint px = gl_GlobalInvocationID.x * WORKER_SIZE + ix;
33 |   uint py = gl_GlobalInvocationID.y * WORKER_SIZE + iy;
34 | 
35 |   /*
36 |   In order to fit the work into workgroups, some unnecessary threads are launched.
37 |   We terminate those threads here. 
38 |   */
39 |   if(px >= width || py >= height)
40 |     continue;
41 | 
42 |   float x = float(px) / float(width);
43 |   float y = float(py) / float(height);
44 | 
45 |   /*
46 |   What follows is code for rendering the mandelbrot set. 
47 |   */
48 |   vec2 uv = vec2(x,y);
49 |   float n = 0.0;
50 |   vec2 c = vec2(-.445, 0.0) +  (uv - 0.5)*(2.0+ 1.7*0.2  ), 
51 |   z = vec2(0.0);
52 |   const int M = 128;
53 |   for (int i = 0; i < M; i++)
54 |   {
55 |     z = vec2(z.x*z.x - z.y*z.y, 2.*z.x*z.y) + c;
56 |     if (dot(z, z) > 2) break;
57 |     n++;
58 |   }
59 |   // we use a simple cosine palette to determine color:
60 |   // http://iquilezles.org/www/articles/palettes/palettes.htm         
61 |   float t = float(n) / float(M);
62 |   vec3 d = vec3(0.3, 0.3 ,0.5);
63 |   vec3 e = vec3(-0.2, -0.3 ,-0.5);
64 |   vec3 f = vec3(2.1, 2.0, 3.0);
65 |   vec3 g = vec3(0.0, 0.1, 0.0);
66 |   vec4 color = vec4( d + e*cos( 6.28318*(f*t+g) ) ,1.0);
67 |           
68 |   // store the rendered mandelbrot set into a storage buffer:
69 |   imageData[width * py + px].value = color;
70 | 
71 |   }
72 | }
73 | 


--------------------------------------------------------------------------------
/http_shader/vulkan/README.md:
--------------------------------------------------------------------------------
 1 | # A small in-memory key-value store
 2 | 
 3 | Send a HTTP `GET /xxxxxxx` to get the value at key `xxxxxxx`. 
 4 | 
 5 | Send a `POST /xxxxxxx` to set the value at key `xxxxxxx` to the POST body. The start of the body should be the content-type, followed by `\r\n\r\n`. 
 6 | You can add other headers after the content-type if you want. See `vulkanRunner.cpp` for an example.
 7 | 
 8 | Access to the values is protected by a per-object atomic "mutex". Only one write request can execute at a time, other simultaneous writes are rejected.
 9 | If you try to write while you have readers, the write will fail. If you try to read when a write is in process, the read will fail.
10 | 
11 | ## Build & run
12 | 
13 | ```bash
14 | (cd .. && sh build_vulkan.sh)
15 | sh run.sh
16 | # 200 OK HTTP/1.1
17 | # content-type: text/plain
18 | # 
19 | # BLK
20 | # 200 OK HTTP/1.1
21 | # content-type:  text/html
22 | # 
23 | # <html><body>This is 20481 spam-post 20481 number 20481.</body></html>
24 | # 200 OK HTTP/1.1
25 | # content-type: text/plain
26 | # 
27 | # BLK
28 | # 200 OK HTTP/1.1
29 | # content-type:  text/html
30 | # 
31 | # <html><body>This is 97283 spam-post 97283 number 97283.</body></html>
32 | # 200 OK HTTP/1.1
33 | # content-type: text/plain
34 | # 
35 | # BLK
36 | # 200 OK HTTP/1.1
37 | # content-type:  text/html
38 | # 
39 | # <html><body>This is 286725 spam-post 286725 number 286725.</body></html>
40 | # 200 OK HTTP/1.1
41 | # content-type: text/plain
42 | # 
43 | # BLK
44 | # 200 OK HTTP/1.1
45 | # content-type:  text/html
46 | # 
47 | # <html><body>This is 138247 spam-post 138247 number 138247.</body></html>
48 | # 200 OK HTTP/1.1
49 | # content-type: text/plain
50 | # 
51 | # BLK
52 | # 200 OK HTTP/1.1
53 | # content-type:  text/html
54 | # 
55 | # <html><body>This is 496649 spam-post 496649 number 496649.</body></html>
56 | # 
57 | # Elapsed: 6731 ms
58 | # Million requests per second: 7.789
59 | # 
60 | # 0.27user 1.03system 0:08.06elapsed 16%CPU (0avgtext+0avgdata 3217080maxresident)k
61 | # 0inputs+40outputs (2major+267993minor)pagefaults 0swaps
62 | ```
63 | 


--------------------------------------------------------------------------------
/http_shader/ispc_ivec4/README.md:
--------------------------------------------------------------------------------
 1 | # A small in-memory key-value store
 2 | 
 3 | Send a HTTP `GET /xxxxxxx` to get the value at key `xxxxxxx`. 
 4 | 
 5 | Send a `POST /xxxxxxx` to set the value at key `xxxxxxx` to the POST body. The start of the body should be the content-type, followed by `\r\n\r\n`. 
 6 | You can add other headers after the content-type if you want. See `httpd_ispc_ivec4.cpp` for an example.
 7 | 
 8 | Access to the values is protected by a per-object atomic "mutex". Only one write request can execute at a time, other simultaneous writes are rejected.
 9 | If you try to write while you have readers, the write will fail. If you try to read when a write is in process, the read will fail.
10 | 
11 | ## Build & run
12 | 
13 | ```bash
14 | (cd .. && sh build_ispc_ivec4.sh)
15 | sh run.sh
16 | # 200 OK HTTP/1.1
17 | # content-type: text/plain
18 | # 
19 | # BLK
20 | # 200 OK HTTP/1.1
21 | # content-type:  text/html
22 | # 
23 | # <html><body>This is 150281 spam-post 150281 number 150281.</body></html>
24 | # 200 OK HTTP/1.1
25 | # content-type: text/plain
26 | # 
27 | # BLK
28 | # 200 OK HTTP/1.1
29 | # content-type:  text/html
30 | # 
31 | # <html><body>This is 248863 spam-post 248863 number 248863.</body></html>
32 | # 200 OK HTTP/1.1
33 | # content-type: text/plain
34 | # 
35 | # OK.
36 | # 200 OK HTTP/1.1
37 | # content-type:  text/html
38 | # 
39 | # <html><body>This is 18435 spam-post 18435 number 18435.</body></html>
40 | # 200 OK HTTP/1.1
41 | # content-type: text/plain
42 | # 
43 | # BLK
44 | # 200 OK HTTP/1.1
45 | # content-type:  text/html
46 | # 
47 | # <html><body>This is 81927 spam-post 81927 number 81927.</body></html>
48 | # 200 OK HTTP/1.1
49 | # content-type: text/plain
50 | # 
51 | # BLK
52 | # 200 OK HTTP/1.1
53 | # content-type:  text/html
54 | # 
55 | # <html><body>This is 3079 spam-post 3079 number 3079.</body></html>
56 | # 
57 | # Elapsed: 19885 ms
58 | # Million requests per second: 26.366
59 | # 
60 | # 606.42user 0.93system 0:20.34elapsed 2986%CPU (0avgtext+0avgdata 1576412maxresident)k
61 | # 0inputs+0outputs (0major+393407minor)pagefaults 0swaps
62 | ```
63 | 


--------------------------------------------------------------------------------
/spirv-io/lib/statemachine.glsl:
--------------------------------------------------------------------------------
 1 | struct stateMachine {
 2 |     ptr_t statePtr;
 3 |     stringArray attrs;
 4 |     ptr_t heapPtrPtr;
 5 |     ptr_t fromIOPtrPtr;
 6 |     ptr_t toIOPtrPtr;
 7 | };
 8 | 
 9 | stateMachine loadStateMachine(int initialState) {
10 |     stateMachine m = stateMachine(
11 |         heapPtr/4 + 1,
12 |         stringArray(heapPtr/4 + 2, heapPtr/4 + 28),
13 |         heapPtr/4 + 29,
14 |         heapPtr/4 + 30,
15 |         heapPtr/4 + 31
16 |     );
17 |     if (i32heap[heapPtr/4] != 0x57A7E0FC) {
18 |         i32heap[heapPtr/4] = 0x57A7E0FC;
19 |         i32heap[m.statePtr] = initialState;
20 |         for (ptr_t i = m.attrs.x; i < m.attrs.y; i++) {
21 |             i32heap[i] = 0;
22 |         }
23 |         i32heap[m.heapPtrPtr] = heapPtr + 32*4;
24 |         i32heap[m.fromIOPtrPtr] = fromIOPtr;
25 |         i32heap[m.toIOPtrPtr] = toIOPtr;
26 |     }
27 |     heapPtr = i32heap[m.heapPtrPtr];
28 |     fromIOPtr = i32heap[m.fromIOPtrPtr];
29 |     toIOPtr = i32heap[m.toIOPtrPtr];
30 |     return m;
31 | }
32 | 
33 | void saveStateMachine(stateMachine m) {
34 |     i32heap[m.heapPtrPtr] = heapPtr;
35 |     i32heap[m.toIOPtrPtr] = toIOPtr;
36 |     i32heap[m.fromIOPtrPtr] = fromIOPtr;
37 | }
38 | 
39 | int getState(stateMachine m) {
40 |     return i32heap[m.statePtr];
41 | }
42 | 
43 | void setState(stateMachine m, int state) {
44 |     i32heap[m.statePtr] = state;
45 | }
46 | 
47 | void setAttr(stateMachine m, int key, string value) {
48 |     aSet(m.attrs, key, value);
49 | }
50 | 
51 | string getAttr(stateMachine m, int key) {
52 |     return aGet(m.attrs, key);
53 | }
54 | 
55 | void setAttr(stateMachine m, int key, io value) {
56 |     aSet(m.attrs, key, string(value.index, value.heapBufStart));
57 | }
58 | 
59 | void setAttr(stateMachine m, int key, int32_t value) {
60 |     aSet(m.attrs, key, string(value, 0));
61 | }
62 | 
63 | io getIOAttr(stateMachine m, int key) {
64 |     string s = aGet(m.attrs, key);
65 |     return io(s.x, s.y);
66 | }
67 | 
68 | int32_t getI32Attr(stateMachine m, int key) {
69 |     string s = aGet(m.attrs, key);
70 |     return int32_t(s.x);
71 | }
72 | 


--------------------------------------------------------------------------------
/http_shader/ispc_ivec4/runner_ivec4.ispc:
--------------------------------------------------------------------------------
 1 | const varying int int_cast(const varying unsigned int32 v) { return (int32)v; }
 2 | 
 3 | #define atomicAdd(val, data) atomic_add_global(&(val), (data))
 4 | #define atomicCompSwap(val, compare, newval) atomic_compare_exchange_global(&(val), (compare), (newval))
 5 | 
 6 | #include "httpd.ispc"
 7 | 
 8 | export void runner_dispatch(uniform int work_group_ID[3], uniform int work_groups[3], 
 9 | 	uniform struct inputBuffer& inputs,
10 |     uniform struct outputBuffer& outputs,
11 |     uniform struct heapBuffer& heap
12 | )
13 | {
14 |     uniform int3 gl_NumWorkGroups = int3(work_groups[0], work_groups[1], work_groups[2]);
15 |     uniform int3 gl_WorkGroupID = int3(work_group_ID[0], work_group_ID[1], work_group_ID[2]);
16 |     
17 |     // Vectorise the workgroup
18 |     foreach_tiled(lx = 0 ... gl_WorkGroupSize.x)
19 |     {
20 |         varying int3 gl_LocalInvocationID = int3(lx, 0, 0);
21 |         varying int3 gl_GlobalInvocationID = gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID;
22 |         varying int gl_LocalInvocationIndex = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + gl_LocalInvocationID.y * gl_WorkGroupSize.x + gl_LocalInvocationID.x;
23 |         
24 |         httpd_ispc_main( 
25 | 	        gl_GlobalInvocationID,
26 | 	        outputs,
27 | 	        heap,
28 | 	        inputs
29 |         );
30 |     }
31 | }
32 | 
33 | task
34 | void runner_task(uniform int work_groups[3],
35 | 	uniform struct inputBuffer& inputs,
36 |     uniform struct outputBuffer& outputs,
37 |     uniform struct heapBuffer& heap
38 | )
39 | {
40 |     uniform int workGroupID[3];
41 |     workGroupID[0] = taskIndex0;
42 |     workGroupID[1] = taskIndex1;
43 |     workGroupID[2] = taskIndex2;
44 |     runner_dispatch(workGroupID, work_groups, inputs, outputs, heap);
45 | }
46 | 
47 | export void runner_main(uniform int work_groups[3],
48 | 	uniform struct inputBuffer& inputs,  
49 | 	uniform struct outputBuffer& outputs,  
50 | 	uniform struct heapBuffer& heap
51 | )
52 | {
53 |     launch[work_groups[0], work_groups[1], work_groups[2]]
54 |     runner_task(work_groups, inputs, outputs, heap);
55 | }
56 | 
57 | 


--------------------------------------------------------------------------------
/include/spirv_cross/barrier.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2015-2017 ARM Limited
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #ifndef SPIRV_CROSS_BARRIER_HPP
18 | #define SPIRV_CROSS_BARRIER_HPP
19 | 
20 | #include <atomic>
21 | #include <thread>
22 | 
23 | namespace spirv_cross
24 | {
25 | class Barrier
26 | {
27 | public:
28 | 	Barrier()
29 | 	{
30 | 		count.store(0);
31 | 		iteration.store(0);
32 | 	}
33 | 
34 | 	void set_release_divisor(unsigned divisor)
35 | 	{
36 | 		this->divisor = divisor;
37 | 	}
38 | 
39 | 	static inline void memoryBarrier()
40 | 	{
41 | 		std::atomic_thread_fence(std::memory_order_seq_cst);
42 | 	}
43 | 
44 | 	void reset_counter()
45 | 	{
46 | 		count.store(0);
47 | 		iteration.store(0);
48 | 	}
49 | 
50 | 	void wait()
51 | 	{
52 | 		unsigned target_iteration = iteration.load(std::memory_order_relaxed) + 1;
53 | 		// Overflows cleanly.
54 | 		unsigned target_count = divisor * target_iteration;
55 | 
56 | 		// Barriers don't enforce memory ordering.
57 | 		// Be as relaxed about the barrier as we possibly can!
58 | 		unsigned c = count.fetch_add(1u, std::memory_order_relaxed);
59 | 
60 | 		if (c + 1 == target_count)
61 | 		{
62 | 			iteration.store(target_iteration, std::memory_order_relaxed);
63 | 		}
64 | 		else
65 | 		{
66 | 			// If we have more threads than the CPU, don't hog the CPU for very long periods of time.
67 | 			while (iteration.load(std::memory_order_relaxed) != target_iteration)
68 | 				std::this_thread::yield();
69 | 		}
70 | 	}
71 | 
72 | private:
73 | 	unsigned divisor = 1;
74 | 	std::atomic<unsigned> count;
75 | 	std::atomic<unsigned> iteration;
76 | };
77 | }
78 | 
79 | #endif
80 | 


--------------------------------------------------------------------------------
/spirv-io/bin/gls_resolve_includes.js:
--------------------------------------------------------------------------------
 1 | const fs = require('fs');
 2 | const cp = require('child_process');
 3 | const crypto = require('crypto');
 4 | 
 5 | // Resolve and inline all #include foo things
 6 | 
 7 | function resolveIncludes(source) {
 8 |     const resolvedSrc = source.replace(/^\s*#include\s+<(https:.*)>\s*@?\s*([a-zA-Z0-9]+)?$/mg, (match, url, hash) => {
 9 |         /*
10 |         if (url.startsWith('"')) { // Local file
11 |         } else if (url.startsWith('<https://')){ // URL
12 |         } else { // System library
13 |         }
14 |         */
15 |         if (hash) hash = hash.toLowerCase();
16 |         const localPath = `${process.env.HOME}/.gls/cache/lib/${encodeURIComponent(url)}${hash ? `%${encodeURIComponent(hash)}` : '%0000'}`;
17 |         if (!fs.existsSync(localPath)) {
18 |             cp.execFileSync('mkdir', ['-p', `${process.env.HOME}/.gls/cache/lib`]);
19 |             fs.writeFileSync(localPath, "");
20 |             const urlSource = cp.execFileSync('curl', ['--silent', url]).toString();
21 |             const contentHash = crypto.createHash('sha256').update(urlSource).digest('hex');
22 |             if (hash && contentHash !== hash) {
23 |                 console.error(`Downloaded ${url}\nERROR: Library file has been tampered with!\n       Downloaded content hash ${contentHash} differs from requested ${hash}`);
24 |                 process.exit(1);
25 |             } else {
26 |                 console.error(`Downloaded ${url} - content hash ${contentHash}`);
27 |             }
28 |             const resolved = resolveIncludes(urlSource);
29 |             const hashLocalPath = `${process.env.HOME}/.gls/cache/lib/${encodeURIComponent(url)}%${encodeURIComponent(contentHash)}`;
30 |             fs.writeFileSync(hashLocalPath, resolved);
31 |             fs.writeFileSync(localPath, resolved);
32 |         }
33 |         return `#include "${localPath}"`;
34 |     });
35 |     return resolvedSrc;
36 | }
37 | 
38 | const srcBuf = Buffer.alloc(1048576);
39 | const srcLen = fs.readSync(process.stdin.fd, srcBuf);
40 | const source = srcBuf.slice(0, srcLen).toString();
41 | 
42 | const resolvedSrc = resolveIncludes(source);
43 | 
44 | fs.writeSync(process.stdout.fd, resolvedSrc);
45 | 


--------------------------------------------------------------------------------
/spirv-io/src/test_file.cpp:
--------------------------------------------------------------------------------
 1 | #include "compute_application.hpp"
 2 | #include <malloc.h>
 3 | 
 4 | class App : public ComputeApplication
 5 | {
 6 |   public:
 7 |     App() {
 8 |         workSize[0] = 4;
 9 |         verbose = false;
10 |     }
11 | 
12 |     void runProgram() {
13 |         /*
14 |         int n = 20;
15 |         int len = n * 1 * 1048576;
16 |         char *c = (char*)memalign(2097152, len);
17 |         char *d = (char*)mappedToGPUMemory;
18 |         //char *d = (char*)memalign(2097152, len);
19 |         memset(c, 5, len);
20 |         std::thread threads[n];
21 |         std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
22 |         int i;
23 |         for (i = 0; i < 100; i++) {
24 |             for (int j = 0; j < n; j++) threads[j] = std::thread(memcpy, d+(j*len/n), c+(j*len/n), len/n);
25 |             for (int j = 0; j < n; j++) threads[j].join();
26 |         }
27 |         std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
28 |         printf("%.2f GB/s\n\n", ((float)len * i) / 1e9 / (0.000001 * std::chrono::duration_cast<std::chrono::microseconds>(end - begin).count()));
29 |         */
30 |         std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
31 |         int i;
32 |         for (i = 0; i < 1; i++) {
33 |             while(ioReset);
34 |             startCommandBuffer();
35 |             waitCommandBuffer();
36 |             ioReset = true;
37 |         }
38 |         std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
39 |         printf("\nElapsed: %ld ms\n", std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count());
40 |         printf("Test runs per second: %.0f\n\n", (float)(threadCount * i) / (0.000001 * std::chrono::duration_cast<std::chrono::microseconds>(end - begin).count()));
41 |     }
42 | };
43 | 
44 | int main(int argc, char *argv[])
45 | {
46 |     App app;
47 | 
48 |     try
49 |     {
50 |         app.run("test_file.spv", argc, argv);
51 |     }
52 |     catch (const std::runtime_error &e)
53 |     {
54 |         printf("%s\n", e.what());
55 |         app.cleanup();
56 |         return EXIT_FAILURE;
57 |     }
58 | 
59 |     return app.exitCode;
60 | }
61 | 


--------------------------------------------------------------------------------
/spirv-io/examples/http_client.glsl:
--------------------------------------------------------------------------------
 1 | #include <file.glsl>
 2 | #include <hashtable.glsl>
 3 | 
 4 | ThreadLocalCount = 1;
 5 | ThreadGroupCount = 1;
 6 | 
 7 | HeapSize = 16777216;
 8 | FromIOSize = 16777216;
 9 | ToIOSize = 16777216;
10 | 
11 | void main() {
12 |     if (ThreadId == 0) {
13 |         string url = "https://github.com/plotly/datasets/raw/master/tips.csv";
14 |         log(concat("Downloading ", url));
15 |         awaitIO(runCmd(concat("curl -s -L -O ", url)));
16 |         log("Downloaded");
17 |         uint64_t sz = statSync("tips.csv").st_size;
18 |         log(concat("File size: ", str(sz)));
19 |         string csv = readSync("tips.csv", malloc(sz));
20 |         stringArray lines = split(csv, '\n');
21 |         i32map counts = i32hAlloc(16);
22 |         i32map means = f32hAlloc(16);
23 |         for (int i = 1; i < arrLen(lines); i++) {
24 |             int32_t count;
25 |             float mean;
26 |             int32_t size = -1;
27 |             FREE(
28 |                 stringArray fields = split(aGet(lines, i), ',');
29 |                 if (arrLen(fields) == 7) {
30 |                     float total_bill = parsef32(aGet(fields, 0));
31 |                     float tip = parsef32(aGet(fields, 1));
32 |                     size = parsei32(aGet(fields, 6));
33 |                     float tip_pct = 100.0 * tip / total_bill;
34 |                     if (!i32hGet(counts, size, count)) {
35 |                         count = 0;
36 |                         mean = 0.0;
37 |                     } else {
38 |                         f32hGet(means, size, mean);
39 |                     }
40 |                     count += 1;
41 |                     mean = (mean * float(count-1) + tip_pct) / float(count);
42 |                 }
43 |             );
44 |             if (size != -1) {
45 |                 i32hSet(counts, size, count);
46 |                 f32hSet(means, size, mean);
47 |             }
48 |         }
49 |         i32array sizes = f32hKeys(means);
50 |         i32sort(sizes);
51 |         for (int i = 0; i < i32len(sizes); i++) {
52 |             int32_t size = i32get(sizes, i);
53 |             float mean;
54 |             if (f32hGet(means, size, mean)) {
55 |                 FREE_ALL( log(concat("size: ", str(size), " tip_pct: ", str(mean))) );
56 |             }
57 |         }
58 |     }
59 | }
60 | 
61 | 


--------------------------------------------------------------------------------
/http_shader/ispc_int/runner.ispc:
--------------------------------------------------------------------------------
 1 | const varying int int_cast(const varying unsigned int32 v) { return (int32)v; }
 2 | 
 3 | #include "httpd.ispc"
 4 | 
 5 | 
 6 | export void runner_dispatch(uniform int work_group_ID[3], uniform int work_groups[3], 
 7 | 	uniform struct outputBuffer& v_656,  uniform struct inputBuffer& v_613,  uniform struct heapBuffer& _901,
 8 | 	uniform struct requestBuffer& reqBuf,
 9 | 	uniform struct responseBuffer& resBuf
10 | )
11 | {
12 |     uniform int3 gl_NumWorkGroups = int3(work_groups[0], work_groups[1], work_groups[2]);
13 |     uniform int3 gl_WorkGroupID = int3(work_group_ID[0], work_group_ID[1], work_group_ID[2]);
14 |     
15 |     // Vectorise the workgroup
16 |     foreach_tiled(lx = 0 ... gl_WorkGroupSize.x)
17 |     {
18 |         varying int3 gl_LocalInvocationID = int3(lx, 0, 0);
19 |         varying int3 gl_GlobalInvocationID = gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID;
20 |         varying int gl_LocalInvocationIndex = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + gl_LocalInvocationID.y * gl_WorkGroupSize.x + gl_LocalInvocationID.x;
21 |         
22 |         httpd_ispc_main( 
23 | 	        v_656,
24 | 	        v_613,
25 | 	        resBuf,
26 | 	        reqBuf,
27 | 	        gl_NumWorkGroups,
28 | 	        gl_GlobalInvocationID,
29 | 	        _901
30 |         );
31 |     }
32 | }
33 | 
34 | task
35 | void runner_task(uniform int work_groups[3],
36 | 	uniform struct outputBuffer& v_656,  uniform struct inputBuffer& v_613,  uniform struct heapBuffer& _901,
37 | 	uniform struct requestBuffer& reqBuf,
38 | 	uniform struct responseBuffer& resBuf
39 | )
40 | {
41 |     uniform int workGroupID[3];
42 |     workGroupID[0] = taskIndex0;
43 |     workGroupID[1] = taskIndex1;
44 |     workGroupID[2] = taskIndex2;
45 |     runner_dispatch(workGroupID, work_groups,
46 |         v_656, v_613, _901, reqBuf, resBuf
47 |     );
48 | }
49 | 
50 | export void runner_main(uniform int work_groups[3],
51 | 	uniform struct inputBuffer& v_613,  
52 | 	uniform struct outputBuffer& v_656,  
53 | 	uniform struct heapBuffer& _901,
54 | 	uniform struct requestBuffer& reqBuf,
55 | 	uniform struct responseBuffer& resBuf
56 | )
57 | {
58 |     launch[work_groups[0], work_groups[1], work_groups[2]]
59 |     runner_task(work_groups, 
60 |         v_656, v_613, _901, reqBuf, resBuf
61 |     );
62 | }
63 | 
64 | 


--------------------------------------------------------------------------------
/docker/spirv-runner/program.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // targets/kernel/program.h
 3 | // (Header automatically generated by the ispc compiler.)
 4 | // DO NOT EDIT THIS FILE.
 5 | //
 6 | 
 7 | #ifndef ISPC_TARGETS_KERNEL_PROGRAM_H
 8 | #define ISPC_TARGETS_KERNEL_PROGRAM_H
 9 | 
10 | #include <stdint.h>
11 | 
12 | 
13 | 
14 | #ifdef __cplusplus
15 | namespace ispc { /* namespace */
16 | #endif // __cplusplus
17 | 
18 | #ifndef __ISPC_ALIGN__
19 | #if defined(__clang__) || !defined(_MSC_VER)
20 | // Clang, GCC, ICC
21 | #define __ISPC_ALIGN__(s) __attribute__((aligned(s)))
22 | #define __ISPC_ALIGNED_STRUCT__(s) struct __ISPC_ALIGN__(s)
23 | #else
24 | // Visual Studio
25 | #define __ISPC_ALIGN__(s) __declspec(align(s))
26 | #define __ISPC_ALIGNED_STRUCT__(s) __ISPC_ALIGN__(s) struct
27 | #endif
28 | #endif
29 | 
30 | #ifndef __ISPC_STRUCT_inputs__
31 | #define __ISPC_STRUCT_inputs__
32 | struct inputs {
33 |     float inputData[1];
34 | };
35 | #endif
36 | 
37 | #ifndef __ISPC_STRUCT_outputs__
38 | #define __ISPC_STRUCT_outputs__
39 | struct outputs {
40 |     float outputData[1];
41 | };
42 | #endif
43 | 
44 | 
45 | ///////////////////////////////////////////////////////////////////////////
46 | // Functions exported from ispc code
47 | ///////////////////////////////////////////////////////////////////////////
48 | #if defined(__cplusplus) && (! defined(__ISPC_NO_EXTERN_C) || !__ISPC_NO_EXTERN_C )
49 | extern "C" {
50 | #endif // __cplusplus
51 |     extern void program_dispatch_all(int32_t * work_groups, struct inputs &_20, struct outputs &_164);
52 |     extern void program_dispatch_all_tiled(int32_t * work_groups, struct inputs &_20, struct outputs &_164);
53 |     extern void program_dispatch_single(int32_t * work_group_ID, int32_t * work_groups, struct inputs &_20, struct outputs &_164);
54 |     extern void program_dispatch_single_tiled(int32_t * work_group_ID, int32_t * work_groups, struct inputs &_20, struct outputs &_164);
55 |     extern void program_get_workgroup_size(int32_t &wg_x, int32_t &wg_y, int32_t &wg_z);
56 |     extern void runner_main(int32_t * work_groups, struct inputs &input, struct outputs &output);
57 | #if defined(__cplusplus) && (! defined(__ISPC_NO_EXTERN_C) || !__ISPC_NO_EXTERN_C )
58 | } /* end extern C */
59 | #endif // __cplusplus
60 | 
61 | 
62 | #ifdef __cplusplus
63 | } /* namespace */
64 | #endif // __cplusplus
65 | 
66 | #endif // ISPC_TARGETS_KERNEL_PROGRAM_H
67 | 


--------------------------------------------------------------------------------
/http_shader/ispc_char/httpd.ispc.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // httpd.ispc.h
 3 | // (Header automatically generated by the ispc compiler.)
 4 | // DO NOT EDIT THIS FILE.
 5 | //
 6 | 
 7 | #ifndef ISPC_HTTPD_ISPC_H
 8 | #define ISPC_HTTPD_ISPC_H
 9 | 
10 | #include <stdint.h>
11 | 
12 | 
13 | 
14 | #ifdef __cplusplus
15 | namespace ispc { /* namespace */
16 | #endif // __cplusplus
17 | 
18 | #ifndef __ISPC_ALIGN__
19 | #if defined(__clang__) || !defined(_MSC_VER)
20 | // Clang, GCC, ICC
21 | #define __ISPC_ALIGN__(s) __attribute__((aligned(s)))
22 | #define __ISPC_ALIGNED_STRUCT__(s) struct __ISPC_ALIGN__(s)
23 | #else
24 | // Visual Studio
25 | #define __ISPC_ALIGN__(s) __declspec(align(s))
26 | #define __ISPC_ALIGNED_STRUCT__(s) __ISPC_ALIGN__(s) struct
27 | #endif
28 | #endif
29 | 
30 | #ifndef __ISPC_STRUCT_outputBuffer__
31 | #define __ISPC_STRUCT_outputBuffer__
32 | struct outputBuffer {
33 |     int32_t outputBytes[];
34 | };
35 | #endif
36 | 
37 | #ifndef __ISPC_STRUCT_inputBuffer__
38 | #define __ISPC_STRUCT_inputBuffer__
39 | struct inputBuffer {
40 |     int32_t inputBytes[];
41 | };
42 | #endif
43 | 
44 | #ifndef __ISPC_STRUCT_heapBuffer__
45 | #define __ISPC_STRUCT_heapBuffer__
46 | struct heapBuffer {
47 |     int32_t heap[];
48 | };
49 | #endif
50 | 
51 | #ifndef __ISPC_STRUCT_requestBuffer__
52 | #define __ISPC_STRUCT_requestBuffer__
53 | struct requestBuffer {
54 |     int32_t request[];
55 | };
56 | #endif
57 | 
58 | #ifndef __ISPC_STRUCT_responseBuffer__
59 | #define __ISPC_STRUCT_responseBuffer__
60 | struct responseBuffer {
61 |     int32_t response[];
62 | };
63 | #endif
64 | 
65 | 
66 | ///////////////////////////////////////////////////////////////////////////
67 | // Functions exported from ispc code
68 | ///////////////////////////////////////////////////////////////////////////
69 | #if defined(__cplusplus) && (! defined(__ISPC_NO_EXTERN_C) || !__ISPC_NO_EXTERN_C )
70 | extern "C" {
71 | #endif // __cplusplus
72 |     extern void httpd_get_workgroup_size(int32_t &wg_x, int32_t &wg_y, int32_t &wg_z);
73 |     extern void runner_main(int32_t * work_groups, struct inputBuffer &v_94, struct outputBuffer &v_656, struct heapBuffer &_901);
74 | #if defined(__cplusplus) && (! defined(__ISPC_NO_EXTERN_C) || !__ISPC_NO_EXTERN_C )
75 | } /* end extern C */
76 | #endif // __cplusplus
77 | 
78 | 
79 | #ifdef __cplusplus
80 | } /* namespace */
81 | #endif // __cplusplus
82 | 
83 | #endif // ISPC_HTTPD_ISPC_H
84 | 


--------------------------------------------------------------------------------
/http_shader/ispc_ivec4/httpd.ispc.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // httpd.ispc.h
 3 | // (Header automatically generated by the ispc compiler.)
 4 | // DO NOT EDIT THIS FILE.
 5 | //
 6 | 
 7 | #ifndef ISPC_HTTPD_ISPC_H
 8 | #define ISPC_HTTPD_ISPC_H
 9 | 
10 | #include <stdint.h>
11 | 
12 | 
13 | 
14 | #ifdef __cplusplus
15 | namespace ispc { /* namespace */
16 | #endif // __cplusplus
17 | 
18 | #ifndef __ISPC_ALIGN__
19 | #if defined(__clang__) || !defined(_MSC_VER)
20 | // Clang, GCC, ICC
21 | #define __ISPC_ALIGN__(s) __attribute__((aligned(s)))
22 | #define __ISPC_ALIGNED_STRUCT__(s) struct __ISPC_ALIGN__(s)
23 | #else
24 | // Visual Studio
25 | #define __ISPC_ALIGN__(s) __declspec(align(s))
26 | #define __ISPC_ALIGNED_STRUCT__(s) __ISPC_ALIGN__(s) struct
27 | #endif
28 | #endif
29 | 
30 | #ifndef __ISPC_STRUCT_outputBuffer__
31 | #define __ISPC_STRUCT_outputBuffer__
32 | struct outputBuffer {
33 |     int32_t outputBytes[];
34 | };
35 | #endif
36 | 
37 | #ifndef __ISPC_STRUCT_inputBuffer__
38 | #define __ISPC_STRUCT_inputBuffer__
39 | struct inputBuffer {
40 |     int32_t inputBytes[];
41 | };
42 | #endif
43 | 
44 | #ifndef __ISPC_STRUCT_heapBuffer__
45 | #define __ISPC_STRUCT_heapBuffer__
46 | struct heapBuffer {
47 |     int32_t heap[];
48 | };
49 | #endif
50 | 
51 | #ifndef __ISPC_STRUCT_requestBuffer__
52 | #define __ISPC_STRUCT_requestBuffer__
53 | struct requestBuffer {
54 |     int32_t request[];
55 | };
56 | #endif
57 | 
58 | #ifndef __ISPC_STRUCT_responseBuffer__
59 | #define __ISPC_STRUCT_responseBuffer__
60 | struct responseBuffer {
61 |     int32_t response[];
62 | };
63 | #endif
64 | 
65 | 
66 | ///////////////////////////////////////////////////////////////////////////
67 | // Functions exported from ispc code
68 | ///////////////////////////////////////////////////////////////////////////
69 | #if defined(__cplusplus) && (! defined(__ISPC_NO_EXTERN_C) || !__ISPC_NO_EXTERN_C )
70 | extern "C" {
71 | #endif // __cplusplus
72 |     extern void httpd_get_workgroup_size(int32_t &wg_x, int32_t &wg_y, int32_t &wg_z);
73 |     extern void runner_main(int32_t * work_groups, struct inputBuffer &v_94, struct outputBuffer &v_656, struct heapBuffer &_901);
74 | #if defined(__cplusplus) && (! defined(__ISPC_NO_EXTERN_C) || !__ISPC_NO_EXTERN_C )
75 | } /* end extern C */
76 | #endif // __cplusplus
77 | 
78 | 
79 | #ifdef __cplusplus
80 | } /* namespace */
81 | #endif // __cplusplus
82 | 
83 | #endif // ISPC_HTTPD_ISPC_H
84 | 


--------------------------------------------------------------------------------
/http_shader/ispc_int/httpd.ispc.h:
--------------------------------------------------------------------------------
 1 | //
 2 | // httpd.ispc.h
 3 | // (Header automatically generated by the ispc compiler.)
 4 | // DO NOT EDIT THIS FILE.
 5 | //
 6 | 
 7 | #ifndef ISPC_HTTPD_ISPC_H
 8 | #define ISPC_HTTPD_ISPC_H
 9 | 
10 | #include <stdint.h>
11 | 
12 | 
13 | 
14 | #ifdef __cplusplus
15 | namespace ispc { /* namespace */
16 | #endif // __cplusplus
17 | 
18 | #ifndef __ISPC_ALIGN__
19 | #if defined(__clang__) || !defined(_MSC_VER)
20 | // Clang, GCC, ICC
21 | #define __ISPC_ALIGN__(s) __attribute__((aligned(s)))
22 | #define __ISPC_ALIGNED_STRUCT__(s) struct __ISPC_ALIGN__(s)
23 | #else
24 | // Visual Studio
25 | #define __ISPC_ALIGN__(s) __declspec(align(s))
26 | #define __ISPC_ALIGNED_STRUCT__(s) __ISPC_ALIGN__(s) struct
27 | #endif
28 | #endif
29 | 
30 | #ifndef __ISPC_STRUCT_outputBuffer__
31 | #define __ISPC_STRUCT_outputBuffer__
32 | struct outputBuffer {
33 |     int32_t outputBytes[];
34 | };
35 | #endif
36 | 
37 | #ifndef __ISPC_STRUCT_inputBuffer__
38 | #define __ISPC_STRUCT_inputBuffer__
39 | struct inputBuffer {
40 |     int32_t inputBytes[];
41 | };
42 | #endif
43 | 
44 | #ifndef __ISPC_STRUCT_heapBuffer__
45 | #define __ISPC_STRUCT_heapBuffer__
46 | struct heapBuffer {
47 |     int32_t heap[];
48 | };
49 | #endif
50 | 
51 | #ifndef __ISPC_STRUCT_requestBuffer__
52 | #define __ISPC_STRUCT_requestBuffer__
53 | struct requestBuffer {
54 |     int32_t request[];
55 | };
56 | #endif
57 | 
58 | #ifndef __ISPC_STRUCT_responseBuffer__
59 | #define __ISPC_STRUCT_responseBuffer__
60 | struct responseBuffer {
61 |     int32_t response[];
62 | };
63 | #endif
64 | 
65 | 
66 | ///////////////////////////////////////////////////////////////////////////
67 | // Functions exported from ispc code
68 | ///////////////////////////////////////////////////////////////////////////
69 | #if defined(__cplusplus) && (! defined(__ISPC_NO_EXTERN_C) || !__ISPC_NO_EXTERN_C )
70 | extern "C" {
71 | #endif // __cplusplus
72 |     extern void httpd_get_workgroup_size(int32_t &wg_x, int32_t &wg_y, int32_t &wg_z);
73 |     extern void runner_main(int32_t * work_groups, struct inputBuffer &v_94, struct outputBuffer &v_656, struct heapBuffer &_901, struct requestBuffer &reqBuf, struct responseBuffer &resBuf);
74 | #if defined(__cplusplus) && (! defined(__ISPC_NO_EXTERN_C) || !__ISPC_NO_EXTERN_C )
75 | } /* end extern C */
76 | #endif // __cplusplus
77 | 
78 | 
79 | #ifdef __cplusplus
80 | } /* namespace */
81 | #endif // __cplusplus
82 | 
83 | #endif // ISPC_HTTPD_ISPC_H
84 | 


--------------------------------------------------------------------------------
/http_shader/ispc_char/httpd_ispc.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2015-2017 ARM Limited
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <unistd.h>
18 | #include "httpd.ispc.h"
19 | 
20 | #include <stdio.h>
21 | #include <string.h>
22 | #include <chrono>
23 | 
24 | #ifndef GLM_FORCE_SWIZZLE
25 | #define GLM_FORCE_SWIZZLE
26 | #endif
27 | 
28 | #ifndef GLM_FORCE_RADIANS
29 | #define GLM_FORCE_RADIANS
30 | #endif
31 | 
32 | #include <glm/glm.hpp>
33 | using namespace glm;
34 | using namespace ispc;
35 | 
36 | // Build some input data for our compute shader.
37 | #define NUM_WORKGROUPS_X 32
38 | #define NUM_WORKGROUPS_Y 1
39 | 
40 | static const uint requestCount = NUM_WORKGROUPS_X * NUM_WORKGROUPS_Y * 16 * 512;
41 | 
42 | static uint8_t inputs[1024 * requestCount] = {};
43 | static uint8_t outputs[1024 * requestCount] = {};
44 | static uint8_t heaps[1024 * requestCount] = {};
45 | 
46 | int main()
47 | {
48 | 	int bytes = fread(((char*)inputs)+16, 1, 1024-16, stdin);
49 | 	((uint32_t*)inputs)[0] = bytes;
50 | 	for (int i = 1; i < requestCount; i++) {
51 | 		memcpy((void*)(inputs + 1024 * i), (void*)inputs, 1024);
52 | 	}
53 | 
54 | 	std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
55 | 	for (int j = 0; j < 1000; j++) {
56 | 		int32_t workgroups[] = {NUM_WORKGROUPS_X, NUM_WORKGROUPS_Y, 1};
57 | 		runner_main(workgroups,
58 | 			*(struct inputBuffer*)inputs,
59 | 			*(struct outputBuffer*)outputs,
60 | 			*(struct heapBuffer*)heaps
61 | 		);
62 | 	}
63 | 	std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
64 | 
65 | 	for (int i = 0; i < 1; i++) {
66 | 		printf("%d\n", ((uint32_t*)outputs)[256*i]);
67 | 		write(1, outputs+1024*i+16, ((uint32_t*)outputs)[256*i]);
68 | 	}
69 | 
70 | 	printf("Elapsed: %ld ms\n", std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count());
71 | 	printf("Million requests per second: %.3f\n", 1e-6 * (requestCount * 1000.0) / (0.001 * std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count()));
72 | 
73 | 	return 0;
74 | }
75 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ispc/ubuntu_16.04:llvm100
 2 | 
 3 | RUN apt update
 4 | RUN apt install -y libxml2 emacs-nox
 5 | 
 6 | WORKDIR /usr/local/src/wasm-ld
 7 | RUN wget -qO- https://storage.googleapis.com/webassembly/emscripten-releases-builds/linux/65d33d604d3fa0ebe03548378b898fc6608e9cb8/wasm-binaries.tbz2 | tar xj
 8 | 
 9 | WORKDIR /usr/local/src/ispc
10 | RUN git pull
11 | RUN mkdir build; cd build && cmake .. -DWASM_ENABLED=ON && make -j32
12 | ENV PATH="/usr/local/src/ispc/build/bin:/usr/local/src/wasm-ld/install/bin/:${PATH}"
13 | 
14 | WORKDIR /usr/local/src/spirv
15 | 
16 | RUN apt install -y libglm-dev
17 | 
18 | RUN git clone --depth=1 https://github.com/GameTechDev/SPIRV-Cross SPIRV-Cross-ISPC
19 | RUN git clone --depth=1 https://github.com/KhronosGroup/SPIRV-Cross
20 | RUN git clone --depth=1 https://github.com/KhronosGroup/glslang
21 | 
22 | ENV CXX="clang++"
23 | ENV CC="clang"
24 | 
25 | WORKDIR /usr/local/src/spirv/glslang
26 | RUN mkdir build; cd build && cmake .. -DCMAKE_BUILD_TYPE=Release && make -j32 install
27 | 
28 | WORKDIR /usr/local/src/spirv/SPIRV-Cross
29 | RUN make -j32 && cp spirv-cross /usr/bin
30 | 
31 | WORKDIR /usr/local/src/spirv/SPIRV-Cross-ISPC
32 | COPY spirv-cross-linux-x86-64 /usr/bin/spirv-cross-ispc
33 | 
34 | WORKDIR /usr/local/src
35 | RUN ln -s /usr/bin/python3 /usr/bin/python
36 | RUN git clone --depth=1 https://github.com/aschrein/ispc/ ispc-wasm
37 | 
38 | WORKDIR /usr/local/src/ispc-wasm
39 | RUN git pull origin wasm_tests:wasm_tests && git checkout wasm_tests
40 | RUN mkdir build && cd build && cmake .. -DWASM_ENABLED=ON && make -j32 ispc
41 | ENV PATH="/usr/local/src/ispc-wasm/build/bin:/usr/local/src/wasm-ld/install/bin/:${PATH}"
42 | 
43 | WORKDIR /usr/local/src
44 | RUN git clone https://github.com/emscripten-core/emsdk && cd emsdk && ./emsdk install latest && ./emsdk activate latest
45 | ENV PATH="/usr/local/src/emsdk:/usr/local/src/emsdk/node/12.9.1_64bit/bin:/usr/local/src/emsdk/upstream/emscripten:${PATH}"
46 | 
47 | WORKDIR /usr/local/src/spirv-wasm
48 | RUN apt install -y ruby
49 | RUN echo "int main(int argc, char *argv[]) { return 0; }" > test.cpp && emcc test.cpp -o TEST.html -s USE_PTHREADS=1 -s PTHREAD_POOL_SIZE=16 -s TOTAL_MEMORY=268435456 -msimd128 -s SIMD=1
50 | RUN printf "#include <iostream>\nusing namespace std;\nint main() { cout << \"foo\"; return 0; }" > test.cpp && em++ -lpthread -fno-exceptions test.cpp -O3 -o TEST.html -s USE_PTHREADS=1 -s PTHREAD_POOL_SIZE=16 -s TOTAL_MEMORY=268435456 -msimd128 -s SIMD=1 -s EXTRA_EXPORTED_RUNTIME_METHODS='["ccall"]'
51 | COPY ./spirv-runner .
52 | RUN make tasksys.o builtins.o 
53 | RUN make ispcRunner.o
54 | 


--------------------------------------------------------------------------------
/spirv-io/src/gls.cpp:
--------------------------------------------------------------------------------
 1 | #include "compute_application.hpp"
 2 | 
 3 | #include <stdio.h>
 4 | #include <ctype.h>
 5 | #include <sys/stat.h>
 6 | 
 7 | #define MD5_LEN 32
 8 | 
 9 | bool getFileMD5(char *filename, char *md5sum)
10 | {
11 |     char cmd[500];
12 |     snprintf(cmd, sizeof(cmd), "md5sum %s 2>/dev/null", filename);
13 | 
14 |     FILE *pipe = popen(cmd, "r");
15 |     if (pipe == NULL) return false;
16 |     size_t bytes = fread(md5sum, 1, MD5_LEN, pipe);
17 |     pclose(pipe);
18 | 
19 |     md5sum[bytes] = 0;
20 |     return bytes == MD5_LEN;
21 | }
22 | 
23 | int usage() {
24 |     printf("USAGE: gls [-t] [-v] program.(spv|glsl) args...\n");
25 |     return EXIT_FAILURE;
26 | }
27 | 
28 | int main(int argc, char *argv[])
29 | {
30 |     ComputeApplication app;
31 |     app.workSize[0] = 20;
32 |     int argIdx = 1;
33 |     if (argIdx >= argc) return usage();
34 |     if (strcmp(argv[argIdx], "-t") == 0) {
35 |       app.timings = true;
36 |       argIdx++;
37 |     }
38 |     if (argIdx >= argc) return usage();
39 |     if (strcmp(argv[argIdx], "-v") == 0) {
40 |       app.verbose = true;
41 |       argIdx++;
42 |     }
43 |     if (argIdx >= argc) return usage();
44 |     if (strcmp(argv[argIdx], "-t") == 0) {
45 |       app.timings = true;
46 |       argIdx++;
47 |     }
48 |     if (argIdx >= argc) return usage();
49 | 
50 |     int len = strlen(argv[argIdx]);
51 |     if (len > 5 && strcmp(argv[argIdx] + (len-5), ".glsl") == 0) {
52 |         char cmd[len + 520];
53 |         system("mkdir -p ~/.gls/cache/");
54 |         char md5sum[MD5_LEN + 1];
55 |         if (!getFileMD5(argv[argIdx], md5sum)) {
56 |         	fprintf(stderr, "Failed to get MD5 sum of file\n");
57 |         	return EXIT_FAILURE;
58 |         }
59 |         char spvFilename[500];
60 |         snprintf(spvFilename, sizeof(spvFilename), "%s/.gls/cache/%s.spv", getenv("HOME"), md5sum);
61 |         struct stat st;
62 |         if (0 != stat(spvFilename, &st)) {
63 |         	fprintf(stderr, "Compiling to SPIR-V\n");
64 | 	        snprintf(cmd, sizeof(cmd), "glsl2spv \"%s\" %s", argv[argIdx], spvFilename);
65 | 	        system(cmd);
66 | 	        if (0 != stat(spvFilename, &st)) {
67 | 	        	fprintf(stderr, "Failed to compile SPIR-V: %s\n", cmd);
68 | 		        return EXIT_FAILURE;
69 | 	        }
70 |         }
71 |         argv[argIdx] = spvFilename;
72 |     }
73 | 
74 |     try
75 |     {
76 |         app.run(argv[argIdx], argc-argIdx, argv+argIdx);
77 |     }
78 |     catch (const std::runtime_error &e)
79 |     {
80 |         printf("%s\n", e.what());
81 |         app.cleanup();
82 |         return EXIT_FAILURE;
83 |     }
84 | 
85 |     return app.exitCode;
86 | }
87 | 


--------------------------------------------------------------------------------
/spirv-io/src/gls_cpu.cpp:
--------------------------------------------------------------------------------
 1 | #include "cpu_compute_application.hpp"
 2 | 
 3 | #include <stdio.h>
 4 | #include <ctype.h>
 5 | #include <sys/stat.h>
 6 | 
 7 | #define MD5_LEN 32
 8 | 
 9 | bool getFileMD5(char *filename, char *md5sum)
10 | {
11 |     char cmd[500];
12 |     snprintf(cmd, sizeof(cmd), "md5sum %s 2>/dev/null", filename);
13 | 
14 |     FILE *pipe = popen(cmd, "r");
15 |     if (pipe == NULL) return false;
16 |     size_t bytes = fread(md5sum, 1, MD5_LEN, pipe);
17 |     pclose(pipe);
18 | 
19 |     md5sum[bytes] = 0;
20 |     return bytes == MD5_LEN;
21 | }
22 | 
23 | int usage() {
24 |     printf("USAGE: gls [-t] [-v] program.(spv|glsl) args...\n");
25 |     return EXIT_FAILURE;
26 | }
27 | 
28 | int main(int argc, char *argv[])
29 | {
30 |     ComputeApplication app;
31 |     app.workSize[0] = 20;
32 |     int argIdx = 1;
33 |     if (argIdx >= argc) return usage();
34 |     if (strcmp(argv[argIdx], "-t") == 0) {
35 |       app.timings = true;
36 |       argIdx++;
37 |     }
38 |     if (argIdx >= argc) return usage();
39 |     if (strcmp(argv[argIdx], "-v") == 0) {
40 |       app.verbose = true;
41 |       argIdx++;
42 |     }
43 |     if (argIdx >= argc) return usage();
44 |     if (strcmp(argv[argIdx], "-t") == 0) {
45 |       app.timings = true;
46 |       argIdx++;
47 |     }
48 |     if (argIdx >= argc) return usage();
49 | 
50 |     int len = strlen(argv[argIdx]);
51 |     if (len > 5 && strcmp(argv[argIdx] + (len-5), ".glsl") == 0) {
52 |         char cmd[len + 520];
53 |         system("mkdir -p ~/.gls/cache/");
54 |         char md5sum[MD5_LEN + 1];
55 |         if (!getFileMD5(argv[argIdx], md5sum)) {
56 |         	fprintf(stderr, "Failed to get MD5 sum of file\n");
57 |         	return EXIT_FAILURE;
58 |         }
59 |         char spvFilename[500];
60 |         snprintf(spvFilename, sizeof(spvFilename), "%s/.gls/cache/%s.spv", getenv("HOME"), md5sum);
61 |         struct stat st;
62 |         if (0 != stat(spvFilename, &st)) {
63 |         	fprintf(stderr, "Compiling to SPIR-V\n");
64 | 	        snprintf(cmd, sizeof(cmd), "glsl2spv \"%s\" %s", argv[argIdx], spvFilename);
65 | 	        system(cmd);
66 | 	        if (0 != stat(spvFilename, &st)) {
67 | 	        	fprintf(stderr, "Failed to compile SPIR-V: %s\n", cmd);
68 | 		        return EXIT_FAILURE;
69 | 	        }
70 |         }
71 |         argv[argIdx] = spvFilename;
72 |     }
73 | 
74 |     try
75 |     {
76 |         app.run(argv[argIdx], argc-argIdx, argv+argIdx);
77 |     }
78 |     catch (const std::runtime_error &e)
79 |     {
80 |         printf("%s\n", e.what());
81 |         app.cleanup();
82 |         return EXIT_FAILURE;
83 |     }
84 | 
85 |     return app.exitCode;
86 | }
87 | 


--------------------------------------------------------------------------------
/include/spirv_cross/thread_group.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2015-2017 ARM Limited
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #ifndef SPIRV_CROSS_THREAD_GROUP_HPP
 18 | #define SPIRV_CROSS_THREAD_GROUP_HPP
 19 | 
 20 | #include <condition_variable>
 21 | #include <mutex>
 22 | #include <thread>
 23 | 
 24 | namespace spirv_cross
 25 | {
 26 | template <typename T, unsigned Size>
 27 | class ThreadGroup
 28 | {
 29 | public:
 30 | 	ThreadGroup(T *impl)
 31 | 	{
 32 | 		for (unsigned i = 0; i < Size; i++)
 33 | 			workers[i].start(&impl[i]);
 34 | 	}
 35 | 
 36 | 	void run()
 37 | 	{
 38 | 		for (auto &worker : workers)
 39 | 			worker.run();
 40 | 	}
 41 | 
 42 | 	void wait()
 43 | 	{
 44 | 		for (auto &worker : workers)
 45 | 			worker.wait();
 46 | 	}
 47 | 
 48 | private:
 49 | 	struct Thread
 50 | 	{
 51 | 		enum State
 52 | 		{
 53 | 			Idle,
 54 | 			Running,
 55 | 			Dying
 56 | 		};
 57 | 		State state = Idle;
 58 | 
 59 | 		void start(T *impl)
 60 | 		{
 61 | 			worker = std::thread([impl, this] {
 62 | 				for (;;)
 63 | 				{
 64 | 					{
 65 | 						std::unique_lock<std::mutex> l{ lock };
 66 | 						cond.wait(l, [this] { return state != Idle; });
 67 | 						if (state == Dying)
 68 | 							break;
 69 | 					}
 70 | 
 71 | 					impl->main();
 72 | 
 73 | 					std::lock_guard<std::mutex> l{ lock };
 74 | 					state = Idle;
 75 | 					cond.notify_one();
 76 | 				}
 77 | 			});
 78 | 		}
 79 | 
 80 | 		void wait()
 81 | 		{
 82 | 			std::unique_lock<std::mutex> l{ lock };
 83 | 			cond.wait(l, [this] { return state == Idle; });
 84 | 		}
 85 | 
 86 | 		void run()
 87 | 		{
 88 | 			std::lock_guard<std::mutex> l{ lock };
 89 | 			state = Running;
 90 | 			cond.notify_one();
 91 | 		}
 92 | 
 93 | 		~Thread()
 94 | 		{
 95 | 			if (worker.joinable())
 96 | 			{
 97 | 				{
 98 | 					std::lock_guard<std::mutex> l{ lock };
 99 | 					state = Dying;
100 | 					cond.notify_one();
101 | 				}
102 | 				worker.join();
103 | 			}
104 | 		}
105 | 		std::thread worker;
106 | 		std::condition_variable cond;
107 | 		std::mutex lock;
108 | 	};
109 | 	Thread workers[Size];
110 | };
111 | }
112 | 
113 | #endif
114 | 


--------------------------------------------------------------------------------
/http_shader/ispc_int/httpd_ispc.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2015-2017 ARM Limited
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <unistd.h>
18 | #include "httpd.ispc.h"
19 | 
20 | #include <stdio.h>
21 | #include <string.h>
22 | #include <chrono>
23 | 
24 | #ifndef GLM_FORCE_SWIZZLE
25 | #define GLM_FORCE_SWIZZLE
26 | #endif
27 | 
28 | #ifndef GLM_FORCE_RADIANS
29 | #define GLM_FORCE_RADIANS
30 | #endif
31 | 
32 | #include <glm/glm.hpp>
33 | using namespace glm;
34 | using namespace ispc;
35 | 
36 | // Build some input data for our compute shader.
37 | #define NUM_WORKGROUPS_X 32
38 | #define NUM_WORKGROUPS_Y 32
39 | 
40 | static const uint requestCount = NUM_WORKGROUPS_X * NUM_WORKGROUPS_Y * 32;
41 | 
42 | static int inputBuffe[256 * requestCount] = {};
43 | static int outputBuffe[256 * requestCount] = {};
44 | static int heapBuffe[1024 * requestCount] = {};
45 | static int requestBuffe[1024 * requestCount] = {};
46 | static int responseBuffe[1024 * requestCount] = {};
47 | 
48 | int main()
49 | {
50 | 	int bytes = fread(((char*)inputBuffe)+4, 1, 1020, stdin);
51 | 	inputBuffe[0] = bytes;
52 | 	for (int i = 1; i < requestCount; i++) {
53 | 		memcpy((void*)(inputBuffe + 256 * i), (void*)inputBuffe, 1024);
54 | 	}
55 | 
56 | 	std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
57 | 	for (int j = 0; j < 1000; j++) {
58 | 		int32_t workgroups[] = {NUM_WORKGROUPS_X, NUM_WORKGROUPS_Y, 1};
59 | 		runner_main(workgroups,
60 | 			*(struct inputBuffer*)inputBuffe,
61 | 			*(struct outputBuffer*)outputBuffe,
62 | 			*(struct heapBuffer*)heapBuffe,
63 | 			*(struct requestBuffer*)requestBuffe,
64 | 			*(struct responseBuffer*)responseBuffe
65 | 		);
66 | 	}
67 | 	std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
68 | 
69 | 	for (int i = 0; i < 1; i++) {
70 | 		printf("%d\n", outputBuffe[256*i]);
71 | 		write(1, ((char*)outputBuffe)+1024*i+4, outputBuffe[256*i]);
72 | 	}
73 | 
74 | 	printf("Elapsed: %ld ms\n", std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count());
75 | 	printf("Million requests per second: %.3f\n", 1e-6 * (requestCount * 1000.0) / (0.001 * std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count()));
76 | 
77 | 
78 | 	return 0;
79 | }
80 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # spirv-wasm
 2 | 
 3 | Run SPIR-V shaders in WebAssembly
 4 | 
 5 | See demo at https://fhtr.org/spirv-wasm - uses WebAssembly Threads, so you may need to turn them on in chrome://flags or such.
 6 | 
 7 | ## More demos
 8 | 
 9 | These require WebAssembly SIMD enabled in chrome://flags. All running on the CPU.
10 | 
11 | - [GLSL AOBench](https://fhtr.org/spirv-wasm/aobench), pretty much a copy-paste from the [ShaderToy version](https://www.shadertoy.com/view/llKBzd)
12 | - [Animated ISPC Mandelbrot](https://fhtr.org/spirv-wasm/ispc)
13 | 
14 | ## Goal
15 | 
16 | Write low-effort code in one language, run safely in the browser at 50% of hardware peak performance. 
17 | 
18 | For comparison, naive C++ achieves 1% of HW peak performance on a 16-core. If you include the GPU, it's closer to 0.1% of peak perf. Divide by amount of compute nodes available to you. Yes, yes, adding OpenMP #pragmas and doing hot spots in SoA intrinsics / ISPC, CUDA, and MPI does get you to 90%+ after much effort.
19 | 
20 | ## Contribute
21 | 
22 | Help! There's a lot of work to do! Estimated effort for the first pass: works OK for simple pipelines 100 commits, GPU integration 300 commits, job system 300 commits, simple distributed computing 200 commits, decent standard library 500 commits. Reaching something at the level "Hey you've got to try this, it's awesome for X" is 3000 commits. I've got roughly 200 commits per year to spend here... you do the math.
23 | 
24 | Try to get it running (use the [Docker image](https://github.com/kig/spirv-wasm/tree/master/docker/)), write some shaders, see how it goes. 
25 | 
26 | Have a look at the [issues](https://github.com/kig/spirv-wasm/issues) and make some more. 
27 | 
28 | 
29 | 
30 | ## Build
31 | 
32 | Requires Emscripten, glslangValidator, glm and spirv-cross.
33 | 
34 | [Install Emscripten](https://emscripten.org/docs/getting_started/downloads.html)
35 | 
36 | The others are likely in your package manager.
37 | 
38 | ```bash
39 | brew install glslangValidator
40 | brew install spirv-cross
41 | brew install glm
42 | ```
43 | 
44 | Now you can build the shader:
45 | 
46 | ```bash
47 | source somewhere/emsdk/emsdk_env.sh
48 | emmake make
49 | serve
50 | ```
51 | 
52 | If everything went right, you can open [http://localhost:5000/src/mandel.html](http://localhost:5000/src/mandel.html)
53 | and hopefully see a Mandelbrot fractal. Check the browser console for timings.
54 | 
55 | 
56 | ## Debug information
57 | 
58 | You can make Emscripten emit source maps with the -g4 flag. You can make glslangValidator and the latest versions of spirv-cross emit line numbers like this:
59 | 
60 | ```
61 | glslangValidator -g -V -o mandel.spv mandel.comp
62 | spirv-cross --cpp --emit-line-directives --output mandel.spv.cpp mandel.spv
63 | ```
64 | 
65 | Then you just have to hack the Emscripten source maps to map from the cpp to the compute shader line numbers.
66 | 
67 | Send a PR if you do!
68 | 
69 | 


--------------------------------------------------------------------------------
/include/spirv_cross/sampler.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2015-2017 ARM Limited
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #ifndef SPIRV_CROSS_SAMPLER_HPP
 18 | #define SPIRV_CROSS_SAMPLER_HPP
 19 | 
 20 | #include <vector>
 21 | 
 22 | namespace spirv_cross
 23 | {
 24 | struct spirv_cross_sampler_2d
 25 | {
 26 | 	inline virtual ~spirv_cross_sampler_2d()
 27 | 	{
 28 | 	}
 29 | };
 30 | 
 31 | template <typename T>
 32 | struct sampler2DBase : spirv_cross_sampler_2d
 33 | {
 34 | 	sampler2DBase(const spirv_cross_sampler_info *info)
 35 | 	{
 36 | 		mips.insert(mips.end(), info->mipmaps, info->mipmaps + info->num_mipmaps);
 37 | 		format = info->format;
 38 | 		wrap_s = info->wrap_s;
 39 | 		wrap_t = info->wrap_t;
 40 | 		min_filter = info->min_filter;
 41 | 		mag_filter = info->mag_filter;
 42 | 		mip_filter = info->mip_filter;
 43 | 	}
 44 | 
 45 | 	inline virtual T sample(glm::vec2 uv, float bias)
 46 | 	{
 47 | 		return sampleLod(uv, bias);
 48 | 	}
 49 | 
 50 | 	inline virtual T sampleLod(glm::vec2 uv, float lod)
 51 | 	{
 52 | 		if (mag_filter == SPIRV_CROSS_FILTER_NEAREST)
 53 | 		{
 54 | 			uv.x = wrap(uv.x, wrap_s, mips[0].width);
 55 | 			uv.y = wrap(uv.y, wrap_t, mips[0].height);
 56 | 			glm::vec2 uv_full = uv * glm::vec2(mips[0].width, mips[0].height);
 57 | 
 58 | 			int x = int(uv_full.x);
 59 | 			int y = int(uv_full.y);
 60 | 			return sample(x, y, 0);
 61 | 		}
 62 | 		else
 63 | 		{
 64 | 			return T(0, 0, 0, 1);
 65 | 		}
 66 | 	}
 67 | 
 68 | 	inline float wrap(float v, spirv_cross_wrap wrap, unsigned size)
 69 | 	{
 70 | 		switch (wrap)
 71 | 		{
 72 | 		case SPIRV_CROSS_WRAP_REPEAT:
 73 | 			return v - glm::floor(v);
 74 | 		case SPIRV_CROSS_WRAP_CLAMP_TO_EDGE:
 75 | 		{
 76 | 			float half = 0.5f / size;
 77 | 			return glm::clamp(v, half, 1.0f - half);
 78 | 		}
 79 | 
 80 | 		default:
 81 | 			return 0.0f;
 82 | 		}
 83 | 	}
 84 | 
 85 | 	std::vector<spirv_cross_miplevel> mips;
 86 | 	spirv_cross_format format;
 87 | 	spirv_cross_wrap wrap_s;
 88 | 	spirv_cross_wrap wrap_t;
 89 | 	spirv_cross_filter min_filter;
 90 | 	spirv_cross_filter mag_filter;
 91 | 	spirv_cross_mipfilter mip_filter;
 92 | };
 93 | 
 94 | typedef sampler2DBase<glm::vec4> sampler2D;
 95 | typedef sampler2DBase<glm::ivec4> isampler2D;
 96 | typedef sampler2DBase<glm::uvec4> usampler2D;
 97 | 
 98 | template <typename T>
 99 | inline T texture(const sampler2DBase<T> &samp, const glm::vec2 &uv, float bias = 0.0f)
100 | {
101 | 	return samp.sample(uv, bias);
102 | }
103 | }
104 | 
105 | #endif
106 | 


--------------------------------------------------------------------------------
/spirv-io/lib/binary_data.glsl:
--------------------------------------------------------------------------------
 1 | #define getBit(n, idx) (0 != ((n) & (1 << (idx))))
 2 | #define setBit(n, idx) ((n) | (1 << (idx)))
 3 | #define unsetBit(n, idx) ((n) & ~(1 << (idx)))
 4 | 
 5 | int32_t readI32fromIO(ptr_t i) {
 6 |     return (
 7 |           (int32_t(u8fromIO[i])   << 0u)
 8 |         | (int32_t(u8fromIO[i+1]) << 8u)
 9 |         | (int32_t(u8fromIO[i+2]) << 16u)
10 |         | (int32_t(u8fromIO[i+3]) << 24u)
11 |     );
12 | }
13 | 
14 | uint32_t readU32fromIO(ptr_t i) {
15 |     return (
16 |           (uint32_t(u8fromIO[i])   << 0u)
17 |         | (uint32_t(u8fromIO[i+1]) << 8u)
18 |         | (uint32_t(u8fromIO[i+2]) << 16u)
19 |         | (uint32_t(u8fromIO[i+3]) << 24u)
20 |     );
21 | }
22 | 
23 | uint64_t readU64fromIO(ptr_t i) {
24 |     return packUint2x32(u32vec2(readU32fromIO(i), readU32fromIO(i+4)));
25 | }
26 | 
27 | int32_t readI32heap(ptr_t i) {
28 |     return (
29 |           (int32_t(u8heap[i])   << 0u)
30 |         | (int32_t(u8heap[i+1]) << 8u)
31 |         | (int32_t(u8heap[i+2]) << 16u)
32 |         | (int32_t(u8heap[i+3]) << 24u)
33 |     );
34 | }
35 | 
36 | uint32_t readU32heap(ptr_t i) {
37 |     return (
38 |           (uint32_t(u8heap[i])   << 0u)
39 |         | (uint32_t(u8heap[i+1]) << 8u)
40 |         | (uint32_t(u8heap[i+2]) << 16u)
41 |         | (uint32_t(u8heap[i+3]) << 24u)
42 |     );
43 | }
44 | 
45 | uint64_t readU64heap(ptr_t i) {
46 |     return packUint2x32(u32vec2(readU32heap(i), readU32heap(i+4)));
47 | }
48 | 
49 | 
50 | /*
51 | i64vec4 rotateLeft(i64vec4 v, i64vec4 v2, int offset) {
52 |     return (v << offset) | (i64vec4(v.yzw, v2.x) >> (64-offset));
53 | }
54 | 
55 | i64vec4 rotateRight(i64vec4 v, i64vec4 v2, int offset) {
56 |     return (i64vec4(v.w, v2.xyz) << (64-offset)) | (v2 >> offset);
57 | }
58 | 
59 | i64vec4 rotateLeftBytes(i64vec4 v1, i64vec4 v2, int offset) {
60 |     if (offset >= 24) {
61 |         v1 = i64vec4(v1.w, v2.xyz);
62 |         v2 = i64vec4(v2.w, 0, 0, 0);
63 |     } else if (offset >= 16) {
64 |         v1 = i64vec4(v1.zw, v2.xy);
65 |         v2 = i64vec4(v2.zw, 0, 0);
66 |     } else if (offset >= 8) {
67 |         v1 = i64vec4(v1.yzw, v2.x);
68 |         v2 = i64vec4(v2.yzw, 0);
69 |     }
70 |     return rotateLeft(v1, v2, (offset%8)*8);
71 | }
72 | 
73 | i64vec4 rotateRightBytes(i64vec4 v1, i64vec4 v2, int offset) {
74 |     if (offset >= 24) {
75 |         v1 = i64vec4(v1.xyz, v2.x);
76 |         v2 = i64vec4(v2.yzw, 0);
77 |     } else if (offset >= 16) {
78 |         v1 = i64vec4(v1.xy, v2.xy);
79 |         v2 = i64vec4(v2.zw, 0, 0);
80 |     } else if (offset >= 8) {
81 |         v1 = i64vec4(v1.x, v2.xyz);
82 |         v2 = i64vec4(v2.w, 0, 0, 0);
83 |     }
84 |     return rotateRight(v1, v2, (offset%8)*8);
85 | }
86 | 
87 | i64vec4 unalignedLoad(ptr_t i) {
88 |     int idx = i / 32;
89 |     return rotateLeftBytes(i64v4fromIO[idx], i64v4fromIO[idx+1], i % 32);
90 | }
91 | 
92 | void unalignedStore(ptr_t i, i64vec4 v2) {
93 |     int idx = i / 32;
94 |     i64v4heap[idx] = rotateRightBytes(i64v4heap[idx], v2, i % 32);
95 |     i64v4heap[idx+1] = rotateRightBytes(v2, i64v4heap[idx+1], i % 32);
96 | }
97 | */
98 | 


--------------------------------------------------------------------------------
/docker/README.md:
--------------------------------------------------------------------------------
 1 | # Dockerfile for ISPC to WASM and GLSL to WASM compilation
 2 | 
 3 | First build the image: `docker build -t ispc-wasm:latest .`
 4 | 
 5 | ## ISPC
 6 | 
 7 | Then you can compile ISPC files to WASM:
 8 | 
 9 | ```bash
10 | $ ./ispc2wasm.sh mandelbrot.ispc
11 | $ ls
12 | mandelbrot.ispc mandelbrot.ispc.o mandelbrot.ispc.wasm
13 | ```
14 | 
15 | To use from JavaScript:
16 | 
17 | ```js
18 | // Load the ISPC module
19 | const obj = await WebAssembly.instantiateStreaming(fetch('mandelbrot.ispc.wasm'), {"env": {
20 |     "ISPCAlloc":() => console.log("ISPCAlloc"),
21 |     "ISPCLaunch":() => console.log("ISPCLaunch"), 
22 |     "ISPCSync":() => console.log("ISPCSync"), 
23 | }});
24 | const width = 1920, height = 1080, outputPtr = 0;
25 | 
26 | // Allocate space for output image
27 | obj.instance.exports.memory.grow(Math.ceil(width * height * 4 / 2**16));
28 | 
29 | // Call the ISPC function
30 | instance.exports.mandelbrot_ispc(-2.5, -1, 1, 1, width, height, 255, outputPtr);
31 | 
32 | // Read the output image from the heap.
33 | const heap = new Int32Array(obj.instance.exports.memory.buffer); 
34 | 
35 | const canvas = document.createElement('canvas');
36 | canvas.width = width;
37 | canvas.height = height;
38 | const ctx = canvas.getContext('2d');
39 | const id = ctx.createImageData(width, height);
40 | 
41 | for (let i = 0; i < width*height; i++) {
42 |     id.data[i*4 + 0] = heap[i];
43 |     id.data[i*4 + 1] = heap[i];
44 |     id.data[i*4 + 2] = heap[i];
45 |     id.data[i*4 + 3] = 255;
46 | }
47 | 
48 | ctx.putImageData(id, 0, 0);
49 | document.body.append(canvas);
50 | ```
51 | 
52 | ## GLSL
53 | 
54 | Compile GLSL compute shaders to WebAssembly (this is even more experimental!):
55 | 
56 | ```bash
57 | $ ./glsl2wasm.sh ao.comp.glsl
58 | $ ls
59 | ao.comp.glsl.html ao.comp.glsl.js ao.comp.glsl.worker.js ao.comp.glsl.wasm
60 | ```
61 | 
62 | To use the GLSL version, open `ao.comp.glsl.html` and run:
63 | 
64 | ```js
65 | const width = 1920, height = 1080;
66 | const localSizeX = 192, localSizeY = 10; // Local workgroup size of the compute shader.
67 | // Spawn enough workgroups to cover the image.
68 | const numWorkGroupsX = width / localSizeX;
69 | const numWorkGroupsY = height / localSizeY;
70 | const numWorkGroupsZ =  1;
71 | 
72 | const inputPtr = Module._malloc(8*4); // The shader takes an 8-float SSBO as its input buffer.
73 | const outputPtr = Module._malloc(width*height*4); // And writes to a 8-bit RGBA image buffer.
74 | 
75 | const input = new Float32Array(Module.wasmMemory.buffer, inputPtr, 8);
76 | input.set([width, height, 0, 0, 0, 0, 0, 0]); // Write the SSBO values to the input buffer.
77 | 
78 | // Run the shader across all accessible cores and SIMD lanes.
79 | Module._run(numWorkGroupsX, numWorkGroupsY, numWorkGroupsZ, inputPtr, outputPtr);
80 | 
81 | // Show the resulting image.
82 | const canvas = document.createElement('canvas');
83 | canvas.width = width;
84 | canvas.height = height;
85 | const ctx = canvas.getContext('2d');
86 | const id = ctx.createImageData(width, height);
87 | id.data.set(new Uint8Array(Module.wasmMemory.buffer, outputPtr, id.data.byteLength));
88 | ctx.putImageData(id, 0, 0);
89 | document.body.append(canvas);
90 | ```
91 | 


--------------------------------------------------------------------------------
/http_shader/chr.glsl:
--------------------------------------------------------------------------------
  1 | #define CHR_NULL 0
  2 | #define CHR_SOH 1
  3 | #define CHR_STX 2
  4 | #define CHR_ETX 3
  5 | #define CHR_EOT 4
  6 | #define CHR_ENQ 5
  7 | #define CHR_ACK 6
  8 | #define CHR_BELL 7
  9 | #define CHR_BACKSPACE 8
 10 | #define CHR_TAB 9
 11 | #define CHR_LF 10
 12 | #define CHR_VT 11
 13 | #define CHR_FF 12
 14 | #define CHR_CR 13
 15 | #define CHR_SO 14
 16 | #define CHR_SI 15
 17 | #define CHR_DLE 16
 18 | #define CHR_DC1 17
 19 | #define CHR_DC2 18
 20 | #define CHR_DC3 19
 21 | #define CHR_DC4 20
 22 | #define CHR_NAK 21
 23 | #define CHR_SYN 22
 24 | #define CHR_ETB 23
 25 | #define CHR_CAN 24
 26 | #define CHR_EM 25
 27 | #define CHR_SUB 26
 28 | #define CHR_ESC 27
 29 | #define CHR_FS 28
 30 | #define CHR_GS 29
 31 | #define CHR_RS 30
 32 | #define CHR_US 31
 33 | #define CHR_SPACE 32
 34 | #define CHR_EXCLAMATION_MARK 33
 35 | #define CHR_DOUBLE_QUOTE 34
 36 | #define CHR_HASH 35
 37 | #define CHR_DOLLAR 36
 38 | #define CHR_PERCENT 37
 39 | #define CHR_AMPERSAND 38
 40 | #define CHR_SINGLE_QUOTE 39
 41 | #define CHR_OPEN_PAREN 40
 42 | #define CHR_CLOSE_PAREN 41
 43 | #define CHR_TIMES 42
 44 | #define CHR_PLUS 43
 45 | #define CHR_COMMA 44
 46 | #define CHR_DASH 45
 47 | #define CHR_DOT 46
 48 | #define CHR_SLASH 47
 49 | #define CHR_0 48
 50 | #define CHR_1 49
 51 | #define CHR_2 50
 52 | #define CHR_3 51
 53 | #define CHR_4 52
 54 | #define CHR_5 53
 55 | #define CHR_6 54
 56 | #define CHR_7 55
 57 | #define CHR_8 56
 58 | #define CHR_9 57
 59 | #define CHR_COLON 58
 60 | #define CHR_SEMICOLON 59
 61 | #define CHR_OPEN_ANGLE_BRACKET 60
 62 | #define CHR_EQUAL 61
 63 | #define CHR_CLOSE_ANGLE_BRACKET 62
 64 | #define CHR_QUESTION_MARK 63
 65 | #define CHR_AT 64
 66 | #define CHR_A 65
 67 | #define CHR_B 66
 68 | #define CHR_C 67
 69 | #define CHR_D 68
 70 | #define CHR_E 69
 71 | #define CHR_F 70
 72 | #define CHR_G 71
 73 | #define CHR_H 72
 74 | #define CHR_I 73
 75 | #define CHR_J 74
 76 | #define CHR_K 75
 77 | #define CHR_L 76
 78 | #define CHR_M 77
 79 | #define CHR_N 78
 80 | #define CHR_O 79
 81 | #define CHR_P 80
 82 | #define CHR_Q 81
 83 | #define CHR_R 82
 84 | #define CHR_S 83
 85 | #define CHR_T 84
 86 | #define CHR_U 85
 87 | #define CHR_V 86
 88 | #define CHR_W 87
 89 | #define CHR_X 88
 90 | #define CHR_Y 89
 91 | #define CHR_Z 90
 92 | #define CHR_OPEN_SQUARE_BRACKET 91
 93 | #define CHR_BACKSLASH 92
 94 | #define CHR_CLOSE_SQUARE_BRACKET 93
 95 | #define CHR_CARET 94
 96 | #define CHR_UNDERSCORE 95
 97 | #define CHR_BACKTICK 96
 98 | #define CHR_a 97
 99 | #define CHR_b 98
100 | #define CHR_c 99
101 | #define CHR_d 100
102 | #define CHR_e 101
103 | #define CHR_f 102
104 | #define CHR_g 103
105 | #define CHR_h 104
106 | #define CHR_i 105
107 | #define CHR_j 106
108 | #define CHR_k 107
109 | #define CHR_l 108
110 | #define CHR_m 109
111 | #define CHR_n 110
112 | #define CHR_o 111
113 | #define CHR_p 112
114 | #define CHR_q 113
115 | #define CHR_r 114
116 | #define CHR_s 115
117 | #define CHR_t 116
118 | #define CHR_u 117
119 | #define CHR_v 118
120 | #define CHR_w 119
121 | #define CHR_x 120
122 | #define CHR_y 121
123 | #define CHR_z 122
124 | #define CHR_OPEN_CURLY_BRACKET 123
125 | #define CHR_PIPE 124
126 | #define CHR_CLOSE_CURLY_BRACKET 125
127 | #define CHR_TILDE 126
128 | #define CHR_DEL 127
129 | 


--------------------------------------------------------------------------------
/docker/mandelbrot.ispc:
--------------------------------------------------------------------------------
 1 | /*
 2 |   Copyright (c) 2010-2012, Intel Corporation
 3 |   All rights reserved.
 4 | 
 5 |   Redistribution and use in source and binary forms, with or without
 6 |   modification, are permitted provided that the following conditions are
 7 |   met:
 8 | 
 9 |     * Redistributions of source code must retain the above copyright
10 |       notice, this list of conditions and the following disclaimer.
11 | 
12 |     * Redistributions in binary form must reproduce the above copyright
13 |       notice, this list of conditions and the following disclaimer in the
14 |       documentation and/or other materials provided with the distribution.
15 | 
16 |     * Neither the name of Intel Corporation nor the names of its
17 |       contributors may be used to endorse or promote products derived from
18 |       this software without specific prior written permission.
19 | 
20 | 
21 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22 |    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 |    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24 |    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25 |    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 |    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 |    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 |    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 |    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 |    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 |    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 | */
33 | 
34 | static inline int mandel(float c_re, float c_im, int count) {
35 |     float z_re = c_re, z_im = c_im;
36 |     int i;
37 |     for (i = 0; i < count; ++i) {
38 |         if (z_re * z_re + z_im * z_im > 4.)
39 |             break;
40 | 
41 |         float new_re = z_re*z_re - z_im*z_im;
42 |         float new_im = 2.f * z_re * z_im;
43 |         unmasked {
44 |             z_re = c_re + new_re;
45 |             z_im = c_im + new_im;
46 |         }
47 |     }
48 | 
49 |     return i;
50 | }
51 | 
52 | export void mandelbrot_ispc(uniform float x0, uniform float y0,
53 |                             uniform float x1, uniform float y1,
54 |                             uniform int width, uniform int height,
55 |                             uniform int maxIterations,
56 |                             uniform int output[])
57 | {
58 |     float dx = (x1 - x0) / width;
59 |     float dy = (y1 - y0) / height;
60 | 
61 |     for (uniform int j = 0; j < height; j++) {
62 |         // Note that we'll be doing programCount computations in parallel,
63 |         // so increment i by that much.  This assumes that width evenly
64 |         // divides programCount.
65 |         foreach (i = 0 ... width) {
66 |             // Figure out the position on the complex plane to compute the
67 |             // number of iterations at.  Note that the x values are
68 |             // different across different program instances, since its
69 |             // initializer incorporates the value of the programIndex
70 |             // variable.
71 |             float x = x0 + i * dx;
72 |             float y = y0 + j * dy;
73 | 
74 |             int index = j * width + i;
75 |             output[index] = mandel(x, y, maxIterations);
76 |         }
77 |     }
78 | }
79 | 


--------------------------------------------------------------------------------
/spirv-io/src/parse_spv.hpp:
--------------------------------------------------------------------------------
 1 | #define TAG(v) (((v>>24) & 0xff) | ((v>>8) & 0xff00) | (((uint32_t)v<<8) & 0xff0000) | (((uint32_t)v << 24) & 0xff000000))
 2 | 
 3 | // Read file into array of bytes, and cast to uint32_t*, then return.
 4 | // The data has been padded, so that it fits into an array uint32_t.
 5 | uint32_t *readFile(uint32_t &length, const char *filename)
 6 | {
 7 |     FILE *fp = fopen(filename, "rb");
 8 |     if (fp == NULL)
 9 |     {
10 |         printf("Could not find or open file: %s\n", filename);
11 |     }
12 | 
13 |     // get file size.
14 |     fseek(fp, 0, SEEK_END);
15 |     long filesize = ftell(fp);
16 |     fseek(fp, 0, SEEK_SET);
17 | 
18 |     long filesizepadded = ((filesize+3) / 4) * 4;
19 | 
20 |     // read file contents.
21 |     char *str = new char[filesizepadded];
22 |     fread(str, filesize, sizeof(char), fp);
23 |     fclose(fp);
24 | 
25 |     // data padding.
26 |     for (int i = filesize; i < filesizepadded; i++)
27 |     {
28 |         str[i] = 0;
29 |     }
30 | 
31 |     length = filesizepadded;
32 |     return (uint32_t *)str;
33 | }
34 | 
35 | void parseLocalSize(uint32_t *code) {
36 |     //printf("%d\n", filelength);
37 |     uint32_t len32 = filelength / 4;
38 |     if (len32 <= 5) {
39 |         fprintf(stderr, "Shader file empty: %s\n", programFileName);
40 |         assert(len32 > 5);
41 |     }
42 |     uint32_t magicNumber = 0x07230203;
43 |     assert(magicNumber == code[0]);
44 |     for (int i = 5; i < len32; i++) {
45 |         uint32_t op = code[i];
46 |         uint32_t wordCount = op >> 16;
47 |         uint32_t opCode = op & 0xffff;
48 |         #ifndef NDEBUG
49 |         fprintf(stderr, "Op: %8x OpCode: %d WordCount:%d\n", op, opCode, wordCount);
50 |         #endif
51 |         int j = i+1;
52 |         if (opCode == 16) { // OpExecutionMode
53 |             uint32_t entryPoint = code[j++];
54 |             uint32_t mode = code[j++];
55 |             if (verbose) fprintf(stderr, "EntryPoint: %d Mode: %d\n", entryPoint, mode);
56 |             if (mode == 17) { // LocalSize
57 |                 localSize[0] = code[j++];
58 |                 localSize[1] = code[j++];
59 |                 localSize[2] = code[j++];
60 |                 if (verbose) fprintf(stderr, "LocalSize: %d %d %d\n", localSize[0], localSize[1], localSize[2]);
61 |             }
62 |         }
63 |         if (opCode == 4) { // OpSourceExtension
64 |             int j = i + 1;
65 |             uint32_t tag = code[j++];
66 |             if (verbose) fprintf(stderr, "OpSourceExtension tag %.4s %d\n", (char*)(&tag), code[j]);
67 |             if (tag == TAG('glo=')) {
68 |                 const uint32_t len = 4 * (wordCount - 2);
69 |                 globalsLen += len;
70 |                 if (globals != NULL) globals = (char*)realloc(globals, globalsLen+1);
71 |                 else globals = (char *)malloc(globalsLen+1);
72 |                 memcpy(globals + (globalsLen-len), code + j, len);
73 |                 globals[globalsLen] = 0;
74 |                 if (verbose) fprintf(stderr, "globals[%d]: %s\n", globalsLen, globals);
75 |             }
76 |             else if (tag == TAG('tgc=')) workSize[0] = code[j++];
77 |             else if (tag == TAG('ths=')) heapBufferSize = code[j++];
78 |             else if (tag == TAG('tti=')) fromGPUBufferSize = code[j++];
79 |             else if (tag == TAG('tfi=')) toGPUBufferSize = code[j++];
80 |         }
81 |         i += wordCount > 0 ? wordCount-1 : 0;
82 |     }
83 | }
84 | 
85 | void readShader() {
86 |     code = readFile(filelength, programFileName);
87 |     parseLocalSize(code);
88 | }
89 | 


--------------------------------------------------------------------------------
/src/mandel.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2015-2017 ARM Limited
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #ifdef __EMSCRIPTEN__
 18 | #include <emscripten.h>
 19 | #else
 20 | #include <unistd.h>
 21 | #endif
 22 | 
 23 | #include "spirv_cross/external_interface.h"
 24 | #include <stdio.h>
 25 | 
 26 | #ifndef GLM_FORCE_SWIZZLE
 27 | #define GLM_FORCE_SWIZZLE
 28 | #endif
 29 | 
 30 | #ifndef GLM_FORCE_RADIANS
 31 | #define GLM_FORCE_RADIANS
 32 | #endif
 33 | 
 34 | #include <glm/glm.hpp>
 35 | using namespace glm;
 36 | 
 37 | static float inputs[2] = {1280, 1280};
 38 | static vec4 outputs[1280 * 1280] = {};
 39 | 
 40 | 
 41 | int main()
 42 | {
 43 | #ifdef __EMSCRIPTEN__
 44 | 	EM_ASM({
 45 | 	    console.time('compute');
 46 | 	});
 47 | #endif
 48 | 	// First, we get the C interface to the shader.
 49 | 	// This can be loaded from a dynamic library, or as here,
 50 | 	// linked in as a static library.
 51 | 	auto *iface = spirv_cross_get_interface();
 52 | 
 53 | 	// Create an instance of the shader interface.
 54 | 	auto *shader = iface->construct();
 55 | 
 56 | // Build some input data for our compute shader.
 57 | #define NUM_WORKGROUPS_X 20
 58 | #define NUM_WORKGROUPS_Y 20
 59 | 
 60 | 	void *inputs_ptr = inputs;
 61 | 	void *outputs_ptr = outputs;
 62 | 
 63 | 	// Bind resources to the shader.
 64 | 	// For resources like samplers and buffers, we provide a list of pointers,
 65 | 	// since UBOs, SSBOs and samplers can be arrays, and can point to different types,
 66 | 	// which is especially true for samplers.
 67 | 	spirv_cross_set_resource(shader, 0, 0, &inputs_ptr, sizeof(inputs_ptr));
 68 | 	spirv_cross_set_resource(shader, 0, 1, &outputs_ptr, sizeof(outputs_ptr));
 69 | 
 70 | 	// We also have to set builtins.
 71 | 	// The relevant builtins will depend on the shader,
 72 | 	// but for compute, there are few builtins, which are gl_NumWorkGroups and gl_WorkGroupID.
 73 | 	// LocalInvocationID and GlobalInvocationID are inferred when executing the invocation.
 74 | 	uvec3 num_workgroups(NUM_WORKGROUPS_X, NUM_WORKGROUPS_Y, 1);
 75 | 	uvec3 work_group_id(0, 0, 0);
 76 | 	spirv_cross_set_builtin(shader, SPIRV_CROSS_BUILTIN_NUM_WORK_GROUPS, &num_workgroups, sizeof(num_workgroups));
 77 | 	spirv_cross_set_builtin(shader, SPIRV_CROSS_BUILTIN_WORK_GROUP_ID, &work_group_id, sizeof(work_group_id));
 78 | 
 79 | 	// Execute work groups.
 80 | 	for (unsigned x = 0; x < NUM_WORKGROUPS_X; x++)
 81 | 	for (unsigned y = 0; y < NUM_WORKGROUPS_Y; y++)
 82 | 	{
 83 | 		work_group_id.x = x;
 84 | 		work_group_id.y = y;
 85 | 		iface->invoke(shader);
 86 | 	}
 87 | 
 88 | 	// Call destructor.
 89 | 	iface->destruct(shader);
 90 | 
 91 | #ifdef __EMSCRIPTEN__
 92 | 	EM_ASM({
 93 | 	    console.timeEnd('compute');
 94 | 	    var c = Module.canvas;
 95 | 	    var ctx = c.getContext('2d');
 96 | 	    c.width = c.height = 1280;
 97 | 	    var id = ctx.createImageData(c.width, c.height);
 98 | 	    var data = id.data;
 99 |             var off = $0 / 4;
100 | 	    for (var i = 0; i < data.length; i++) {
101 | 	      data[i] = (Module.HEAPF32[off + i] * 255.0) | 0;
102 | 	    }
103 | 	    ctx.putImageData(id, 0, 0);
104 | 	}, (int)outputs_ptr);
105 | #else
106 | 	write(1, (char*)outputs_ptr, 1280*1280*4*4);
107 | #endif
108 | 
109 | 	return 0;
110 | }
111 | 


--------------------------------------------------------------------------------
/http_shader/ispc_ivec4/httpd_ispc_ivec4.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright 2015-2017 ARM Limited
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | #include <unistd.h>
18 | #include "httpd.ispc.h"
19 | 
20 | #include <stdio.h>
21 | #include <string.h>
22 | #include <chrono>
23 | 
24 | #ifndef GLM_FORCE_SWIZZLE
25 | #define GLM_FORCE_SWIZZLE
26 | #endif
27 | 
28 | #ifndef GLM_FORCE_RADIANS
29 | #define GLM_FORCE_RADIANS
30 | #endif
31 | 
32 | #include <glm/glm.hpp>
33 | using namespace glm;
34 | using namespace ispc;
35 | 
36 | // Build some input data for our compute shader.
37 | #define NUM_WORKGROUPS_X 32
38 | #define NUM_WORKGROUPS_Y 1
39 | 
40 | static const uint requestCount = NUM_WORKGROUPS_X * NUM_WORKGROUPS_Y * 1024 * 16;
41 | 
42 | static const int requestSize = 1024;
43 | 
44 | static int inputBuffe[(requestSize / 4) * requestCount] = {};
45 | static int outputBuffe[(requestSize / 4) * requestCount] = {};
46 | static int heapBuffe[(requestSize / 4) * requestCount] = {};
47 | 
48 | int main()
49 | {
50 | 	int requestTemplate[(requestSize / 4)];
51 | 	for (int i = 0; i < requestCount; i++) {
52 | 		if (i % 2 == 0) {
53 | 			snprintf((char*)(&inputBuffe[(requestSize / 4) * i + 4]), ((requestSize / 16) - 1) * 16, "POST /%07d HTTP/1.1\r\nhost: localhost\r\n\r\ntext/html\r\n\r\n<html><body>This is post number %d.</body></html>", i*2/3, i);
54 | 		} else {
55 | 			snprintf((char*)(&inputBuffe[(requestSize / 4) * i + 4]), ((requestSize / 16) - 1) * 16, "GET /%07d HTTP/1.1\r\nhost: localhost\r\n\r\n", i);
56 | 		}
57 | 		if (i % 11 == 10) {
58 | 			int j = i % 10;
59 | 			snprintf((char*)(&inputBuffe[(requestSize / 4) * i + 4]), ((requestSize / 16) - 1) * 16, "POST /%07d HTTP/1.1\r\nhost: localhost\r\n\r\ntext/html\r\n\r\n<html><body>This is %d spam-post %d number %d.</body></html>", j, i, i, i);
60 | 		}
61 | 		inputBuffe[(requestSize / 4) * i] = strlen((char*)(&inputBuffe[(requestSize / 4) * i + 4]));
62 | 		// if (i < 10) printf("%d\n%s\n", inputBuffe[(requestSize / 4) * i], (char*)(&inputBuffe[(requestSize / 4) * i + 4]));
63 | 
64 | 		snprintf((char*)(&heapBuffe[(requestSize / 4) * i + 4]), ((requestSize / 16) - 1) * 16, "text/html\r\n\r\n<html><body>This is document number %d.</body></html>", i);
65 | 		heapBuffe[(requestSize / 4) * i] = strlen((char*)(&heapBuffe[(requestSize / 4) * i + 4]));
66 | 		// if (i < 10) printf("%d\n%s\n", heapBuffe[(requestSize / 4) * i], (char*)(&heapBuffe[(requestSize / 4) * i + 4]));
67 | 	}
68 | 
69 | 	std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
70 | 	for (int j = 0; j < 1000; j++) {
71 | 		int32_t workgroups[] = {NUM_WORKGROUPS_X, NUM_WORKGROUPS_Y, 1};
72 | 		runner_main(workgroups,
73 | 			*(struct inputBuffer*)inputBuffe,
74 | 			*(struct outputBuffer*)outputBuffe,
75 | 			*(struct heapBuffer*)heapBuffe
76 | 		);
77 | 	}
78 | 	std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
79 | 
80 | 	for (int i = 0; i < 10; i++) {
81 | 		write(1, ((char*)outputBuffe)+requestSize*i+16, outputBuffe[(requestSize / 4)*i]);
82 | 		printf("\n");
83 | 	}
84 | 
85 | 	printf("\nElapsed: %ld ms\n", std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count());
86 | 	printf("Million requests per second: %.3f\n\n", 1e-6 * (requestCount * 1000.0) / (0.001 * std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count()));
87 | 
88 | 
89 | 	return 0;
90 | }
91 | 


--------------------------------------------------------------------------------
/docker/spirv-runner/ispcRunner.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include "program.h"
  4 | #include "emscripten.h"
  5 | 
  6 | #ifdef WIN32
  7 | #include <io.h>
  8 | #include <fcntl.h>
  9 | #endif
 10 | 
 11 | static uint32_t bufferSize = 0;
 12 | static uint32_t inputBufferSize = 0;
 13 | static uint32_t vulkanDeviceIndex = 0;
 14 | static int32_t workSize[3] = {1, 1, 1};
 15 | 
 16 | static char *input;
 17 | 
 18 | void readHeader()
 19 | {
 20 |     ::size_t input_length = 0, read_bytes = 0, input_buffer_size = 4096;
 21 | 
 22 | #ifdef WIN32
 23 | 	_setmode(_fileno(stdout), _O_BINARY);
 24 | 	_setmode(_fileno(stdin), _O_BINARY);
 25 | #endif
 26 | 
 27 |     bufferSize = 0;
 28 |     read_bytes = fread(&bufferSize, 1, 4, stdin);
 29 |     if (read_bytes < 4)
 30 |     {
 31 |         fprintf(stderr, "read only %zd bytes, using default bufferSize\n", read_bytes);
 32 |         bufferSize = 4;
 33 |     }
 34 | 
 35 |     vulkanDeviceIndex = 0;
 36 |     read_bytes = fread(&vulkanDeviceIndex, 1, 4, stdin);
 37 |     if (read_bytes < 4)
 38 |     {
 39 |         fprintf(stderr, "read only %zd bytes, using default vulkanDeviceIndex\n", read_bytes);
 40 |         vulkanDeviceIndex = 0;
 41 |     }
 42 | 
 43 |     read_bytes = fread(workSize, 1, 12, stdin);
 44 |     if (read_bytes < 12)
 45 |     {
 46 |         fprintf(stderr, "read only %zd bytes, using default workSize\n", read_bytes);
 47 |          workSize[0] = workSize[1] = workSize[2] = 1;
 48 |     }
 49 | 
 50 |     inputBufferSize = 0;
 51 |     read_bytes = fread(&inputBufferSize, 1, 4, stdin);
 52 |     if (read_bytes < 4)
 53 |     {
 54 |         fprintf(stderr, "read only %zd bytes, using default inputBufferSize\n", read_bytes);
 55 |         inputBufferSize = 4;
 56 |     }
 57 | 
 58 |     input = (char *)malloc(sizeof(ispc::inputs) - 4 + inputBufferSize);
 59 | }
 60 | 
 61 | bool readInput()
 62 | {
 63 | 	if (feof(stdin)) {
 64 | 		return false;
 65 | 	}
 66 | 	
 67 |     ::size_t input_length = 0, read_bytes = 0;
 68 |     ::size_t off = sizeof(ispc::inputs) - 4;
 69 | 
 70 |     while (input_length < inputBufferSize && !feof(stdin))
 71 |     {
 72 |         read_bytes = fread((void *)(input + input_length + off), 1, inputBufferSize, stdin);
 73 |         input_length += read_bytes;
 74 |     }
 75 |     return input_length > 0;
 76 | }
 77 | 
 78 | EMSCRIPTEN_KEEPALIVE extern "C" 
 79 | int run(int w, int h, int d, ispc::inputs *inputs, ispc::outputs *outputs) {
 80 |   workSize[0] = w;
 81 |   workSize[1] = h;
 82 |   workSize[2] = d;
 83 |   ispc::runner_main(workSize, *inputs, *outputs);
 84 |   return (int)(outputs->outputData);
 85 | }
 86 | 
 87 | int main(int argc, char *argv[])
 88 | {
 89 |     return 0;
 90 | 
 91 |     EM_ASM({
 92 |       console.time('compute');
 93 |     });
 94 |     inputBufferSize = 8*4;
 95 |     input = (char *)malloc(sizeof(ispc::inputs) - 4 + inputBufferSize);
 96 |     float *dims = (float*)(input + (sizeof(ispc::inputs) - 4));
 97 |     dims[0] = 1920;
 98 |     dims[1] = 1080;
 99 |     dims[2] = 0;
100 |     dims[3] = 0;
101 |     dims[4] = 0;
102 |     dims[5] = 0;
103 |     dims[6] = 0;
104 |     dims[7] = 0;
105 | 
106 |     bufferSize = dims[0]*dims[1]*4;
107 |     
108 |     ispc::outputs *outputs = (ispc::outputs *)malloc(sizeof(ispc::outputs) - 4 + bufferSize);
109 |     ispc::inputs *inputs = (ispc::inputs *)input;
110 | 
111 |     int output_ptr = run(dims[0]/192, dims[1]/10, 1, inputs, outputs);
112 | 
113 |     EM_ASM({
114 | 	console.timeEnd('compute');
115 | 	var c = Module.canvas;
116 | 	var ctx = c.getContext('2d');
117 | 	c.width = $1; c.height = $2;
118 | 	var id = ctx.createImageData(c.width, c.height);
119 | 	var data = id.data;
120 | 	var off = $0;
121 | 	for (var i = 0; i < data.length; i++) {
122 | 	  data[i] = Module.HEAPU8[off + i];
123 | 	}
124 | 	ctx.putImageData(id, 0, 0);
125 |     }, output_ptr, dims[0], dims[1]);
126 | 
127 |     free(input);
128 |     free(outputs);
129 |     
130 |     return 0;
131 | }
132 | 


--------------------------------------------------------------------------------
/spirv-io/lib/chr.glsl:
--------------------------------------------------------------------------------
  1 | #define CHR_NULL char(0)
  2 | #define CHR_SOH char(1)
  3 | #define CHR_STX char(2)
  4 | #define CHR_ETX char(3)
  5 | #define CHR_EOT char(4)
  6 | #define CHR_ENQ char(5)
  7 | #define CHR_ACK char(6)
  8 | #define CHR_BELL char(7)
  9 | #define CHR_BACKSPACE char(8)
 10 | #define CHR_TAB char(9)
 11 | #define CHR_LF char(10)
 12 | #define CHR_VT char(11)
 13 | #define CHR_FF char(12)
 14 | #define CHR_CR char(13)
 15 | #define CHR_SO char(14)
 16 | #define CHR_SI char(15)
 17 | #define CHR_DLE char(16)
 18 | #define CHR_DC1 char(17)
 19 | #define CHR_DC2 char(18)
 20 | #define CHR_DC3 char(19)
 21 | #define CHR_DC4 char(20)
 22 | #define CHR_NAK char(21)
 23 | #define CHR_SYN char(22)
 24 | #define CHR_ETB char(23)
 25 | #define CHR_CAN char(24)
 26 | #define CHR_EM char(25)
 27 | #define CHR_SUB char(26)
 28 | #define CHR_ESC char(27)
 29 | #define CHR_FS char(28)
 30 | #define CHR_GS char(29)
 31 | #define CHR_RS char(30)
 32 | #define CHR_US char(31)
 33 | #define CHR_SPACE char(32)
 34 | #define CHR_EXCLAMATION_MARK char(33)
 35 | #define CHR_DOUBLE_QUOTE char(34)
 36 | #define CHR_HASH char(35)
 37 | #define CHR_DOLLAR char(36)
 38 | #define CHR_PERCENT char(37)
 39 | #define CHR_AMPERSAND char(38)
 40 | #define CHR_SINGLE_QUOTE char(39)
 41 | #define CHR_OPEN_PAREN char(40)
 42 | #define CHR_CLOSE_PAREN char(41)
 43 | #define CHR_TIMES char(42)
 44 | #define CHR_PLUS char(43)
 45 | #define CHR_COMMA char(44)
 46 | #define CHR_DASH char(45)
 47 | #define CHR_DOT char(46)
 48 | #define CHR_SLASH char(47)
 49 | #define CHR_0 char(48)
 50 | #define CHR_1 char(49)
 51 | #define CHR_2 char(50)
 52 | #define CHR_3 char(51)
 53 | #define CHR_4 char(52)
 54 | #define CHR_5 char(53)
 55 | #define CHR_6 char(54)
 56 | #define CHR_7 char(55)
 57 | #define CHR_8 char(56)
 58 | #define CHR_9 char(57)
 59 | #define CHR_COLON char(58)
 60 | #define CHR_SEMICOLON char(59)
 61 | #define CHR_OPEN_ANGLE_BRACKET char(60)
 62 | #define CHR_EQUAL char(61)
 63 | #define CHR_CLOSE_ANGLE_BRACKET char(62)
 64 | #define CHR_QUESTION_MARK char(63)
 65 | #define CHR_AT char(64)
 66 | #define CHR_A char(65)
 67 | #define CHR_B char(66)
 68 | #define CHR_C char(67)
 69 | #define CHR_D char(68)
 70 | #define CHR_E char(69)
 71 | #define CHR_F char(70)
 72 | #define CHR_G char(71)
 73 | #define CHR_H char(72)
 74 | #define CHR_I char(73)
 75 | #define CHR_J char(74)
 76 | #define CHR_K char(75)
 77 | #define CHR_L char(76)
 78 | #define CHR_M char(77)
 79 | #define CHR_N char(78)
 80 | #define CHR_O char(79)
 81 | #define CHR_P char(80)
 82 | #define CHR_Q char(81)
 83 | #define CHR_R char(82)
 84 | #define CHR_S char(83)
 85 | #define CHR_T char(84)
 86 | #define CHR_U char(85)
 87 | #define CHR_V char(86)
 88 | #define CHR_W char(87)
 89 | #define CHR_X char(88)
 90 | #define CHR_Y char(89)
 91 | #define CHR_Z char(90)
 92 | #define CHR_OPEN_SQUARE_BRACKET char(91)
 93 | #define CHR_BACKSLASH char(92)
 94 | #define CHR_CLOSE_SQUARE_BRACKET char(93)
 95 | #define CHR_CARET char(94)
 96 | #define CHR_UNDERSCORE char(95)
 97 | #define CHR_BACKTICK char(96)
 98 | #define CHR_a char(97)
 99 | #define CHR_b char(98)
100 | #define CHR_c char(99)
101 | #define CHR_d char(100)
102 | #define CHR_e char(101)
103 | #define CHR_f char(102)
104 | #define CHR_g char(103)
105 | #define CHR_h char(104)
106 | #define CHR_i char(105)
107 | #define CHR_j char(106)
108 | #define CHR_k char(107)
109 | #define CHR_l char(108)
110 | #define CHR_m char(109)
111 | #define CHR_n char(110)
112 | #define CHR_o char(111)
113 | #define CHR_p char(112)
114 | #define CHR_q char(113)
115 | #define CHR_r char(114)
116 | #define CHR_s char(115)
117 | #define CHR_t char(116)
118 | #define CHR_u char(117)
119 | #define CHR_v char(118)
120 | #define CHR_w char(119)
121 | #define CHR_x char(120)
122 | #define CHR_y char(121)
123 | #define CHR_z char(122)
124 | #define CHR_OPEN_CURLY_BRACKET char(123)
125 | #define CHR_PIPE char(124)
126 | #define CHR_CLOSE_CURLY_BRACKET char(125)
127 | #define CHR_TILDE char(126)
128 | #define CHR_DEL char(127)
129 | 


--------------------------------------------------------------------------------
/spirv-io/test/test_file.glsl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env gls
  2 | 
  3 | #include "../lib/file.glsl"
  4 | 
  5 | ThreadLocalCount = 1;
  6 | ThreadGroupCount = 1;
  7 | 
  8 | #define rg(i,n) for (int i=0,_l_=(n); i<_l_; i++)
  9 | #define mapIO(i, n, f) { io _ios_[n]; rg(i, n) _ios_[i] = f; rg(i, n) awaitIO(_ios_[i]); }
 10 | 
 11 | bool testRead() {
 12 |     string r1 = readSync("hello.txt", malloc(100));
 13 |     bool okShort = strEq(r1, "Hello, world!\n");
 14 |     if (!okShort) println(concat(str(strLen(r1)), " ", r1));
 15 | 
 16 |     string buf = malloc(100);
 17 |     int ok;
 18 |     io reqNum = read("hello.txt", 0, 100, buf);
 19 |     string res = awaitIO(reqNum, ok);
 20 |     bool okLong = strEq(res, "Hello, world!\n");
 21 |     if (!okLong) println(concat(str(strLen(res)), " ", res));
 22 | 
 23 |     return okShort && okLong;
 24 | }
 25 | 
 26 | bool testWrite() {
 27 |     string buf = malloc(100);
 28 |     string filename = concat("write", str(ThreadId), ".txt");
 29 | 
 30 |     awaitIO(createFile(filename));
 31 |     awaitIO(truncateFile(filename, 0));
 32 |     awaitIO(write(filename, 0, 100, "Write, write, write!"));
 33 |     string r1 = readSync(filename, buf);
 34 |     bool firstOk = strEq(r1, "Write, write, write!");
 35 |     if (!firstOk) println(concat(str(strLen(r1)), r1));
 36 |     awaitIO(truncateFile(filename, 0));
 37 | 
 38 |     writeSync(filename, "Hello, world!");
 39 |     string r2 = readSync(filename, buf);
 40 |     bool secondOk = strEq(r2, "Hello, world!");
 41 |     if (!secondOk) println(concat(str(strLen(r2)), r2));
 42 |     awaitIO(truncateFile(filename, 0));
 43 |     awaitIO(deleteFile(filename));
 44 | 
 45 |     return firstOk && secondOk;
 46 | }
 47 | 
 48 | bool testRunCmd() {
 49 |     FREE(FREE_IO(
 50 |         awaitIO(runCmd(concat("echo Hello from thread ", str(ThreadId))));
 51 |         awaitIO(runCmd(concat(
 52 |             "node -e 'fs=require(`fs`); fs.writeFileSync(`node-${",
 53 |             str(ThreadId),
 54 |             "}.txt`, Date.now().toString())'"
 55 |         )));
 56 |     ))
 57 |     string res = readSync(concat("node-", str(ThreadId), ".txt"), malloc(1000));
 58 |     println(concat("Node says ", res));
 59 |     deleteFile(concat("node-", str(ThreadId), ".txt"));
 60 |     return true;
 61 | }
 62 | 
 63 | bool testLs() {
 64 |     string dir = concat("dir-", str(ThreadId));
 65 |     awaitIO(mkdir(dir));
 66 |     mapIO(i, 10, createFile(concat(dir, "/", str(i))));
 67 |     stringArray res = awaitIO(ls(dir, malloc(1000)));
 68 |     mapIO(i, 10, deleteFile(concat(dir, "/", str(i))));
 69 |     awaitIO(rmdir(dir));
 70 | 
 71 |     bool ok = true;
 72 |     ok = ok && arrLen(res) == 10;
 73 |     rg(i, 10) {
 74 |         bool found = false;
 75 |         string si = str(i);
 76 |         rg(j, 10) {
 77 |             FREE(
 78 |                 found = found || strEq(concat(dir, "/", si), aGet(res, j));
 79 |             )
 80 |         }
 81 |         ok = ok && found;
 82 |     }
 83 |     return ok;
 84 | }
 85 | 
 86 | bool testGetCwd() {
 87 |     string cwd = awaitIO(getCwd());
 88 |     println(concat(str(ThreadId), " cwd is ", cwd));
 89 |     bool ok = strLen(cwd) > 0;
 90 |     if (ThreadId == 0) {
 91 |         awaitIO(mkdir("test_cwd"));
 92 |         awaitIO(chdir("test_cwd"));
 93 |         string newCwd = awaitIO(getCwd());
 94 |         println(concat("New cwd is ", newCwd));
 95 |         ok = ok && !strEq(cwd, newCwd);
 96 |         ok = ok && strEq("test_cwd", last(split(newCwd, '/')));
 97 |     }
 98 |     return ok;
 99 | }
100 | 
101 | void printTest(bool ok, string name) {
102 |     if (!ok || ThreadId == 0) {
103 |         println(concat(str(ThreadId), ": ", name, ok ? " successful" : " failed!"));
104 |     }
105 | }
106 | 
107 | #define TEST(testFn) FREE(FREE_IO(printTest(testFn(), #testFn)))
108 | 
109 | void main() {
110 |     awaitIO(chdir("test_data"));
111 |     TEST(testRead);
112 |     TEST(testWrite);
113 |     TEST(testRunCmd);
114 |     TEST(testLs);
115 |     TEST(testGetCwd);
116 | }
117 | 
118 | 


--------------------------------------------------------------------------------
/spirv-io/examples/listen3.glsl:
--------------------------------------------------------------------------------
  1 | #include <file.glsl>
  2 | #include <statemachine.glsl>
  3 | 
  4 | HeapSize = 8192;
  5 | FromIOSize = 8192;
  6 | ToIOSize = 8192;
  7 | 
  8 | ThreadGroupCount = 256;
  9 | ThreadLocalCount = 1;
 10 | 
 11 | const int s_Init = 0;
 12 | const int s_Accept = 1;
 13 | const int s_WaitingConn = 2;
 14 | const int s_Reading = 3;
 15 | const int s_Writing = 4;
 16 | const int s_Closing = 5;
 17 | 
 18 | const int a_Server = 0;
 19 | const int a_ConnectionIO = 1;
 20 | const int a_Connection = 2;
 21 | const int a_ReadIO = 3;
 22 | const int a_WriteIO = 4;
 23 | const int a_CloseIO = 5;
 24 | const int a_HeapStart = 6;
 25 | 
 26 | string process(string req) {
 27 |     return concat("HTTP/1.1 200 OK\r\ncontent-type: text/plain\r\n\r\nHello from ", str(ThreadId), "\n");
 28 | }
 29 | 
 30 | #define LOAD(k) atomicLoad(k, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsAcquire)
 31 | #define STORE(k,v) atomicStore(k, v, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelease)
 32 | 
 33 | void main() {
 34 |     string readBuf = malloc(4096);
 35 | 
 36 |     io r;
 37 |     int64_t startTime;
 38 | 
 39 |     if (ThreadId == 0) rerunProgram = RERUN_ON_IO;
 40 | 
 41 |     // This should do 512 accept+reads at a time and process them as they become ready.
 42 |     // Ditto for the writes and closes.
 43 |     // r = acceptReadBatch(listen_fd, conn_fds_i32a, reads_str_array);
 44 |     // conn_count = awaitIO(r).x;
 45 |     // pfor(i, conn_count, {
 46 |     //   process_req(i, conn_fds_i32a, reads_str_array, writes_str_array);
 47 |     // });
 48 |     // writeCloseBatch(conn_fds_buf, conn_count, writes_str_array);
 49 |     //
 50 | 
 51 |     stateMachine m = loadStateMachine(s_Init);
 52 | //    atomicAdd(programReturnValue, 1);
 53 | //    if (ThreadId == 0) {
 54 | //        while(programReturnValue < ThreadCount);
 55 | //        startTime = microTimeSync();
 56 | //        STORE(io_pad_8, 0);
 57 | //        STORE(programReturnValue, 0);
 58 | //    }
 59 | //    while (LOAD(programReturnValue) != 0);
 60 | //    uint32_t idx = 0;
 61 | //    while (LOAD(io_pad_8) == 0) {
 62 | //        idx++;
 63 | //        if (ThreadId == 0 && (idx % 4096 == 0)) {
 64 | //            int64_t now = microTimeSync();
 65 | //            if (now - startTime > 10000) {
 66 | //                STORE(io_pad_8, 1);
 67 | //            }
 68 | //            idx = 1;
 69 | //        }
 70 | //        barrier();
 71 |         switch (getState(m)) {
 72 |             case s_Init:
 73 |                 if (ThreadId == 0) FREE_ALL(log("Starting server on port 8080..."));
 74 |                 setAttr(m, a_Server, listenSync(8080));
 75 |                 setAttr(m, a_HeapStart, heapPtr);
 76 |                 setState(m, s_Accept);
 77 |                 if (ThreadId == 0) FREE_ALL(log("Server running, accepting connections."));
 78 | //                break;
 79 | 
 80 |             case s_Accept:
 81 |                 heapPtr = getI32Attr(m, a_HeapStart);
 82 |                 fromIOPtr = fromIOStart;
 83 |                 toIOPtr = toIOStart;
 84 |                 setAttr(m, a_ConnectionIO, acceptAndRecv(getAttr(m, a_Server), readBuf));
 85 |                 setState(m, s_WaitingConn);
 86 | //                break;
 87 | 
 88 |             case s_WaitingConn:
 89 |                 r = getIOAttr(m, a_ConnectionIO);
 90 |                 if (pollIO(r)) {
 91 |                     string req;
 92 |                     socket conn = awaitIO2(r, req);
 93 |                     string response = process(req);
 94 |                     setAttr(m, a_WriteIO, sendAndClose(conn, response));
 95 |                     setState(m, s_Closing);
 96 |                 }
 97 |                 break;
 98 | 
 99 |             case s_Closing:
100 |                 r = getIOAttr(m, a_WriteIO);
101 |                 if (pollIO(r)) {
102 |                     awaitIO(r);
103 |                     setState(m, s_Accept);
104 |                 }
105 |                 break;
106 |         }
107 | //    }
108 |     saveStateMachine(m);
109 | }
110 | 
111 | 


--------------------------------------------------------------------------------
/include/spirv_cross/external_interface.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2015-2017 ARM Limited
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #ifndef SPIRV_CROSS_EXTERNAL_INTERFACE_H
 18 | #define SPIRV_CROSS_EXTERNAL_INTERFACE_H
 19 | 
 20 | #ifdef __cplusplus
 21 | extern "C" {
 22 | #endif
 23 | 
 24 | #include <stddef.h>
 25 | 
 26 | typedef struct spirv_cross_shader spirv_cross_shader_t;
 27 | 
 28 | struct spirv_cross_interface
 29 | {
 30 | 	spirv_cross_shader_t *(*construct)(void);
 31 | 	void (*destruct)(spirv_cross_shader_t *thiz);
 32 | 	void (*invoke)(spirv_cross_shader_t *thiz);
 33 | };
 34 | 
 35 | void spirv_cross_set_stage_input(spirv_cross_shader_t *thiz, unsigned location, void *data, size_t size);
 36 | 
 37 | void spirv_cross_set_stage_output(spirv_cross_shader_t *thiz, unsigned location, void *data, size_t size);
 38 | 
 39 | void spirv_cross_set_push_constant(spirv_cross_shader_t *thiz, void *data, size_t size);
 40 | 
 41 | void spirv_cross_set_uniform_constant(spirv_cross_shader_t *thiz, unsigned location, void *data, size_t size);
 42 | 
 43 | void spirv_cross_set_resource(spirv_cross_shader_t *thiz, unsigned set, unsigned binding, void **data, size_t size);
 44 | 
 45 | const struct spirv_cross_interface *spirv_cross_get_interface(void);
 46 | 
 47 | typedef enum spirv_cross_builtin {
 48 | 	SPIRV_CROSS_BUILTIN_POSITION = 0,
 49 | 	SPIRV_CROSS_BUILTIN_FRAG_COORD = 1,
 50 | 	SPIRV_CROSS_BUILTIN_WORK_GROUP_ID = 2,
 51 | 	SPIRV_CROSS_BUILTIN_NUM_WORK_GROUPS = 3,
 52 | 	SPIRV_CROSS_NUM_BUILTINS
 53 | } spirv_cross_builtin;
 54 | 
 55 | void spirv_cross_set_builtin(spirv_cross_shader_t *thiz, spirv_cross_builtin builtin, void *data, size_t size);
 56 | 
 57 | #define SPIRV_CROSS_NUM_DESCRIPTOR_SETS 4
 58 | #define SPIRV_CROSS_NUM_DESCRIPTOR_BINDINGS 16
 59 | #define SPIRV_CROSS_NUM_STAGE_INPUTS 16
 60 | #define SPIRV_CROSS_NUM_STAGE_OUTPUTS 16
 61 | #define SPIRV_CROSS_NUM_UNIFORM_CONSTANTS 32
 62 | 
 63 | enum spirv_cross_format
 64 | {
 65 | 	SPIRV_CROSS_FORMAT_R8_UNORM = 0,
 66 | 	SPIRV_CROSS_FORMAT_R8G8_UNORM = 1,
 67 | 	SPIRV_CROSS_FORMAT_R8G8B8_UNORM = 2,
 68 | 	SPIRV_CROSS_FORMAT_R8G8B8A8_UNORM = 3,
 69 | 
 70 | 	SPIRV_CROSS_NUM_FORMATS
 71 | };
 72 | 
 73 | enum spirv_cross_wrap
 74 | {
 75 | 	SPIRV_CROSS_WRAP_CLAMP_TO_EDGE = 0,
 76 | 	SPIRV_CROSS_WRAP_REPEAT = 1,
 77 | 
 78 | 	SPIRV_CROSS_NUM_WRAP
 79 | };
 80 | 
 81 | enum spirv_cross_filter
 82 | {
 83 | 	SPIRV_CROSS_FILTER_NEAREST = 0,
 84 | 	SPIRV_CROSS_FILTER_LINEAR = 1,
 85 | 
 86 | 	SPIRV_CROSS_NUM_FILTER
 87 | };
 88 | 
 89 | enum spirv_cross_mipfilter
 90 | {
 91 | 	SPIRV_CROSS_MIPFILTER_BASE = 0,
 92 | 	SPIRV_CROSS_MIPFILTER_NEAREST = 1,
 93 | 	SPIRV_CROSS_MIPFILTER_LINEAR = 2,
 94 | 
 95 | 	SPIRV_CROSS_NUM_MIPFILTER
 96 | };
 97 | 
 98 | struct spirv_cross_miplevel
 99 | {
100 | 	const void *data;
101 | 	unsigned width, height;
102 | 	size_t stride;
103 | };
104 | 
105 | struct spirv_cross_sampler_info
106 | {
107 | 	const struct spirv_cross_miplevel *mipmaps;
108 | 	unsigned num_mipmaps;
109 | 
110 | 	enum spirv_cross_format format;
111 | 	enum spirv_cross_wrap wrap_s;
112 | 	enum spirv_cross_wrap wrap_t;
113 | 	enum spirv_cross_filter min_filter;
114 | 	enum spirv_cross_filter mag_filter;
115 | 	enum spirv_cross_mipfilter mip_filter;
116 | };
117 | 
118 | typedef struct spirv_cross_sampler_2d spirv_cross_sampler_2d_t;
119 | spirv_cross_sampler_2d_t *spirv_cross_create_sampler_2d(const struct spirv_cross_sampler_info *info);
120 | void spirv_cross_destroy_sampler_2d(spirv_cross_sampler_2d_t *samp);
121 | 
122 | #ifdef __cplusplus
123 | }
124 | #endif
125 | 
126 | #endif
127 | 


--------------------------------------------------------------------------------
/spirv-io/test/test_hashtable.glsl:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <assert.glsl>
  3 | #include "../lib/hashtable.glsl"
  4 | 
  5 | ThreadLocalCount = 1;
  6 | ThreadGroupCount = 1;
  7 | 
  8 | HeapSize = 16777216;
  9 | ToIOSize = 16777216;
 10 | FromIOSize = 16777216;
 11 | 
 12 | 
 13 | void test_i32hAlloc() {
 14 | 
 15 |     i32map ht = i32hAlloc(300);
 16 |     assert(512 == ht.capacity);
 17 |     assert(512*3 == strLen(ht.table));
 18 |     assert(0 == ht.count);
 19 |     ht = i32hAlloc(256);
 20 |     assert(256 == ht.capacity);
 21 |     assert(256*3 == strLen(ht.table));
 22 |     assert(0 == ht.count);
 23 |     ht = i32hAlloc(257);
 24 |     assert(512 == ht.capacity);
 25 |     assert(512*3 == strLen(ht.table));
 26 |     assert(0 == ht.count);
 27 | }
 28 | 
 29 | void test_i32hSet() {
 30 | 
 31 |     i32map ht = i32hAlloc(256);
 32 |     int32_t v = 0;
 33 | 
 34 |     i32hSet(ht, 45, 1);
 35 |     i32hSet(ht, 46, 2);
 36 |     i32hSet(ht, 47, 3);
 37 |     assert(true == i32hGet(ht, 45, v));
 38 |     assert(1 == v);
 39 |     i32hSet(ht, 45, 4);
 40 |     i32hSet(ht, 248, 5);
 41 |     assert(true == i32hGet(ht, 46, v));
 42 |     assert(2 == v);
 43 |     assert(true == i32hGet(ht, 47, v));
 44 |     assert(3 == v);
 45 |     assert(true == i32hGet(ht, 45, v));
 46 |     assert(4 == v);
 47 |     assert(true == i32hGet(ht, 248, v));
 48 |     assert(5 == v);    assert(256 == ht.capacity);
 49 |     log("Adding 260 keys");
 50 |     for (int32_t i = 0; i < 260; i++) {
 51 |         i32hSet(ht, i, i);
 52 |     }
 53 | 
 54 |     // Resized table
 55 |     assert(512 == ht.capacity);
 56 |     log("Checking for keys");
 57 |     // Check if all the keys are still there
 58 |     for (int32_t i = 0; i < 260; i++) {
 59 |     assert(true == i32hGet(ht, i, v));
 60 |     assert(i == v);
 61 |     }
 62 | 
 63 | 
 64 | }
 65 | 
 66 | void test_i32hGet() {
 67 | 
 68 |     i32map ht = i32hAlloc(256);
 69 |     int32_t v = 123;
 70 |     assert(false == i32hGet(ht, 30, v));
 71 |     i32hSet(ht, 30, 321);
 72 |     assert(true == i32hGet(ht, 30, v));
 73 |     assert(321 == v);    assert(false == i32hGet(ht, 31, v));
 74 |     for (int32_t i = 32; i < 512; i++) {
 75 |     assert(false == i32hGet(ht, i, v));
 76 |     }
 77 | 
 78 | 
 79 | }
 80 | 
 81 | void test_i32hDelete() {
 82 | 
 83 |     i32map ht = i32hAlloc(256);
 84 |     int32_t v = 0;
 85 | 
 86 |     i32hSet(ht, 30, 321);
 87 |     assert(true == i32hGet(ht, 30, v));
 88 |     assert(321 == v);    assert(true == i32hDelete(ht, 30));    assert(false == i32hGet(ht, 30, v));
 89 |     i32hSet(ht, 30, 321);
 90 | 
 91 |     log("i32hDelete: Adding and deleting 468 keys");
 92 | 
 93 |     for (int32_t i = 32; i < 500; i++) {
 94 |         i32hSet(ht, i, i);
 95 |     assert(true == i32hGet(ht, i, i));
 96 |     assert(true == i32hDelete(ht, i));
 97 |     }
 98 | 
 99 |     log("i32hDelete: Checking that none of the keys exist");
100 | 
101 |     for (int32_t i = 32; i < 500; i++) {
102 |     assert(false == i32hGet(ht, i, v));
103 |     assert(false == i32hDelete(ht, i));
104 |     }
105 |     assert(true == i32hGet(ht, 30, v));
106 |     assert(321 == v);
107 |     log("i32hDelete: Check sequences of gets, sets and deletes");
108 | 
109 |     for (int32_t i = 0; i < 500; i+=3) {
110 |         i32hSet(ht, i, i);
111 |     }
112 |     for (int32_t i = 0; i < 500; i+=7) {
113 |         i32hDelete(ht, i);
114 |     }
115 |     for (int32_t i = 0; i < 500; i+=3) {
116 |         if (i % 7 != 0) {
117 |     assert(true == i32hGet(ht, i, v));
118 |     assert(i == v);
119 |             if (!i32hGet(ht, i, v)) {
120 |                 log(concat("err 1.1: ", str(i)));
121 |             }
122 |         } else {
123 |     assert(false == i32hGet(ht, i, v));
124 |             if (i32hGet(ht, i, v)) {
125 |                 log(concat("err 1.2: ", str(i)));
126 |             }
127 |         }
128 |     }
129 | 
130 |     for (int32_t i = 0; i < 500; i+=11) {
131 |         i32hSet(ht, i, i);
132 |     }
133 |     for (int32_t i = 0; i < 500; i+=3) {
134 |         i32hDelete(ht, i);
135 |     }
136 |     for (int32_t i = 0; i < 500; i+=11) {
137 |         if (i % 3 != 0) {
138 |     assert(true == i32hGet(ht, i, v));
139 |     assert(i == v);
140 |             if (!i32hGet(ht, i, v)) {
141 |                 log(concat("err 2.1: ", str(i)));
142 |             }
143 |         } else {
144 |     assert(false == i32hGet(ht, i, v));
145 |             if (i32hGet(ht, i, v)) {
146 |                 log(concat("err 2.2: ", str(i)));
147 |             }
148 |         }
149 |     }
150 | 
151 | 
152 | }
153 | 
154 | void main() {
155 |     FREE_ALL(test_i32hAlloc());
156 |     FREE_ALL(test_i32hSet());
157 |     FREE_ALL(test_i32hGet());
158 |     FREE_ALL(test_i32hDelete());
159 | }
160 | 


--------------------------------------------------------------------------------
/spirv-io/test/test_array.glsl:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <assert.glsl>
  3 | #include "../lib/array.glsl"
  4 | 
  5 | ThreadLocalCount = 1;
  6 | ThreadGroupCount = 1;
  7 | 
  8 | HeapSize = 16777216;
  9 | ToIOSize = 16777216;
 10 | FromIOSize = 16777216;
 11 | 
 12 | 
 13 | void test_i32alloc() {
 14 | 
 15 |     i32array a = i32alloc(3);
 16 |     assert(3 == i32len(a));
 17 | }
 18 | 
 19 | void test_i32len() {
 20 |     assert(3 == i32len(i32{1,2,3}));
 21 |     assert(1 == i32len(i32{1}));
 22 |     assert(0 == i32len(i32{}));
 23 | }
 24 | 
 25 | void test_i32get() {
 26 | 
 27 |     i32array a = i32{1,2,3};
 28 |     assert(1 == i32get(a, 0));
 29 |     assert(2 == i32get(a, 1));
 30 |     assert(3 == i32get(a, 2));
 31 | }
 32 | 
 33 | void test_i32set() {
 34 | 
 35 |     i32array a = i32{1,2,3};
 36 |     i32set(a, 0, 4);
 37 |     i32set(a, 1, 5);
 38 |     i32set(a, 2, 6);
 39 |     assert(4 == i32get(a, 0));
 40 |     assert(5 == i32get(a, 1));
 41 |     assert(6 == i32get(a, 2));
 42 | }
 43 | 
 44 | void test_i32last() {
 45 | 
 46 |     i32array a = i32{1,2,3};
 47 |     assert(3 == i32last(a));
 48 | }
 49 | 
 50 | void test_i32first() {
 51 | 
 52 |     i32array a = i32{1,2,3};
 53 |     assert(1 == i32first(a));
 54 | }
 55 | 
 56 | void test_i32clone() {
 57 | 
 58 |     i32array a = i32{1,2,3};
 59 |     i32array b = i32{4,5};
 60 |     assert(true == i32eq(i32clone(a), a));
 61 |     assert(true == i32eq(i32clone(b), b));
 62 |     assert(a.start != i32clone(a).start);
 63 |     assert(a.end != i32clone(a).end);
 64 | }
 65 | 
 66 | void test_i32concat() {
 67 | 
 68 |     i32array a = i32{1,2,3};
 69 |     i32array b = i32{4,5};
 70 |     i32array c = i32{};
 71 |     assert(true == i32eq(i32concat(a,b), i32{1,2,3,4,5}));
 72 |     assert(true == i32eq(i32concat(b,a), i32{4,5,1,2,3}));
 73 |     assert(true == i32eq(i32concat(a,c), i32{1,2,3}));
 74 |     assert(true == i32eq(i32concat(c,a), i32{1,2,3}));
 75 |     assert(true == i32eq(i32concat(c,c), i32{}));
 76 | }
 77 | 
 78 | void test_i32fill() {
 79 | 
 80 |     i32array a = i32{1,2,3,4,5};
 81 |     i32fill(a, 9);
 82 |     assert(true == i32eq(a, i32{9,9,9,9,9}));
 83 | }
 84 | 
 85 | void test_i32indexOf() {
 86 | 
 87 |     i32array a = i32{1,2,3,2,5};
 88 |     assert(0 == i32indexOf(a, 1));
 89 |     assert(4 == i32indexOf(a, 5));
 90 |     assert(1 == i32indexOf(a, 2));
 91 |     assert(2 == i32indexOf(a, 3));
 92 |     assert(-1 == i32indexOf(a, 4));
 93 | }
 94 | 
 95 | void test_i32lastIndexOf() {
 96 | 
 97 |     i32array a = i32{1,2,3,2,5};
 98 |     assert(0 == i32lastIndexOf(a, 1));
 99 |     assert(4 == i32lastIndexOf(a, 5));
100 |     assert(3 == i32lastIndexOf(a, 2));
101 |     assert(2 == i32lastIndexOf(a, 3));
102 |     assert(-1 == i32lastIndexOf(a, 4));
103 | }
104 | 
105 | void test_i32includes() {
106 | 
107 |     i32array a = i32{1,2,3,4,5};
108 |     assert(true == i32includes(a, 1));
109 |     assert(true == i32includes(a, 2));
110 |     assert(true == i32includes(a, 3));
111 |     assert(true == i32includes(a, 4));
112 |     assert(true == i32includes(a, 5));
113 |     assert(false == i32includes(a, 6));
114 | }
115 | 
116 | void test_i32reverseInPlace() {
117 | 
118 |     i32array a = i32{1,2,3,4,5};
119 |     i32reverseInPlace(a);
120 |     assert(true == i32eq(a, i32{5,4,3,2,1}));
121 | }
122 | 
123 | void test_i32reverse() {
124 | 
125 |     i32array a = i32{1,2,3,4,5};
126 |     assert(true == i32eq(i32reverse(a), i32{5,4,3,2,1}));
127 | }
128 | 
129 | void test_i32slice() {
130 | 
131 |     i32array a = i32{1,2,3,4,5};
132 |     assert(3 == i32len(i32slice(a, 2)));
133 |     assert(2 == i32len(i32slice(a, 3)));
134 |     assert(2 == i32len(i32slice(a, -2)));
135 |     assert(5 == i32len(i32slice(a, -5)));
136 |     assert(0 == i32len(i32slice(a, 5)));
137 |     assert(5 == i32len(i32slice(a, 0)));
138 |     assert(3 == i32get(i32slice(a, 2), 0));
139 |     // (i32array a, size_t i) => i32len(a) >= i32len(i32slice(a, i));
140 | 
141 | }
142 | 
143 | void test_i32slice_() {
144 | 
145 |     i32array arr = i32alloc(5);
146 |     for (ptr_t i = 0; i < 5; i++) i32set(arr, i, i);
147 |     assert(3 == i32len(i32slice(arr, 1, -1)));
148 |     assert(3 == i32len(i32slice(arr, 1, 4)));
149 |     assert(3 == i32len(i32slice(arr, 2, 7)));
150 |     assert(3 == i32get(i32slice(arr, 1, -1), 2));
151 |     // (i32array a, size_t i, size_t j) => i32len(a) >= i32len(i32slice(a, i, j));
152 | 
153 | }
154 | 
155 | void main() {
156 |     FREE_ALL(test_i32alloc());
157 |     FREE_ALL(test_i32len());
158 |     FREE_ALL(test_i32get());
159 |     FREE_ALL(test_i32set());
160 |     FREE_ALL(test_i32last());
161 |     FREE_ALL(test_i32first());
162 |     FREE_ALL(test_i32clone());
163 |     FREE_ALL(test_i32concat());
164 |     FREE_ALL(test_i32fill());
165 |     FREE_ALL(test_i32indexOf());
166 |     FREE_ALL(test_i32lastIndexOf());
167 |     FREE_ALL(test_i32includes());
168 |     FREE_ALL(test_i32reverseInPlace());
169 |     FREE_ALL(test_i32reverse());
170 |     FREE_ALL(test_i32slice());
171 |     FREE_ALL(test_i32slice_());
172 | }
173 | 


--------------------------------------------------------------------------------
/spirv-io/examples/grep_cpu.glsl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env gls
  2 | 
  3 | ThreadLocalCount = 256;
  4 | ThreadGroupCount = 64;
  5 | 
  6 | #include <file.glsl>
  7 | // #include <lz4.glsl>
  8 | 
  9 | shared int done;
 10 | shared int64_t wgOff;
 11 | shared string wgBuf;
 12 | shared int32_t decompressedSize;
 13 | shared bool isCompressed;
 14 | 
 15 | void addHit(int32_t k, int32_t off, inout bool found) {
 16 |     i32fromIO[atomicAdd(groupHeapPtr, 4)/4] = int32_t(k) + off;
 17 |     found = true;
 18 | }
 19 | 
 20 | bool grepBuffer(int32_t blockSize, string buf, string pattern, char p, int32_t off) {
 21 |     bool found = false;
 22 |     for (size_t i = 0, l = strLen(buf); i < blockSize; i+=32) {
 23 |         ptr_t idx = buf.x + i;
 24 |         i64vec4 v = i64v4heap[idx / 32];
 25 |         for (size_t j = 0, k = i, jdx = idx; j < 64; j += 8, idx++, k++, jdx++) {
 26 |             i8vec4 u = i8vec4((v >> int64_t(j)) & 0xff);
 27 |             if (any(equal(u, i8vec4(p)))) {
 28 |                 if (k < l && p == u.x && startsWith(string(jdx, buf.y), pattern)) addHit(k, off, found);
 29 |                 if (k+8 < l && p == u.y && startsWith(string(jdx+8, buf.y), pattern)) addHit(k + 8, off, found);
 30 |                 if (k+16 < l && p == u.z && startsWith(string(jdx+16, buf.y), pattern)) addHit(k + 16, off, found);
 31 |                 if (k+24 < l && p == u.w && startsWith(string(jdx+24, buf.y), pattern)) addHit(k + 24, off, found);
 32 |             }
 33 |         }
 34 |     }
 35 |     return found;
 36 | }
 37 | 
 38 | 
 39 | void main() {
 40 | 
 41 |     if (arrLen(argv) < 3) {
 42 |         if (ThreadId == 0) eprintln("USAGE: grep.glsl pattern file");
 43 |         return;
 44 |     }
 45 | 
 46 |     string pattern = aGet(argv, 1);
 47 |     string filename = aGet(argv, 2);
 48 | 
 49 |     if (ThreadId == 0) {
 50 |         Stat st = statSync(filename);
 51 |         programReturnValue = (st.error == 0) ? 1 : 2;
 52 |         // readaheadSync(filename, 0, st.st_size);
 53 |     }
 54 |     while (programReturnValue == 0); // Wait for first thread.
 55 | 
 56 |     if (programReturnValue == 2) {
 57 |         if (ThreadId == 0) eprintln(concat("File not found: ", filename));
 58 |         return;
 59 |     }
 60 | 
 61 |     int32_t patternLength = strLen(pattern);
 62 |     int32_t blockSize = HeapSize - (((patternLength+31) / 32) * 32);
 63 |     int32_t wgBufSize = ThreadLocalCount * blockSize + patternLength;
 64 | 
 65 |     if (ThreadLocalId == 0) {
 66 |         done = 0;
 67 |         wgOff = int64_t(ThreadGroupId * ThreadLocalCount) * int64_t(blockSize);
 68 |         isCompressed = true;
 69 |     }
 70 | 
 71 |     bool found = false;
 72 |     char p = heap[pattern.x];
 73 | 
 74 |     ptr_t hitStart = 0;
 75 | 
 76 |     while (done == 0) {
 77 |         FREE(FREE_IO(
 78 |             barrier(); memoryBarrier();
 79 | 
 80 |             if (ThreadLocalId == 0) {
 81 |                 fromIOPtr = groupHeapStart;
 82 |                 toIOPtr = groupHeapStart;
 83 | 
 84 |                 io r = read(filename, wgOff, wgBufSize, string(groupHeapStart, groupHeapStart + (HeapSize * ThreadLocalCount)));
 85 |                 wgBuf = awaitIO(r, true, decompressedSize, isCompressed);
 86 | 
 87 |                 if (decompressedSize != wgBufSize) {
 88 |                     done = (decompressedSize == 0) ? 2 : 1;
 89 |                 }
 90 |                 groupHeapPtr = groupHeapStart;
 91 |                 hitStart = groupHeapPtr;
 92 |             }
 93 | 
 94 |             barrier(); memoryBarrier();
 95 | 
 96 |             if (done == 2) break;
 97 | 
 98 | //            if (isCompressed) {
 99 | //                for (int32_t i = 0; i < 128; i += ThreadLocalCount/LZ4_GROUP_SIZE) {
100 | //                    lz4DecompressBlockStreamFromIOToHeap(i + ThreadLocalId/LZ4_GROUP_SIZE, LZ4_STREAM_BLOCK_SIZE, wgBuf, string(groupHeapStart, groupHeapStart + decompressedSize));
101 | //                }
102 | //            } else {
103 |                 copyFromIOToHeap(
104 |                     string(groupHeapStart + ThreadLocalId * HeapSize, groupHeapStart + (ThreadLocalId+1) * HeapSize),
105 |                     string(groupHeapStart + ThreadLocalId * HeapSize, groupHeapStart + (ThreadLocalId+1) * HeapSize)
106 |                 );
107 | //            }
108 | 
109 |             if (ThreadLocalId == 0) {
110 |                 wgBuf = string(groupHeapStart, groupHeapStart + decompressedSize);
111 |             }
112 | 
113 |             barrier(); memoryBarrier();
114 | 
115 |             string buf = string(
116 |                 min(wgBuf.y, wgBuf.x + ThreadLocalId * blockSize),
117 |                 min(wgBuf.y, wgBuf.x + (ThreadLocalId+1) * blockSize + patternLength)
118 |             );
119 | 
120 |             bool blockFound = grepBuffer(blockSize, buf, pattern, p, ThreadLocalId * blockSize);
121 |             found = found || blockFound;
122 | 
123 |             barrier(); memoryBarrier();
124 | 
125 |             if (ThreadLocalId == 0) {
126 |                 fromIOPtr = groupHeapStart;
127 |                 toIOPtr = groupHeapStart;
128 |                 ptr_t start = hitStart / 4;
129 |                 ptr_t end = groupHeapPtr / 4;
130 | 
131 |                 if (start != end) {
132 |                     heapPtr = groupHeapStart;
133 |                     for (int j = start; j < end; j++) {
134 |                         str(int64_t(i32fromIO[j]) + wgOff);
135 |                         _w('\n');
136 |                     }
137 |                     print(string(groupHeapStart, heapPtr));
138 |                 }
139 | 
140 |                 wgOff += int64_t(ThreadCount * blockSize);
141 |             }
142 | 
143 |             barrier(); memoryBarrier();
144 |         ))
145 |     }
146 | 
147 | }
148 | 
149 | 
150 | 


--------------------------------------------------------------------------------
/spirv-io/examples/grep.glsl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env gls
  2 | 
  3 | ThreadLocalCount = 256;
  4 | ThreadGroupCount = 64;
  5 | 
  6 | #define LZ4_GROUP_SIZE 8
  7 | #define LZ4_STREAM_BLOCK_SIZE 8192
  8 | 
  9 | #include <file.glsl>
 10 | #include <lz4.glsl>
 11 | 
 12 | shared int done;
 13 | shared int64_t wgOff;
 14 | shared string wgBuf;
 15 | shared int32_t decompressedSize;
 16 | shared bool isCompressed;
 17 | 
 18 | void addHit(int32_t k, int32_t off, inout bool found) {
 19 |     i32fromIO[atomicAdd(groupHeapPtr, 4)/4] = int32_t(k) + off;
 20 |     found = true;
 21 | }
 22 | 
 23 | bool grepBuffer(int32_t blockSize, string buf, string pattern, char p, int32_t off) {
 24 |     bool found = false;
 25 |     for (size_t i = 0, l = strLen(buf); i < blockSize; i+=32) {
 26 |         ptr_t idx = buf.x + i;
 27 |         i64vec4 v = i64v4heap[idx / 32];
 28 |         for (size_t j = 0, k = i, jdx = idx; j < 64; j += 8, idx++, k++, jdx++) {
 29 |             i8vec4 u = i8vec4((v >> j) & 0xff);
 30 |             if (any(equal(u, i8vec4(p)))) {
 31 |                 if (k < l && p == u.x && startsWith(string(jdx, buf.y), pattern)) addHit(k, off, found);
 32 |                 if (k+8 < l && p == u.y && startsWith(string(jdx+8, buf.y), pattern)) addHit(k + 8, off, found);
 33 |                 if (k+16 < l && p == u.z && startsWith(string(jdx+16, buf.y), pattern)) addHit(k + 16, off, found);
 34 |                 if (k+24 < l && p == u.w && startsWith(string(jdx+24, buf.y), pattern)) addHit(k + 24, off, found);
 35 |             }
 36 |         }
 37 |     }
 38 |     return found;
 39 | }
 40 | 
 41 | 
 42 | void main() {
 43 | 
 44 |     if (arrLen(argv) < 3) {
 45 |         if (ThreadId == 0) eprintln("USAGE: grep.glsl pattern file");
 46 |         return;
 47 |     }
 48 | 
 49 |     string pattern = aGet(argv, 1);
 50 |     string filename = aGet(argv, 2);
 51 | 
 52 |     if (ThreadId == 0) {
 53 |         Stat st = statSync(filename);
 54 |         programReturnValue = (st.error == 0) ? 1 : 2;
 55 |         // readaheadSync(filename, 0, st.st_size);
 56 |     }
 57 |     while (programReturnValue == 0); // Wait for first thread.
 58 | 
 59 |     if (programReturnValue == 2) {
 60 |         if (ThreadId == 0) eprintln(concat("File not found: ", filename));
 61 |         return;
 62 |     }
 63 | 
 64 |     int32_t patternLength = strLen(pattern);
 65 |     int32_t blockSize = HeapSize - (((patternLength+31) / 32) * 32);
 66 |     int32_t wgBufSize = ThreadLocalCount * blockSize + patternLength;
 67 | 
 68 |     if (ThreadLocalId == 0) {
 69 |         done = 0;
 70 |         wgOff = int64_t(ThreadGroupId * ThreadLocalCount) * int64_t(blockSize);
 71 |         isCompressed = true;
 72 |     }
 73 | 
 74 |     bool found = false;
 75 |     char p = heap[pattern.x];
 76 | 
 77 |     ptr_t hitStart = 0;
 78 | 
 79 |     while (done == 0) {
 80 |         FREE(FREE_IO(
 81 |             barrier(); memoryBarrier();
 82 | 
 83 |             if (ThreadLocalId == 0) {
 84 |                 fromIOPtr = groupHeapStart;
 85 |                 toIOPtr = groupHeapStart;
 86 | 
 87 |                 io r = read(filename, wgOff, wgBufSize, string(groupHeapStart, groupHeapStart + (HeapSize * ThreadLocalCount)), IO_COMPRESS_LZ4_BLOCK_STREAM | LZ4_STREAM_BLOCK_SIZE);
 88 |                 wgBuf = awaitIO(r, true, decompressedSize, isCompressed);
 89 | 
 90 |                 if (decompressedSize != wgBufSize) {
 91 |                     done = (decompressedSize == 0) ? 2 : 1;
 92 |                 }
 93 |                 groupHeapPtr = groupHeapStart;
 94 |                 hitStart = groupHeapPtr;
 95 |             }
 96 | 
 97 |             barrier(); memoryBarrier();
 98 | 
 99 |             if (done == 2) break;
100 | 
101 |             if (isCompressed) {
102 |                 for (int32_t i = 0; i < 128; i += ThreadLocalCount/LZ4_GROUP_SIZE) {
103 |                     lz4DecompressBlockStreamFromIOToHeap(i + ThreadLocalId/LZ4_GROUP_SIZE, LZ4_STREAM_BLOCK_SIZE, wgBuf, string(groupHeapStart, groupHeapStart + decompressedSize));
104 |                 }
105 |             } else {
106 |                 copyFromIOToHeap(
107 |                     string(groupHeapStart + ThreadLocalId * HeapSize, groupHeapStart + (ThreadLocalId+1) * HeapSize),
108 |                     string(groupHeapStart + ThreadLocalId * HeapSize, groupHeapStart + (ThreadLocalId+1) * HeapSize)
109 |                 );
110 |             }
111 | 
112 |             if (ThreadLocalId == 0) {
113 |                 wgBuf = string(groupHeapStart, groupHeapStart + decompressedSize);
114 |             }
115 | 
116 |             barrier(); memoryBarrier();
117 | 
118 |             string buf = string(
119 |                 min(wgBuf.y, wgBuf.x + ThreadLocalId * blockSize),
120 |                 min(wgBuf.y, wgBuf.x + (ThreadLocalId+1) * blockSize + patternLength)
121 |             );
122 | 
123 |             bool blockFound = grepBuffer(blockSize, buf, pattern, p, ThreadLocalId * blockSize);
124 |             found = found || blockFound;
125 | 
126 |             barrier(); memoryBarrier();
127 | 
128 |             if (ThreadLocalId == 0) {
129 |                 fromIOPtr = groupHeapStart;
130 |                 toIOPtr = groupHeapStart;
131 |                 ptr_t start = hitStart / 4;
132 |                 ptr_t end = groupHeapPtr / 4;
133 | 
134 |                 if (start != end) {
135 |                     heapPtr = groupHeapStart;
136 |                     for (int j = start; j < end; j++) {
137 |                         str(int64_t(i32fromIO[j]) + wgOff);
138 |                         _w('\n');
139 |                     }
140 |                     print(string(groupHeapStart, heapPtr));
141 |                 }
142 | 
143 |                 wgOff += int64_t(ThreadCount * blockSize);
144 |             }
145 | 
146 |             barrier(); memoryBarrier();
147 |         ))
148 |     }
149 | 
150 |     atomicMin(programReturnValue, found ? 0 : 1);
151 | }
152 | 
153 | 


--------------------------------------------------------------------------------
/http_shader/cpp/cppRunner.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2015-2017 ARM Limited
  3 |  *
  4 |  * Licensed under the Apache License, Version 2.0 (the "License");
  5 |  * you may not use this file except in compliance with the License.
  6 |  * You may obtain a copy of the License at
  7 |  *
  8 |  *     http://www.apache.org/licenses/LICENSE-2.0
  9 |  *
 10 |  * Unless required by applicable law or agreed to in writing, software
 11 |  * distributed under the License is distributed on an "AS IS" BASIS,
 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  * See the License for the specific language governing permissions and
 14 |  * limitations under the License.
 15 |  */
 16 | 
 17 | #include <unistd.h>
 18 | 
 19 | #include "spirv_cross/external_interface.h"
 20 | #include <stdio.h>
 21 | #include <string.h>
 22 | #include <chrono>
 23 | 
 24 | inline int _atomicCompSwap(int *ptr, int val, int newval) {
 25 | 	__atomic_compare_exchange(ptr, &val, &newval, false, 0, 0);
 26 | 	return val;
 27 | }
 28 | 
 29 | #define atomicCompSwap(ptr, val, newval) _atomicCompSwap(&(ptr), (val), (newval))
 30 | 
 31 | #include "httpd.cpp"
 32 | 
 33 | #ifndef GLM_FORCE_SWIZZLE
 34 | #define GLM_FORCE_SWIZZLE
 35 | #endif
 36 | 
 37 | #ifndef GLM_FORCE_RADIANS
 38 | #define GLM_FORCE_RADIANS
 39 | #endif
 40 | 
 41 | #include <glm/glm.hpp>
 42 | using namespace glm;
 43 | 
 44 | // Build some input data for our compute shader.
 45 | #define NUM_WORKGROUPS_X 32
 46 | #define NUM_WORKGROUPS_Y 1
 47 | 
 48 | static const uint requestCount = NUM_WORKGROUPS_X * NUM_WORKGROUPS_Y * 1024 * 16;
 49 | 
 50 | static const int requestSize = 1024;
 51 | 
 52 | static int inputBuffer[(requestSize / 4) * requestCount] = {};
 53 | static int outputBuffer[(requestSize / 4) * requestCount] = {};
 54 | static int heapBuffer[(requestSize / 4) * requestCount] = {};
 55 | 
 56 | int main()
 57 | {
 58 | 	// First, we get the C interface to the shader.
 59 | 	// This can be loaded from a dynamic library, or as here,
 60 | 	// linked in as a static library.
 61 | 	auto *iface = spirv_cross_get_interface();
 62 | 
 63 | 	// Create an instance of the shader interface.
 64 | 	auto *shader = iface->construct();
 65 | 
 66 | 	void *inputs_ptr = inputBuffer;
 67 | 	void *outputs_ptr = outputBuffer;
 68 | 	void *heap_ptr = heapBuffer;
 69 | 
 70 | 	int requestTemplate[(requestSize / 4)];
 71 | 	for (int i = 0; i < requestCount; i++) {
 72 | 		if (i % 2 == 0) {
 73 | 			snprintf((char*)(&inputBuffer[(requestSize / 4) * i + 4]), ((requestSize / 16) - 1) * 16, "POST /%07d HTTP/1.1\r\nhost: localhost\r\n\r\ntext/html\r\n\r\n<html><body>This is post number %d.</body></html>", i*2/3, i);
 74 | 		} else {
 75 | 			snprintf((char*)(&inputBuffer[(requestSize / 4) * i + 4]), ((requestSize / 16) - 1) * 16, "GET /%07d HTTP/1.1\r\nhost: localhost\r\n\r\n", i);
 76 | 		}
 77 | 		if (i % 11 == 10) {
 78 | 			int j = i % 10;
 79 | 			snprintf((char*)(&inputBuffer[(requestSize / 4) * i + 4]), ((requestSize / 16) - 1) * 16, "POST /%07d HTTP/1.1\r\nhost: localhost\r\n\r\ntext/html\r\n\r\n<html><body>This is %d spam-post %d number %d.</body></html>", j, i, i, i);
 80 | 		}
 81 | 		inputBuffer[(requestSize / 4) * i] = strlen((char*)(&inputBuffer[(requestSize / 4) * i + 4]));
 82 | 		// if (i < 10) printf("%d\n%s\n", inputBuffer[(requestSize / 4) * i], (char*)(&inputBuffer[(requestSize / 4) * i + 4]));
 83 | 
 84 | 		snprintf((char*)(&heapBuffer[(requestSize / 4) * i + 4]), ((requestSize / 16) - 1) * 16, "text/html\r\n\r\n<html><body>This is document number %d.</body></html>", i);
 85 | 		heapBuffer[(requestSize / 4) * i] = strlen((char*)(&heapBuffer[(requestSize / 4) * i + 4]));
 86 | 		// if (i < 10) printf("%d\n%s\n", heapBuffer[(requestSize / 4) * i], (char*)(&heapBuffer[(requestSize / 4) * i + 4]));
 87 | 	}
 88 | 
 89 | 	std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
 90 | 	for (int i = 0; i < 100; i++) {
 91 | 
 92 | 		// Bind resources to the shader.
 93 | 		// For resources like samplers and buffers, we provide a list of pointers,
 94 | 		// since UBOs, SSBOs and samplers can be arrays, and can point to different types,
 95 | 		// which is especially true for samplers.
 96 | 		spirv_cross_set_resource(shader, 0, 0, &inputs_ptr, sizeof(inputs_ptr));
 97 | 		spirv_cross_set_resource(shader, 0, 1, &outputs_ptr, sizeof(outputs_ptr));
 98 | 		spirv_cross_set_resource(shader, 0, 2, &heap_ptr, sizeof(heap_ptr));
 99 | 
100 | 		// We also have to set builtins.
101 | 		// The relevant builtins will depend on the shader,
102 | 		// but for compute, there are few builtins, which are gl_NumWorkGroups and gl_WorkGroupID.
103 | 		// LocalInvocationID and GlobalInvocationID are inferred when executing the invocation.
104 | 		uvec3 num_workgroups(NUM_WORKGROUPS_X, NUM_WORKGROUPS_Y, 1);
105 | 		uvec3 work_group_id(0, 0, 0);
106 | 		spirv_cross_set_builtin(shader, SPIRV_CROSS_BUILTIN_NUM_WORK_GROUPS, &num_workgroups, sizeof(num_workgroups));
107 | 		spirv_cross_set_builtin(shader, SPIRV_CROSS_BUILTIN_WORK_GROUP_ID, &work_group_id, sizeof(work_group_id));
108 | 
109 | 		// Execute work groups.
110 | 		for (unsigned x = 0; x < NUM_WORKGROUPS_X; x++)
111 | 		for (unsigned y = 0; y < NUM_WORKGROUPS_Y; y++)
112 | 		{
113 | 			work_group_id.x = x;
114 | 			work_group_id.y = y;
115 | 			iface->invoke(shader);
116 | 		}
117 | 
118 | 	}
119 | 	std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
120 | 
121 | 	// Call destructor.
122 | 	iface->destruct(shader);
123 | 
124 | 	for (int i = 0; i < 10; i++) {
125 | 		write(1, ((char*)outputBuffer)+requestSize*i+16, outputBuffer[(requestSize / 4)*i]);
126 | 		printf("\n");
127 | 	}
128 | 
129 | 	printf("\nElapsed: %ld ms\n", std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count());
130 | 	printf("Million requests per second: %.3f\n\n", 1e-6 * (requestCount * 100.0) / (0.001 * std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count()));
131 | 
132 | 	return 0;
133 | }
134 | 


--------------------------------------------------------------------------------
/http_shader/ispc_char/httpd.glsl:
--------------------------------------------------------------------------------
  1 | #define version #version
  2 | 
  3 | version 450
  4 | 
  5 | #include "../chr.glsl"
  6 | 
  7 | #define BLK_SZ 1024
  8 | 
  9 | #define REQUESTS_PER_INVOCATION 512
 10 | 
 11 | #define strCopy(SRC, DST, i, start, end) uint _s = start; uint _e = end; while (_s < _e) (DST)[i++] = (SRC)[_s++];
 12 | #define strCopyAll(SRC, DST, i) uint _str[] = SRC; strCopy(_str, DST, i, 0, _str.length())
 13 | 
 14 | #define A_OK if (i > BLK_SZ) { error(index); return; }
 15 | 
 16 | layout (local_size_x = 16, local_size_y = 1, local_size_z = 1 ) in;
 17 | 
 18 | layout(std430, binding = 0) readonly buffer inputBuffer { lowp uint inputBytes[]; };
 19 | layout(std430, binding = 1) buffer outputBuffer { lowp uint outputBytes[]; };
 20 | layout(std430, binding = 2) buffer heapBuffer { lowp uint heap[]; };
 21 | 
 22 | const uint METHOD_UNKNOWN = 0;
 23 | const uint METHOD_GET = 1;
 24 | const uint METHOD_POST = 2;
 25 | const uint METHOD_OPTION = 3;
 26 | 
 27 | const uint PROTOCOL_UNKNOWN = 0;
 28 | const uint PROTOCOL_HTTP10 = 1;
 29 | const uint PROTOCOL_HTTP11 = 2;
 30 | 
 31 | const uint MIME_TEXT_PLAIN = 0;
 32 | const uint MIME_TEXT_HTML = 1;
 33 | 
 34 | struct header {
 35 | 	uvec2 name;
 36 | 	uvec2 value;
 37 | };
 38 | 
 39 | void readRequestUntilChar(inout uint i, uint index, uint endChar, out uvec2 str) {
 40 | 	str.x = index + i;
 41 | 	while (i < BLK_SZ && inputBytes[index+i] != endChar) {
 42 | 		i++;
 43 | 	}
 44 | 	str.y = index + i;
 45 | 	i++;
 46 | }
 47 | 
 48 | void readMethod(inout uint i, uint index, out uint method) {
 49 | 	uint j = index + i;
 50 | 	uint c = inputBytes[j];
 51 | 	if (
 52 | 		inputBytes[j] == CHR_G &&
 53 | 		inputBytes[j+1] == CHR_E &&
 54 | 		inputBytes[j+2] == CHR_T &&
 55 | 		inputBytes[j+3] == CHR_SPACE
 56 | 	) {
 57 | 		method = METHOD_GET;
 58 | 		i += 4;
 59 | 		return;
 60 | 	} else if (
 61 | 		inputBytes[j] == CHR_P &&
 62 | 		inputBytes[j+1] == CHR_O &&
 63 | 		inputBytes[j+2] == CHR_S &&
 64 | 		inputBytes[j+3] == CHR_T &&
 65 | 		inputBytes[j+4] == CHR_SPACE
 66 | 	) {
 67 | 		method = METHOD_POST;
 68 | 		i += 5;
 69 | 		return;
 70 | 	} else if (inputBytes[j] == CHR_O && inputBytes[j+6] == CHR_SPACE) {
 71 | 		method = METHOD_OPTION;
 72 | 		i += 7;
 73 | 		return;
 74 | 	}
 75 | 	method = METHOD_UNKNOWN;
 76 | 	i = BLK_SZ+1;
 77 | }
 78 | 
 79 | void readPath(inout uint i, uint index, out uvec2 path) {
 80 | 	readRequestUntilChar(i, index, CHR_SPACE, path);
 81 | }
 82 | 
 83 | void readProtocol(inout uint i, uint index, out uint protocol) {
 84 | 	uvec2 protocolString;
 85 | 	readRequestUntilChar(i, index, CHR_CR, protocolString);
 86 | 	if (i < 1024 && inputBytes[index+i] == CHR_LF) {
 87 | 		i++;
 88 | 		if (inputBytes[protocolString.y-1] == CHR_1) {
 89 | 			protocol = PROTOCOL_HTTP11;
 90 | 		} else {
 91 | 			protocol = PROTOCOL_HTTP10;
 92 | 		}
 93 | 	} else {
 94 | 		protocol = PROTOCOL_UNKNOWN;
 95 | 		i = BLK_SZ+1;
 96 | 	}
 97 | }
 98 | 
 99 | bool readHeader(inout uint i, uint index, out header hdr) {
100 | 	if (inputBytes[index+i] == CHR_CR) {
101 | 		i += 2;
102 | 		return true;
103 | 	}
104 | 	readRequestUntilChar(i, index, CHR_COLON, hdr.name);
105 | 	while (i < 1024 && inputBytes[index+i] == CHR_SPACE) i++;
106 | 	readRequestUntilChar(i, index, CHR_CR, hdr.value);
107 | 	i++;
108 | 	return false;
109 | }
110 | 
111 | void writeStatus(inout uint i, uint index, uint statusCode) {
112 | 	uint j = i + index;
113 | 	strCopyAll("HTTP/1.1 ", outputBytes, j);
114 | 	if (statusCode == 200) {
115 | 		strCopyAll("200 OK", outputBytes, j);
116 | 	} else {
117 | 		strCopyAll("500 Error", outputBytes, j);
118 | 	}
119 | 	outputBytes[j++] = CHR_CR;
120 | 	outputBytes[j++] = CHR_LF;
121 | 	i = j - index;
122 | }
123 | 
124 | void writeContentType(inout uint i, uint index, uint contentType) {
125 | 	uint j = i + index;
126 | 
127 | 	uint contentTypeString[] = "Content-Type: ";
128 | 	strCopyAll(contentTypeString, outputBytes, j);
129 | 	if (contentType == MIME_TEXT_PLAIN) {
130 | 		strCopyAll("text/plain", outputBytes, j);
131 | 	} else {
132 | 		strCopyAll("text/html", outputBytes, j);
133 | 	}
134 | 	outputBytes[j++] = CHR_CR;
135 | 	outputBytes[j++] = CHR_LF;
136 | 
137 | 	i = j - index;
138 | }
139 | 
140 | void writeEndHeaders(inout uint i, uint index) {
141 | 	uint j = i + index;
142 | 	outputBytes[j++] = CHR_CR;
143 | 	outputBytes[j++] = CHR_LF;
144 | 	i = j - index;
145 | }
146 | 
147 | void writeBody(inout uint i, uint index, uvec2 path) {
148 | 	uint j = i + index;
149 | 	strCopyAll("Hello, World!", outputBytes, j);
150 | 	outputBytes[j++] = CHR_LF;
151 | 	i = j - index;
152 | }
153 | 
154 | void error(uint index) {
155 | 	uint i = 16;
156 | 	writeStatus(i, index, 500);
157 | 	writeContentType(i, index, MIME_TEXT_PLAIN);
158 | 	writeEndHeaders(i, index);
159 | 	outputBytes[index+0] = ((i-16) << 0) & 0xFF;
160 | 	outputBytes[index+1] = ((i-16) << 8) & 0xFF;
161 | 	outputBytes[index+2] = ((i-16) << 16) & 0xFF;
162 | 	outputBytes[index+3] = ((i-16) << 24) & 0xFF;
163 | }
164 | 
165 | void handleRequest(uint index) {
166 | 	uint i = 16;
167 | 	uint method;
168 | 	uvec2 path;
169 | 	uint protocol;
170 | 	header headers[32];
171 | 	uint headerCount = 0;
172 | 
173 | 	readMethod(i, index, method); A_OK;
174 | 	readPath(i, index, path); A_OK;
175 | 	readProtocol(i, index, protocol); A_OK;
176 | 
177 | 	for (uint j = 0; j < 32; j++) {
178 | 		bool done = readHeader(i, index, headers[j]); A_OK;
179 | 		if (done) break;
180 | 		headerCount++;
181 | 	}
182 | 
183 | 	i = 16;
184 | 	writeStatus(i, index, 200);
185 | 	writeContentType(i, index, MIME_TEXT_PLAIN);
186 | 	writeEndHeaders(i, index);
187 | 	writeBody(i, index, path);
188 | 	outputBytes[index+0] = ((i-16) << 0) & 0xFF;
189 | 	outputBytes[index+1] = ((i-16) << 8) & 0xFF;
190 | 	outputBytes[index+2] = ((i-16) << 16) & 0xFF;
191 | 	outputBytes[index+3] = ((i-16) << 24) & 0xFF;
192 | }
193 | 
194 | void main() {
195 | 	uint index = BLK_SZ * REQUESTS_PER_INVOCATION * (gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * (gl_NumWorkGroups.x * gl_WorkGroupSize.x));
196 | 	for (uint i = 0; i < REQUESTS_PER_INVOCATION; i++) {
197 | 		handleRequest(index + BLK_SZ);
198 | 	}
199 | }
200 | 


--------------------------------------------------------------------------------
/spirv-io/lib/malloc.glsl:
--------------------------------------------------------------------------------
  1 | #include <thread_id.glsl>
  2 | 
  3 | #define ptr_t int32_t
  4 | #define size_t int32_t
  5 | 
  6 | struct alloc_t { ptr_t x; ptr_t y; };
  7 | struct pair_t { alloc_t x; alloc_t y; };
  8 | 
  9 | #define INDEX_SIZE 4
 10 | 
 11 | #define FREE(f) { int32_t _hp_ = heapPtr; f; heapPtr = _hp_; }
 12 | 
 13 | layout(std430, binding = 0) buffer indexBuf { ptr_t indexHeap[]; };
 14 | 
 15 | layout(std430, binding = 0) buffer f32Buffer { float32_t f32heap[]; };
 16 | layout(std430, binding = 0) buffer f64Buffer { float64_t f64heap[]; };
 17 | 
 18 | layout(std430, binding = 0) buffer u8Buffer { uint8_t u8heap[]; };
 19 | layout(std430, binding = 0) buffer u16Buffer { uint16_t u16heap[]; };
 20 | layout(std430, binding = 0) buffer u32Buffer { uint32_t u32heap[]; };
 21 | layout(std430, binding = 0) buffer u64Buffer { uint64_t u64heap[]; };
 22 | 
 23 | layout(std430, binding = 0) buffer i8Buffer { int8_t i8heap[]; };
 24 | layout(std430, binding = 0) buffer i16Buffer { int16_t i16heap[]; };
 25 | layout(std430, binding = 0) buffer i32Buffer { int32_t i32heap[]; };
 26 | layout(std430, binding = 0) buffer i64Buffer { int64_t i64heap[]; };
 27 | 
 28 | #ifdef FLOAT16
 29 | layout(std430, binding = 0) buffer f16Buffer { float16_t f16heap[]; };
 30 | layout(std430, binding = 0) buffer f16v2Buffer { f16vec2 f16v2heap[]; };
 31 | layout(std430, binding = 0) buffer f16v3Buffer { f16vec3 f16v3heap[]; };
 32 | layout(std430, binding = 0) buffer f16v4Buffer { f16vec4 f16v4heap[]; };
 33 | layout(std430, binding = 0) buffer f16m2Buffer { f16mat2 f16m2heap[]; };
 34 | layout(std430, binding = 0) buffer f16m3Buffer { f16mat3 f16m3heap[]; };
 35 | layout(std430, binding = 0) buffer f16m4Buffer { f16mat4 f16m4heap[]; };
 36 | #endif
 37 | 
 38 | layout(std430, binding = 0) buffer f32v2Buffer { f32vec2 f32v2heap[]; };
 39 | layout(std430, binding = 0) buffer f64v2Buffer { f64vec2 f64v2heap[]; };
 40 | 
 41 | layout(std430, binding = 0) buffer f32v3Buffer { f32vec3 f32v3heap[]; };
 42 | layout(std430, binding = 0) buffer f64v3Buffer { f64vec3 f64v3heap[]; };
 43 | 
 44 | layout(std430, binding = 0) buffer f32v4Buffer { f32vec4 f32v4heap[]; };
 45 | layout(std430, binding = 0) buffer f64v4Buffer { f64vec4 f64v4heap[]; };
 46 | 
 47 | layout(std430, binding = 0) buffer u8v2Buffer { u8vec2 u8v2heap[]; };
 48 | layout(std430, binding = 0) buffer u16v2Buffer { u16vec2 u16v2heap[]; };
 49 | layout(std430, binding = 0) buffer u32v2Buffer { u32vec2 u32v2heap[]; };
 50 | layout(std430, binding = 0) buffer u64v2Buffer { u64vec2 u64v2heap[]; };
 51 | 
 52 | layout(std430, binding = 0) buffer u8v3Buffer { u8vec3 u8v3heap[]; };
 53 | layout(std430, binding = 0) buffer u16v3Buffer { u16vec3 u16v3heap[]; };
 54 | layout(std430, binding = 0) buffer u32v3Buffer { u32vec3 u32v3heap[]; };
 55 | layout(std430, binding = 0) buffer u64v3Buffer { u64vec3 u64v3heap[]; };
 56 | 
 57 | layout(std430, binding = 0) buffer u8v4Buffer { u8vec4 u8v4heap[]; };
 58 | layout(std430, binding = 0) buffer u16v4Buffer { u16vec4 u16v4heap[]; };
 59 | layout(std430, binding = 0) buffer u32v4Buffer { u32vec4 u32v4heap[]; };
 60 | layout(std430, binding = 0) buffer u64v4Buffer { u64vec4 u64v4heap[]; };
 61 | 
 62 | layout(std430, binding = 0) buffer i8v2Buffer { i8vec2 i8v2heap[]; };
 63 | layout(std430, binding = 0) buffer i16v2Buffer { i16vec2 i16v2heap[]; };
 64 | layout(std430, binding = 0) buffer i32v2Buffer { i32vec2 i32v2heap[]; };
 65 | layout(std430, binding = 0) buffer i64v2Buffer { i64vec2 i64v2heap[]; };
 66 | 
 67 | layout(std430, binding = 0) buffer i8v3Buffer { i8vec3 i8v3heap[]; };
 68 | layout(std430, binding = 0) buffer i16v3Buffer { i16vec3 i16v3heap[]; };
 69 | layout(std430, binding = 0) buffer i32v3Buffer { i32vec3 i32v3heap[]; };
 70 | layout(std430, binding = 0) buffer i64v3Buffer { i64vec3 i64v3heap[]; };
 71 | 
 72 | layout(std430, binding = 0) buffer i8v4Buffer { i8vec4 i8v4heap[]; };
 73 | layout(std430, binding = 0) buffer i16v4Buffer { i16vec4 i16v4heap[]; };
 74 | layout(std430, binding = 0) buffer i32v4Buffer { i32vec4 i32v4heap[]; };
 75 | layout(std430, binding = 0) buffer i64v4Buffer { i64vec4 i64v4heap[]; };
 76 | 
 77 | layout(std430, binding = 0) buffer f32m2Buffer { f32mat2 f32m2heap[]; };
 78 | layout(std430, binding = 0) buffer f64m2Buffer { f64mat2 f64m2heap[]; };
 79 | 
 80 | layout(std430, binding = 0) buffer f32m3Buffer { f32mat3 f32m3heap[]; };
 81 | layout(std430, binding = 0) buffer f64m3Buffer { f64mat3 f64m3heap[]; };
 82 | 
 83 | layout(std430, binding = 0) buffer f32m4Buffer { f32mat4 f32m4heap[]; };
 84 | layout(std430, binding = 0) buffer f64m4Buffer { f64mat4 f64m4heap[]; };
 85 | 
 86 | 
 87 | ptr_t heapStart = ThreadId * HeapSize;
 88 | ptr_t heapEnd = heapStart + HeapSize;
 89 | 
 90 | ptr_t heapPtr = heapStart;
 91 | 
 92 | ptr_t groupHeapStart = ThreadGroupId * GroupHeapSize;
 93 | ptr_t groupHeapEnd = groupHeapStart + GroupHeapSize;
 94 | 
 95 | shared ptr_t groupHeapPtr;
 96 | 
 97 | size_t allocSize(alloc_t a) {
 98 | 	return a.y - a.x;
 99 | }
100 | 
101 | alloc_t malloc(size_t len) {
102 | 	ptr_t ptr = heapPtr;
103 | 	heapPtr += len;
104 | 	return alloc_t(ptr, heapPtr);
105 | }
106 | 
107 | alloc_t malloc(size_t len, size_t align) {
108 | 	ptr_t ptr = ((heapPtr+(align-1)) / align) * align;
109 | 	heapPtr = ptr + len;
110 | 	return alloc_t(ptr, heapPtr);
111 | }
112 | 
113 | alloc_t malloc(uint64_t len) {
114 | 	ptr_t ptr = heapPtr;
115 | 	heapPtr += ptr_t(len);
116 | 	return alloc_t(ptr, heapPtr);
117 | }
118 | 
119 | alloc_t malloc(uint64_t len, size_t align) {
120 | 	ptr_t ptr = ((heapPtr+(align-1)) / align) * align;
121 | 	heapPtr = ptr + ptr_t(len);
122 | 	return alloc_t(ptr, heapPtr);
123 | }
124 | 
125 | alloc_t malloc(uint32_t len) {
126 | 	ptr_t ptr = heapPtr;
127 | 	heapPtr += ptr_t(len);
128 | 	return alloc_t(ptr, heapPtr);
129 | }
130 | 
131 | alloc_t malloc(uint32_t len, size_t align) {
132 | 	ptr_t ptr = ((heapPtr+(align-1)) / align) * align;
133 | 	heapPtr = ptr + ptr_t(len);
134 | 	return alloc_t(ptr, heapPtr);
135 | }
136 | 
137 | ptr_t toIndexPtr(ptr_t ptr) {
138 |     return ((ptr+(INDEX_SIZE-1)) / INDEX_SIZE);
139 | }
140 | 
141 | ptr_t fromIndexPtr(ptr_t ptr) {
142 |     return ptr * INDEX_SIZE;
143 | }
144 | 


--------------------------------------------------------------------------------
/http_shader/cpp/httpd_ivec4.glsl:
--------------------------------------------------------------------------------
  1 | #define version #version
  2 | 
  3 | version 450
  4 | 
  5 | #include "../chr.glsl"
  6 | 
  7 | #define REQUEST_SIZE 1024
  8 | #define RESPONSE_SIZE 1024
  9 | #define HEAP_SIZE 1024
 10 | 
 11 | #define REQUESTS_PER_INVOCATION 1024
 12 | 
 13 | #define HEAP_TOTAL_SZ (32 * 16 * 1024 * (HEAP_SIZE / 16))
 14 | 
 15 | layout ( local_size_x = 16, local_size_y = 1, local_size_z = 1 ) in;
 16 | 
 17 | layout(std430, binding = 0) readonly buffer inputBuffer { highp ivec4 inputBytes[]; };
 18 | layout(std430, binding = 1) buffer outputBuffer { highp ivec4 outputBytes[]; };
 19 | layout(std430, binding = 2) buffer heapBuffer { highp ivec4 heap[]; };
 20 | 
 21 | const highp int METHOD_GET = 'GET ';
 22 | const highp int METHOD_POST = 'POST';
 23 | const highp int METHOD_OPTION = 'OPTI';
 24 | const highp int METHOD_UNKNOWN = 0;
 25 | 
 26 | void setE(inout ivec4 v, int i, int value) {
 27 | 	if (i == 0) v.x = value;
 28 | 	else if (i == 1) v.y = value;
 29 | 	else if (i == 2) v.z = value;
 30 | 	else v.w = value;
 31 | }
 32 | 
 33 | int getE(ivec4 v, int i) {
 34 | 	int value = v.x;
 35 | 	if (i == 1) value = v.y;
 36 | 	else if (i == 2) value = v.z;
 37 | 	else if (i == 3) value = v.w;
 38 | 	return value;
 39 | }
 40 | 
 41 | void main() {
 42 | 	int wgId = int(gl_GlobalInvocationID.x) * REQUESTS_PER_INVOCATION;
 43 | 
 44 | 	for (int j = 0; j < REQUESTS_PER_INVOCATION; j++) {
 45 | 		int reqOff = (wgId+j) * (REQUEST_SIZE / 16);
 46 | 		int resOff = (wgId+j) * (RESPONSE_SIZE / 16);
 47 | 
 48 | 		// Parse request in format
 49 | 		// [GET |/xxx|xxxx| HTT|P/1.|1\r\n.|...]
 50 | 		// [POST| /xx|xxxx|x HT|TP/1|.1\r\n|....\r\nmimetype\r\n\r\npost body]
 51 | 		ivec4 requestInfo = inputBytes[reqOff];
 52 | 		if (requestInfo.x == 0) { // skip empty requests
 53 | 			continue;
 54 | 		}
 55 | 		ivec4 req = inputBytes[reqOff+1];
 56 | 		ivec4 req2 = inputBytes[reqOff+2];
 57 | 		int method = req.x;
 58 | 
 59 | 		int i = resOff;
 60 | 
 61 | 		if (method == METHOD_GET) {
 62 | 			// Parse key from path /xxxxxxx
 63 | 			int key = (
 64 | 				(((req.y >> 8) & 0xFF) - 48) * 1000000 +
 65 | 				(((req.y >> 16) & 0xFF) - 48) * 100000 +
 66 | 				(((req.y >> 24) & 0xFF) - 48) * 10000 +
 67 | 				(((req.z >> 0) & 0xFF) - 48) * 1000 +
 68 | 				(((req.z >> 8) & 0xFF) - 48) * 100 +
 69 | 				(((req.z >> 16) & 0xFF) - 48) * 10 +
 70 | 				(((req.z >> 24) & 0xFF) - 48) * 1
 71 | 			) * (HEAP_SIZE / 16);
 72 | 			// Check that the key is valid and fetch the content from the heap buffer if so.
 73 | 			if (key >= 0 && key < HEAP_TOTAL_SZ && heap[key].x > 0 && heap[key].x <= RESPONSE_SIZE - 3 * 16) {
 74 | 				int locked = atomicCompSwap(heap[key].w, 0, 1);
 75 | 				if (locked >= 0) {
 76 | 					atomicAdd(heap[key].w, 1);
 77 | 					outputBytes[i+0] = ivec4(2*16 + heap[key].x, 0, 0, 0);
 78 | 					outputBytes[i+1] = ivec4('200 ', 'OK H', 'TTP/', '1.1\r');
 79 | 					outputBytes[i+2] = ivec4('\ncon', 'tent', '-typ', 'e:  ');
 80 | 					int len = heap[key].x / 16 + (heap[key].x % 16 > 0 ? 1 : 0);
 81 | 					for (int k = 0; k < len; k++) {
 82 | 						outputBytes[i+3+k] = heap[key+1+k];
 83 | 					}
 84 | 					atomicAdd(heap[key].w, -1);
 85 | 				} else {
 86 | 					outputBytes[i+0] = ivec4(3*16, 0, 0, 0);
 87 | 					outputBytes[i+1] = ivec4('400 ', 'NO H', 'TTP/', '1.1\r');
 88 | 					outputBytes[i+2] = ivec4('\ncon', 'tent', '-typ', 'e: t');
 89 | 					outputBytes[i+3] = ivec4('ext/', 'plai', 'n\r\n\r', '\nBLK');
 90 | 				}
 91 | 				continue;
 92 | 			}
 93 | 		} else if (method == METHOD_POST) {
 94 | 			// Parse key from path /xxxxxxx
 95 | 			int key = (
 96 | 				(((req.y >> 16) & 0xFF) - 48) * 1000000 +
 97 | 				(((req.y >> 24) & 0xFF) - 48) * 100000 +
 98 | 				(((req.z >> 0) & 0xFF) - 48) * 10000 +
 99 | 				(((req.z >> 8) & 0xFF) - 48) * 1000 +
100 | 				(((req.z >> 16) & 0xFF) - 48) * 100 +
101 | 				(((req.z >> 24) & 0xFF) - 48) * 10 +
102 | 				(((req.w >> 0) & 0xFF) - 48) * 1
103 | 			) * (HEAP_SIZE / 16);
104 | 			// If the key is valid, replace the content in the heap buffer with the post body.
105 | 			if (key >= 0 && key < HEAP_TOTAL_SZ) {
106 | 				int locked = atomicCompSwap(heap[key].w, 0, -1);
107 | 				if (locked == 0) {
108 | 					int rnrn = 0;
109 | 					int readStart = 0;
110 | 					int readEnd = 512;
111 | 					ivec4 w = ivec4(0);
112 | 					int l = 0;
113 | 					int hi = 0;
114 | 					for (int k = 13; k < REQUEST_SIZE && k < HEAP_SIZE; k++) {
115 | 						int v4i = k / 16;
116 | 						int vi = k - (v4i * 16);
117 | 						int c = vi / 4;
118 | 						int b = vi - (c * 4);
119 | 						int chr = (getE(inputBytes[reqOff + 1 + v4i], c) >> (b * 8)) & 0xFF;
120 | 						if (readStart > 0) {
121 | 							if (chr == 0) {
122 | 								readEnd = k;
123 | 								break;
124 | 							}
125 | 							int wc = l / 4;
126 | 							int wb = l - (wc * 4);
127 | 							setE(w, wc, getE(w, wc) | (chr << (wb * 8)));
128 | 							l++;
129 | 							if (l == 16) {
130 | 								heap[key+1+hi] = w;
131 | 								hi++;
132 | 								w *= 0;
133 | 								l = 0;
134 | 							}
135 | 						} else if (chr == CHR_CR && (rnrn & 1) == 0) {
136 | 							rnrn++;
137 | 						} else if (chr == CHR_LF && (rnrn & 1) == 1) {
138 | 							rnrn++;
139 | 							if (rnrn == 4) {
140 | 								readStart = k;
141 | 							}
142 | 						} else {
143 | 							rnrn = 0;
144 | 						}
145 | 					}
146 | 					if (l > 0 && (1 + hi) < (HEAP_SIZE/16)) {
147 | 						heap[key+1+hi] = w;
148 | 					}
149 | 					heap[key].x = readEnd - readStart;
150 | 					heap[key].w = 0;
151 | 					outputBytes[i+0] = ivec4(3*16, 0, 0, 0);
152 | 					outputBytes[i+1] = ivec4('200 ', 'OK H', 'TTP/', '1.1\r');
153 | 					outputBytes[i+2] = ivec4('\ncon', 'tent', '-typ', 'e: t');
154 | 					outputBytes[i+3] = ivec4('ext/', 'plai', 'n\r\n\r', '\nOK.');
155 | 				} else {
156 | 					outputBytes[i+0] = ivec4(3*16, 0, 0, 0);
157 | 					outputBytes[i+1] = ivec4('200 ', 'OK H', 'TTP/', '1.1\r');
158 | 					outputBytes[i+2] = ivec4('\ncon', 'tent', '-typ', 'e: t');
159 | 					outputBytes[i+3] = ivec4('ext/', 'plai', 'n\r\n\r', '\nBLK');
160 | 				}
161 | 				continue;
162 | 			}
163 | 		}
164 | 		outputBytes[i+0] = ivec4(3*16 - 3*4 - 2, 0, 0, 0);
165 | 		outputBytes[i+1] = ivec4('500 ', 'BAD ', 'HTTP', '/1.1');
166 | 		outputBytes[i+2] = ivec4('\r\n\r\n', req.x, req.y, req.z);
167 | 		outputBytes[i+3] = ivec4(req.w, req2.x, req2.y, req2.z);
168 | 		
169 | 	}
170 | 
171 | }
172 | 


--------------------------------------------------------------------------------
/http_shader/vulkan/httpd_ivec4.glsl:
--------------------------------------------------------------------------------
  1 | #define version #version
  2 | 
  3 | version 450
  4 | 
  5 | #include "../chr.glsl"
  6 | 
  7 | #define REQUEST_SIZE 1024
  8 | #define RESPONSE_SIZE 1024
  9 | #define HEAP_SIZE 1024
 10 | 
 11 | #define REQUESTS_PER_INVOCATION 1024
 12 | 
 13 | #define HEAP_TOTAL_SZ (32 * 16 * 1024 * (HEAP_SIZE / 16))
 14 | 
 15 | layout ( local_size_x = 16, local_size_y = 1, local_size_z = 1 ) in;
 16 | 
 17 | layout(std430, binding = 0) readonly buffer inputBuffer { highp ivec4 inputBytes[]; };
 18 | layout(std430, binding = 1) buffer outputBuffer { highp ivec4 outputBytes[]; };
 19 | layout(std430, binding = 2) buffer heapBuffer { highp ivec4 heap[]; };
 20 | 
 21 | const highp int METHOD_GET = 'GET ';
 22 | const highp int METHOD_POST = 'POST';
 23 | const highp int METHOD_OPTION = 'OPTI';
 24 | const highp int METHOD_UNKNOWN = 0;
 25 | 
 26 | void setE(inout ivec4 v, int i, int value) {
 27 | 	if (i == 0) v.x = value;
 28 | 	else if (i == 1) v.y = value;
 29 | 	else if (i == 2) v.z = value;
 30 | 	else v.w = value;
 31 | }
 32 | 
 33 | int getE(ivec4 v, int i) {
 34 | 	int value = v.x;
 35 | 	if (i == 1) value = v.y;
 36 | 	else if (i == 2) value = v.z;
 37 | 	else if (i == 3) value = v.w;
 38 | 	return value;
 39 | }
 40 | 
 41 | void main() {
 42 | 	int wgId = int(gl_GlobalInvocationID.x) * REQUESTS_PER_INVOCATION;
 43 | 
 44 | 	for (int j = 0; j < REQUESTS_PER_INVOCATION; j++) {
 45 | 		int reqOff = (wgId+j) * (REQUEST_SIZE / 16);
 46 | 		int resOff = (wgId+j) * (RESPONSE_SIZE / 16);
 47 | 
 48 | 		// Parse request in format
 49 | 		// [GET |/xxx|xxxx| HTT|P/1.|1\r\n.|...]
 50 | 		// [POST| /xx|xxxx|x HT|TP/1|.1\r\n|....\r\nmimetype\r\n\r\npost body]
 51 | 		ivec4 requestInfo = inputBytes[reqOff];
 52 | 		if (requestInfo.x == 0) { // skip empty requests
 53 | 			continue;
 54 | 		}
 55 | 		ivec4 req = inputBytes[reqOff+1];
 56 | 		ivec4 req2 = inputBytes[reqOff+2];
 57 | 		int method = req.x;
 58 | 
 59 | 		int i = resOff;
 60 | 
 61 | 		if (method == METHOD_GET) {
 62 | 			// Parse key from path /xxxxxxx
 63 | 			int key = (
 64 | 				(((req.y >> 8) & 0xFF) - 48) * 1000000 +
 65 | 				(((req.y >> 16) & 0xFF) - 48) * 100000 +
 66 | 				(((req.y >> 24) & 0xFF) - 48) * 10000 +
 67 | 				(((req.z >> 0) & 0xFF) - 48) * 1000 +
 68 | 				(((req.z >> 8) & 0xFF) - 48) * 100 +
 69 | 				(((req.z >> 16) & 0xFF) - 48) * 10 +
 70 | 				(((req.z >> 24) & 0xFF) - 48) * 1
 71 | 			) * (HEAP_SIZE / 16);
 72 | 			// Check that the key is valid and fetch the content from the heap buffer if so.
 73 | 			if (key >= 0 && key < HEAP_TOTAL_SZ && heap[key].x > 0 && heap[key].x <= RESPONSE_SIZE - 3 * 16) {
 74 | 				int locked = atomicCompSwap(heap[key].w, 0, 1);
 75 | 				if (locked >= 0) {
 76 | 					atomicAdd(heap[key].w, 1);
 77 | 					outputBytes[i+0] = ivec4(2*16 + heap[key].x, 0, 0, 0);
 78 | 					outputBytes[i+1] = ivec4('200 ', 'OK H', 'TTP/', '1.1\r');
 79 | 					outputBytes[i+2] = ivec4('\ncon', 'tent', '-typ', 'e:  ');
 80 | 					int len = heap[key].x / 16 + (heap[key].x % 16 > 0 ? 1 : 0);
 81 | 					for (int k = 0; k < len; k++) {
 82 | 						outputBytes[i+3+k] = heap[key+1+k];
 83 | 					}
 84 | 					atomicAdd(heap[key].w, -1);
 85 | 				} else {
 86 | 					outputBytes[i+0] = ivec4(3*16, 0, 0, 0);
 87 | 					outputBytes[i+1] = ivec4('400 ', 'NO H', 'TTP/', '1.1\r');
 88 | 					outputBytes[i+2] = ivec4('\ncon', 'tent', '-typ', 'e: t');
 89 | 					outputBytes[i+3] = ivec4('ext/', 'plai', 'n\r\n\r', '\nBLK');
 90 | 				}
 91 | 				continue;
 92 | 			}
 93 | 		} else if (method == METHOD_POST) {
 94 | 			// Parse key from path /xxxxxxx
 95 | 			int key = (
 96 | 				(((req.y >> 16) & 0xFF) - 48) * 1000000 +
 97 | 				(((req.y >> 24) & 0xFF) - 48) * 100000 +
 98 | 				(((req.z >> 0) & 0xFF) - 48) * 10000 +
 99 | 				(((req.z >> 8) & 0xFF) - 48) * 1000 +
100 | 				(((req.z >> 16) & 0xFF) - 48) * 100 +
101 | 				(((req.z >> 24) & 0xFF) - 48) * 10 +
102 | 				(((req.w >> 0) & 0xFF) - 48) * 1
103 | 			) * (HEAP_SIZE / 16);
104 | 			// If the key is valid, replace the content in the heap buffer with the post body.
105 | 			if (key >= 0 && key < HEAP_TOTAL_SZ) {
106 | 				int locked = atomicCompSwap(heap[key].w, 0, -1);
107 | 				if (locked == 0) {
108 | 					int rnrn = 0;
109 | 					int readStart = 0;
110 | 					int readEnd = 512;
111 | 					ivec4 w = ivec4(0);
112 | 					int l = 0;
113 | 					int hi = 0;
114 | 					for (int k = 13; k < REQUEST_SIZE && k < HEAP_SIZE; k++) {
115 | 						int v4i = k / 16;
116 | 						int vi = k - (v4i * 16);
117 | 						int c = vi / 4;
118 | 						int b = vi - (c * 4);
119 | 						int chr = (getE(inputBytes[reqOff + 1 + v4i], c) >> (b * 8)) & 0xFF;
120 | 						if (readStart > 0) {
121 | 							if (chr == 0) {
122 | 								readEnd = k;
123 | 								break;
124 | 							}
125 | 							int wc = l / 4;
126 | 							int wb = l - (wc * 4);
127 | 							setE(w, wc, getE(w, wc) | (chr << (wb * 8)));
128 | 							l++;
129 | 							if (l == 16) {
130 | 								heap[key+1+hi] = w;
131 | 								hi++;
132 | 								w *= 0;
133 | 								l = 0;
134 | 							}
135 | 						} else if (chr == CHR_CR && (rnrn & 1) == 0) {
136 | 							rnrn++;
137 | 						} else if (chr == CHR_LF && (rnrn & 1) == 1) {
138 | 							rnrn++;
139 | 							if (rnrn == 4) {
140 | 								readStart = k;
141 | 							}
142 | 						} else {
143 | 							rnrn = 0;
144 | 						}
145 | 					}
146 | 					if (l > 0 && (1 + hi) < (HEAP_SIZE/16)) {
147 | 						heap[key+1+hi] = w;
148 | 					}
149 | 					heap[key].x = readEnd - readStart;
150 | 					heap[key].w = 0;
151 | 					outputBytes[i+0] = ivec4(3*16, 0, 0, 0);
152 | 					outputBytes[i+1] = ivec4('200 ', 'OK H', 'TTP/', '1.1\r');
153 | 					outputBytes[i+2] = ivec4('\ncon', 'tent', '-typ', 'e: t');
154 | 					outputBytes[i+3] = ivec4('ext/', 'plai', 'n\r\n\r', '\nOK.');
155 | 				} else {
156 | 					outputBytes[i+0] = ivec4(3*16, 0, 0, 0);
157 | 					outputBytes[i+1] = ivec4('200 ', 'OK H', 'TTP/', '1.1\r');
158 | 					outputBytes[i+2] = ivec4('\ncon', 'tent', '-typ', 'e: t');
159 | 					outputBytes[i+3] = ivec4('ext/', 'plai', 'n\r\n\r', '\nBLK');
160 | 				}
161 | 				continue;
162 | 			}
163 | 		}
164 | 		outputBytes[i+0] = ivec4(3*16 - 3*4 - 2, 0, 0, 0);
165 | 		outputBytes[i+1] = ivec4('500 ', 'BAD ', 'HTTP', '/1.1');
166 | 		outputBytes[i+2] = ivec4('\r\n\r\n', req.x, req.y, req.z);
167 | 		outputBytes[i+3] = ivec4(req.w, req2.x, req2.y, req2.z);
168 | 		
169 | 	}
170 | 
171 | }
172 | 


--------------------------------------------------------------------------------
/http_shader/ispc_ivec4/httpd_ivec4.glsl:
--------------------------------------------------------------------------------
  1 | #define version #version
  2 | 
  3 | version 450
  4 | 
  5 | #include "../chr.glsl"
  6 | 
  7 | #define REQUEST_SIZE 1024
  8 | #define RESPONSE_SIZE 1024
  9 | #define HEAP_SIZE 1024
 10 | 
 11 | #define REQUESTS_PER_INVOCATION 1024
 12 | 
 13 | #define HEAP_TOTAL_SZ (32 * 16 * 1024 * (HEAP_SIZE / 16))
 14 | 
 15 | layout ( local_size_x = 16, local_size_y = 1, local_size_z = 1 ) in;
 16 | 
 17 | layout(std430, binding = 0) readonly buffer inputBuffer { highp ivec4 inputBytes[]; };
 18 | layout(std430, binding = 1) buffer outputBuffer { highp ivec4 outputBytes[]; };
 19 | layout(std430, binding = 2) buffer heapBuffer { highp ivec4 heap[]; };
 20 | 
 21 | const highp int METHOD_GET = 'GET ';
 22 | const highp int METHOD_POST = 'POST';
 23 | const highp int METHOD_OPTION = 'OPTI';
 24 | const highp int METHOD_UNKNOWN = 0;
 25 | 
 26 | void setE(inout ivec4 v, int i, int value) {
 27 | 	if (i == 0) v.x = value;
 28 | 	else if (i == 1) v.y = value;
 29 | 	else if (i == 2) v.z = value;
 30 | 	else v.w = value;
 31 | }
 32 | 
 33 | int getE(ivec4 v, int i) {
 34 | 	int value = v.x;
 35 | 	if (i == 1) value = v.y;
 36 | 	else if (i == 2) value = v.z;
 37 | 	else if (i == 3) value = v.w;
 38 | 	return value;
 39 | }
 40 | 
 41 | void main() {
 42 | 	int wgId = int(gl_GlobalInvocationID.x) * REQUESTS_PER_INVOCATION;
 43 | 
 44 | 	for (int j = 0; j < REQUESTS_PER_INVOCATION; j++) {
 45 | 		int reqOff = (wgId+j) * (REQUEST_SIZE / 16);
 46 | 		int resOff = (wgId+j) * (RESPONSE_SIZE / 16);
 47 | 
 48 | 		// Parse request in format
 49 | 		// [GET |/xxx|xxxx| HTT|P/1.|1\r\n.|...]
 50 | 		// [POST| /xx|xxxx|x HT|TP/1|.1\r\n|....\r\nmimetype\r\n\r\npost body]
 51 | 		ivec4 requestInfo = inputBytes[reqOff];
 52 | 		if (requestInfo.x == 0) { // skip empty requests
 53 | 			continue;
 54 | 		}
 55 | 		ivec4 req = inputBytes[reqOff+1];
 56 | 		ivec4 req2 = inputBytes[reqOff+2];
 57 | 		int method = req.x;
 58 | 
 59 | 		int i = resOff;
 60 | 
 61 | 		if (method == METHOD_GET) {
 62 | 			// Parse key from path /xxxxxxx
 63 | 			int key = (
 64 | 				(((req.y >> 8) & 0xFF) - 48) * 1000000 +
 65 | 				(((req.y >> 16) & 0xFF) - 48) * 100000 +
 66 | 				(((req.y >> 24) & 0xFF) - 48) * 10000 +
 67 | 				(((req.z >> 0) & 0xFF) - 48) * 1000 +
 68 | 				(((req.z >> 8) & 0xFF) - 48) * 100 +
 69 | 				(((req.z >> 16) & 0xFF) - 48) * 10 +
 70 | 				(((req.z >> 24) & 0xFF) - 48) * 1
 71 | 			) * (HEAP_SIZE / 16);
 72 | 			// Check that the key is valid and fetch the content from the heap buffer if so.
 73 | 			if (key >= 0 && key < HEAP_TOTAL_SZ && heap[key].x > 0 && heap[key].x <= RESPONSE_SIZE - 3 * 16) {
 74 | 				int locked = atomicCompSwap(heap[key].w, 0, 1);
 75 | 				if (locked >= 0) {
 76 | 					atomicAdd(heap[key].w, 1);
 77 | 					outputBytes[i+0] = ivec4(2*16 + heap[key].x, 0, 0, 0);
 78 | 					outputBytes[i+1] = ivec4('200 ', 'OK H', 'TTP/', '1.1\r');
 79 | 					outputBytes[i+2] = ivec4('\ncon', 'tent', '-typ', 'e:  ');
 80 | 					int len = heap[key].x / 16 + (heap[key].x % 16 > 0 ? 1 : 0);
 81 | 					for (int k = 0; k < len; k++) {
 82 | 						outputBytes[i+3+k] = heap[key+1+k];
 83 | 					}
 84 | 					atomicAdd(heap[key].w, -1);
 85 | 				} else {
 86 | 					outputBytes[i+0] = ivec4(3*16, 0, 0, 0);
 87 | 					outputBytes[i+1] = ivec4('400 ', 'NO H', 'TTP/', '1.1\r');
 88 | 					outputBytes[i+2] = ivec4('\ncon', 'tent', '-typ', 'e: t');
 89 | 					outputBytes[i+3] = ivec4('ext/', 'plai', 'n\r\n\r', '\nBLK');
 90 | 				}
 91 | 				continue;
 92 | 			}
 93 | 		} else if (method == METHOD_POST) {
 94 | 			// Parse key from path /xxxxxxx
 95 | 			int key = (
 96 | 				(((req.y >> 16) & 0xFF) - 48) * 1000000 +
 97 | 				(((req.y >> 24) & 0xFF) - 48) * 100000 +
 98 | 				(((req.z >> 0) & 0xFF) - 48) * 10000 +
 99 | 				(((req.z >> 8) & 0xFF) - 48) * 1000 +
100 | 				(((req.z >> 16) & 0xFF) - 48) * 100 +
101 | 				(((req.z >> 24) & 0xFF) - 48) * 10 +
102 | 				(((req.w >> 0) & 0xFF) - 48) * 1
103 | 			) * (HEAP_SIZE / 16);
104 | 			// If the key is valid, replace the content in the heap buffer with the post body.
105 | 			if (key >= 0 && key < HEAP_TOTAL_SZ) {
106 | 				int locked = atomicCompSwap(heap[key].w, 0, -1);
107 | 				if (locked == 0) {
108 | 					int rnrn = 0;
109 | 					int readStart = 0;
110 | 					int readEnd = 512;
111 | 					ivec4 w = ivec4(0);
112 | 					int l = 0;
113 | 					int hi = 0;
114 | 					for (int k = 13; k < REQUEST_SIZE && k < HEAP_SIZE; k++) {
115 | 						int v4i = k / 16;
116 | 						int vi = k - (v4i * 16);
117 | 						int c = vi / 4;
118 | 						int b = vi - (c * 4);
119 | 						int chr = (getE(inputBytes[reqOff + 1 + v4i], c) >> (b * 8)) & 0xFF;
120 | 						if (readStart > 0) {
121 | 							if (chr == 0) {
122 | 								readEnd = k;
123 | 								break;
124 | 							}
125 | 							int wc = l / 4;
126 | 							int wb = l - (wc * 4);
127 | 							setE(w, wc, getE(w, wc) | (chr << (wb * 8)));
128 | 							l++;
129 | 							if (l == 16) {
130 | 								heap[key+1+hi] = w;
131 | 								hi++;
132 | 								w *= 0;
133 | 								l = 0;
134 | 							}
135 | 						} else if (chr == CHR_CR && (rnrn & 1) == 0) {
136 | 							rnrn++;
137 | 						} else if (chr == CHR_LF && (rnrn & 1) == 1) {
138 | 							rnrn++;
139 | 							if (rnrn == 4) {
140 | 								readStart = k;
141 | 							}
142 | 						} else {
143 | 							rnrn = 0;
144 | 						}
145 | 					}
146 | 					if (l > 0 && (1 + hi) < (HEAP_SIZE/16)) {
147 | 						heap[key+1+hi] = w;
148 | 					}
149 | 					heap[key].x = readEnd - readStart;
150 | 					heap[key].w = 0;
151 | 					outputBytes[i+0] = ivec4(3*16, 0, 0, 0);
152 | 					outputBytes[i+1] = ivec4('200 ', 'OK H', 'TTP/', '1.1\r');
153 | 					outputBytes[i+2] = ivec4('\ncon', 'tent', '-typ', 'e: t');
154 | 					outputBytes[i+3] = ivec4('ext/', 'plai', 'n\r\n\r', '\nOK.');
155 | 				} else {
156 | 					outputBytes[i+0] = ivec4(3*16, 0, 0, 0);
157 | 					outputBytes[i+1] = ivec4('200 ', 'OK H', 'TTP/', '1.1\r');
158 | 					outputBytes[i+2] = ivec4('\ncon', 'tent', '-typ', 'e: t');
159 | 					outputBytes[i+3] = ivec4('ext/', 'plai', 'n\r\n\r', '\nBLK');
160 | 				}
161 | 				continue;
162 | 			}
163 | 		}
164 | 		outputBytes[i+0] = ivec4(3*16 - 3*4 - 2, 0, 0, 0);
165 | 		outputBytes[i+1] = ivec4('500 ', 'BAD ', 'HTTP', '/1.1');
166 | 		outputBytes[i+2] = ivec4('\r\n\r\n', req.x, req.y, req.z);
167 | 		outputBytes[i+3] = ivec4(req.w, req2.x, req2.y, req2.z);
168 | 		
169 | 	}
170 | 
171 | }
172 | 


--------------------------------------------------------------------------------
/http_shader/ispc_int/httpd_int.glsl:
--------------------------------------------------------------------------------
  1 | #define version #version
  2 | 
  3 | version 450
  4 | 
  5 | #include "../chr.glsl"
  6 | 
  7 | #define STRIDE 32
  8 | 
  9 | #define BSZ 1024
 10 | 
 11 | #define heapStrCopy(str, SRC, DST, i, index) {int _s = (str).x; int _e = (str).y; while (_s < _e) {(DST)[index+i*STRIDE] = (SRC)[index+_s*STRIDE]; i++; _s++;}}
 12 | #define strCopySlice(SRC, DST, i, index, start, end) {int _s = start; int _e = end; while (_s < _e) {(DST)[index+i*STRIDE] = (SRC)[_s++]; i++;}}
 13 | #define strCopy(SRC, DST, i, index) {int _str[] = SRC; strCopySlice(_str, DST, i, index, 0, _str.length())}
 14 | #define W(chr) response[index + i*STRIDE] = (chr); i++;
 15 | 
 16 | #define A_OK if (i > BSZ) { return error(index); }
 17 | 
 18 | layout ( local_size_x = STRIDE, local_size_y = 1, local_size_z = 1 ) in;
 19 | 
 20 | layout(std430, binding = 0) readonly buffer inputBuffer { highp int inputBytes[]; };
 21 | layout(std430, binding = 1) buffer outputBuffer { highp int outputBytes[]; };
 22 | layout(std430, binding = 2) buffer heapBuffer { lowp int heap[]; };
 23 | layout(std430, binding = 3) buffer requestBuffer { lowp int request[]; };
 24 | layout(std430, binding = 4) buffer responseBuffer { lowp int response[]; };
 25 | 
 26 | const highp int METHOD_GET = 'GET ';
 27 | const highp int METHOD_POST = 'POST';
 28 | const highp int METHOD_OPTION = 'OPTI';
 29 | const highp int METHOD_UNKNOWN = 0;
 30 | 
 31 | const lowp int PROTOCOL_UNKNOWN = 0;
 32 | const lowp int PROTOCOL_HTTP10 = '/1.0';
 33 | const lowp int PROTOCOL_HTTP11 = '/1.1';
 34 | 
 35 | const lowp int MIME_TEXT_PLAIN = 0;
 36 | const lowp int MIME_TEXT_HTML = 1;
 37 | 
 38 | 
 39 | int strLen(ivec2 str) {
 40 | 	return str.y - str.x;
 41 | }
 42 | 
 43 | 
 44 | struct header {
 45 | 	ivec2 name;
 46 | 	ivec2 value;
 47 | };
 48 | 
 49 | void readRequestUntilChar(inout int i, int index, int endChar, out ivec2 str) {
 50 | 	str.x = i;
 51 | 	while (i < BSZ && request[index+i*STRIDE] != endChar) {
 52 | 		i++;
 53 | 	}
 54 | 	str.y = i;
 55 | 	i++;
 56 | }
 57 | 
 58 | void readMethod(inout int i, int index, out int method) {
 59 | 	int j = index + i * STRIDE;
 60 | 	if (
 61 | 		request[j+0*STRIDE] == CHR_G &&
 62 | 		request[j+1*STRIDE] == CHR_E &&
 63 | 		request[j+2*STRIDE] == CHR_T &&
 64 | 		request[j+3*STRIDE] == CHR_SPACE
 65 | 	) {
 66 | 		method = METHOD_GET;
 67 | 		i += 4;
 68 | 		return;
 69 | 	} else if (
 70 | 		request[j+0*STRIDE] == CHR_P &&
 71 | 		request[j+1*STRIDE] == CHR_O &&
 72 | 		request[j+2*STRIDE] == CHR_S &&
 73 | 		request[j+3*STRIDE] == CHR_T &&
 74 | 		request[j+4*STRIDE] == CHR_SPACE
 75 | 	) {
 76 | 		method = METHOD_POST;
 77 | 		i += 5;
 78 | 		return;
 79 | 	} else if (request[j+0*STRIDE] == CHR_O && request[j+6*STRIDE] == CHR_SPACE) {
 80 | 		method = METHOD_OPTION;
 81 | 		i += 7;
 82 | 		return;
 83 | 	}
 84 | 	method = METHOD_UNKNOWN;
 85 | 	i = BSZ+1;
 86 | }
 87 | 
 88 | void readPath(inout int i, int index, out ivec2 path) {
 89 | 	readRequestUntilChar(i, index, CHR_SPACE, path);
 90 | }
 91 | 
 92 | void readProtocol(inout int i, int index, out int protocol) {
 93 | 	ivec2 protocolString;
 94 | 	readRequestUntilChar(i, index, CHR_CR, protocolString);
 95 | 	if (i < BSZ && request[index+i*STRIDE] == CHR_LF) {
 96 | 		i++;
 97 | 		if (request[index+(protocolString.y-1)*STRIDE] == CHR_1) {
 98 | 			protocol = PROTOCOL_HTTP11;
 99 | 		} else {
100 | 			protocol = PROTOCOL_HTTP10;
101 | 		}
102 | 	} else {
103 | 		protocol = PROTOCOL_UNKNOWN;
104 | 		i = BSZ+1;
105 | 	}
106 | }
107 | 
108 | bool readHeader(inout int i, int index, out header hdr) {
109 | 	if (request[index+i*STRIDE] == CHR_CR) {
110 | 		i += 2;
111 | 		return true;
112 | 	}
113 | 	readRequestUntilChar(i, index, CHR_COLON, hdr.name);
114 | 	while (i < BSZ && request[index+i*STRIDE] == CHR_SPACE) i++;
115 | 	readRequestUntilChar(i, index, CHR_CR, hdr.value);
116 | 	i++;
117 | 	return false;
118 | }
119 | 
120 | void writeCRLF(inout int i, int index) {
121 | 	W(CHR_CR);
122 | 	W(CHR_LF);
123 | }
124 | 
125 | void writeStatus(inout int i, int index, int statusCode) {
126 | 	strCopy("HTTP/1.1 ", response, i, index);
127 | 	if (statusCode == 200) {
128 | 		strCopy("200 OK", response, i, index);
129 | 	} else {
130 | 		strCopy("500 Error", response, i, index);
131 | 	}
132 | 	writeCRLF(i, index);
133 | }
134 | 
135 | void writeContentType(inout int i, int index, int contentType) {
136 | 	int contentTypeString[] = "Content-Type: ";
137 | 	strCopy(contentTypeString, response, i, index);
138 | 	if (contentType == MIME_TEXT_PLAIN) {
139 | 		strCopy("text/plain", response, i, index);
140 | 	} else {
141 | 		strCopy("text/html", response, i, index);
142 | 	}
143 | 	writeCRLF(i, index);
144 | }
145 | 
146 | void writeBody(inout int i, int index, ivec2 path, header headers[32], int headerCount) {
147 | 	strCopy("Hello, World!", response, i, index);
148 | 	W(CHR_LF);
149 | 	for (int j = 0; j < 32; j++) {
150 | 		if (j >= headerCount) break;
151 | 		ivec2 name = headers[j].name;
152 | 		ivec2 value = headers[j].value;
153 | 		if (strLen(name) + 3 + strLen(value) + i > 1023) break;
154 | 		heapStrCopy(name, request, response, i, index);
155 | 		strCopy(": ", response, i, index);
156 | 		heapStrCopy(value, request, response, i, index);
157 | 		W(CHR_LF);
158 | 	}
159 | }
160 | 
161 | int error(int index) {
162 | 	int i = 0;
163 | 	writeStatus(i, index, 500);
164 | 	writeContentType(i, index, MIME_TEXT_PLAIN);
165 | 	writeCRLF(i, index);
166 | 	return i;
167 | }
168 | 
169 | void unpackRequest(int byteIndex, int index) {
170 | 	int len = inputBytes[byteIndex];
171 | 	for (int j = 0; j < min(256, len/4+1); j++) {
172 | 		int v = inputBytes[byteIndex + j + 1];
173 | 		int off = index + (j * 4) * STRIDE;
174 | 		request[off + 0*STRIDE] = (v >> 0) & 0xFF;
175 | 		request[off + 1*STRIDE] = (v >> 8) & 0xFF;
176 | 		request[off + 2*STRIDE] = (v >> 16) & 0xFF;
177 | 		request[off + 3*STRIDE] = (v >> 24) & 0xFF;
178 | 	}
179 | }
180 | 
181 | void packResponse(int byteIndex, int index, int len) {
182 | 	outputBytes[byteIndex] = len;
183 | 	for (int j = 1; j < min(256, len/4+1); j++) {
184 | 		int off = index + (j * 4 - 4) * STRIDE;
185 | 		ivec4 v = ivec4(
186 | 			((response[off + 0*STRIDE] & 0xFF) << 0),
187 | 		    ((response[off + 1*STRIDE] & 0xFF) << 8),
188 | 			((response[off + 2*STRIDE] & 0xFF) << 16),
189 | 			((response[off + 3*STRIDE] & 0xFF) << 24)
190 | 		);
191 | 		outputBytes[byteIndex + j] = (v.x | v.y | v.z | v.w);
192 | 	}
193 | }
194 | 
195 | int handleRequest(int index) {
196 | 	int method;
197 | 	ivec2 path;
198 | 	int protocol;
199 | 	header headers[32];
200 | 	int headerCount = 0;
201 | 
202 | 	int i = 0;
203 | 	readMethod(i, index, method);
204 | 	readPath(i, index, path);
205 | 	readProtocol(i, index, protocol);
206 | 	for (int j = 0; j < 32; j++) {
207 | 		if (readHeader(i, index, headers[j])) {
208 | 			break;
209 | 		}
210 | 		headerCount++;
211 | 	}
212 | 	A_OK;
213 | 
214 | 	i = 0;
215 | 	writeStatus(i, index, 200);
216 | 	writeContentType(i, index, MIME_TEXT_PLAIN);
217 | 	writeCRLF(i, index);
218 | 	writeBody(i, index, path, headers, headerCount);
219 | 	return i;
220 | }
221 | 
222 | void main() {
223 | 	int wgId = int(gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * (gl_NumWorkGroups.x * gl_WorkGroupSize.x));
224 | 	int index = STRIDE * BSZ * (wgId / STRIDE);
225 | 	index += wgId & (STRIDE-1);
226 | 	unpackRequest(wgId*(BSZ/4), index);
227 | 	int len = handleRequest(index);
228 | 	packResponse(wgId*(BSZ/4), index, len);
229 | }
230 | 


--------------------------------------------------------------------------------
/spirv-io/lib/errno.glsl:
--------------------------------------------------------------------------------
  1 | 
  2 | #define	EPERM		 1	/* Operation not permitted */
  3 | #define	ENOENT		 2	/* No such file or directory */
  4 | #define	ESRCH		 3	/* No such process */
  5 | #define	EINTR		 4	/* Interrupted system call */
  6 | #define	EIO		 5	/* I/O error */
  7 | #define	ENXIO		 6	/* No such device or address */
  8 | #define	E2BIG		 7	/* Argument list too long */
  9 | #define	ENOEXEC		 8	/* Exec format error */
 10 | #define	EBADF		 9	/* Bad file number */
 11 | #define	ECHILD		10	/* No child processes */
 12 | #define	EAGAIN		11	/* Try again */
 13 | #define	ENOMEM		12	/* Out of memory */
 14 | #define	EACCES		13	/* Permission denied */
 15 | #define	EFAULT		14	/* Bad address */
 16 | #define	ENOTBLK		15	/* Block device required */
 17 | #define	EBUSY		16	/* Device or resource busy */
 18 | #define	EEXIST		17	/* File exists */
 19 | #define	EXDEV		18	/* Cross-device link */
 20 | #define	ENODEV		19	/* No such device */
 21 | #define	ENOTDIR		20	/* Not a directory */
 22 | #define	EISDIR		21	/* Is a directory */
 23 | #define	EINVAL		22	/* Invalid argument */
 24 | #define	ENFILE		23	/* File table overflow */
 25 | #define	EMFILE		24	/* Too many open files */
 26 | #define	ENOTTY		25	/* Not a typewriter */
 27 | #define	ETXTBSY		26	/* Text file busy */
 28 | #define	EFBIG		27	/* File too large */
 29 | #define	ENOSPC		28	/* No space left on device */
 30 | #define	ESPIPE		29	/* Illegal seek */
 31 | #define	EROFS		30	/* Read-only file system */
 32 | #define	EMLINK		31	/* Too many links */
 33 | #define	EPIPE		32	/* Broken pipe */
 34 | #define	EDOM		33	/* Math argument out of domain of func */
 35 | #define	ERANGE		34	/* Math result not representable */
 36 | 
 37 | 
 38 | #define	EDEADLK		35	/* Resource deadlock would occur */
 39 | #define	ENAMETOOLONG	36	/* File name too long */
 40 | #define	ENOLCK		37	/* No record locks available */
 41 | 
 42 | /*
 43 |  * This error code is special: arch syscall entry code will return
 44 |  * -ENOSYS if users try to call a syscall that doesn't exist.  To keep
 45 |  * failures of syscalls that really do exist distinguishable from
 46 |  * failures due to attempts to use a nonexistent syscall, syscall
 47 |  * implementations should refrain from returning -ENOSYS.
 48 |  */
 49 | #define	ENOSYS		38	/* Invalid system call number */
 50 | 
 51 | #define	ENOTEMPTY	39	/* Directory not empty */
 52 | #define	ELOOP		40	/* Too many symbolic links encountered */
 53 | #define	EWOULDBLOCK	EAGAIN	/* Operation would block */
 54 | #define	ENOMSG		42	/* No message of desired type */
 55 | #define	EIDRM		43	/* Identifier removed */
 56 | #define	ECHRNG		44	/* Channel number out of range */
 57 | #define	EL2NSYNC	45	/* Level 2 not synchronized */
 58 | #define	EL3HLT		46	/* Level 3 halted */
 59 | #define	EL3RST		47	/* Level 3 reset */
 60 | #define	ELNRNG		48	/* Link number out of range */
 61 | #define	EUNATCH		49	/* Protocol driver not attached */
 62 | #define	ENOCSI		50	/* No CSI structure available */
 63 | #define	EL2HLT		51	/* Level 2 halted */
 64 | #define	EBADE		52	/* Invalid exchange */
 65 | #define	EBADR		53	/* Invalid request descriptor */
 66 | #define	EXFULL		54	/* Exchange full */
 67 | #define	ENOANO		55	/* No anode */
 68 | #define	EBADRQC		56	/* Invalid request code */
 69 | #define	EBADSLT		57	/* Invalid slot */
 70 | 
 71 | #define	EDEADLOCK	EDEADLK
 72 | 
 73 | #define	EBFONT		59	/* Bad font file format */
 74 | #define	ENOSTR		60	/* Device not a stream */
 75 | #define	ENODATA		61	/* No data available */
 76 | #define	ETIME		62	/* Timer expired */
 77 | #define	ENOSR		63	/* Out of streams resources */
 78 | #define	ENONET		64	/* Machine is not on the network */
 79 | #define	ENOPKG		65	/* Package not installed */
 80 | #define	EREMOTE		66	/* Object is remote */
 81 | #define	ENOLINK		67	/* Link has been severed */
 82 | #define	EADV		68	/* Advertise error */
 83 | #define	ESRMNT		69	/* Srmount error */
 84 | #define	ECOMM		70	/* Communication error on send */
 85 | #define	EPROTO		71	/* Protocol error */
 86 | #define	EMULTIHOP	72	/* Multihop attempted */
 87 | #define	EDOTDOT		73	/* RFS specific error */
 88 | #define	EBADMSG		74	/* Not a data message */
 89 | #define	EOVERFLOW	75	/* Value too large for defined data type */
 90 | #define	ENOTUNIQ	76	/* Name not unique on network */
 91 | #define	EBADFD		77	/* File descriptor in bad state */
 92 | #define	EREMCHG		78	/* Remote address changed */
 93 | #define	ELIBACC		79	/* Can not access a needed shared library */
 94 | #define	ELIBBAD		80	/* Accessing a corrupted shared library */
 95 | #define	ELIBSCN		81	/* .lib section in a.out corrupted */
 96 | #define	ELIBMAX		82	/* Attempting to link in too many shared libraries */
 97 | #define	ELIBEXEC	83	/* Cannot exec a shared library directly */
 98 | #define	EILSEQ		84	/* Illegal byte sequence */
 99 | #define	ERESTART	85	/* Interrupted system call should be restarted */
100 | #define	ESTRPIPE	86	/* Streams pipe error */
101 | #define	EUSERS		87	/* Too many users */
102 | #define	ENOTSOCK	88	/* Socket operation on non-socket */
103 | #define	EDESTADDRREQ	89	/* Destination address required */
104 | #define	EMSGSIZE	90	/* Message too long */
105 | #define	EPROTOTYPE	91	/* Protocol wrong type for socket */
106 | #define	ENOPROTOOPT	92	/* Protocol not available */
107 | #define	EPROTONOSUPPORT	93	/* Protocol not supported */
108 | #define	ESOCKTNOSUPPORT	94	/* Socket type not supported */
109 | #define	EOPNOTSUPP	95	/* Operation not supported on transport endpoint */
110 | #define	EPFNOSUPPORT	96	/* Protocol family not supported */
111 | #define	EAFNOSUPPORT	97	/* Address family not supported by protocol */
112 | #define	EADDRINUSE	98	/* Address already in use */
113 | #define	EADDRNOTAVAIL	99	/* Cannot assign requested address */
114 | #define	ENETDOWN	100	/* Network is down */
115 | #define	ENETUNREACH	101	/* Network is unreachable */
116 | #define	ENETRESET	102	/* Network dropped connection because of reset */
117 | #define	ECONNABORTED	103	/* Software caused connection abort */
118 | #define	ECONNRESET	104	/* Connection reset by peer */
119 | #define	ENOBUFS		105	/* No buffer space available */
120 | #define	EISCONN		106	/* Transport endpoint is already connected */
121 | #define	ENOTCONN	107	/* Transport endpoint is not connected */
122 | #define	ESHUTDOWN	108	/* Cannot send after transport endpoint shutdown */
123 | #define	ETOOMANYREFS	109	/* Too many references: cannot splice */
124 | #define	ETIMEDOUT	110	/* Connection timed out */
125 | #define	ECONNREFUSED	111	/* Connection refused */
126 | #define	EHOSTDOWN	112	/* Host is down */
127 | #define	EHOSTUNREACH	113	/* No route to host */
128 | #define	EALREADY	114	/* Operation already in progress */
129 | #define	EINPROGRESS	115	/* Operation now in progress */
130 | #define	ESTALE		116	/* Stale file handle */
131 | #define	EUCLEAN		117	/* Structure needs cleaning */
132 | #define	ENOTNAM		118	/* Not a XENIX named type file */
133 | #define	ENAVAIL		119	/* No XENIX semaphores available */
134 | #define	EISNAM		120	/* Is a named type file */
135 | #define	EREMOTEIO	121	/* Remote I/O error */
136 | #define	EDQUOT		122	/* Quota exceeded */
137 | 
138 | #define	ENOMEDIUM	123	/* No medium found */
139 | #define	EMEDIUMTYPE	124	/* Wrong medium type */
140 | #define	ECANCELED	125	/* Operation Canceled */
141 | #define	ENOKEY		126	/* Required key not available */
142 | #define	EKEYEXPIRED	127	/* Key has expired */
143 | #define	EKEYREVOKED	128	/* Key has been revoked */
144 | #define	EKEYREJECTED	129	/* Key was rejected by service */
145 | 
146 | /* for robust mutexes */
147 | #define	EOWNERDEAD	130	/* Owner died */
148 | #define	ENOTRECOVERABLE	131	/* State not recoverable */
149 | 
150 | #define ERFKILL		132	/* Operation not possible due to RF-kill */
151 | 
152 | #define EHWPOISON	133	/* Memory page has hardware error */
153 | 


--------------------------------------------------------------------------------
/spirv-io/lib/hashtable.glsl:
--------------------------------------------------------------------------------
  1 | // Based on https://github.com/nosferalatu/SimpleGPUHashTable/
  2 | 
  3 | #include <array.glsl>
  4 | 
  5 | struct i32map {
  6 |     alloc_t table;
  7 |     int32_t capacity;
  8 |     int32_t count;
  9 | };
 10 | 
 11 | // 32 bit Murmur3 hash
 12 | int32_t murmur3hash(int32_t k)
 13 | {
 14 |     k ^= k >> 16;
 15 |     k *= 0x85ebca6b;
 16 |     k ^= k >> 13;
 17 |     k *= 0xc2b2ae35;
 18 |     k ^= k >> 16;
 19 |     if (k == -1) k = 0;
 20 |     return k;
 21 | }
 22 | 
 23 | /*T
 24 |     i32map ht = i32hAlloc(300);
 25 |     512 == ht.capacity;
 26 |     512*3 == strLen(ht.table);
 27 |     0 == ht.count;
 28 | 
 29 |     ht = i32hAlloc(256);
 30 |     256 == ht.capacity;
 31 |     256*3 == strLen(ht.table);
 32 |     0 == ht.count;
 33 | 
 34 |     ht = i32hAlloc(257);
 35 |     512 == ht.capacity;
 36 |     512*3 == strLen(ht.table);
 37 |     0 == ht.count;
 38 | */
 39 | i32map i32hAlloc(int32_t size) {
 40 |     size = 1 << int32_t(ceil(log2(float(size))));
 41 |     i32map ht = i32map(malloc(4 * (int32_t(size) * 3), 4), size, 0);
 42 |     ht.table.x /= 4;
 43 |     ht.table.y /= 4;
 44 |     for (uint32_t i = ht.table.x; i < ht.table.y; i += 3) {
 45 |         i32heap[i] = -1;
 46 |         i32heap[i+1] = -1;
 47 |         i32heap[i+2] = 0;
 48 |     }
 49 |     return ht;
 50 | }
 51 | 
 52 | i32map i32hAlloc() {
 53 |     return i32hAlloc(16);
 54 | }
 55 | 
 56 | #define i32hIter(ht, key, value, body) { \
 57 |     for (uint32_t _i_ = ht.table.x; _i_ < ht.table.y; _i_ += 3) {\
 58 |         if (i32heap[_i_+1] != -1) {\
 59 |             key = i32heap[_i_];\
 60 |             value = i32heap[_i_+2];\
 61 |             body;\
 62 |         }\
 63 |     }\
 64 | }
 65 | 
 66 | #define f32hIter(ht, key, value, body) { \
 67 |     for (uint32_t _i_ = ht.table.x; _i_ < ht.table.y; _i_ += 3) {\
 68 |         if (i32heap[_i_+1] != -1) {\
 69 |             key = i32heap[_i_];\
 70 |             value = intBitsToFloat(i32heap[_i_+2]);\
 71 |             body;\
 72 |         }\
 73 |     }\
 74 | }
 75 | 
 76 | i32array i32hKeys(i32map ht) {
 77 |     heapPtr += 3 - (3 - (heapPtr & 3));
 78 |     ptr_t start = heapPtr;
 79 |     i32hIter(ht, int32_t k, int32_t v, {
 80 |         i32heap[heapPtr/4] = k;
 81 |         heapPtr += 4;
 82 |     })
 83 |     return i32array(start/4, heapPtr/4);
 84 | }
 85 | 
 86 | i32array f32hKeys(i32map ht) {
 87 |     return i32hKeys(ht);
 88 | }
 89 | 
 90 | /*T
 91 |     i32map ht = i32hAlloc(256);
 92 |     int32_t v = 0;
 93 | 
 94 |     i32hSet(ht, 45, 1);
 95 |     i32hSet(ht, 46, 2);
 96 |     i32hSet(ht, 47, 3);
 97 | 
 98 |     true == i32hGet(ht, 45, v);
 99 |     1 == v;
100 | 
101 |     i32hSet(ht, 45, 4);
102 |     i32hSet(ht, 248, 5);
103 | 
104 |     true == i32hGet(ht, 46, v);
105 |     2 == v;
106 |     true == i32hGet(ht, 47, v);
107 |     3 == v;
108 |     true == i32hGet(ht, 45, v);
109 |     4 == v;
110 |     true == i32hGet(ht, 248, v);
111 |     5 == v;
112 | 
113 |     256 == ht.capacity;
114 | 
115 |     log("Adding 260 keys");
116 |     for (int32_t i = 0; i < 260; i++) {
117 |         i32hSet(ht, i, i);
118 |     }
119 | 
120 |     // Resized table
121 |     512 == ht.capacity;
122 | 
123 |     log("Checking for keys");
124 |     // Check if all the keys are still there
125 |     for (int32_t i = 0; i < 260; i++) {
126 |         true == i32hGet(ht, i, v);
127 |         i == v;
128 |     }
129 | 
130 | */
131 | void i32hSet(inout i32map ht, int32_t key, int32_t value) {
132 |     if ((ht.count + 1) * 100 > ht.capacity * 70) {
133 |         i32map nt = i32hAlloc(ht.capacity*2);
134 |         //FREE_ALL( log(concat("Resize ", str(ivec2(ht.capacity, nt.capacity)))) );
135 |         for (uint32_t i = ht.table.x; i < ht.table.y; i += 3) {
136 |             if (i32heap[i+1] != -1) {
137 |                 int32_t idx = i32heap[i+1] & (nt.capacity-1);
138 |                 while (i32heap[nt.table.x + idx*3] != -1) {
139 |                     idx = (idx + 1) & (nt.capacity-1);
140 |                 }
141 |                 i32heap[nt.table.x + idx*3    ] = i32heap[i];
142 |                 i32heap[nt.table.x + idx*3 + 1] = i32heap[i+1];
143 |                 i32heap[nt.table.x + idx*3 + 2] = i32heap[i+2];
144 |                 nt.count++;
145 |             }
146 |         }
147 |         ht = nt;
148 |     }
149 |     int32_t h = murmur3hash(key);
150 |     int32_t idx = h & (ht.capacity-1);
151 |     while (i32heap[ht.table.x + idx*3] != -1 && i32heap[ht.table.x + idx*3] != key) {
152 |         idx = (idx + 1) & (ht.capacity-1);
153 |     }
154 |     if (i32heap[ht.table.x + idx*3] == -1) ht.count++;
155 |     i32heap[ht.table.x + idx*3] = key;
156 |     i32heap[ht.table.x + idx*3 + 1] = h;
157 |     i32heap[ht.table.x + idx*3 + 2] = value;
158 | }
159 | 
160 | /*T
161 |     i32map ht = i32hAlloc(256);
162 |     int32_t v = 123;
163 | 
164 |     false == i32hGet(ht, 30, v);
165 | 
166 |     i32hSet(ht, 30, 321);
167 | 
168 |     true == i32hGet(ht, 30, v);
169 |     321 == v;
170 | 
171 |     false == i32hGet(ht, 31, v);
172 | 
173 |     for (int32_t i = 32; i < 512; i++) {
174 |         false == i32hGet(ht, i, v);
175 |     }
176 | 
177 | */
178 | bool i32hGet(i32map ht, int32_t key, out int32_t value) {
179 |     int32_t idx = murmur3hash(key) & (ht.capacity-1);
180 |     while (true) {
181 |         int32_t k = i32heap[ht.table.x + idx * 3];
182 |         if (k == key) {
183 |             int32_t kh = i32heap[ht.table.x + idx * 3 + 1];
184 |             if (kh == -1) return false;
185 |             value = i32heap[ht.table.x + idx * 3 + 2];
186 |             return true;
187 |         } else if (k == -1) {
188 |             return false;
189 |         }
190 |         idx = (idx + 1) & (ht.capacity-1);
191 |     }
192 |     return false;
193 | }
194 | 
195 | /*T
196 |     i32map ht = i32hAlloc(256);
197 |     int32_t v = 0;
198 | 
199 |     i32hSet(ht, 30, 321);
200 | 
201 |     true == i32hGet(ht, 30, v);
202 |     321 == v;
203 | 
204 |     true == i32hDelete(ht, 30);
205 | 
206 |     false == i32hGet(ht, 30, v);
207 | 
208 |     i32hSet(ht, 30, 321);
209 | 
210 |     log("i32hDelete: Adding and deleting 468 keys");
211 | 
212 |     for (int32_t i = 32; i < 500; i++) {
213 |         i32hSet(ht, i, i);
214 |         true == i32hGet(ht, i, i);
215 |         true == i32hDelete(ht, i);
216 |     }
217 | 
218 |     log("i32hDelete: Checking that none of the keys exist");
219 | 
220 |     for (int32_t i = 32; i < 500; i++) {
221 |         false == i32hGet(ht, i, v);
222 |         false == i32hDelete(ht, i);
223 |     }
224 | 
225 |     true == i32hGet(ht, 30, v);
226 |     321 == v;
227 | 
228 |     log("i32hDelete: Check sequences of gets, sets and deletes");
229 | 
230 |     for (int32_t i = 0; i < 500; i+=3) {
231 |         i32hSet(ht, i, i);
232 |     }
233 |     for (int32_t i = 0; i < 500; i+=7) {
234 |         i32hDelete(ht, i);
235 |     }
236 |     for (int32_t i = 0; i < 500; i+=3) {
237 |         if (i % 7 != 0) {
238 |             true == i32hGet(ht, i, v);
239 |             i == v;
240 |             if (!i32hGet(ht, i, v)) {
241 |                 log(concat("err 1.1: ", str(i)));
242 |             }
243 |         } else {
244 |             false == i32hGet(ht, i, v);
245 |             if (i32hGet(ht, i, v)) {
246 |                 log(concat("err 1.2: ", str(i)));
247 |             }
248 |         }
249 |     }
250 | 
251 |     for (int32_t i = 0; i < 500; i+=11) {
252 |         i32hSet(ht, i, i);
253 |     }
254 |     for (int32_t i = 0; i < 500; i+=3) {
255 |         i32hDelete(ht, i);
256 |     }
257 |     for (int32_t i = 0; i < 500; i+=11) {
258 |         if (i % 3 != 0) {
259 |             true == i32hGet(ht, i, v);
260 |             i == v;
261 |             if (!i32hGet(ht, i, v)) {
262 |                 log(concat("err 2.1: ", str(i)));
263 |             }
264 |         } else {
265 |             false == i32hGet(ht, i, v);
266 |             if (i32hGet(ht, i, v)) {
267 |                 log(concat("err 2.2: ", str(i)));
268 |             }
269 |         }
270 |     }
271 | 
272 | */
273 | bool i32hDelete(inout i32map ht, int32_t key) {
274 |     int32_t idx = murmur3hash(key) & (ht.capacity-1);
275 |     while (true) {
276 |         int32_t  k = i32heap[ht.table.x + idx * 3];
277 |         if (k == key) {
278 |             if (i32heap[ht.table.x + idx * 3 + 1] == -1) return false;
279 |             i32heap[ht.table.x + idx * 3 + 1] = -1;
280 |             return true;
281 |         } else if (k == -1) {
282 |             return false;
283 |         }
284 |         idx = (idx + 1) & (ht.capacity-1);
285 |     }
286 |     return false;
287 | }
288 | 
289 | i32map f32hAlloc(int32_t size) {
290 |     return i32hAlloc(size);
291 | }
292 | 
293 | i32map f32hAlloc() {
294 |     return f32hAlloc(16);
295 | }
296 | 
297 | void f32hDelete(inout i32map ht, int32_t key) {
298 |     i32hDelete(ht, key);
299 | }
300 | 
301 | bool f32hGet(i32map ht, int32_t key, out float value) {
302 |     int32_t v;
303 |     bool rv = i32hGet(ht, key, v);
304 |     value = intBitsToFloat(v);
305 |     return rv;
306 | }
307 | 
308 | void f32hSet(inout i32map ht, int32_t key, float value) {
309 |     i32hSet(ht, key, floatBitsToInt(value));
310 | }
311 | 
312 | 


--------------------------------------------------------------------------------