├── http_shader ├── cpp │ ├── run.sh │ ├── README.md │ ├── cppRunner.cpp │ └── httpd_ivec4.glsl ├── ispc_ivec4 │ ├── run.sh │ ├── README.md │ ├── runner_ivec4.ispc │ ├── httpd.ispc.h │ ├── httpd_ispc_ivec4.cpp │ └── httpd_ivec4.glsl ├── vulkan │ ├── run.sh │ ├── README.md │ └── httpd_ivec4.glsl ├── ispc_int │ ├── run.sh │ ├── request.txt │ ├── README.md │ ├── runner.ispc │ ├── httpd.ispc.h │ ├── httpd_ispc.cpp │ └── httpd_int.glsl ├── ispc_char │ ├── run.sh │ ├── request.txt │ ├── README.md │ ├── runner.ispc │ ├── httpd.ispc.h │ ├── httpd_ispc.cpp │ └── httpd.glsl ├── build.sh ├── .gitignore ├── build_vulkan.sh ├── build_cpp.sh ├── build_ispc_char.sh ├── build_ispc_int.sh ├── build_ispc_ivec4.sh ├── README.md ├── preprocess.js └── chr.glsl ├── spirv-io ├── test │ ├── .gitignore │ ├── test_data │ │ └── hello.txt │ ├── test_file.glsl │ ├── test_hashtable.glsl │ └── test_array.glsl ├── .gitignore ├── examples │ ├── hello.glsl │ ├── hello_1.glsl │ ├── rerun.glsl │ ├── template_literal.glsl │ ├── discard.glsl │ ├── bm.glsl │ ├── clock.glsl │ ├── hello_dlopen_gh.glsl │ ├── memalloc.glsl │ ├── wait_for_stdin.glsl │ ├── cat.glsl │ ├── hello_dlopen.glsl │ ├── http_client.glsl │ ├── listen3.glsl │ ├── grep_cpu.glsl │ └── grep.glsl ├── lib │ ├── assert.glsl │ ├── thread_id.glsl │ ├── dlopen.glsl │ ├── stat.glsl │ ├── statemachine.glsl │ ├── binary_data.glsl │ ├── chr.glsl │ ├── malloc.glsl │ ├── errno.glsl │ └── hashtable.glsl ├── Makefile ├── bin │ ├── glsl2spv │ ├── gls_generate_tests.js │ └── gls_resolve_includes.js └── src │ ├── test_string.cpp │ ├── test_file.cpp │ ├── gls.cpp │ ├── gls_cpu.cpp │ └── parse_spv.hpp ├── docker ├── .gitignore ├── spirv-cross-linux-x86-64 ├── glsl2wasm.sh ├── ispc2wasm.sh ├── spirv-runner │ ├── fix_ispc_input_output_order │ ├── Makefile │ ├── runner.ispc │ ├── program.h │ └── ispcRunner.cpp ├── Dockerfile ├── README.md └── mandelbrot.ispc ├── .gitignore ├── Makefile ├── include └── spirv_cross │ ├── image.hpp │ ├── barrier.hpp │ ├── thread_group.hpp │ ├── sampler.hpp │ └── external_interface.h ├── src ├── mandel.comp └── mandel.cpp └── README.md /http_shader/cpp/run.sh: -------------------------------------------------------------------------------- 1 | time ./cppRunner 2 | -------------------------------------------------------------------------------- /spirv-io/test/.gitignore: -------------------------------------------------------------------------------- 1 | test_data/ 2 | -------------------------------------------------------------------------------- /http_shader/ispc_ivec4/run.sh: -------------------------------------------------------------------------------- 1 | time ./httpd_ivec4 2 | -------------------------------------------------------------------------------- /http_shader/vulkan/run.sh: -------------------------------------------------------------------------------- 1 | time ./vulkanRunner 2 | -------------------------------------------------------------------------------- /spirv-io/test/test_data/hello.txt: -------------------------------------------------------------------------------- 1 | Hello, world! 2 | -------------------------------------------------------------------------------- /docker/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.wasm 3 | *.js 4 | *.html 5 | -------------------------------------------------------------------------------- /http_shader/ispc_int/run.sh: -------------------------------------------------------------------------------- 1 | time ./httpd < request.txt 2 | -------------------------------------------------------------------------------- /http_shader/ispc_char/run.sh: -------------------------------------------------------------------------------- 1 | time ./httpd < request.txt 2 | -------------------------------------------------------------------------------- /http_shader/build.sh: -------------------------------------------------------------------------------- 1 | ./build_ispc.sh 2 | ./build_ispc_ivec4.sh 3 | ./build_vulkan.sh 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | src/*.js 2 | src/*.html 3 | src/*.wasm 4 | src/*.spv 5 | src/*.spv.cpp 6 | src/*.o 7 | -------------------------------------------------------------------------------- /spirv-io/.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | test_file 3 | test_string 4 | grep 5 | gls 6 | *.comp 7 | *.full* 8 | -------------------------------------------------------------------------------- /docker/spirv-cross-linux-x86-64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kig/spirv-wasm/HEAD/docker/spirv-cross-linux-x86-64 -------------------------------------------------------------------------------- /spirv-io/examples/hello.glsl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env gls 2 | 3 | #include 4 | 5 | println("Hello, world!"); 6 | -------------------------------------------------------------------------------- /http_shader/.gitignore: -------------------------------------------------------------------------------- 1 | *.comp 2 | *.spv 3 | *.swp 4 | *.o 5 | vulkanRunner 6 | httpd_i 7 | httpd 8 | httpd_ivec4 9 | cppRunner 10 | -------------------------------------------------------------------------------- /spirv-io/examples/hello_1.glsl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env gls 2 | #include "file.glsl" 3 | ThreadLocalCount = 1; 4 | ThreadGroupCount = 1; 5 | 6 | println("Hello, world!"); 7 | -------------------------------------------------------------------------------- /spirv-io/lib/assert.glsl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define assert(f) { if (!(f)) { FREE_ALL(eprintln(concat(__FILE__, ":", str(__LINE__), " Assertion failed: ", #f ))); } } 4 | -------------------------------------------------------------------------------- /http_shader/build_vulkan.sh: -------------------------------------------------------------------------------- 1 | cd vulkan && 2 | cpp httpd_ivec4.glsl | node ../preprocess.js > out.comp && 3 | glslangValidator -V -o httpd.spv out.comp && 4 | clang++ -lvulkan -lpthread -m64 -O2 -o vulkanRunner vulkanRunner.cpp -std=c++11 5 | -------------------------------------------------------------------------------- /http_shader/build_cpp.sh: -------------------------------------------------------------------------------- 1 | cd cpp && 2 | cpp httpd_ivec4.glsl | node ../preprocess.js > out.comp && 3 | glslangValidator -V -o httpd.spv out.comp && 4 | spirv-cross --cpp --output httpd.cpp httpd.spv && 5 | clang++ -lpthread -I../../include -lm -O3 -o cppRunner cppRunner.cpp -std=c++11 6 | -------------------------------------------------------------------------------- /spirv-io/examples/rerun.glsl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void main() { 4 | if (ThreadId == 0) { 5 | if (runCount < 10) { 6 | println(concat("Hello from run ", str(runCount))); 7 | rerunProgram = RERUN_NOW; 8 | } 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /spirv-io/examples/template_literal.glsl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | ThreadLocalCount = 1; 4 | ThreadGroupCount = 1; 5 | 6 | void main() { 7 | float x = 1.2345; 8 | string s = `x = ${x} 9 | x * x = ${ 10 | x * x 11 | } 12 | Hello template literal!`; 13 | 14 | println(s); 15 | } 16 | -------------------------------------------------------------------------------- /spirv-io/examples/discard.glsl: -------------------------------------------------------------------------------- 1 | #include 2 | ThreadLocalCount = 8; 3 | ThreadGroupCount = 1; 4 | 5 | void main() { 6 | for (int i = 0; i < 10; i++) { 7 | if (i > 3 && ThreadId > 2) stopIO = 1; 8 | if (ThreadId == 0 && i > 6) exitSync(0); 9 | println(str(ThreadId), ": ", str(i)); 10 | } 11 | } 12 | 13 | -------------------------------------------------------------------------------- /spirv-io/lib/thread_id.glsl: -------------------------------------------------------------------------------- 1 | int32_t ThreadGroupCount = int(gl_NumWorkGroups.x); 2 | int32_t ThreadLocalCount = int(gl_WorkGroupSize.x); 3 | int32_t ThreadCount = ThreadGroupCount * ThreadLocalCount; 4 | int32_t ThreadId = int(gl_GlobalInvocationID.x); 5 | int32_t ThreadGroupId = int(gl_WorkGroupID.x); 6 | int32_t ThreadLocalId = int(gl_LocalInvocationID.x); 7 | -------------------------------------------------------------------------------- /http_shader/build_ispc_char.sh: -------------------------------------------------------------------------------- 1 | cd ispc_char && 2 | cpp httpd.glsl | node ../preprocess.js > out.comp && 3 | glslangValidator -V -o httpd.spv out.comp && 4 | spirv-cross-ispc --ispc --output httpd.ispc httpd.spv && 5 | 6 | ispc -O3 -o httpd.ispc.o runner.ispc && 7 | clang++ -I/usr/local/bin -pthread -std=c++11 -lm -pthread -O3 -o httpd httpd.ispc.o ../tasksys.cpp httpd_ispc.cpp 8 | -------------------------------------------------------------------------------- /http_shader/build_ispc_int.sh: -------------------------------------------------------------------------------- 1 | cd ispc_int && 2 | cpp httpd_int.glsl | node ../preprocess.js > out.comp && 3 | glslangValidator -V -o httpd.spv out.comp && 4 | spirv-cross-linux-x86-64 --ispc --output httpd.ispc httpd.spv && 5 | 6 | ispc -O3 -o httpd.ispc.o runner.ispc && 7 | clang++ -I/usr/local/bin -pthread -std=c++11 -lm -pthread -O3 -o httpd httpd.ispc.o ../tasksys.cpp httpd_ispc.cpp 8 | -------------------------------------------------------------------------------- /http_shader/ispc_int/request.txt: -------------------------------------------------------------------------------- 1 | GET /gl HTTP/1.1 2 | Host: localhost:9000 3 | User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0 4 | Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 5 | Accept-Language: en-US,en;q=0.5 6 | Accept-Encoding: gzip, deflate 7 | Connection: keep-alive 8 | Upgrade-Insecure-Requests: 1 9 | 10 | -------------------------------------------------------------------------------- /http_shader/build_ispc_ivec4.sh: -------------------------------------------------------------------------------- 1 | cd ispc_ivec4 && 2 | cpp httpd_ivec4.glsl | node ../preprocess.js > out_int.comp && 3 | glslangValidator -V -o httpd.spv out_int.comp && 4 | spirv-cross-ispc --ispc --output httpd.ispc httpd.spv && 5 | ispc -O3 --target=avx2-i64x4 -o httpd.ispc.o runner_ivec4.ispc && 6 | clang++ -pthread -std=c++11 -lm -pthread -O3 -o httpd_ivec4 httpd.ispc.o ../tasksys.cpp httpd_ispc_ivec4.cpp 7 | -------------------------------------------------------------------------------- /http_shader/ispc_char/request.txt: -------------------------------------------------------------------------------- 1 | GET /gl HTTP/1.1 2 | Host: localhost:9000 3 | User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0 4 | Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 5 | Accept-Language: en-US,en;q=0.5 6 | Accept-Encoding: gzip, deflate 7 | Connection: keep-alive 8 | Upgrade-Insecure-Requests: 1 9 | 10 | -------------------------------------------------------------------------------- /docker/glsl2wasm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ -z $1 ]] 4 | then 5 | echo 6 | echo USAGE $0 source.glsl 7 | echo Generates source.glsl.html, source.glsl.worker.js, source.glsl.js, and source.glsl.wasm 8 | echo 9 | exit 1 10 | fi 11 | 12 | docker run -i -v `pwd`:/tmp --rm ispc-wasm:latest bash -c "cd /usr/local/src/spirv-wasm && cp /tmp/$1 program.comp.glsl && make TARGET="$1" build && cp $1.{html,wasm,worker.js,js} /tmp/" 13 | 14 | -------------------------------------------------------------------------------- /docker/ispc2wasm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ -z $1 ]] 4 | then 5 | echo 6 | echo USAGE $0 source.ispc 7 | echo Generates source.ispc.o and source.ispc.wasm 8 | echo 9 | exit 1 10 | fi 11 | 12 | docker run -i -v `pwd`:/tmp --rm ispc-wasm:latest bash -c "ispc --target=wasm-i32x4 --nostdlib --emit-llvm-text -o - /tmp/$1 | llc -O3 -filetype=obj - -o /tmp/$1.o && wasm-ld --no-entry --export-all --allow-undefined -o /tmp/$1.wasm /tmp/$1.o" 13 | 14 | -------------------------------------------------------------------------------- /spirv-io/examples/bm.glsl: -------------------------------------------------------------------------------- 1 | 2 | #include "file.glsl" 3 | 4 | layout ( local_size_x = 32, local_size_y = 1, local_size_z = 1 ) in; 5 | 6 | #define TEST(testFn) FREE(FREE_IO(printTest(testFn(), #testFn))) 7 | 8 | void main() { 9 | initGlobals(); 10 | 11 | alloc_t s = malloc(2048); 12 | awaitIO(_ioPingPong(s)); 13 | 14 | if (ThreadID == 0) { 15 | println(concat("IO pingpong on ", str(ThreadCount), " threads, total bytes ", str(ThreadCount * 2048))); 16 | } 17 | } 18 | 19 | -------------------------------------------------------------------------------- /spirv-io/Makefile: -------------------------------------------------------------------------------- 1 | CPP := clang++ 2 | CFLAGS := -m64 -march=native -mtune=native -std=c++17 -I../include -O2 3 | LDFLAGS := -ldl -llz4 -lzstd -lvulkan -lpthread 4 | 5 | gls: 6 | $(CPP) $(CFLAGS) $(LDFLAGS) -o bin/gls src/gls.cpp 7 | 8 | gls_cpu: 9 | $(CPP) $(CFLAGS) $(LDFLAGS) -o bin/gls_cpu src/gls_cpu.cpp 10 | 11 | install: gls 12 | install -d $(DESTDIR)$(PREFIX)/lib/ 13 | install -m 644 lib/* $(DESTDIR)$(PREFIX)/lib/ 14 | install -d $(DESTDIR)$(PREFIX)/bin/ 15 | install -m 755 bin/* $(DESTDIR)$(PREFIX)/bin/ 16 | 17 | all: gls 18 | -------------------------------------------------------------------------------- /docker/spirv-runner/fix_ispc_input_output_order: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | STDIN.read.sub( 4 | /program_ispc_main\(.*uniform struct outputs&\s+([^,]+),\s*uniform struct inputs&\s+([^\)]+)/ 5 | ) {|m| 6 | "program_ispc_main(uniform struct inputs& #{$2}, uniform struct outputs\& #{$1}" 7 | }.sub( 8 | /program_ispc_main\(.*uniform struct inputs/ 9 | ) {|m| 10 | "program_ispc_main(uniform int3 gl_NumWorkGroups, uniform int3 gl_WorkGroupID, varying int3 gl_LocalInvocationID, varying int3 gl_GlobalInvocationID, varying int gl_LocalInvocationIndex, uniform struct inputs" 11 | }.split(/^export void /)[0].display 12 | -------------------------------------------------------------------------------- /spirv-io/bin/glsl2spv: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 4 | 5 | if [[ -z $1 ]] 6 | then 7 | echo "USAGE: glsl2spv program.glsl [program.spv]" 8 | exit 1 9 | fi 10 | 11 | target=$2 12 | 13 | if [[ -z $target ]] 14 | then 15 | target=`echo "$1" | sed -e 's/glsl$/spv/i'` 16 | fi 17 | 18 | tmp_dir=$(mktemp -d -t gls-XXXXXXXXXX) 19 | tmp=${tmp_dir}/$(basename "$1") 20 | 21 | grep -v '^#!' "$1" | node "$DIR"/gls_resolve_includes.js | 22 | cpp -nostdinc -x c -I/usr/local/lib/glsl -o "${tmp}.full" && 23 | node "$DIR"/gls_preprocess.js "${tmp}.full" && 24 | glslangValidator -V -o "${target}" "${tmp}.full.comp" | grep -v "^${tmp}.full.comp$" 25 | if [[ -f "${target}" ]] 26 | then 27 | cat "${tmp}.full.defs.spv" >> "${target}" #&& 28 | #rm -r "${tmp_dir}" 29 | fi 30 | -------------------------------------------------------------------------------- /http_shader/ispc_char/README.md: -------------------------------------------------------------------------------- 1 | Change the buffer type from int to int8 in the generated ISPC files. 2 | 3 | Easiest way to run this is without rebuilding the ISPC files. 4 | 5 | ```bash 6 | ispc -O3 -o httpd.ispc.o runner.ispc && 7 | clang++ -I/usr/local/bin -pthread -std=c++11 -lm -pthread -O3 -o httpd httpd.ispc.o ../tasksys.cpp httpd_ispc.cpp && 8 | sh run.sh 9 | # 59 10 | # HTTP/1.1 200 OK 11 | # Content-Type: text/plain 12 | # 13 | # Hello, World! 14 | # Elapsed: 25451 ms 15 | # Million requests per second: 10.300 16 | # 423.24user 2.08system 0:25.56elapsed 1663%CPU (0avgtext+0avgdata 790060maxresident)k 17 | # 0inputs+0outputs (0major+262349minor)pagefaults 0swaps 18 | ``` 19 | 20 | If you want to do a full build: 21 | 22 | ```bash 23 | (cd .. && sh build_ispc_char.sh) 24 | # Do the type change in httpd.ispc, changing int to int8 in the buffers. 25 | sh run.sh 26 | ``` 27 | -------------------------------------------------------------------------------- /spirv-io/examples/clock.glsl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | ThreadGroupCount = 1; 4 | ThreadLocalCount = 1; 5 | 6 | void main() { 7 | uint64_t ct = clockARB(); 8 | uint64_t rt = clockRealtimeEXT(); 9 | int64_t t0 = microTimeSync(); 10 | int64_t t1 = microTimeSync(); 11 | int64_t t2 = microTimeSync(); 12 | int64_t t3 = microTimeSync(); 13 | int64_t t4 = microTimeSync(); 14 | uint64_t ct2 = clockARB(); 15 | uint64_t rt2 = clockRealtimeEXT(); 16 | 17 | println(concat("Wallclock time: ", str(t0))); 18 | println(concat("Wallclock time: ", str(t1))); 19 | println(concat("Wallclock time: ", str(t2))); 20 | println(concat("Wallclock time: ", str(t3))); 21 | println(concat("Wallclock time: ", str(t4))); 22 | println(concat("clock: ", str(ct), " elapsed ", str(ct2-ct))); 23 | println(concat("clockRealtime: ", str(rt), " elapsed ", str(rt2-rt))); 24 | } 25 | -------------------------------------------------------------------------------- /spirv-io/lib/dlopen.glsl: -------------------------------------------------------------------------------- 1 | io dlcall(uint64_t lib, string symbol, alloc_t args, alloc_t result) { 2 | return requestIO(ioRequest(IO_DLCALL, IO_START, int64_t(lib), strLen(args), symbol, args, 0,0,result,0,0)); 3 | } 4 | 5 | io dlopen(string path, alloc_t dstBuffer) { 6 | return requestIO(ioRequest(IO_DLOPEN, IO_START, 0, 0, path, dstBuffer, 0,0,string(0,0),0,0)); 7 | } 8 | 9 | uint64_t dlopenSync(string path) { 10 | uint64_t lib; 11 | FREE(FREE_IO( 12 | string res = awaitIO(dlopen(path, malloc(8)), true); 13 | lib = readU64fromIO(res.x); 14 | )) 15 | return lib; 16 | } 17 | 18 | string dlcallSync(uint64_t lib, string symbol, alloc_t args, alloc_t result) { 19 | string res; 20 | FREE_IO( res = awaitIO(dlcall(lib, symbol, args, result)); ) 21 | return res; 22 | } 23 | 24 | void dlcallSync(uint64_t lib, string symbol, alloc_t args) { 25 | dlcallSync(lib, symbol, args, string(-4,-4)); 26 | } 27 | -------------------------------------------------------------------------------- /spirv-io/examples/hello_dlopen_gh.glsl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env gls 2 | 3 | #include 4 | #include @ dbc62e1bd6df8765f90b9f54e72bb644e20489ad17fefae51450cdf5321ca769 5 | 6 | ThreadLocalCount = 1; 7 | ThreadGroupCount = 1; 8 | 9 | writeSync("hello.c", "#include \nvoid hello(char* s){printf(\"Hello, %s!\\n\",s);}\nvoid sub(int* v, unsigned int vlen, int* res, unsigned int reslen) { res[0] = v[0]-v[1]; }"); 10 | awaitIO(runCmd("cc --shared -o hello.so hello.c")); 11 | uint64_t lib = dlopenSync("./hello.so"); 12 | dlcallSync(lib, "hello", "GLSL\u0000", string(-4,-4)); 13 | alloc_t params = malloc(8); 14 | i32heap[params.x/4] = 7; 15 | i32heap[params.x/4+1] = 12; 16 | alloc_t res = dlcallSync(lib, "sub", params, malloc(4)); 17 | int32_t subResult = readI32heap(res.x); 18 | println(concat(str(i32heap[params.x/4]), " - ", str(i32heap[params.x/4+1]), " = ", str(subResult))); 19 | -------------------------------------------------------------------------------- /http_shader/README.md: -------------------------------------------------------------------------------- 1 | # http_shader 2 | 3 | GLSL shaders that parse HTTP requests and write out HTTP responses. 4 | 5 | There are three different ways here: 6 | 7 | * `httpd.glsl` - Turn a inputBuffer of 8-bit ASCII characters into a buffer of ints, parse requests in the int buffer, create responses in another buffer of ints, convert the response buffer into 8-bit char outputBuffer. 8 | * `httpd_int.glsl` - Same thing but when converting the 8-bit chars to ints, turn them into SOA format (`[req_0_0, req_1_0, req_2_0, req_3_0, ... req_31_0, req_0_1, ...]`) for some faster-going 9 | * `httpd_ivec4.glsl` - Just deal with the inputBuffer & outputBuffer directly. Using ivec4s because doesn't that sound painful? The ISPC version does >100 million "Hello, world!" requests per second on a TR2950X 16-core. 10 | 11 | I ... found out that SPIR-V in Vulkan 1.2 supports 8-bit ints as a native type. I want to use those instead of writing helper functions to get/set individual bytes in ivec4s. 12 | 13 | In a shocking turn of events, running these on the CPU via SPIR-V to ISPC performs better than the GPU. Even after removing the buffer uploads and downloads. 14 | -------------------------------------------------------------------------------- /http_shader/preprocess.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | 3 | const source = fs.readFileSync(0); 4 | 5 | const segments = source.toString().replace(/^# .*/mg, '').split(/("|')/g); 6 | 7 | let inString = false; 8 | let inChar = false; 9 | let lastSegment = ''; 10 | let stringSegments = []; 11 | 12 | const output = []; 13 | 14 | for (segment of segments) { 15 | if (segment === '"' && lastSegment[lastSegment.length-1] !== '\\') { 16 | inString = !inString; 17 | if (!inString) { 18 | const str = stringSegments.join(''); 19 | output.push(`{${Buffer.from(JSON.parse('"'+str+'"')).join(",")}}`); 20 | } 21 | stringSegments = []; 22 | } else if (inString) { 23 | stringSegments.push(segment); 24 | } else if (segment === "'" && lastSegment[lastSegment.length-1] !== '\\') { 25 | inChar = !inChar; 26 | if (!inChar) { 27 | const str = stringSegments.join(''); 28 | output.push(`${Buffer.from(eval("'"+str+"'")).readInt32LE(0)}`); 29 | } 30 | stringSegments = []; 31 | } else if (inChar) { 32 | stringSegments.push(segment); 33 | } else { 34 | output.push(segment); 35 | } 36 | lastSegment = segment; 37 | } 38 | 39 | console.log(output.join('')); 40 | -------------------------------------------------------------------------------- /http_shader/ispc_int/README.md: -------------------------------------------------------------------------------- 1 | Change the request and response buffer types from int to int8 in the generated ISPC files for ~2x perf. 2 | 3 | Easiest way to run this is without rebuilding the ISPC files. 4 | 5 | ```bash 6 | ispc -O3 -o httpd.ispc.o runner.ispc && 7 | clang++ -I/usr/local/bin -pthread -std=c++11 -lm -pthread -O3 -o httpd httpd.ispc.o ../tasksys.cpp httpd_ispc.cpp && 8 | sh run.sh 9 | # 367 10 | # HTTP/1.1 200 OK 11 | # Content-Type: text/plain 12 | # 13 | # Hello, World! 14 | # Host: localhost:9000 15 | # User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0 16 | # Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 17 | # Accept-Language: en-US,en;q=0.5 18 | # Accept-Encoding: gzip, deflate 19 | # Connection: keep-alive 20 | # Upgrade-Insecure-Requests: 21 | # 22 | # Elapsed: 2848 ms 23 | # Million requests per second: 11.506 24 | # 25 | # 84.56user 0.44system 0:02.86elapsed 2965%CPU (0avgtext+0avgdata 98164maxresident)k 26 | # 0inputs+0outputs (0major+23821minor)pagefaults 0swaps 27 | ``` 28 | 29 | If you want to do a full build: 30 | 31 | ```bash 32 | (cd .. && sh build_ispc_int.sh) 33 | # Do the type change in httpd.ispc, changing int to int8 in the request and response buffers. 34 | sh run.sh 35 | ``` 36 | -------------------------------------------------------------------------------- /docker/spirv-runner/Makefile: -------------------------------------------------------------------------------- 1 | TARGET := TEST 2 | 3 | builtins.o: /usr/local/src/ispc-wasm/builtins/builtins.c 4 | emcc -DWASM_IMPLEMENTATION /usr/local/src/ispc-wasm/builtins/builtins.c -O3 -s EXPORTED_FUNCTIONS='["___wasm_do_print", "___wasm_clock"]' -c -o builtins.o 5 | 6 | tasksys.o: /usr/local/src/ispc-wasm/examples/tasksys.cpp 7 | emcc -DWASM -DISPC_USE_PTHREADS -s USE_PTHREADS=1 /usr/local/src/ispc-wasm/examples/tasksys.cpp -I./ -O2 -c -o tasksys.o 8 | 9 | ispcRunner.o: ispcRunner.cpp 10 | emcc -DWASM ispcRunner.cpp -I./ -O3 -s EXPORTED_FUNCTIONS='["_main", "_run"]' -c -o ispcRunner.o -msimd128 -s SIMD=1 11 | 12 | runner.ispc.o: program.comp.glsl 13 | glslangValidator -V -o program.spv program.comp.glsl 14 | spirv-cross-ispc --ispc --output program.ispc.raw program.spv 15 | ruby fix_ispc_input_output_order < program.ispc.raw > program.ispc 16 | ispc runner.ispc -O2 --target=wasm-i32x4 -o runner.ispc.o 17 | 18 | build: builtins.o tasksys.o ispcRunner.o runner.ispc.o 19 | emcc -O3 ispcRunner.o builtins.o tasksys.o runner.ispc.o -s USE_PTHREADS=1 -s PTHREAD_POOL_SIZE=64 -s TOTAL_MEMORY=268435456 -o $(TARGET).html -msimd128 -s SIMD=1 -s EXPORTED_FUNCTIONS='["_main", "_run"]' -s EXTRA_EXPORTED_RUNTIME_METHODS='["ccall"]' 20 | 21 | all: builtins.o tasksys.o ispcRunner.o runner.ispc.o build 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SOURCES := $(wildcard src/*.comp) 2 | SPIRV := $(SOURCES:.comp=.spv) 3 | CPP_INTERFACE := $(SOURCES:.comp=.spv.cpp) 4 | CPP_DRIVER := $(SOURCES:.comp=.cpp) 5 | EXECUTABLES := $(SOURCES:.comp=.html) 6 | OBJECTS := $(CPP_DRIVER:.cpp=.o) $(CPP_INTERFACE:.cpp=.o) 7 | 8 | TOTAL_MEMORY := 67108864 9 | TOTAL_THREADS := 16 10 | USE_THREADS := 1 11 | USE_SIMD := 0 12 | 13 | ifeq ($(USE_THREADS), 1) 14 | THREAD_FLAGS := -pthread -s USE_PTHREADS=1 -s PTHREAD_POOL_SIZE=$(TOTAL_THREADS) 15 | endif 16 | ifeq ($(USE_SIMD), 1) 17 | SIMD_FLAGS := -msimd128 -s SIMD=1 18 | endif 19 | 20 | CXXFLAGS += -std=c++11 -Iinclude -Isrc -I/usr/local/include -O3 -s WASM=1 $(THREAD_FLAGS) -s TOTAL_MEMORY=$(TOTAL_MEMORY) -s EXTRA_EXPORTED_RUNTIME_METHODS='["ccall"]' $(SIMD_FLAGS) 21 | LDFLAGS += -lm -O3 -s WASM=1 $(THREAD_FLAGS) -s TOTAL_MEMORY=$(TOTAL_MEMORY) -s EXTRA_EXPORTED_RUNTIME_METHODS='["ccall"]' $(SIMD_FLAGS) 22 | 23 | all: $(EXECUTABLES) 24 | 25 | %.spv: %.comp 26 | glslangValidator -V -o $@ $< 27 | 28 | %.spv.cpp: %.spv 29 | spirv-cross --cpp --output $@ $< 30 | 31 | %.o: %.cpp 32 | $(CXX) -c -o $@ $< $(CXXFLAGS) 33 | 34 | %.html: %.o %.spv.o 35 | $(CXX) -o $@ $^ $(LDFLAGS) 36 | 37 | clean: 38 | $(RM) -f $(EXECUTABLES) $(SPIRV) $(CPP_INTERFACE) $(OBJECTS) 39 | 40 | .PHONY: clean 41 | .SECONDARY: 42 | 43 | -------------------------------------------------------------------------------- /spirv-io/examples/memalloc.glsl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | ThreadLocalCount = 4; 4 | ThreadGroupCount = 4; 5 | 6 | void main() { 7 | int64_t ptr; 8 | alloc_t res = malloc(8,8); 9 | 10 | awaitIO(memAlloc(100, res)); 11 | ptr = i64heap[res.x/8]; 12 | string hello = `Thread ${ThreadId} says: Hello, CPU!`; 13 | awaitIO(memWrite(ptr, hello)); 14 | alloc_t buf = malloc(strLen(hello)); 15 | string s = awaitIO(memRead(ptr, buf)); 16 | println(s); 17 | awaitIO(memFree(ptr)); 18 | 19 | if (ThreadId == 0) { 20 | // Allocate a 30 GB buffer 21 | awaitIO(memAlloc(30000000000L, res)); 22 | ptr = i64heap[res.x/8]; 23 | // Write something every 1 MB 24 | for (int64_t i = 0; i < 30000000000L; i+=1000000L) { 25 | FREE_ALL( awaitIO(memWrite(ptr + i, str(i))) ); 26 | } 27 | // Test that the writes succeeded 28 | for (int64_t i = 0; i < 30000000000L; i+=1000000L) { 29 | FREE_ALL( 30 | string num = str(i); 31 | string rd = awaitIO(memRead( ptr + i, malloc(strLen(num)) )); 32 | if (!strEq(num, rd)) println(`Roundtrip failed at ${i}: ${num} != ${rd}`); 33 | ) 34 | } 35 | println("Read-write roundtrips successful to a 30 GB buffer"); 36 | awaitIO(memFree(ptr)); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /http_shader/cpp/README.md: -------------------------------------------------------------------------------- 1 | C++ version of the key-value store shader. Compiled to C++ with spirv-cross. 2 | 3 | The atomics implementation seems to not be working too well. 4 | 5 | Build and run: 6 | 7 | ```bash 8 | (cd .. && sh build_cpp.sh) 9 | sh run.sh 10 | # 200 OK HTTP/1.1 11 | # content-type: text/plain 12 | # 13 | # OK. 14 | # 200 OK HTTP/1.1 15 | # content-type: text/html 16 | # 17 | # This is document number 1. 18 | # 200 OK HTTP/1.1 19 | # content-type: text/plain 20 | # 21 | # BLK 22 | # 200 OK HTTP/1.1 23 | # content-type: text/html 24 | # 25 | # This is document number 3. 26 | # 200 OK HTTP/1.1 27 | # content-type: text/plain 28 | # 29 | # OK. 30 | # 200 OK HTTP/1.1 31 | # content-type: text/html 32 | # 33 | # This is document number 5. 34 | # 200 OK HTTP/1.1 35 | # content-type: text/plain 36 | # 37 | # OK. 38 | # 200 OK HTTP/1.1 39 | # content-type: text/html 40 | # 41 | # This is 2067 spam-post 2067 number 2067. 42 | # 200 OK HTTP/1.1 43 | # content-type: text/plain 44 | # 45 | # BLK 46 | # 200 OK HTTP/1.1 47 | # content-type: text/html 48 | # 49 | # This is document number 9. 50 | # 51 | # Elapsed: 3180 ms 52 | # Million requests per second: 16.487 53 | # 54 | # 40.38user 0.91system 0:03.65elapsed 1129%CPU (0avgtext+0avgdata 1576176maxresident)k 55 | # 0inputs+0outputs (0major+393371minor)pagefaults 0swaps 56 | ``` 57 | -------------------------------------------------------------------------------- /spirv-io/examples/wait_for_stdin.glsl: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | ThreadGroupCount = 1; 5 | ThreadLocalCount = 1; 6 | 7 | const int s_Init = 0; 8 | const int s_Reading = 1; 9 | 10 | const int a_Read = 0; 11 | 12 | void main() { 13 | /* 14 | Why not the easy way[1]? 15 | 16 | Because GPUs hang while waiting for IO and the driver kills the program after a few seconds. 17 | That said, making the below Just Work would be great. 18 | Compile awaitIO into "exit program with RERUN_ON_IO". 19 | Store and load IOs automatically. 20 | 21 | [1] The easy way 22 | println("What's your name?"); 23 | string name = awaitIO(readLine(stdin, malloc(256))); 24 | println(concat("Hello, ", name, "!")); 25 | */ 26 | 27 | stateMachine m = loadStateMachine(s_Init); 28 | rerunProgram = RERUN_ON_IO; 29 | switch (getState(m)) { 30 | case s_Init: 31 | println("What's your name?"); 32 | setAttr(m, a_Read, readLine(stdin, malloc(256))); 33 | setState(m, s_Reading); 34 | break; 35 | 36 | case s_Reading: 37 | io r = getIOAttr(m, a_Read); 38 | if (pollIO(r)) { 39 | rerunProgram = NO_RERUN; 40 | string name = awaitIO(r); 41 | println(concat("Hello, ", name, "!")); 42 | return; // Done, exit program. 43 | } 44 | break; 45 | } 46 | saveStateMachine(m); 47 | } 48 | -------------------------------------------------------------------------------- /spirv-io/examples/cat.glsl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env gls 2 | 3 | ThreadLocalCount = 32; 4 | ThreadGroupCount = 1; 5 | 6 | HeapSize = 65536; 7 | FromIOSize = 65536; 8 | ToIOSize = 65536; 9 | 10 | #include 11 | 12 | shared int eof; 13 | 14 | void main() { 15 | 16 | int64_t blockSize = int64_t(HeapSize); 17 | 18 | int argc = arrLen(argv); 19 | for (int i = 1; i < argc; i++) { 20 | eof = 0; 21 | string filename = aGet(argv, i); 22 | int64_t block = 0; 23 | barrier(); 24 | while (eof == 0) { 25 | FREE( 26 | string res; 27 | FREE_IO( 28 | int64_t off = (block * int64_t(ThreadCount) + int64_t(ThreadId)) * blockSize; 29 | 30 | io r = read(filename, off, size_t(blockSize), malloc(size_t(blockSize))); 31 | barrier(); 32 | 33 | res = awaitIO(r); 34 | barrier(); 35 | ) 36 | FREE_IO( 37 | for (int i = 0; i < ThreadCount; i++) { 38 | barrier(); 39 | if (i == ThreadId && strLen(res) > 0) { 40 | print(res); 41 | } 42 | } 43 | 44 | if (strLen(res) < size_t(blockSize)) atomicAdd(eof, 1); 45 | 46 | block++; 47 | barrier(); 48 | ) 49 | ) 50 | } 51 | } 52 | 53 | } 54 | 55 | -------------------------------------------------------------------------------- /spirv-io/lib/stat.glsl: -------------------------------------------------------------------------------- 1 | struct Nanotime { 2 | uint64_t tv_sec; 3 | uint64_t tv_nsec; 4 | }; 5 | const int32_t NanotimeSize = 16; 6 | 7 | struct Stat { 8 | Nanotime st_atim; 9 | Nanotime st_mtim; 10 | Nanotime st_ctim; 11 | 12 | uint64_t st_ino; 13 | uint64_t st_size; 14 | uint64_t st_blocks; 15 | 16 | uint32_t st_dev; 17 | uint32_t st_mode; 18 | uint32_t st_nlink; 19 | uint32_t st_uid; 20 | uint32_t st_gid; 21 | uint32_t st_rdev; 22 | uint32_t st_blksize; 23 | 24 | int32_t error; 25 | }; 26 | const int32_t StatSize = 3 * NanotimeSize + 3 * 8 + 8 * 4; // 104 27 | 28 | Stat initStat(string s) { 29 | Stat st; 30 | if (strLen(s) < StatSize) { 31 | st.error = -1; 32 | return st; 33 | } 34 | ptr_t i = s.x; 35 | 36 | st.st_atim.tv_sec = readU64heap(i); i+=8; 37 | st.st_atim.tv_nsec = readU64heap(i); i+=8; 38 | st.st_mtim.tv_sec = readU64heap(i); i+=8; 39 | st.st_mtim.tv_nsec = readU64heap(i); i+=8; 40 | st.st_ctim.tv_sec = readU64heap(i); i+=8; 41 | st.st_ctim.tv_nsec = readU64heap(i); i+=8; 42 | 43 | st.st_ino = readU64heap(i); i+=8; 44 | st.st_size = readU64heap(i); i+=8; 45 | st.st_blocks = readU64heap(i); i+=8; 46 | 47 | st.st_dev = readU32heap(i); i+=4; 48 | st.st_mode = readU32heap(i); i+=4; 49 | st.st_nlink = readU32heap(i); i+=4; 50 | st.st_uid = readU32heap(i); i+=4; 51 | st.st_gid = readU32heap(i); i+=4; 52 | st.st_rdev = readU32heap(i); i+=4; 53 | st.st_blksize = readU32heap(i); i+=4; 54 | 55 | st.error = readI32heap(i); i+=4; 56 | return st; 57 | } 58 | -------------------------------------------------------------------------------- /include/spirv_cross/image.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015-2017 ARM Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef SPIRV_CROSS_IMAGE_HPP 18 | #define SPIRV_CROSS_IMAGE_HPP 19 | 20 | #ifndef GLM_SWIZZLE 21 | #define GLM_SWIZZLE 22 | #endif 23 | 24 | #ifndef GLM_FORCE_RADIANS 25 | #define GLM_FORCE_RADIANS 26 | #endif 27 | 28 | #include 29 | 30 | namespace spirv_cross 31 | { 32 | template 33 | struct image2DBase 34 | { 35 | virtual ~image2DBase() = default; 36 | inline virtual T load(glm::ivec2 coord) const 37 | { 38 | return T(0, 0, 0, 1); 39 | } 40 | inline virtual void store(glm::ivec2 coord, const T &v) 41 | { 42 | } 43 | }; 44 | 45 | typedef image2DBase image2D; 46 | typedef image2DBase iimage2D; 47 | typedef image2DBase uimage2D; 48 | 49 | template 50 | inline T imageLoad(const image2DBase &image, glm::ivec2 coord) 51 | { 52 | return image.load(coord); 53 | } 54 | 55 | template 56 | void imageStore(image2DBase &image, glm::ivec2 coord, const T &value) 57 | { 58 | image.store(coord, value); 59 | } 60 | } 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /docker/spirv-runner/runner.ispc: -------------------------------------------------------------------------------- 1 | #include "program.ispc" 2 | 3 | export void runner_workgroup(uniform int numWorkGroups[3], uniform int workGroupID[3], uniform struct inputs& input, uniform struct outputs& output) 4 | { 5 | uniform int3 gl_NumWorkGroups = int3(numWorkGroups[0], numWorkGroups[1], numWorkGroups[2]); 6 | uniform int3 gl_WorkGroupID = int3(workGroupID[0], workGroupID[1], workGroupID[2]); 7 | 8 | // Vectorise the workgroup 9 | foreach(lz = 0 ... gl_WorkGroupSize.z, ly = 0 ... gl_WorkGroupSize.y, lx = 0 ... gl_WorkGroupSize.x) 10 | { 11 | varying int3 gl_LocalInvocationID = int3(lx, ly, lz); 12 | varying int3 gl_GlobalInvocationID = gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID; 13 | varying int gl_LocalInvocationIndex = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + gl_LocalInvocationID.y * gl_WorkGroupSize.x + gl_LocalInvocationID.x; 14 | 15 | program_ispc_main(gl_NumWorkGroups, gl_WorkGroupID, gl_LocalInvocationID, gl_GlobalInvocationID, gl_LocalInvocationIndex, input, output); 16 | } 17 | } 18 | 19 | task 20 | void runner_task(uniform int work_groups[3], uniform struct inputs& input, uniform struct outputs& output) 21 | { 22 | uniform int workGroupID[3]; 23 | workGroupID[0] = taskIndex0; 24 | workGroupID[1] = taskIndex1; 25 | workGroupID[2] = taskIndex2; 26 | runner_workgroup(work_groups, workGroupID, input, output); 27 | } 28 | 29 | export void runner_main(uniform int work_groups[3], uniform struct inputs& input, uniform struct outputs& output) 30 | { 31 | launch[work_groups[0], work_groups[1], work_groups[2]] 32 | runner_task(work_groups, input, output); 33 | } 34 | 35 | export static int stdout = 1; 36 | export static int stderr = 2; -------------------------------------------------------------------------------- /spirv-io/bin/gls_generate_tests.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | 3 | const source = fs.readFileSync(process.argv[2]).toString(); 4 | 5 | const testFuncs = {}; 6 | let i = 0; 7 | 8 | const segs = source.split(/(\/\*T)|(\*\/)/y); 9 | 10 | let inTest = false; 11 | let testBody = ''; 12 | for (let i = 0; i < segs.length; i++) { 13 | const seg = segs[i]; 14 | if (seg === '\/*T') { 15 | inTest = true; 16 | testBody = ''; 17 | } else if (seg === '*\/' && inTest) { 18 | inTest = false; 19 | const nextSeg = segs[i+1]; 20 | if (nextSeg) { 21 | const m = nextSeg.match(/^\s*\S+\s+([^(]+)/m); 22 | if (m) { 23 | let name = 'test_'+m[1].trim(); 24 | while (testFuncs[name]) { 25 | name += '_'; 26 | } 27 | testBody = testBody.replace( 28 | /^\s*(\S+)\s*(<=?|>=?|==|!=)\s*([^;]+);\s*$/mg, 29 | (m, lv, cmp, rv) => 30 | ` assert(${lv} ${cmp} ${rv});` 31 | ); 32 | testFuncs[name] = `void ${name}() {\n${testBody}\n}`; 33 | } 34 | } 35 | } else if (inTest && seg) { 36 | testBody = seg; 37 | } 38 | } 39 | 40 | let testSource = [` 41 | #include 42 | #include "${process.argv[2]}" 43 | 44 | ThreadLocalCount = 1; 45 | ThreadGroupCount = 1; 46 | 47 | HeapSize = 16777216; 48 | ToIOSize = 16777216; 49 | FromIOSize = 16777216; 50 | `]; 51 | let testMain = `void main() { 52 | `; 53 | for (const funcName in testFuncs) { 54 | testMain += ` FREE_ALL(${funcName}());\n` 55 | testSource.push(testFuncs[funcName]); 56 | } 57 | testMain += '}'; 58 | testSource.push(testMain); 59 | 60 | console.log(testSource.join("\n\n")); 61 | -------------------------------------------------------------------------------- /spirv-io/examples/hello_dlopen.glsl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env gls 2 | 3 | #include 4 | #include 5 | 6 | ThreadLocalCount = 1; 7 | ThreadGroupCount = 1; 8 | 9 | uint64_t lib = 0; 10 | 11 | int32_t dlcallSync_i32(uint64_t lib, string func, int32_t p0, int32_t p1) { 12 | int32_t r = 0; 13 | FREE_ALL( 14 | alloc_t params = malloc(8, 4); 15 | i32heap[params.x/4] = p0; 16 | i32heap[params.x/4 + 1] = p1; 17 | alloc_t res = dlcallSync(lib, func, params, malloc(4)); 18 | r = readI32heap(res.x); 19 | ); 20 | return r; 21 | } 22 | 23 | #define DLFUNC_I32_I32_I32(lib, func) int32_t func (int32_t p0, int32_t p1) { return dlcallSync_i32(lib, #func, p0, p1); } 24 | #define DLFUNC_ALLOC_VOID(lib, func) void func (alloc_t buf) { FREE_ALL(dlcallSync(lib, #func, buf, string(-4, -4))); } 25 | #define DLFUNC_CSTR_VOID(lib, func) void func (alloc_t buf) {\ 26 | FREE_ALL(\ 27 | alloc_t buf2 = malloc(strLen(buf)+1);\ 28 | strCopy(buf2, buf);\ 29 | setC(buf2, strLen(buf), char(0));\ 30 | dlcallSync(lib, #func, buf2, string(-4, -4));\ 31 | )\ 32 | } 33 | 34 | DLFUNC_I32_I32_I32(lib, sub) 35 | DLFUNC_CSTR_VOID(lib, hello) 36 | 37 | void main() { 38 | writeSync("hello.c", "#include \nvoid hello(char* s){printf(\"Hello, %s!\\n\",s);}\nvoid sub(int* v, unsigned int vlen, int* res, unsigned int reslen) { res[0] = v[0]-v[1]; }"); 39 | awaitIO(runCmd("cc --shared -o hello.so hello.c")); 40 | 41 | lib = dlopenSync("./hello.so"); 42 | 43 | dlcallSync(lib, "hello", "GLSL\u0000"); 44 | 45 | int32_t a = 7, b = 12; 46 | int32_t subResult = dlcallSync_i32(lib, "sub", a, b); 47 | println(concat(str(a), " - ", str(b), " = ", str(subResult))); 48 | 49 | hello("GLSL macro"); 50 | a = 8829; 51 | b = 3741; 52 | println(concat(str(a), " - ", str(b), " = ", str(sub(a, b)))); 53 | } 54 | -------------------------------------------------------------------------------- /spirv-io/src/test_string.cpp: -------------------------------------------------------------------------------- 1 | #include "compute_application.hpp" 2 | 3 | class App : public ComputeApplication 4 | { 5 | public: 6 | App() { 7 | timings = true; 8 | runIO = false; 9 | } 10 | 11 | void runProgram() { 12 | printf("Thread count: %d\n", threadCount); 13 | 14 | std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); 15 | int i; 16 | for (i = 0; i < 100; i++) { 17 | startCommandBuffer(); 18 | waitCommandBuffer(); 19 | } 20 | bufferCopy(fromGPUBuffer, 0, heapBuffer, 0, heapBufferSize); 21 | readFromGPUIO(0, fromGPUBufferSize); 22 | std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); 23 | 24 | for (int j = 0; j < threadCount; j++) { 25 | bool allOk = true; 26 | int k = 0; 27 | for (k = 0; k < 256; k++) { 28 | int ok = ((int*)mappedFromGPUMemory)[(j+1)*(fromGPUSize/4) - 256 + k]; 29 | if (ok == 0) break; 30 | if (ok != 1) { 31 | printf("[%d] Test %d failed: %d\n", j, k, ok); 32 | allOk = false; 33 | } 34 | } 35 | if (allOk) { 36 | printf("[%d] All %d tests succeeded.\n", j, k); 37 | } 38 | } 39 | 40 | printf("\nElapsed: %ld ms\n", std::chrono::duration_cast(end - begin).count()); 41 | printf("Test runs per second: %.0f\n\n", (float)(threadCount * i) / (0.000001 * std::chrono::duration_cast(end - begin).count())); 42 | 43 | } 44 | }; 45 | 46 | int main(int argc, char *argv[]) 47 | { 48 | App app; 49 | 50 | try 51 | { 52 | app.run("test_string.spv", argc, argv); 53 | } 54 | catch (const std::runtime_error &e) 55 | { 56 | printf("%s\n", e.what()); 57 | app.cleanup(); 58 | return EXIT_FAILURE; 59 | } 60 | 61 | return app.exitCode; 62 | } 63 | -------------------------------------------------------------------------------- /http_shader/ispc_char/runner.ispc: -------------------------------------------------------------------------------- 1 | const varying int int_cast(const varying unsigned int32 v) { return (int32)v; } 2 | 3 | #include "httpd.ispc" 4 | 5 | 6 | export void runner_dispatch(uniform int work_group_ID[3], uniform int work_groups[3], 7 | uniform struct outputBuffer& v_656, uniform struct inputBuffer& v_613, uniform struct heapBuffer& _901 8 | ) 9 | { 10 | uniform int3 gl_NumWorkGroups = int3(work_groups[0], work_groups[1], work_groups[2]); 11 | uniform int3 gl_WorkGroupID = int3(work_group_ID[0], work_group_ID[1], work_group_ID[2]); 12 | 13 | // Vectorise the workgroup 14 | foreach_tiled(lx = 0 ... gl_WorkGroupSize.x) 15 | { 16 | varying int3 gl_LocalInvocationID = int3(lx, 0, 0); 17 | varying int3 gl_GlobalInvocationID = gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID; 18 | varying int gl_LocalInvocationIndex = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + gl_LocalInvocationID.y * gl_WorkGroupSize.x + gl_LocalInvocationID.x; 19 | 20 | httpd_ispc_main( 21 | v_613, 22 | v_656, 23 | gl_NumWorkGroups, 24 | gl_GlobalInvocationID, 25 | _901 26 | ); 27 | } 28 | } 29 | 30 | task 31 | void runner_task(uniform int work_groups[3], 32 | uniform struct outputBuffer& v_656, uniform struct inputBuffer& v_613, uniform struct heapBuffer& _901 33 | ) 34 | { 35 | uniform int workGroupID[3]; 36 | workGroupID[0] = taskIndex0; 37 | workGroupID[1] = taskIndex1; 38 | workGroupID[2] = taskIndex2; 39 | runner_dispatch(workGroupID, work_groups, 40 | v_656, v_613, _901 41 | ); 42 | } 43 | 44 | export void runner_main(uniform int work_groups[3], 45 | uniform struct inputBuffer& v_613, 46 | uniform struct outputBuffer& v_656, 47 | uniform struct heapBuffer& _901 48 | ) 49 | { 50 | launch[work_groups[0], work_groups[1], work_groups[2]] 51 | runner_task(work_groups, 52 | v_656, v_613, _901 53 | ); 54 | } 55 | 56 | -------------------------------------------------------------------------------- /src/mandel.comp: -------------------------------------------------------------------------------- 1 | #version 450 2 | #extension GL_ARB_separate_shader_objects : enable 3 | 4 | #define WORKER_SIZE 16 5 | 6 | #define WORKGROUP_SIZE 4 7 | layout (local_size_x = WORKGROUP_SIZE, local_size_y = WORKGROUP_SIZE, local_size_z = 1 ) in; 8 | 9 | struct Pixel{ 10 | vec4 value; 11 | }; 12 | 13 | layout(std430, binding = 0) buffer inputs 14 | { 15 | float dimensions[]; 16 | }; 17 | 18 | layout(std430, binding = 1) buffer outputs 19 | { 20 | Pixel imageData[]; 21 | }; 22 | 23 | void main() { 24 | 25 | int width = int(dimensions[0]); 26 | int height = int(dimensions[1]); 27 | 28 | for (uint iy = 0; iy < WORKER_SIZE; iy++) 29 | for (uint ix = 0; ix < WORKER_SIZE; ix++) 30 | { 31 | 32 | uint px = gl_GlobalInvocationID.x * WORKER_SIZE + ix; 33 | uint py = gl_GlobalInvocationID.y * WORKER_SIZE + iy; 34 | 35 | /* 36 | In order to fit the work into workgroups, some unnecessary threads are launched. 37 | We terminate those threads here. 38 | */ 39 | if(px >= width || py >= height) 40 | continue; 41 | 42 | float x = float(px) / float(width); 43 | float y = float(py) / float(height); 44 | 45 | /* 46 | What follows is code for rendering the mandelbrot set. 47 | */ 48 | vec2 uv = vec2(x,y); 49 | float n = 0.0; 50 | vec2 c = vec2(-.445, 0.0) + (uv - 0.5)*(2.0+ 1.7*0.2 ), 51 | z = vec2(0.0); 52 | const int M = 128; 53 | for (int i = 0; i < M; i++) 54 | { 55 | z = vec2(z.x*z.x - z.y*z.y, 2.*z.x*z.y) + c; 56 | if (dot(z, z) > 2) break; 57 | n++; 58 | } 59 | // we use a simple cosine palette to determine color: 60 | // http://iquilezles.org/www/articles/palettes/palettes.htm 61 | float t = float(n) / float(M); 62 | vec3 d = vec3(0.3, 0.3 ,0.5); 63 | vec3 e = vec3(-0.2, -0.3 ,-0.5); 64 | vec3 f = vec3(2.1, 2.0, 3.0); 65 | vec3 g = vec3(0.0, 0.1, 0.0); 66 | vec4 color = vec4( d + e*cos( 6.28318*(f*t+g) ) ,1.0); 67 | 68 | // store the rendered mandelbrot set into a storage buffer: 69 | imageData[width * py + px].value = color; 70 | 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /http_shader/vulkan/README.md: -------------------------------------------------------------------------------- 1 | # A small in-memory key-value store 2 | 3 | Send a HTTP `GET /xxxxxxx` to get the value at key `xxxxxxx`. 4 | 5 | Send a `POST /xxxxxxx` to set the value at key `xxxxxxx` to the POST body. The start of the body should be the content-type, followed by `\r\n\r\n`. 6 | You can add other headers after the content-type if you want. See `vulkanRunner.cpp` for an example. 7 | 8 | Access to the values is protected by a per-object atomic "mutex". Only one write request can execute at a time, other simultaneous writes are rejected. 9 | If you try to write while you have readers, the write will fail. If you try to read when a write is in process, the read will fail. 10 | 11 | ## Build & run 12 | 13 | ```bash 14 | (cd .. && sh build_vulkan.sh) 15 | sh run.sh 16 | # 200 OK HTTP/1.1 17 | # content-type: text/plain 18 | # 19 | # BLK 20 | # 200 OK HTTP/1.1 21 | # content-type: text/html 22 | # 23 | # This is 20481 spam-post 20481 number 20481. 24 | # 200 OK HTTP/1.1 25 | # content-type: text/plain 26 | # 27 | # BLK 28 | # 200 OK HTTP/1.1 29 | # content-type: text/html 30 | # 31 | # This is 97283 spam-post 97283 number 97283. 32 | # 200 OK HTTP/1.1 33 | # content-type: text/plain 34 | # 35 | # BLK 36 | # 200 OK HTTP/1.1 37 | # content-type: text/html 38 | # 39 | # This is 286725 spam-post 286725 number 286725. 40 | # 200 OK HTTP/1.1 41 | # content-type: text/plain 42 | # 43 | # BLK 44 | # 200 OK HTTP/1.1 45 | # content-type: text/html 46 | # 47 | # This is 138247 spam-post 138247 number 138247. 48 | # 200 OK HTTP/1.1 49 | # content-type: text/plain 50 | # 51 | # BLK 52 | # 200 OK HTTP/1.1 53 | # content-type: text/html 54 | # 55 | # This is 496649 spam-post 496649 number 496649. 56 | # 57 | # Elapsed: 6731 ms 58 | # Million requests per second: 7.789 59 | # 60 | # 0.27user 1.03system 0:08.06elapsed 16%CPU (0avgtext+0avgdata 3217080maxresident)k 61 | # 0inputs+40outputs (2major+267993minor)pagefaults 0swaps 62 | ``` 63 | -------------------------------------------------------------------------------- /http_shader/ispc_ivec4/README.md: -------------------------------------------------------------------------------- 1 | # A small in-memory key-value store 2 | 3 | Send a HTTP `GET /xxxxxxx` to get the value at key `xxxxxxx`. 4 | 5 | Send a `POST /xxxxxxx` to set the value at key `xxxxxxx` to the POST body. The start of the body should be the content-type, followed by `\r\n\r\n`. 6 | You can add other headers after the content-type if you want. See `httpd_ispc_ivec4.cpp` for an example. 7 | 8 | Access to the values is protected by a per-object atomic "mutex". Only one write request can execute at a time, other simultaneous writes are rejected. 9 | If you try to write while you have readers, the write will fail. If you try to read when a write is in process, the read will fail. 10 | 11 | ## Build & run 12 | 13 | ```bash 14 | (cd .. && sh build_ispc_ivec4.sh) 15 | sh run.sh 16 | # 200 OK HTTP/1.1 17 | # content-type: text/plain 18 | # 19 | # BLK 20 | # 200 OK HTTP/1.1 21 | # content-type: text/html 22 | # 23 | # This is 150281 spam-post 150281 number 150281. 24 | # 200 OK HTTP/1.1 25 | # content-type: text/plain 26 | # 27 | # BLK 28 | # 200 OK HTTP/1.1 29 | # content-type: text/html 30 | # 31 | # This is 248863 spam-post 248863 number 248863. 32 | # 200 OK HTTP/1.1 33 | # content-type: text/plain 34 | # 35 | # OK. 36 | # 200 OK HTTP/1.1 37 | # content-type: text/html 38 | # 39 | # This is 18435 spam-post 18435 number 18435. 40 | # 200 OK HTTP/1.1 41 | # content-type: text/plain 42 | # 43 | # BLK 44 | # 200 OK HTTP/1.1 45 | # content-type: text/html 46 | # 47 | # This is 81927 spam-post 81927 number 81927. 48 | # 200 OK HTTP/1.1 49 | # content-type: text/plain 50 | # 51 | # BLK 52 | # 200 OK HTTP/1.1 53 | # content-type: text/html 54 | # 55 | # This is 3079 spam-post 3079 number 3079. 56 | # 57 | # Elapsed: 19885 ms 58 | # Million requests per second: 26.366 59 | # 60 | # 606.42user 0.93system 0:20.34elapsed 2986%CPU (0avgtext+0avgdata 1576412maxresident)k 61 | # 0inputs+0outputs (0major+393407minor)pagefaults 0swaps 62 | ``` 63 | -------------------------------------------------------------------------------- /spirv-io/lib/statemachine.glsl: -------------------------------------------------------------------------------- 1 | struct stateMachine { 2 | ptr_t statePtr; 3 | stringArray attrs; 4 | ptr_t heapPtrPtr; 5 | ptr_t fromIOPtrPtr; 6 | ptr_t toIOPtrPtr; 7 | }; 8 | 9 | stateMachine loadStateMachine(int initialState) { 10 | stateMachine m = stateMachine( 11 | heapPtr/4 + 1, 12 | stringArray(heapPtr/4 + 2, heapPtr/4 + 28), 13 | heapPtr/4 + 29, 14 | heapPtr/4 + 30, 15 | heapPtr/4 + 31 16 | ); 17 | if (i32heap[heapPtr/4] != 0x57A7E0FC) { 18 | i32heap[heapPtr/4] = 0x57A7E0FC; 19 | i32heap[m.statePtr] = initialState; 20 | for (ptr_t i = m.attrs.x; i < m.attrs.y; i++) { 21 | i32heap[i] = 0; 22 | } 23 | i32heap[m.heapPtrPtr] = heapPtr + 32*4; 24 | i32heap[m.fromIOPtrPtr] = fromIOPtr; 25 | i32heap[m.toIOPtrPtr] = toIOPtr; 26 | } 27 | heapPtr = i32heap[m.heapPtrPtr]; 28 | fromIOPtr = i32heap[m.fromIOPtrPtr]; 29 | toIOPtr = i32heap[m.toIOPtrPtr]; 30 | return m; 31 | } 32 | 33 | void saveStateMachine(stateMachine m) { 34 | i32heap[m.heapPtrPtr] = heapPtr; 35 | i32heap[m.toIOPtrPtr] = toIOPtr; 36 | i32heap[m.fromIOPtrPtr] = fromIOPtr; 37 | } 38 | 39 | int getState(stateMachine m) { 40 | return i32heap[m.statePtr]; 41 | } 42 | 43 | void setState(stateMachine m, int state) { 44 | i32heap[m.statePtr] = state; 45 | } 46 | 47 | void setAttr(stateMachine m, int key, string value) { 48 | aSet(m.attrs, key, value); 49 | } 50 | 51 | string getAttr(stateMachine m, int key) { 52 | return aGet(m.attrs, key); 53 | } 54 | 55 | void setAttr(stateMachine m, int key, io value) { 56 | aSet(m.attrs, key, string(value.index, value.heapBufStart)); 57 | } 58 | 59 | void setAttr(stateMachine m, int key, int32_t value) { 60 | aSet(m.attrs, key, string(value, 0)); 61 | } 62 | 63 | io getIOAttr(stateMachine m, int key) { 64 | string s = aGet(m.attrs, key); 65 | return io(s.x, s.y); 66 | } 67 | 68 | int32_t getI32Attr(stateMachine m, int key) { 69 | string s = aGet(m.attrs, key); 70 | return int32_t(s.x); 71 | } 72 | -------------------------------------------------------------------------------- /http_shader/ispc_ivec4/runner_ivec4.ispc: -------------------------------------------------------------------------------- 1 | const varying int int_cast(const varying unsigned int32 v) { return (int32)v; } 2 | 3 | #define atomicAdd(val, data) atomic_add_global(&(val), (data)) 4 | #define atomicCompSwap(val, compare, newval) atomic_compare_exchange_global(&(val), (compare), (newval)) 5 | 6 | #include "httpd.ispc" 7 | 8 | export void runner_dispatch(uniform int work_group_ID[3], uniform int work_groups[3], 9 | uniform struct inputBuffer& inputs, 10 | uniform struct outputBuffer& outputs, 11 | uniform struct heapBuffer& heap 12 | ) 13 | { 14 | uniform int3 gl_NumWorkGroups = int3(work_groups[0], work_groups[1], work_groups[2]); 15 | uniform int3 gl_WorkGroupID = int3(work_group_ID[0], work_group_ID[1], work_group_ID[2]); 16 | 17 | // Vectorise the workgroup 18 | foreach_tiled(lx = 0 ... gl_WorkGroupSize.x) 19 | { 20 | varying int3 gl_LocalInvocationID = int3(lx, 0, 0); 21 | varying int3 gl_GlobalInvocationID = gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID; 22 | varying int gl_LocalInvocationIndex = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + gl_LocalInvocationID.y * gl_WorkGroupSize.x + gl_LocalInvocationID.x; 23 | 24 | httpd_ispc_main( 25 | gl_GlobalInvocationID, 26 | outputs, 27 | heap, 28 | inputs 29 | ); 30 | } 31 | } 32 | 33 | task 34 | void runner_task(uniform int work_groups[3], 35 | uniform struct inputBuffer& inputs, 36 | uniform struct outputBuffer& outputs, 37 | uniform struct heapBuffer& heap 38 | ) 39 | { 40 | uniform int workGroupID[3]; 41 | workGroupID[0] = taskIndex0; 42 | workGroupID[1] = taskIndex1; 43 | workGroupID[2] = taskIndex2; 44 | runner_dispatch(workGroupID, work_groups, inputs, outputs, heap); 45 | } 46 | 47 | export void runner_main(uniform int work_groups[3], 48 | uniform struct inputBuffer& inputs, 49 | uniform struct outputBuffer& outputs, 50 | uniform struct heapBuffer& heap 51 | ) 52 | { 53 | launch[work_groups[0], work_groups[1], work_groups[2]] 54 | runner_task(work_groups, inputs, outputs, heap); 55 | } 56 | 57 | -------------------------------------------------------------------------------- /include/spirv_cross/barrier.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015-2017 ARM Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef SPIRV_CROSS_BARRIER_HPP 18 | #define SPIRV_CROSS_BARRIER_HPP 19 | 20 | #include 21 | #include 22 | 23 | namespace spirv_cross 24 | { 25 | class Barrier 26 | { 27 | public: 28 | Barrier() 29 | { 30 | count.store(0); 31 | iteration.store(0); 32 | } 33 | 34 | void set_release_divisor(unsigned divisor) 35 | { 36 | this->divisor = divisor; 37 | } 38 | 39 | static inline void memoryBarrier() 40 | { 41 | std::atomic_thread_fence(std::memory_order_seq_cst); 42 | } 43 | 44 | void reset_counter() 45 | { 46 | count.store(0); 47 | iteration.store(0); 48 | } 49 | 50 | void wait() 51 | { 52 | unsigned target_iteration = iteration.load(std::memory_order_relaxed) + 1; 53 | // Overflows cleanly. 54 | unsigned target_count = divisor * target_iteration; 55 | 56 | // Barriers don't enforce memory ordering. 57 | // Be as relaxed about the barrier as we possibly can! 58 | unsigned c = count.fetch_add(1u, std::memory_order_relaxed); 59 | 60 | if (c + 1 == target_count) 61 | { 62 | iteration.store(target_iteration, std::memory_order_relaxed); 63 | } 64 | else 65 | { 66 | // If we have more threads than the CPU, don't hog the CPU for very long periods of time. 67 | while (iteration.load(std::memory_order_relaxed) != target_iteration) 68 | std::this_thread::yield(); 69 | } 70 | } 71 | 72 | private: 73 | unsigned divisor = 1; 74 | std::atomic count; 75 | std::atomic iteration; 76 | }; 77 | } 78 | 79 | #endif 80 | -------------------------------------------------------------------------------- /spirv-io/bin/gls_resolve_includes.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const cp = require('child_process'); 3 | const crypto = require('crypto'); 4 | 5 | // Resolve and inline all #include foo things 6 | 7 | function resolveIncludes(source) { 8 | const resolvedSrc = source.replace(/^\s*#include\s+<(https:.*)>\s*@?\s*([a-zA-Z0-9]+)?$/mg, (match, url, hash) => { 9 | /* 10 | if (url.startsWith('"')) { // Local file 11 | } else if (url.startsWith(' 3 | 4 | class App : public ComputeApplication 5 | { 6 | public: 7 | App() { 8 | workSize[0] = 4; 9 | verbose = false; 10 | } 11 | 12 | void runProgram() { 13 | /* 14 | int n = 20; 15 | int len = n * 1 * 1048576; 16 | char *c = (char*)memalign(2097152, len); 17 | char *d = (char*)mappedToGPUMemory; 18 | //char *d = (char*)memalign(2097152, len); 19 | memset(c, 5, len); 20 | std::thread threads[n]; 21 | std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); 22 | int i; 23 | for (i = 0; i < 100; i++) { 24 | for (int j = 0; j < n; j++) threads[j] = std::thread(memcpy, d+(j*len/n), c+(j*len/n), len/n); 25 | for (int j = 0; j < n; j++) threads[j].join(); 26 | } 27 | std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); 28 | printf("%.2f GB/s\n\n", ((float)len * i) / 1e9 / (0.000001 * std::chrono::duration_cast(end - begin).count())); 29 | */ 30 | std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); 31 | int i; 32 | for (i = 0; i < 1; i++) { 33 | while(ioReset); 34 | startCommandBuffer(); 35 | waitCommandBuffer(); 36 | ioReset = true; 37 | } 38 | std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); 39 | printf("\nElapsed: %ld ms\n", std::chrono::duration_cast(end - begin).count()); 40 | printf("Test runs per second: %.0f\n\n", (float)(threadCount * i) / (0.000001 * std::chrono::duration_cast(end - begin).count())); 41 | } 42 | }; 43 | 44 | int main(int argc, char *argv[]) 45 | { 46 | App app; 47 | 48 | try 49 | { 50 | app.run("test_file.spv", argc, argv); 51 | } 52 | catch (const std::runtime_error &e) 53 | { 54 | printf("%s\n", e.what()); 55 | app.cleanup(); 56 | return EXIT_FAILURE; 57 | } 58 | 59 | return app.exitCode; 60 | } 61 | -------------------------------------------------------------------------------- /spirv-io/examples/http_client.glsl: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | ThreadLocalCount = 1; 5 | ThreadGroupCount = 1; 6 | 7 | HeapSize = 16777216; 8 | FromIOSize = 16777216; 9 | ToIOSize = 16777216; 10 | 11 | void main() { 12 | if (ThreadId == 0) { 13 | string url = "https://github.com/plotly/datasets/raw/master/tips.csv"; 14 | log(concat("Downloading ", url)); 15 | awaitIO(runCmd(concat("curl -s -L -O ", url))); 16 | log("Downloaded"); 17 | uint64_t sz = statSync("tips.csv").st_size; 18 | log(concat("File size: ", str(sz))); 19 | string csv = readSync("tips.csv", malloc(sz)); 20 | stringArray lines = split(csv, '\n'); 21 | i32map counts = i32hAlloc(16); 22 | i32map means = f32hAlloc(16); 23 | for (int i = 1; i < arrLen(lines); i++) { 24 | int32_t count; 25 | float mean; 26 | int32_t size = -1; 27 | FREE( 28 | stringArray fields = split(aGet(lines, i), ','); 29 | if (arrLen(fields) == 7) { 30 | float total_bill = parsef32(aGet(fields, 0)); 31 | float tip = parsef32(aGet(fields, 1)); 32 | size = parsei32(aGet(fields, 6)); 33 | float tip_pct = 100.0 * tip / total_bill; 34 | if (!i32hGet(counts, size, count)) { 35 | count = 0; 36 | mean = 0.0; 37 | } else { 38 | f32hGet(means, size, mean); 39 | } 40 | count += 1; 41 | mean = (mean * float(count-1) + tip_pct) / float(count); 42 | } 43 | ); 44 | if (size != -1) { 45 | i32hSet(counts, size, count); 46 | f32hSet(means, size, mean); 47 | } 48 | } 49 | i32array sizes = f32hKeys(means); 50 | i32sort(sizes); 51 | for (int i = 0; i < i32len(sizes); i++) { 52 | int32_t size = i32get(sizes, i); 53 | float mean; 54 | if (f32hGet(means, size, mean)) { 55 | FREE_ALL( log(concat("size: ", str(size), " tip_pct: ", str(mean))) ); 56 | } 57 | } 58 | } 59 | } 60 | 61 | -------------------------------------------------------------------------------- /http_shader/ispc_int/runner.ispc: -------------------------------------------------------------------------------- 1 | const varying int int_cast(const varying unsigned int32 v) { return (int32)v; } 2 | 3 | #include "httpd.ispc" 4 | 5 | 6 | export void runner_dispatch(uniform int work_group_ID[3], uniform int work_groups[3], 7 | uniform struct outputBuffer& v_656, uniform struct inputBuffer& v_613, uniform struct heapBuffer& _901, 8 | uniform struct requestBuffer& reqBuf, 9 | uniform struct responseBuffer& resBuf 10 | ) 11 | { 12 | uniform int3 gl_NumWorkGroups = int3(work_groups[0], work_groups[1], work_groups[2]); 13 | uniform int3 gl_WorkGroupID = int3(work_group_ID[0], work_group_ID[1], work_group_ID[2]); 14 | 15 | // Vectorise the workgroup 16 | foreach_tiled(lx = 0 ... gl_WorkGroupSize.x) 17 | { 18 | varying int3 gl_LocalInvocationID = int3(lx, 0, 0); 19 | varying int3 gl_GlobalInvocationID = gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID; 20 | varying int gl_LocalInvocationIndex = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + gl_LocalInvocationID.y * gl_WorkGroupSize.x + gl_LocalInvocationID.x; 21 | 22 | httpd_ispc_main( 23 | v_656, 24 | v_613, 25 | resBuf, 26 | reqBuf, 27 | gl_NumWorkGroups, 28 | gl_GlobalInvocationID, 29 | _901 30 | ); 31 | } 32 | } 33 | 34 | task 35 | void runner_task(uniform int work_groups[3], 36 | uniform struct outputBuffer& v_656, uniform struct inputBuffer& v_613, uniform struct heapBuffer& _901, 37 | uniform struct requestBuffer& reqBuf, 38 | uniform struct responseBuffer& resBuf 39 | ) 40 | { 41 | uniform int workGroupID[3]; 42 | workGroupID[0] = taskIndex0; 43 | workGroupID[1] = taskIndex1; 44 | workGroupID[2] = taskIndex2; 45 | runner_dispatch(workGroupID, work_groups, 46 | v_656, v_613, _901, reqBuf, resBuf 47 | ); 48 | } 49 | 50 | export void runner_main(uniform int work_groups[3], 51 | uniform struct inputBuffer& v_613, 52 | uniform struct outputBuffer& v_656, 53 | uniform struct heapBuffer& _901, 54 | uniform struct requestBuffer& reqBuf, 55 | uniform struct responseBuffer& resBuf 56 | ) 57 | { 58 | launch[work_groups[0], work_groups[1], work_groups[2]] 59 | runner_task(work_groups, 60 | v_656, v_613, _901, reqBuf, resBuf 61 | ); 62 | } 63 | 64 | -------------------------------------------------------------------------------- /docker/spirv-runner/program.h: -------------------------------------------------------------------------------- 1 | // 2 | // targets/kernel/program.h 3 | // (Header automatically generated by the ispc compiler.) 4 | // DO NOT EDIT THIS FILE. 5 | // 6 | 7 | #ifndef ISPC_TARGETS_KERNEL_PROGRAM_H 8 | #define ISPC_TARGETS_KERNEL_PROGRAM_H 9 | 10 | #include 11 | 12 | 13 | 14 | #ifdef __cplusplus 15 | namespace ispc { /* namespace */ 16 | #endif // __cplusplus 17 | 18 | #ifndef __ISPC_ALIGN__ 19 | #if defined(__clang__) || !defined(_MSC_VER) 20 | // Clang, GCC, ICC 21 | #define __ISPC_ALIGN__(s) __attribute__((aligned(s))) 22 | #define __ISPC_ALIGNED_STRUCT__(s) struct __ISPC_ALIGN__(s) 23 | #else 24 | // Visual Studio 25 | #define __ISPC_ALIGN__(s) __declspec(align(s)) 26 | #define __ISPC_ALIGNED_STRUCT__(s) __ISPC_ALIGN__(s) struct 27 | #endif 28 | #endif 29 | 30 | #ifndef __ISPC_STRUCT_inputs__ 31 | #define __ISPC_STRUCT_inputs__ 32 | struct inputs { 33 | float inputData[1]; 34 | }; 35 | #endif 36 | 37 | #ifndef __ISPC_STRUCT_outputs__ 38 | #define __ISPC_STRUCT_outputs__ 39 | struct outputs { 40 | float outputData[1]; 41 | }; 42 | #endif 43 | 44 | 45 | /////////////////////////////////////////////////////////////////////////// 46 | // Functions exported from ispc code 47 | /////////////////////////////////////////////////////////////////////////// 48 | #if defined(__cplusplus) && (! defined(__ISPC_NO_EXTERN_C) || !__ISPC_NO_EXTERN_C ) 49 | extern "C" { 50 | #endif // __cplusplus 51 | extern void program_dispatch_all(int32_t * work_groups, struct inputs &_20, struct outputs &_164); 52 | extern void program_dispatch_all_tiled(int32_t * work_groups, struct inputs &_20, struct outputs &_164); 53 | extern void program_dispatch_single(int32_t * work_group_ID, int32_t * work_groups, struct inputs &_20, struct outputs &_164); 54 | extern void program_dispatch_single_tiled(int32_t * work_group_ID, int32_t * work_groups, struct inputs &_20, struct outputs &_164); 55 | extern void program_get_workgroup_size(int32_t &wg_x, int32_t &wg_y, int32_t &wg_z); 56 | extern void runner_main(int32_t * work_groups, struct inputs &input, struct outputs &output); 57 | #if defined(__cplusplus) && (! defined(__ISPC_NO_EXTERN_C) || !__ISPC_NO_EXTERN_C ) 58 | } /* end extern C */ 59 | #endif // __cplusplus 60 | 61 | 62 | #ifdef __cplusplus 63 | } /* namespace */ 64 | #endif // __cplusplus 65 | 66 | #endif // ISPC_TARGETS_KERNEL_PROGRAM_H 67 | -------------------------------------------------------------------------------- /http_shader/ispc_char/httpd.ispc.h: -------------------------------------------------------------------------------- 1 | // 2 | // httpd.ispc.h 3 | // (Header automatically generated by the ispc compiler.) 4 | // DO NOT EDIT THIS FILE. 5 | // 6 | 7 | #ifndef ISPC_HTTPD_ISPC_H 8 | #define ISPC_HTTPD_ISPC_H 9 | 10 | #include 11 | 12 | 13 | 14 | #ifdef __cplusplus 15 | namespace ispc { /* namespace */ 16 | #endif // __cplusplus 17 | 18 | #ifndef __ISPC_ALIGN__ 19 | #if defined(__clang__) || !defined(_MSC_VER) 20 | // Clang, GCC, ICC 21 | #define __ISPC_ALIGN__(s) __attribute__((aligned(s))) 22 | #define __ISPC_ALIGNED_STRUCT__(s) struct __ISPC_ALIGN__(s) 23 | #else 24 | // Visual Studio 25 | #define __ISPC_ALIGN__(s) __declspec(align(s)) 26 | #define __ISPC_ALIGNED_STRUCT__(s) __ISPC_ALIGN__(s) struct 27 | #endif 28 | #endif 29 | 30 | #ifndef __ISPC_STRUCT_outputBuffer__ 31 | #define __ISPC_STRUCT_outputBuffer__ 32 | struct outputBuffer { 33 | int32_t outputBytes[]; 34 | }; 35 | #endif 36 | 37 | #ifndef __ISPC_STRUCT_inputBuffer__ 38 | #define __ISPC_STRUCT_inputBuffer__ 39 | struct inputBuffer { 40 | int32_t inputBytes[]; 41 | }; 42 | #endif 43 | 44 | #ifndef __ISPC_STRUCT_heapBuffer__ 45 | #define __ISPC_STRUCT_heapBuffer__ 46 | struct heapBuffer { 47 | int32_t heap[]; 48 | }; 49 | #endif 50 | 51 | #ifndef __ISPC_STRUCT_requestBuffer__ 52 | #define __ISPC_STRUCT_requestBuffer__ 53 | struct requestBuffer { 54 | int32_t request[]; 55 | }; 56 | #endif 57 | 58 | #ifndef __ISPC_STRUCT_responseBuffer__ 59 | #define __ISPC_STRUCT_responseBuffer__ 60 | struct responseBuffer { 61 | int32_t response[]; 62 | }; 63 | #endif 64 | 65 | 66 | /////////////////////////////////////////////////////////////////////////// 67 | // Functions exported from ispc code 68 | /////////////////////////////////////////////////////////////////////////// 69 | #if defined(__cplusplus) && (! defined(__ISPC_NO_EXTERN_C) || !__ISPC_NO_EXTERN_C ) 70 | extern "C" { 71 | #endif // __cplusplus 72 | extern void httpd_get_workgroup_size(int32_t &wg_x, int32_t &wg_y, int32_t &wg_z); 73 | extern void runner_main(int32_t * work_groups, struct inputBuffer &v_94, struct outputBuffer &v_656, struct heapBuffer &_901); 74 | #if defined(__cplusplus) && (! defined(__ISPC_NO_EXTERN_C) || !__ISPC_NO_EXTERN_C ) 75 | } /* end extern C */ 76 | #endif // __cplusplus 77 | 78 | 79 | #ifdef __cplusplus 80 | } /* namespace */ 81 | #endif // __cplusplus 82 | 83 | #endif // ISPC_HTTPD_ISPC_H 84 | -------------------------------------------------------------------------------- /http_shader/ispc_ivec4/httpd.ispc.h: -------------------------------------------------------------------------------- 1 | // 2 | // httpd.ispc.h 3 | // (Header automatically generated by the ispc compiler.) 4 | // DO NOT EDIT THIS FILE. 5 | // 6 | 7 | #ifndef ISPC_HTTPD_ISPC_H 8 | #define ISPC_HTTPD_ISPC_H 9 | 10 | #include 11 | 12 | 13 | 14 | #ifdef __cplusplus 15 | namespace ispc { /* namespace */ 16 | #endif // __cplusplus 17 | 18 | #ifndef __ISPC_ALIGN__ 19 | #if defined(__clang__) || !defined(_MSC_VER) 20 | // Clang, GCC, ICC 21 | #define __ISPC_ALIGN__(s) __attribute__((aligned(s))) 22 | #define __ISPC_ALIGNED_STRUCT__(s) struct __ISPC_ALIGN__(s) 23 | #else 24 | // Visual Studio 25 | #define __ISPC_ALIGN__(s) __declspec(align(s)) 26 | #define __ISPC_ALIGNED_STRUCT__(s) __ISPC_ALIGN__(s) struct 27 | #endif 28 | #endif 29 | 30 | #ifndef __ISPC_STRUCT_outputBuffer__ 31 | #define __ISPC_STRUCT_outputBuffer__ 32 | struct outputBuffer { 33 | int32_t outputBytes[]; 34 | }; 35 | #endif 36 | 37 | #ifndef __ISPC_STRUCT_inputBuffer__ 38 | #define __ISPC_STRUCT_inputBuffer__ 39 | struct inputBuffer { 40 | int32_t inputBytes[]; 41 | }; 42 | #endif 43 | 44 | #ifndef __ISPC_STRUCT_heapBuffer__ 45 | #define __ISPC_STRUCT_heapBuffer__ 46 | struct heapBuffer { 47 | int32_t heap[]; 48 | }; 49 | #endif 50 | 51 | #ifndef __ISPC_STRUCT_requestBuffer__ 52 | #define __ISPC_STRUCT_requestBuffer__ 53 | struct requestBuffer { 54 | int32_t request[]; 55 | }; 56 | #endif 57 | 58 | #ifndef __ISPC_STRUCT_responseBuffer__ 59 | #define __ISPC_STRUCT_responseBuffer__ 60 | struct responseBuffer { 61 | int32_t response[]; 62 | }; 63 | #endif 64 | 65 | 66 | /////////////////////////////////////////////////////////////////////////// 67 | // Functions exported from ispc code 68 | /////////////////////////////////////////////////////////////////////////// 69 | #if defined(__cplusplus) && (! defined(__ISPC_NO_EXTERN_C) || !__ISPC_NO_EXTERN_C ) 70 | extern "C" { 71 | #endif // __cplusplus 72 | extern void httpd_get_workgroup_size(int32_t &wg_x, int32_t &wg_y, int32_t &wg_z); 73 | extern void runner_main(int32_t * work_groups, struct inputBuffer &v_94, struct outputBuffer &v_656, struct heapBuffer &_901); 74 | #if defined(__cplusplus) && (! defined(__ISPC_NO_EXTERN_C) || !__ISPC_NO_EXTERN_C ) 75 | } /* end extern C */ 76 | #endif // __cplusplus 77 | 78 | 79 | #ifdef __cplusplus 80 | } /* namespace */ 81 | #endif // __cplusplus 82 | 83 | #endif // ISPC_HTTPD_ISPC_H 84 | -------------------------------------------------------------------------------- /http_shader/ispc_int/httpd.ispc.h: -------------------------------------------------------------------------------- 1 | // 2 | // httpd.ispc.h 3 | // (Header automatically generated by the ispc compiler.) 4 | // DO NOT EDIT THIS FILE. 5 | // 6 | 7 | #ifndef ISPC_HTTPD_ISPC_H 8 | #define ISPC_HTTPD_ISPC_H 9 | 10 | #include 11 | 12 | 13 | 14 | #ifdef __cplusplus 15 | namespace ispc { /* namespace */ 16 | #endif // __cplusplus 17 | 18 | #ifndef __ISPC_ALIGN__ 19 | #if defined(__clang__) || !defined(_MSC_VER) 20 | // Clang, GCC, ICC 21 | #define __ISPC_ALIGN__(s) __attribute__((aligned(s))) 22 | #define __ISPC_ALIGNED_STRUCT__(s) struct __ISPC_ALIGN__(s) 23 | #else 24 | // Visual Studio 25 | #define __ISPC_ALIGN__(s) __declspec(align(s)) 26 | #define __ISPC_ALIGNED_STRUCT__(s) __ISPC_ALIGN__(s) struct 27 | #endif 28 | #endif 29 | 30 | #ifndef __ISPC_STRUCT_outputBuffer__ 31 | #define __ISPC_STRUCT_outputBuffer__ 32 | struct outputBuffer { 33 | int32_t outputBytes[]; 34 | }; 35 | #endif 36 | 37 | #ifndef __ISPC_STRUCT_inputBuffer__ 38 | #define __ISPC_STRUCT_inputBuffer__ 39 | struct inputBuffer { 40 | int32_t inputBytes[]; 41 | }; 42 | #endif 43 | 44 | #ifndef __ISPC_STRUCT_heapBuffer__ 45 | #define __ISPC_STRUCT_heapBuffer__ 46 | struct heapBuffer { 47 | int32_t heap[]; 48 | }; 49 | #endif 50 | 51 | #ifndef __ISPC_STRUCT_requestBuffer__ 52 | #define __ISPC_STRUCT_requestBuffer__ 53 | struct requestBuffer { 54 | int32_t request[]; 55 | }; 56 | #endif 57 | 58 | #ifndef __ISPC_STRUCT_responseBuffer__ 59 | #define __ISPC_STRUCT_responseBuffer__ 60 | struct responseBuffer { 61 | int32_t response[]; 62 | }; 63 | #endif 64 | 65 | 66 | /////////////////////////////////////////////////////////////////////////// 67 | // Functions exported from ispc code 68 | /////////////////////////////////////////////////////////////////////////// 69 | #if defined(__cplusplus) && (! defined(__ISPC_NO_EXTERN_C) || !__ISPC_NO_EXTERN_C ) 70 | extern "C" { 71 | #endif // __cplusplus 72 | extern void httpd_get_workgroup_size(int32_t &wg_x, int32_t &wg_y, int32_t &wg_z); 73 | extern void runner_main(int32_t * work_groups, struct inputBuffer &v_94, struct outputBuffer &v_656, struct heapBuffer &_901, struct requestBuffer &reqBuf, struct responseBuffer &resBuf); 74 | #if defined(__cplusplus) && (! defined(__ISPC_NO_EXTERN_C) || !__ISPC_NO_EXTERN_C ) 75 | } /* end extern C */ 76 | #endif // __cplusplus 77 | 78 | 79 | #ifdef __cplusplus 80 | } /* namespace */ 81 | #endif // __cplusplus 82 | 83 | #endif // ISPC_HTTPD_ISPC_H 84 | -------------------------------------------------------------------------------- /http_shader/ispc_char/httpd_ispc.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015-2017 ARM Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | #include "httpd.ispc.h" 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | #ifndef GLM_FORCE_SWIZZLE 25 | #define GLM_FORCE_SWIZZLE 26 | #endif 27 | 28 | #ifndef GLM_FORCE_RADIANS 29 | #define GLM_FORCE_RADIANS 30 | #endif 31 | 32 | #include 33 | using namespace glm; 34 | using namespace ispc; 35 | 36 | // Build some input data for our compute shader. 37 | #define NUM_WORKGROUPS_X 32 38 | #define NUM_WORKGROUPS_Y 1 39 | 40 | static const uint requestCount = NUM_WORKGROUPS_X * NUM_WORKGROUPS_Y * 16 * 512; 41 | 42 | static uint8_t inputs[1024 * requestCount] = {}; 43 | static uint8_t outputs[1024 * requestCount] = {}; 44 | static uint8_t heaps[1024 * requestCount] = {}; 45 | 46 | int main() 47 | { 48 | int bytes = fread(((char*)inputs)+16, 1, 1024-16, stdin); 49 | ((uint32_t*)inputs)[0] = bytes; 50 | for (int i = 1; i < requestCount; i++) { 51 | memcpy((void*)(inputs + 1024 * i), (void*)inputs, 1024); 52 | } 53 | 54 | std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); 55 | for (int j = 0; j < 1000; j++) { 56 | int32_t workgroups[] = {NUM_WORKGROUPS_X, NUM_WORKGROUPS_Y, 1}; 57 | runner_main(workgroups, 58 | *(struct inputBuffer*)inputs, 59 | *(struct outputBuffer*)outputs, 60 | *(struct heapBuffer*)heaps 61 | ); 62 | } 63 | std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); 64 | 65 | for (int i = 0; i < 1; i++) { 66 | printf("%d\n", ((uint32_t*)outputs)[256*i]); 67 | write(1, outputs+1024*i+16, ((uint32_t*)outputs)[256*i]); 68 | } 69 | 70 | printf("Elapsed: %ld ms\n", std::chrono::duration_cast(end - begin).count()); 71 | printf("Million requests per second: %.3f\n", 1e-6 * (requestCount * 1000.0) / (0.001 * std::chrono::duration_cast(end - begin).count())); 72 | 73 | return 0; 74 | } 75 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ispc/ubuntu_16.04:llvm100 2 | 3 | RUN apt update 4 | RUN apt install -y libxml2 emacs-nox 5 | 6 | WORKDIR /usr/local/src/wasm-ld 7 | RUN wget -qO- https://storage.googleapis.com/webassembly/emscripten-releases-builds/linux/65d33d604d3fa0ebe03548378b898fc6608e9cb8/wasm-binaries.tbz2 | tar xj 8 | 9 | WORKDIR /usr/local/src/ispc 10 | RUN git pull 11 | RUN mkdir build; cd build && cmake .. -DWASM_ENABLED=ON && make -j32 12 | ENV PATH="/usr/local/src/ispc/build/bin:/usr/local/src/wasm-ld/install/bin/:${PATH}" 13 | 14 | WORKDIR /usr/local/src/spirv 15 | 16 | RUN apt install -y libglm-dev 17 | 18 | RUN git clone --depth=1 https://github.com/GameTechDev/SPIRV-Cross SPIRV-Cross-ISPC 19 | RUN git clone --depth=1 https://github.com/KhronosGroup/SPIRV-Cross 20 | RUN git clone --depth=1 https://github.com/KhronosGroup/glslang 21 | 22 | ENV CXX="clang++" 23 | ENV CC="clang" 24 | 25 | WORKDIR /usr/local/src/spirv/glslang 26 | RUN mkdir build; cd build && cmake .. -DCMAKE_BUILD_TYPE=Release && make -j32 install 27 | 28 | WORKDIR /usr/local/src/spirv/SPIRV-Cross 29 | RUN make -j32 && cp spirv-cross /usr/bin 30 | 31 | WORKDIR /usr/local/src/spirv/SPIRV-Cross-ISPC 32 | COPY spirv-cross-linux-x86-64 /usr/bin/spirv-cross-ispc 33 | 34 | WORKDIR /usr/local/src 35 | RUN ln -s /usr/bin/python3 /usr/bin/python 36 | RUN git clone --depth=1 https://github.com/aschrein/ispc/ ispc-wasm 37 | 38 | WORKDIR /usr/local/src/ispc-wasm 39 | RUN git pull origin wasm_tests:wasm_tests && git checkout wasm_tests 40 | RUN mkdir build && cd build && cmake .. -DWASM_ENABLED=ON && make -j32 ispc 41 | ENV PATH="/usr/local/src/ispc-wasm/build/bin:/usr/local/src/wasm-ld/install/bin/:${PATH}" 42 | 43 | WORKDIR /usr/local/src 44 | RUN git clone https://github.com/emscripten-core/emsdk && cd emsdk && ./emsdk install latest && ./emsdk activate latest 45 | ENV PATH="/usr/local/src/emsdk:/usr/local/src/emsdk/node/12.9.1_64bit/bin:/usr/local/src/emsdk/upstream/emscripten:${PATH}" 46 | 47 | WORKDIR /usr/local/src/spirv-wasm 48 | RUN apt install -y ruby 49 | RUN echo "int main(int argc, char *argv[]) { return 0; }" > test.cpp && emcc test.cpp -o TEST.html -s USE_PTHREADS=1 -s PTHREAD_POOL_SIZE=16 -s TOTAL_MEMORY=268435456 -msimd128 -s SIMD=1 50 | RUN printf "#include \nusing namespace std;\nint main() { cout << \"foo\"; return 0; }" > test.cpp && em++ -lpthread -fno-exceptions test.cpp -O3 -o TEST.html -s USE_PTHREADS=1 -s PTHREAD_POOL_SIZE=16 -s TOTAL_MEMORY=268435456 -msimd128 -s SIMD=1 -s EXTRA_EXPORTED_RUNTIME_METHODS='["ccall"]' 51 | COPY ./spirv-runner . 52 | RUN make tasksys.o builtins.o 53 | RUN make ispcRunner.o 54 | -------------------------------------------------------------------------------- /spirv-io/src/gls.cpp: -------------------------------------------------------------------------------- 1 | #include "compute_application.hpp" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #define MD5_LEN 32 8 | 9 | bool getFileMD5(char *filename, char *md5sum) 10 | { 11 | char cmd[500]; 12 | snprintf(cmd, sizeof(cmd), "md5sum %s 2>/dev/null", filename); 13 | 14 | FILE *pipe = popen(cmd, "r"); 15 | if (pipe == NULL) return false; 16 | size_t bytes = fread(md5sum, 1, MD5_LEN, pipe); 17 | pclose(pipe); 18 | 19 | md5sum[bytes] = 0; 20 | return bytes == MD5_LEN; 21 | } 22 | 23 | int usage() { 24 | printf("USAGE: gls [-t] [-v] program.(spv|glsl) args...\n"); 25 | return EXIT_FAILURE; 26 | } 27 | 28 | int main(int argc, char *argv[]) 29 | { 30 | ComputeApplication app; 31 | app.workSize[0] = 20; 32 | int argIdx = 1; 33 | if (argIdx >= argc) return usage(); 34 | if (strcmp(argv[argIdx], "-t") == 0) { 35 | app.timings = true; 36 | argIdx++; 37 | } 38 | if (argIdx >= argc) return usage(); 39 | if (strcmp(argv[argIdx], "-v") == 0) { 40 | app.verbose = true; 41 | argIdx++; 42 | } 43 | if (argIdx >= argc) return usage(); 44 | if (strcmp(argv[argIdx], "-t") == 0) { 45 | app.timings = true; 46 | argIdx++; 47 | } 48 | if (argIdx >= argc) return usage(); 49 | 50 | int len = strlen(argv[argIdx]); 51 | if (len > 5 && strcmp(argv[argIdx] + (len-5), ".glsl") == 0) { 52 | char cmd[len + 520]; 53 | system("mkdir -p ~/.gls/cache/"); 54 | char md5sum[MD5_LEN + 1]; 55 | if (!getFileMD5(argv[argIdx], md5sum)) { 56 | fprintf(stderr, "Failed to get MD5 sum of file\n"); 57 | return EXIT_FAILURE; 58 | } 59 | char spvFilename[500]; 60 | snprintf(spvFilename, sizeof(spvFilename), "%s/.gls/cache/%s.spv", getenv("HOME"), md5sum); 61 | struct stat st; 62 | if (0 != stat(spvFilename, &st)) { 63 | fprintf(stderr, "Compiling to SPIR-V\n"); 64 | snprintf(cmd, sizeof(cmd), "glsl2spv \"%s\" %s", argv[argIdx], spvFilename); 65 | system(cmd); 66 | if (0 != stat(spvFilename, &st)) { 67 | fprintf(stderr, "Failed to compile SPIR-V: %s\n", cmd); 68 | return EXIT_FAILURE; 69 | } 70 | } 71 | argv[argIdx] = spvFilename; 72 | } 73 | 74 | try 75 | { 76 | app.run(argv[argIdx], argc-argIdx, argv+argIdx); 77 | } 78 | catch (const std::runtime_error &e) 79 | { 80 | printf("%s\n", e.what()); 81 | app.cleanup(); 82 | return EXIT_FAILURE; 83 | } 84 | 85 | return app.exitCode; 86 | } 87 | -------------------------------------------------------------------------------- /spirv-io/src/gls_cpu.cpp: -------------------------------------------------------------------------------- 1 | #include "cpu_compute_application.hpp" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #define MD5_LEN 32 8 | 9 | bool getFileMD5(char *filename, char *md5sum) 10 | { 11 | char cmd[500]; 12 | snprintf(cmd, sizeof(cmd), "md5sum %s 2>/dev/null", filename); 13 | 14 | FILE *pipe = popen(cmd, "r"); 15 | if (pipe == NULL) return false; 16 | size_t bytes = fread(md5sum, 1, MD5_LEN, pipe); 17 | pclose(pipe); 18 | 19 | md5sum[bytes] = 0; 20 | return bytes == MD5_LEN; 21 | } 22 | 23 | int usage() { 24 | printf("USAGE: gls [-t] [-v] program.(spv|glsl) args...\n"); 25 | return EXIT_FAILURE; 26 | } 27 | 28 | int main(int argc, char *argv[]) 29 | { 30 | ComputeApplication app; 31 | app.workSize[0] = 20; 32 | int argIdx = 1; 33 | if (argIdx >= argc) return usage(); 34 | if (strcmp(argv[argIdx], "-t") == 0) { 35 | app.timings = true; 36 | argIdx++; 37 | } 38 | if (argIdx >= argc) return usage(); 39 | if (strcmp(argv[argIdx], "-v") == 0) { 40 | app.verbose = true; 41 | argIdx++; 42 | } 43 | if (argIdx >= argc) return usage(); 44 | if (strcmp(argv[argIdx], "-t") == 0) { 45 | app.timings = true; 46 | argIdx++; 47 | } 48 | if (argIdx >= argc) return usage(); 49 | 50 | int len = strlen(argv[argIdx]); 51 | if (len > 5 && strcmp(argv[argIdx] + (len-5), ".glsl") == 0) { 52 | char cmd[len + 520]; 53 | system("mkdir -p ~/.gls/cache/"); 54 | char md5sum[MD5_LEN + 1]; 55 | if (!getFileMD5(argv[argIdx], md5sum)) { 56 | fprintf(stderr, "Failed to get MD5 sum of file\n"); 57 | return EXIT_FAILURE; 58 | } 59 | char spvFilename[500]; 60 | snprintf(spvFilename, sizeof(spvFilename), "%s/.gls/cache/%s.spv", getenv("HOME"), md5sum); 61 | struct stat st; 62 | if (0 != stat(spvFilename, &st)) { 63 | fprintf(stderr, "Compiling to SPIR-V\n"); 64 | snprintf(cmd, sizeof(cmd), "glsl2spv \"%s\" %s", argv[argIdx], spvFilename); 65 | system(cmd); 66 | if (0 != stat(spvFilename, &st)) { 67 | fprintf(stderr, "Failed to compile SPIR-V: %s\n", cmd); 68 | return EXIT_FAILURE; 69 | } 70 | } 71 | argv[argIdx] = spvFilename; 72 | } 73 | 74 | try 75 | { 76 | app.run(argv[argIdx], argc-argIdx, argv+argIdx); 77 | } 78 | catch (const std::runtime_error &e) 79 | { 80 | printf("%s\n", e.what()); 81 | app.cleanup(); 82 | return EXIT_FAILURE; 83 | } 84 | 85 | return app.exitCode; 86 | } 87 | -------------------------------------------------------------------------------- /include/spirv_cross/thread_group.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015-2017 ARM Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef SPIRV_CROSS_THREAD_GROUP_HPP 18 | #define SPIRV_CROSS_THREAD_GROUP_HPP 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | namespace spirv_cross 25 | { 26 | template 27 | class ThreadGroup 28 | { 29 | public: 30 | ThreadGroup(T *impl) 31 | { 32 | for (unsigned i = 0; i < Size; i++) 33 | workers[i].start(&impl[i]); 34 | } 35 | 36 | void run() 37 | { 38 | for (auto &worker : workers) 39 | worker.run(); 40 | } 41 | 42 | void wait() 43 | { 44 | for (auto &worker : workers) 45 | worker.wait(); 46 | } 47 | 48 | private: 49 | struct Thread 50 | { 51 | enum State 52 | { 53 | Idle, 54 | Running, 55 | Dying 56 | }; 57 | State state = Idle; 58 | 59 | void start(T *impl) 60 | { 61 | worker = std::thread([impl, this] { 62 | for (;;) 63 | { 64 | { 65 | std::unique_lock l{ lock }; 66 | cond.wait(l, [this] { return state != Idle; }); 67 | if (state == Dying) 68 | break; 69 | } 70 | 71 | impl->main(); 72 | 73 | std::lock_guard l{ lock }; 74 | state = Idle; 75 | cond.notify_one(); 76 | } 77 | }); 78 | } 79 | 80 | void wait() 81 | { 82 | std::unique_lock l{ lock }; 83 | cond.wait(l, [this] { return state == Idle; }); 84 | } 85 | 86 | void run() 87 | { 88 | std::lock_guard l{ lock }; 89 | state = Running; 90 | cond.notify_one(); 91 | } 92 | 93 | ~Thread() 94 | { 95 | if (worker.joinable()) 96 | { 97 | { 98 | std::lock_guard l{ lock }; 99 | state = Dying; 100 | cond.notify_one(); 101 | } 102 | worker.join(); 103 | } 104 | } 105 | std::thread worker; 106 | std::condition_variable cond; 107 | std::mutex lock; 108 | }; 109 | Thread workers[Size]; 110 | }; 111 | } 112 | 113 | #endif 114 | -------------------------------------------------------------------------------- /http_shader/ispc_int/httpd_ispc.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015-2017 ARM Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | #include "httpd.ispc.h" 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | #ifndef GLM_FORCE_SWIZZLE 25 | #define GLM_FORCE_SWIZZLE 26 | #endif 27 | 28 | #ifndef GLM_FORCE_RADIANS 29 | #define GLM_FORCE_RADIANS 30 | #endif 31 | 32 | #include 33 | using namespace glm; 34 | using namespace ispc; 35 | 36 | // Build some input data for our compute shader. 37 | #define NUM_WORKGROUPS_X 32 38 | #define NUM_WORKGROUPS_Y 32 39 | 40 | static const uint requestCount = NUM_WORKGROUPS_X * NUM_WORKGROUPS_Y * 32; 41 | 42 | static int inputBuffe[256 * requestCount] = {}; 43 | static int outputBuffe[256 * requestCount] = {}; 44 | static int heapBuffe[1024 * requestCount] = {}; 45 | static int requestBuffe[1024 * requestCount] = {}; 46 | static int responseBuffe[1024 * requestCount] = {}; 47 | 48 | int main() 49 | { 50 | int bytes = fread(((char*)inputBuffe)+4, 1, 1020, stdin); 51 | inputBuffe[0] = bytes; 52 | for (int i = 1; i < requestCount; i++) { 53 | memcpy((void*)(inputBuffe + 256 * i), (void*)inputBuffe, 1024); 54 | } 55 | 56 | std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); 57 | for (int j = 0; j < 1000; j++) { 58 | int32_t workgroups[] = {NUM_WORKGROUPS_X, NUM_WORKGROUPS_Y, 1}; 59 | runner_main(workgroups, 60 | *(struct inputBuffer*)inputBuffe, 61 | *(struct outputBuffer*)outputBuffe, 62 | *(struct heapBuffer*)heapBuffe, 63 | *(struct requestBuffer*)requestBuffe, 64 | *(struct responseBuffer*)responseBuffe 65 | ); 66 | } 67 | std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); 68 | 69 | for (int i = 0; i < 1; i++) { 70 | printf("%d\n", outputBuffe[256*i]); 71 | write(1, ((char*)outputBuffe)+1024*i+4, outputBuffe[256*i]); 72 | } 73 | 74 | printf("Elapsed: %ld ms\n", std::chrono::duration_cast(end - begin).count()); 75 | printf("Million requests per second: %.3f\n", 1e-6 * (requestCount * 1000.0) / (0.001 * std::chrono::duration_cast(end - begin).count())); 76 | 77 | 78 | return 0; 79 | } 80 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # spirv-wasm 2 | 3 | Run SPIR-V shaders in WebAssembly 4 | 5 | See demo at https://fhtr.org/spirv-wasm - uses WebAssembly Threads, so you may need to turn them on in chrome://flags or such. 6 | 7 | ## More demos 8 | 9 | These require WebAssembly SIMD enabled in chrome://flags. All running on the CPU. 10 | 11 | - [GLSL AOBench](https://fhtr.org/spirv-wasm/aobench), pretty much a copy-paste from the [ShaderToy version](https://www.shadertoy.com/view/llKBzd) 12 | - [Animated ISPC Mandelbrot](https://fhtr.org/spirv-wasm/ispc) 13 | 14 | ## Goal 15 | 16 | Write low-effort code in one language, run safely in the browser at 50% of hardware peak performance. 17 | 18 | For comparison, naive C++ achieves 1% of HW peak performance on a 16-core. If you include the GPU, it's closer to 0.1% of peak perf. Divide by amount of compute nodes available to you. Yes, yes, adding OpenMP #pragmas and doing hot spots in SoA intrinsics / ISPC, CUDA, and MPI does get you to 90%+ after much effort. 19 | 20 | ## Contribute 21 | 22 | Help! There's a lot of work to do! Estimated effort for the first pass: works OK for simple pipelines 100 commits, GPU integration 300 commits, job system 300 commits, simple distributed computing 200 commits, decent standard library 500 commits. Reaching something at the level "Hey you've got to try this, it's awesome for X" is 3000 commits. I've got roughly 200 commits per year to spend here... you do the math. 23 | 24 | Try to get it running (use the [Docker image](https://github.com/kig/spirv-wasm/tree/master/docker/)), write some shaders, see how it goes. 25 | 26 | Have a look at the [issues](https://github.com/kig/spirv-wasm/issues) and make some more. 27 | 28 | 29 | 30 | ## Build 31 | 32 | Requires Emscripten, glslangValidator, glm and spirv-cross. 33 | 34 | [Install Emscripten](https://emscripten.org/docs/getting_started/downloads.html) 35 | 36 | The others are likely in your package manager. 37 | 38 | ```bash 39 | brew install glslangValidator 40 | brew install spirv-cross 41 | brew install glm 42 | ``` 43 | 44 | Now you can build the shader: 45 | 46 | ```bash 47 | source somewhere/emsdk/emsdk_env.sh 48 | emmake make 49 | serve 50 | ``` 51 | 52 | If everything went right, you can open [http://localhost:5000/src/mandel.html](http://localhost:5000/src/mandel.html) 53 | and hopefully see a Mandelbrot fractal. Check the browser console for timings. 54 | 55 | 56 | ## Debug information 57 | 58 | You can make Emscripten emit source maps with the -g4 flag. You can make glslangValidator and the latest versions of spirv-cross emit line numbers like this: 59 | 60 | ``` 61 | glslangValidator -g -V -o mandel.spv mandel.comp 62 | spirv-cross --cpp --emit-line-directives --output mandel.spv.cpp mandel.spv 63 | ``` 64 | 65 | Then you just have to hack the Emscripten source maps to map from the cpp to the compute shader line numbers. 66 | 67 | Send a PR if you do! 68 | 69 | -------------------------------------------------------------------------------- /include/spirv_cross/sampler.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015-2017 ARM Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef SPIRV_CROSS_SAMPLER_HPP 18 | #define SPIRV_CROSS_SAMPLER_HPP 19 | 20 | #include 21 | 22 | namespace spirv_cross 23 | { 24 | struct spirv_cross_sampler_2d 25 | { 26 | inline virtual ~spirv_cross_sampler_2d() 27 | { 28 | } 29 | }; 30 | 31 | template 32 | struct sampler2DBase : spirv_cross_sampler_2d 33 | { 34 | sampler2DBase(const spirv_cross_sampler_info *info) 35 | { 36 | mips.insert(mips.end(), info->mipmaps, info->mipmaps + info->num_mipmaps); 37 | format = info->format; 38 | wrap_s = info->wrap_s; 39 | wrap_t = info->wrap_t; 40 | min_filter = info->min_filter; 41 | mag_filter = info->mag_filter; 42 | mip_filter = info->mip_filter; 43 | } 44 | 45 | inline virtual T sample(glm::vec2 uv, float bias) 46 | { 47 | return sampleLod(uv, bias); 48 | } 49 | 50 | inline virtual T sampleLod(glm::vec2 uv, float lod) 51 | { 52 | if (mag_filter == SPIRV_CROSS_FILTER_NEAREST) 53 | { 54 | uv.x = wrap(uv.x, wrap_s, mips[0].width); 55 | uv.y = wrap(uv.y, wrap_t, mips[0].height); 56 | glm::vec2 uv_full = uv * glm::vec2(mips[0].width, mips[0].height); 57 | 58 | int x = int(uv_full.x); 59 | int y = int(uv_full.y); 60 | return sample(x, y, 0); 61 | } 62 | else 63 | { 64 | return T(0, 0, 0, 1); 65 | } 66 | } 67 | 68 | inline float wrap(float v, spirv_cross_wrap wrap, unsigned size) 69 | { 70 | switch (wrap) 71 | { 72 | case SPIRV_CROSS_WRAP_REPEAT: 73 | return v - glm::floor(v); 74 | case SPIRV_CROSS_WRAP_CLAMP_TO_EDGE: 75 | { 76 | float half = 0.5f / size; 77 | return glm::clamp(v, half, 1.0f - half); 78 | } 79 | 80 | default: 81 | return 0.0f; 82 | } 83 | } 84 | 85 | std::vector mips; 86 | spirv_cross_format format; 87 | spirv_cross_wrap wrap_s; 88 | spirv_cross_wrap wrap_t; 89 | spirv_cross_filter min_filter; 90 | spirv_cross_filter mag_filter; 91 | spirv_cross_mipfilter mip_filter; 92 | }; 93 | 94 | typedef sampler2DBase sampler2D; 95 | typedef sampler2DBase isampler2D; 96 | typedef sampler2DBase usampler2D; 97 | 98 | template 99 | inline T texture(const sampler2DBase &samp, const glm::vec2 &uv, float bias = 0.0f) 100 | { 101 | return samp.sample(uv, bias); 102 | } 103 | } 104 | 105 | #endif 106 | -------------------------------------------------------------------------------- /spirv-io/lib/binary_data.glsl: -------------------------------------------------------------------------------- 1 | #define getBit(n, idx) (0 != ((n) & (1 << (idx)))) 2 | #define setBit(n, idx) ((n) | (1 << (idx))) 3 | #define unsetBit(n, idx) ((n) & ~(1 << (idx))) 4 | 5 | int32_t readI32fromIO(ptr_t i) { 6 | return ( 7 | (int32_t(u8fromIO[i]) << 0u) 8 | | (int32_t(u8fromIO[i+1]) << 8u) 9 | | (int32_t(u8fromIO[i+2]) << 16u) 10 | | (int32_t(u8fromIO[i+3]) << 24u) 11 | ); 12 | } 13 | 14 | uint32_t readU32fromIO(ptr_t i) { 15 | return ( 16 | (uint32_t(u8fromIO[i]) << 0u) 17 | | (uint32_t(u8fromIO[i+1]) << 8u) 18 | | (uint32_t(u8fromIO[i+2]) << 16u) 19 | | (uint32_t(u8fromIO[i+3]) << 24u) 20 | ); 21 | } 22 | 23 | uint64_t readU64fromIO(ptr_t i) { 24 | return packUint2x32(u32vec2(readU32fromIO(i), readU32fromIO(i+4))); 25 | } 26 | 27 | int32_t readI32heap(ptr_t i) { 28 | return ( 29 | (int32_t(u8heap[i]) << 0u) 30 | | (int32_t(u8heap[i+1]) << 8u) 31 | | (int32_t(u8heap[i+2]) << 16u) 32 | | (int32_t(u8heap[i+3]) << 24u) 33 | ); 34 | } 35 | 36 | uint32_t readU32heap(ptr_t i) { 37 | return ( 38 | (uint32_t(u8heap[i]) << 0u) 39 | | (uint32_t(u8heap[i+1]) << 8u) 40 | | (uint32_t(u8heap[i+2]) << 16u) 41 | | (uint32_t(u8heap[i+3]) << 24u) 42 | ); 43 | } 44 | 45 | uint64_t readU64heap(ptr_t i) { 46 | return packUint2x32(u32vec2(readU32heap(i), readU32heap(i+4))); 47 | } 48 | 49 | 50 | /* 51 | i64vec4 rotateLeft(i64vec4 v, i64vec4 v2, int offset) { 52 | return (v << offset) | (i64vec4(v.yzw, v2.x) >> (64-offset)); 53 | } 54 | 55 | i64vec4 rotateRight(i64vec4 v, i64vec4 v2, int offset) { 56 | return (i64vec4(v.w, v2.xyz) << (64-offset)) | (v2 >> offset); 57 | } 58 | 59 | i64vec4 rotateLeftBytes(i64vec4 v1, i64vec4 v2, int offset) { 60 | if (offset >= 24) { 61 | v1 = i64vec4(v1.w, v2.xyz); 62 | v2 = i64vec4(v2.w, 0, 0, 0); 63 | } else if (offset >= 16) { 64 | v1 = i64vec4(v1.zw, v2.xy); 65 | v2 = i64vec4(v2.zw, 0, 0); 66 | } else if (offset >= 8) { 67 | v1 = i64vec4(v1.yzw, v2.x); 68 | v2 = i64vec4(v2.yzw, 0); 69 | } 70 | return rotateLeft(v1, v2, (offset%8)*8); 71 | } 72 | 73 | i64vec4 rotateRightBytes(i64vec4 v1, i64vec4 v2, int offset) { 74 | if (offset >= 24) { 75 | v1 = i64vec4(v1.xyz, v2.x); 76 | v2 = i64vec4(v2.yzw, 0); 77 | } else if (offset >= 16) { 78 | v1 = i64vec4(v1.xy, v2.xy); 79 | v2 = i64vec4(v2.zw, 0, 0); 80 | } else if (offset >= 8) { 81 | v1 = i64vec4(v1.x, v2.xyz); 82 | v2 = i64vec4(v2.w, 0, 0, 0); 83 | } 84 | return rotateRight(v1, v2, (offset%8)*8); 85 | } 86 | 87 | i64vec4 unalignedLoad(ptr_t i) { 88 | int idx = i / 32; 89 | return rotateLeftBytes(i64v4fromIO[idx], i64v4fromIO[idx+1], i % 32); 90 | } 91 | 92 | void unalignedStore(ptr_t i, i64vec4 v2) { 93 | int idx = i / 32; 94 | i64v4heap[idx] = rotateRightBytes(i64v4heap[idx], v2, i % 32); 95 | i64v4heap[idx+1] = rotateRightBytes(v2, i64v4heap[idx+1], i % 32); 96 | } 97 | */ 98 | -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | # Dockerfile for ISPC to WASM and GLSL to WASM compilation 2 | 3 | First build the image: `docker build -t ispc-wasm:latest .` 4 | 5 | ## ISPC 6 | 7 | Then you can compile ISPC files to WASM: 8 | 9 | ```bash 10 | $ ./ispc2wasm.sh mandelbrot.ispc 11 | $ ls 12 | mandelbrot.ispc mandelbrot.ispc.o mandelbrot.ispc.wasm 13 | ``` 14 | 15 | To use from JavaScript: 16 | 17 | ```js 18 | // Load the ISPC module 19 | const obj = await WebAssembly.instantiateStreaming(fetch('mandelbrot.ispc.wasm'), {"env": { 20 | "ISPCAlloc":() => console.log("ISPCAlloc"), 21 | "ISPCLaunch":() => console.log("ISPCLaunch"), 22 | "ISPCSync":() => console.log("ISPCSync"), 23 | }}); 24 | const width = 1920, height = 1080, outputPtr = 0; 25 | 26 | // Allocate space for output image 27 | obj.instance.exports.memory.grow(Math.ceil(width * height * 4 / 2**16)); 28 | 29 | // Call the ISPC function 30 | instance.exports.mandelbrot_ispc(-2.5, -1, 1, 1, width, height, 255, outputPtr); 31 | 32 | // Read the output image from the heap. 33 | const heap = new Int32Array(obj.instance.exports.memory.buffer); 34 | 35 | const canvas = document.createElement('canvas'); 36 | canvas.width = width; 37 | canvas.height = height; 38 | const ctx = canvas.getContext('2d'); 39 | const id = ctx.createImageData(width, height); 40 | 41 | for (let i = 0; i < width*height; i++) { 42 | id.data[i*4 + 0] = heap[i]; 43 | id.data[i*4 + 1] = heap[i]; 44 | id.data[i*4 + 2] = heap[i]; 45 | id.data[i*4 + 3] = 255; 46 | } 47 | 48 | ctx.putImageData(id, 0, 0); 49 | document.body.append(canvas); 50 | ``` 51 | 52 | ## GLSL 53 | 54 | Compile GLSL compute shaders to WebAssembly (this is even more experimental!): 55 | 56 | ```bash 57 | $ ./glsl2wasm.sh ao.comp.glsl 58 | $ ls 59 | ao.comp.glsl.html ao.comp.glsl.js ao.comp.glsl.worker.js ao.comp.glsl.wasm 60 | ``` 61 | 62 | To use the GLSL version, open `ao.comp.glsl.html` and run: 63 | 64 | ```js 65 | const width = 1920, height = 1080; 66 | const localSizeX = 192, localSizeY = 10; // Local workgroup size of the compute shader. 67 | // Spawn enough workgroups to cover the image. 68 | const numWorkGroupsX = width / localSizeX; 69 | const numWorkGroupsY = height / localSizeY; 70 | const numWorkGroupsZ = 1; 71 | 72 | const inputPtr = Module._malloc(8*4); // The shader takes an 8-float SSBO as its input buffer. 73 | const outputPtr = Module._malloc(width*height*4); // And writes to a 8-bit RGBA image buffer. 74 | 75 | const input = new Float32Array(Module.wasmMemory.buffer, inputPtr, 8); 76 | input.set([width, height, 0, 0, 0, 0, 0, 0]); // Write the SSBO values to the input buffer. 77 | 78 | // Run the shader across all accessible cores and SIMD lanes. 79 | Module._run(numWorkGroupsX, numWorkGroupsY, numWorkGroupsZ, inputPtr, outputPtr); 80 | 81 | // Show the resulting image. 82 | const canvas = document.createElement('canvas'); 83 | canvas.width = width; 84 | canvas.height = height; 85 | const ctx = canvas.getContext('2d'); 86 | const id = ctx.createImageData(width, height); 87 | id.data.set(new Uint8Array(Module.wasmMemory.buffer, outputPtr, id.data.byteLength)); 88 | ctx.putImageData(id, 0, 0); 89 | document.body.append(canvas); 90 | ``` 91 | -------------------------------------------------------------------------------- /http_shader/chr.glsl: -------------------------------------------------------------------------------- 1 | #define CHR_NULL 0 2 | #define CHR_SOH 1 3 | #define CHR_STX 2 4 | #define CHR_ETX 3 5 | #define CHR_EOT 4 6 | #define CHR_ENQ 5 7 | #define CHR_ACK 6 8 | #define CHR_BELL 7 9 | #define CHR_BACKSPACE 8 10 | #define CHR_TAB 9 11 | #define CHR_LF 10 12 | #define CHR_VT 11 13 | #define CHR_FF 12 14 | #define CHR_CR 13 15 | #define CHR_SO 14 16 | #define CHR_SI 15 17 | #define CHR_DLE 16 18 | #define CHR_DC1 17 19 | #define CHR_DC2 18 20 | #define CHR_DC3 19 21 | #define CHR_DC4 20 22 | #define CHR_NAK 21 23 | #define CHR_SYN 22 24 | #define CHR_ETB 23 25 | #define CHR_CAN 24 26 | #define CHR_EM 25 27 | #define CHR_SUB 26 28 | #define CHR_ESC 27 29 | #define CHR_FS 28 30 | #define CHR_GS 29 31 | #define CHR_RS 30 32 | #define CHR_US 31 33 | #define CHR_SPACE 32 34 | #define CHR_EXCLAMATION_MARK 33 35 | #define CHR_DOUBLE_QUOTE 34 36 | #define CHR_HASH 35 37 | #define CHR_DOLLAR 36 38 | #define CHR_PERCENT 37 39 | #define CHR_AMPERSAND 38 40 | #define CHR_SINGLE_QUOTE 39 41 | #define CHR_OPEN_PAREN 40 42 | #define CHR_CLOSE_PAREN 41 43 | #define CHR_TIMES 42 44 | #define CHR_PLUS 43 45 | #define CHR_COMMA 44 46 | #define CHR_DASH 45 47 | #define CHR_DOT 46 48 | #define CHR_SLASH 47 49 | #define CHR_0 48 50 | #define CHR_1 49 51 | #define CHR_2 50 52 | #define CHR_3 51 53 | #define CHR_4 52 54 | #define CHR_5 53 55 | #define CHR_6 54 56 | #define CHR_7 55 57 | #define CHR_8 56 58 | #define CHR_9 57 59 | #define CHR_COLON 58 60 | #define CHR_SEMICOLON 59 61 | #define CHR_OPEN_ANGLE_BRACKET 60 62 | #define CHR_EQUAL 61 63 | #define CHR_CLOSE_ANGLE_BRACKET 62 64 | #define CHR_QUESTION_MARK 63 65 | #define CHR_AT 64 66 | #define CHR_A 65 67 | #define CHR_B 66 68 | #define CHR_C 67 69 | #define CHR_D 68 70 | #define CHR_E 69 71 | #define CHR_F 70 72 | #define CHR_G 71 73 | #define CHR_H 72 74 | #define CHR_I 73 75 | #define CHR_J 74 76 | #define CHR_K 75 77 | #define CHR_L 76 78 | #define CHR_M 77 79 | #define CHR_N 78 80 | #define CHR_O 79 81 | #define CHR_P 80 82 | #define CHR_Q 81 83 | #define CHR_R 82 84 | #define CHR_S 83 85 | #define CHR_T 84 86 | #define CHR_U 85 87 | #define CHR_V 86 88 | #define CHR_W 87 89 | #define CHR_X 88 90 | #define CHR_Y 89 91 | #define CHR_Z 90 92 | #define CHR_OPEN_SQUARE_BRACKET 91 93 | #define CHR_BACKSLASH 92 94 | #define CHR_CLOSE_SQUARE_BRACKET 93 95 | #define CHR_CARET 94 96 | #define CHR_UNDERSCORE 95 97 | #define CHR_BACKTICK 96 98 | #define CHR_a 97 99 | #define CHR_b 98 100 | #define CHR_c 99 101 | #define CHR_d 100 102 | #define CHR_e 101 103 | #define CHR_f 102 104 | #define CHR_g 103 105 | #define CHR_h 104 106 | #define CHR_i 105 107 | #define CHR_j 106 108 | #define CHR_k 107 109 | #define CHR_l 108 110 | #define CHR_m 109 111 | #define CHR_n 110 112 | #define CHR_o 111 113 | #define CHR_p 112 114 | #define CHR_q 113 115 | #define CHR_r 114 116 | #define CHR_s 115 117 | #define CHR_t 116 118 | #define CHR_u 117 119 | #define CHR_v 118 120 | #define CHR_w 119 121 | #define CHR_x 120 122 | #define CHR_y 121 123 | #define CHR_z 122 124 | #define CHR_OPEN_CURLY_BRACKET 123 125 | #define CHR_PIPE 124 126 | #define CHR_CLOSE_CURLY_BRACKET 125 127 | #define CHR_TILDE 126 128 | #define CHR_DEL 127 129 | -------------------------------------------------------------------------------- /docker/mandelbrot.ispc: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2010-2012, Intel Corporation 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of Intel Corporation nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 22 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 24 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 25 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | static inline int mandel(float c_re, float c_im, int count) { 35 | float z_re = c_re, z_im = c_im; 36 | int i; 37 | for (i = 0; i < count; ++i) { 38 | if (z_re * z_re + z_im * z_im > 4.) 39 | break; 40 | 41 | float new_re = z_re*z_re - z_im*z_im; 42 | float new_im = 2.f * z_re * z_im; 43 | unmasked { 44 | z_re = c_re + new_re; 45 | z_im = c_im + new_im; 46 | } 47 | } 48 | 49 | return i; 50 | } 51 | 52 | export void mandelbrot_ispc(uniform float x0, uniform float y0, 53 | uniform float x1, uniform float y1, 54 | uniform int width, uniform int height, 55 | uniform int maxIterations, 56 | uniform int output[]) 57 | { 58 | float dx = (x1 - x0) / width; 59 | float dy = (y1 - y0) / height; 60 | 61 | for (uniform int j = 0; j < height; j++) { 62 | // Note that we'll be doing programCount computations in parallel, 63 | // so increment i by that much. This assumes that width evenly 64 | // divides programCount. 65 | foreach (i = 0 ... width) { 66 | // Figure out the position on the complex plane to compute the 67 | // number of iterations at. Note that the x values are 68 | // different across different program instances, since its 69 | // initializer incorporates the value of the programIndex 70 | // variable. 71 | float x = x0 + i * dx; 72 | float y = y0 + j * dy; 73 | 74 | int index = j * width + i; 75 | output[index] = mandel(x, y, maxIterations); 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /spirv-io/src/parse_spv.hpp: -------------------------------------------------------------------------------- 1 | #define TAG(v) (((v>>24) & 0xff) | ((v>>8) & 0xff00) | (((uint32_t)v<<8) & 0xff0000) | (((uint32_t)v << 24) & 0xff000000)) 2 | 3 | // Read file into array of bytes, and cast to uint32_t*, then return. 4 | // The data has been padded, so that it fits into an array uint32_t. 5 | uint32_t *readFile(uint32_t &length, const char *filename) 6 | { 7 | FILE *fp = fopen(filename, "rb"); 8 | if (fp == NULL) 9 | { 10 | printf("Could not find or open file: %s\n", filename); 11 | } 12 | 13 | // get file size. 14 | fseek(fp, 0, SEEK_END); 15 | long filesize = ftell(fp); 16 | fseek(fp, 0, SEEK_SET); 17 | 18 | long filesizepadded = ((filesize+3) / 4) * 4; 19 | 20 | // read file contents. 21 | char *str = new char[filesizepadded]; 22 | fread(str, filesize, sizeof(char), fp); 23 | fclose(fp); 24 | 25 | // data padding. 26 | for (int i = filesize; i < filesizepadded; i++) 27 | { 28 | str[i] = 0; 29 | } 30 | 31 | length = filesizepadded; 32 | return (uint32_t *)str; 33 | } 34 | 35 | void parseLocalSize(uint32_t *code) { 36 | //printf("%d\n", filelength); 37 | uint32_t len32 = filelength / 4; 38 | if (len32 <= 5) { 39 | fprintf(stderr, "Shader file empty: %s\n", programFileName); 40 | assert(len32 > 5); 41 | } 42 | uint32_t magicNumber = 0x07230203; 43 | assert(magicNumber == code[0]); 44 | for (int i = 5; i < len32; i++) { 45 | uint32_t op = code[i]; 46 | uint32_t wordCount = op >> 16; 47 | uint32_t opCode = op & 0xffff; 48 | #ifndef NDEBUG 49 | fprintf(stderr, "Op: %8x OpCode: %d WordCount:%d\n", op, opCode, wordCount); 50 | #endif 51 | int j = i+1; 52 | if (opCode == 16) { // OpExecutionMode 53 | uint32_t entryPoint = code[j++]; 54 | uint32_t mode = code[j++]; 55 | if (verbose) fprintf(stderr, "EntryPoint: %d Mode: %d\n", entryPoint, mode); 56 | if (mode == 17) { // LocalSize 57 | localSize[0] = code[j++]; 58 | localSize[1] = code[j++]; 59 | localSize[2] = code[j++]; 60 | if (verbose) fprintf(stderr, "LocalSize: %d %d %d\n", localSize[0], localSize[1], localSize[2]); 61 | } 62 | } 63 | if (opCode == 4) { // OpSourceExtension 64 | int j = i + 1; 65 | uint32_t tag = code[j++]; 66 | if (verbose) fprintf(stderr, "OpSourceExtension tag %.4s %d\n", (char*)(&tag), code[j]); 67 | if (tag == TAG('glo=')) { 68 | const uint32_t len = 4 * (wordCount - 2); 69 | globalsLen += len; 70 | if (globals != NULL) globals = (char*)realloc(globals, globalsLen+1); 71 | else globals = (char *)malloc(globalsLen+1); 72 | memcpy(globals + (globalsLen-len), code + j, len); 73 | globals[globalsLen] = 0; 74 | if (verbose) fprintf(stderr, "globals[%d]: %s\n", globalsLen, globals); 75 | } 76 | else if (tag == TAG('tgc=')) workSize[0] = code[j++]; 77 | else if (tag == TAG('ths=')) heapBufferSize = code[j++]; 78 | else if (tag == TAG('tti=')) fromGPUBufferSize = code[j++]; 79 | else if (tag == TAG('tfi=')) toGPUBufferSize = code[j++]; 80 | } 81 | i += wordCount > 0 ? wordCount-1 : 0; 82 | } 83 | } 84 | 85 | void readShader() { 86 | code = readFile(filelength, programFileName); 87 | parseLocalSize(code); 88 | } 89 | -------------------------------------------------------------------------------- /src/mandel.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015-2017 ARM Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifdef __EMSCRIPTEN__ 18 | #include 19 | #else 20 | #include 21 | #endif 22 | 23 | #include "spirv_cross/external_interface.h" 24 | #include 25 | 26 | #ifndef GLM_FORCE_SWIZZLE 27 | #define GLM_FORCE_SWIZZLE 28 | #endif 29 | 30 | #ifndef GLM_FORCE_RADIANS 31 | #define GLM_FORCE_RADIANS 32 | #endif 33 | 34 | #include 35 | using namespace glm; 36 | 37 | static float inputs[2] = {1280, 1280}; 38 | static vec4 outputs[1280 * 1280] = {}; 39 | 40 | 41 | int main() 42 | { 43 | #ifdef __EMSCRIPTEN__ 44 | EM_ASM({ 45 | console.time('compute'); 46 | }); 47 | #endif 48 | // First, we get the C interface to the shader. 49 | // This can be loaded from a dynamic library, or as here, 50 | // linked in as a static library. 51 | auto *iface = spirv_cross_get_interface(); 52 | 53 | // Create an instance of the shader interface. 54 | auto *shader = iface->construct(); 55 | 56 | // Build some input data for our compute shader. 57 | #define NUM_WORKGROUPS_X 20 58 | #define NUM_WORKGROUPS_Y 20 59 | 60 | void *inputs_ptr = inputs; 61 | void *outputs_ptr = outputs; 62 | 63 | // Bind resources to the shader. 64 | // For resources like samplers and buffers, we provide a list of pointers, 65 | // since UBOs, SSBOs and samplers can be arrays, and can point to different types, 66 | // which is especially true for samplers. 67 | spirv_cross_set_resource(shader, 0, 0, &inputs_ptr, sizeof(inputs_ptr)); 68 | spirv_cross_set_resource(shader, 0, 1, &outputs_ptr, sizeof(outputs_ptr)); 69 | 70 | // We also have to set builtins. 71 | // The relevant builtins will depend on the shader, 72 | // but for compute, there are few builtins, which are gl_NumWorkGroups and gl_WorkGroupID. 73 | // LocalInvocationID and GlobalInvocationID are inferred when executing the invocation. 74 | uvec3 num_workgroups(NUM_WORKGROUPS_X, NUM_WORKGROUPS_Y, 1); 75 | uvec3 work_group_id(0, 0, 0); 76 | spirv_cross_set_builtin(shader, SPIRV_CROSS_BUILTIN_NUM_WORK_GROUPS, &num_workgroups, sizeof(num_workgroups)); 77 | spirv_cross_set_builtin(shader, SPIRV_CROSS_BUILTIN_WORK_GROUP_ID, &work_group_id, sizeof(work_group_id)); 78 | 79 | // Execute work groups. 80 | for (unsigned x = 0; x < NUM_WORKGROUPS_X; x++) 81 | for (unsigned y = 0; y < NUM_WORKGROUPS_Y; y++) 82 | { 83 | work_group_id.x = x; 84 | work_group_id.y = y; 85 | iface->invoke(shader); 86 | } 87 | 88 | // Call destructor. 89 | iface->destruct(shader); 90 | 91 | #ifdef __EMSCRIPTEN__ 92 | EM_ASM({ 93 | console.timeEnd('compute'); 94 | var c = Module.canvas; 95 | var ctx = c.getContext('2d'); 96 | c.width = c.height = 1280; 97 | var id = ctx.createImageData(c.width, c.height); 98 | var data = id.data; 99 | var off = $0 / 4; 100 | for (var i = 0; i < data.length; i++) { 101 | data[i] = (Module.HEAPF32[off + i] * 255.0) | 0; 102 | } 103 | ctx.putImageData(id, 0, 0); 104 | }, (int)outputs_ptr); 105 | #else 106 | write(1, (char*)outputs_ptr, 1280*1280*4*4); 107 | #endif 108 | 109 | return 0; 110 | } 111 | -------------------------------------------------------------------------------- /http_shader/ispc_ivec4/httpd_ispc_ivec4.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015-2017 ARM Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | #include "httpd.ispc.h" 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | #ifndef GLM_FORCE_SWIZZLE 25 | #define GLM_FORCE_SWIZZLE 26 | #endif 27 | 28 | #ifndef GLM_FORCE_RADIANS 29 | #define GLM_FORCE_RADIANS 30 | #endif 31 | 32 | #include 33 | using namespace glm; 34 | using namespace ispc; 35 | 36 | // Build some input data for our compute shader. 37 | #define NUM_WORKGROUPS_X 32 38 | #define NUM_WORKGROUPS_Y 1 39 | 40 | static const uint requestCount = NUM_WORKGROUPS_X * NUM_WORKGROUPS_Y * 1024 * 16; 41 | 42 | static const int requestSize = 1024; 43 | 44 | static int inputBuffe[(requestSize / 4) * requestCount] = {}; 45 | static int outputBuffe[(requestSize / 4) * requestCount] = {}; 46 | static int heapBuffe[(requestSize / 4) * requestCount] = {}; 47 | 48 | int main() 49 | { 50 | int requestTemplate[(requestSize / 4)]; 51 | for (int i = 0; i < requestCount; i++) { 52 | if (i % 2 == 0) { 53 | snprintf((char*)(&inputBuffe[(requestSize / 4) * i + 4]), ((requestSize / 16) - 1) * 16, "POST /%07d HTTP/1.1\r\nhost: localhost\r\n\r\ntext/html\r\n\r\nThis is post number %d.", i*2/3, i); 54 | } else { 55 | snprintf((char*)(&inputBuffe[(requestSize / 4) * i + 4]), ((requestSize / 16) - 1) * 16, "GET /%07d HTTP/1.1\r\nhost: localhost\r\n\r\n", i); 56 | } 57 | if (i % 11 == 10) { 58 | int j = i % 10; 59 | snprintf((char*)(&inputBuffe[(requestSize / 4) * i + 4]), ((requestSize / 16) - 1) * 16, "POST /%07d HTTP/1.1\r\nhost: localhost\r\n\r\ntext/html\r\n\r\nThis is %d spam-post %d number %d.", j, i, i, i); 60 | } 61 | inputBuffe[(requestSize / 4) * i] = strlen((char*)(&inputBuffe[(requestSize / 4) * i + 4])); 62 | // if (i < 10) printf("%d\n%s\n", inputBuffe[(requestSize / 4) * i], (char*)(&inputBuffe[(requestSize / 4) * i + 4])); 63 | 64 | snprintf((char*)(&heapBuffe[(requestSize / 4) * i + 4]), ((requestSize / 16) - 1) * 16, "text/html\r\n\r\nThis is document number %d.", i); 65 | heapBuffe[(requestSize / 4) * i] = strlen((char*)(&heapBuffe[(requestSize / 4) * i + 4])); 66 | // if (i < 10) printf("%d\n%s\n", heapBuffe[(requestSize / 4) * i], (char*)(&heapBuffe[(requestSize / 4) * i + 4])); 67 | } 68 | 69 | std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); 70 | for (int j = 0; j < 1000; j++) { 71 | int32_t workgroups[] = {NUM_WORKGROUPS_X, NUM_WORKGROUPS_Y, 1}; 72 | runner_main(workgroups, 73 | *(struct inputBuffer*)inputBuffe, 74 | *(struct outputBuffer*)outputBuffe, 75 | *(struct heapBuffer*)heapBuffe 76 | ); 77 | } 78 | std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); 79 | 80 | for (int i = 0; i < 10; i++) { 81 | write(1, ((char*)outputBuffe)+requestSize*i+16, outputBuffe[(requestSize / 4)*i]); 82 | printf("\n"); 83 | } 84 | 85 | printf("\nElapsed: %ld ms\n", std::chrono::duration_cast(end - begin).count()); 86 | printf("Million requests per second: %.3f\n\n", 1e-6 * (requestCount * 1000.0) / (0.001 * std::chrono::duration_cast(end - begin).count())); 87 | 88 | 89 | return 0; 90 | } 91 | -------------------------------------------------------------------------------- /docker/spirv-runner/ispcRunner.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "program.h" 4 | #include "emscripten.h" 5 | 6 | #ifdef WIN32 7 | #include 8 | #include 9 | #endif 10 | 11 | static uint32_t bufferSize = 0; 12 | static uint32_t inputBufferSize = 0; 13 | static uint32_t vulkanDeviceIndex = 0; 14 | static int32_t workSize[3] = {1, 1, 1}; 15 | 16 | static char *input; 17 | 18 | void readHeader() 19 | { 20 | ::size_t input_length = 0, read_bytes = 0, input_buffer_size = 4096; 21 | 22 | #ifdef WIN32 23 | _setmode(_fileno(stdout), _O_BINARY); 24 | _setmode(_fileno(stdin), _O_BINARY); 25 | #endif 26 | 27 | bufferSize = 0; 28 | read_bytes = fread(&bufferSize, 1, 4, stdin); 29 | if (read_bytes < 4) 30 | { 31 | fprintf(stderr, "read only %zd bytes, using default bufferSize\n", read_bytes); 32 | bufferSize = 4; 33 | } 34 | 35 | vulkanDeviceIndex = 0; 36 | read_bytes = fread(&vulkanDeviceIndex, 1, 4, stdin); 37 | if (read_bytes < 4) 38 | { 39 | fprintf(stderr, "read only %zd bytes, using default vulkanDeviceIndex\n", read_bytes); 40 | vulkanDeviceIndex = 0; 41 | } 42 | 43 | read_bytes = fread(workSize, 1, 12, stdin); 44 | if (read_bytes < 12) 45 | { 46 | fprintf(stderr, "read only %zd bytes, using default workSize\n", read_bytes); 47 | workSize[0] = workSize[1] = workSize[2] = 1; 48 | } 49 | 50 | inputBufferSize = 0; 51 | read_bytes = fread(&inputBufferSize, 1, 4, stdin); 52 | if (read_bytes < 4) 53 | { 54 | fprintf(stderr, "read only %zd bytes, using default inputBufferSize\n", read_bytes); 55 | inputBufferSize = 4; 56 | } 57 | 58 | input = (char *)malloc(sizeof(ispc::inputs) - 4 + inputBufferSize); 59 | } 60 | 61 | bool readInput() 62 | { 63 | if (feof(stdin)) { 64 | return false; 65 | } 66 | 67 | ::size_t input_length = 0, read_bytes = 0; 68 | ::size_t off = sizeof(ispc::inputs) - 4; 69 | 70 | while (input_length < inputBufferSize && !feof(stdin)) 71 | { 72 | read_bytes = fread((void *)(input + input_length + off), 1, inputBufferSize, stdin); 73 | input_length += read_bytes; 74 | } 75 | return input_length > 0; 76 | } 77 | 78 | EMSCRIPTEN_KEEPALIVE extern "C" 79 | int run(int w, int h, int d, ispc::inputs *inputs, ispc::outputs *outputs) { 80 | workSize[0] = w; 81 | workSize[1] = h; 82 | workSize[2] = d; 83 | ispc::runner_main(workSize, *inputs, *outputs); 84 | return (int)(outputs->outputData); 85 | } 86 | 87 | int main(int argc, char *argv[]) 88 | { 89 | return 0; 90 | 91 | EM_ASM({ 92 | console.time('compute'); 93 | }); 94 | inputBufferSize = 8*4; 95 | input = (char *)malloc(sizeof(ispc::inputs) - 4 + inputBufferSize); 96 | float *dims = (float*)(input + (sizeof(ispc::inputs) - 4)); 97 | dims[0] = 1920; 98 | dims[1] = 1080; 99 | dims[2] = 0; 100 | dims[3] = 0; 101 | dims[4] = 0; 102 | dims[5] = 0; 103 | dims[6] = 0; 104 | dims[7] = 0; 105 | 106 | bufferSize = dims[0]*dims[1]*4; 107 | 108 | ispc::outputs *outputs = (ispc::outputs *)malloc(sizeof(ispc::outputs) - 4 + bufferSize); 109 | ispc::inputs *inputs = (ispc::inputs *)input; 110 | 111 | int output_ptr = run(dims[0]/192, dims[1]/10, 1, inputs, outputs); 112 | 113 | EM_ASM({ 114 | console.timeEnd('compute'); 115 | var c = Module.canvas; 116 | var ctx = c.getContext('2d'); 117 | c.width = $1; c.height = $2; 118 | var id = ctx.createImageData(c.width, c.height); 119 | var data = id.data; 120 | var off = $0; 121 | for (var i = 0; i < data.length; i++) { 122 | data[i] = Module.HEAPU8[off + i]; 123 | } 124 | ctx.putImageData(id, 0, 0); 125 | }, output_ptr, dims[0], dims[1]); 126 | 127 | free(input); 128 | free(outputs); 129 | 130 | return 0; 131 | } 132 | -------------------------------------------------------------------------------- /spirv-io/lib/chr.glsl: -------------------------------------------------------------------------------- 1 | #define CHR_NULL char(0) 2 | #define CHR_SOH char(1) 3 | #define CHR_STX char(2) 4 | #define CHR_ETX char(3) 5 | #define CHR_EOT char(4) 6 | #define CHR_ENQ char(5) 7 | #define CHR_ACK char(6) 8 | #define CHR_BELL char(7) 9 | #define CHR_BACKSPACE char(8) 10 | #define CHR_TAB char(9) 11 | #define CHR_LF char(10) 12 | #define CHR_VT char(11) 13 | #define CHR_FF char(12) 14 | #define CHR_CR char(13) 15 | #define CHR_SO char(14) 16 | #define CHR_SI char(15) 17 | #define CHR_DLE char(16) 18 | #define CHR_DC1 char(17) 19 | #define CHR_DC2 char(18) 20 | #define CHR_DC3 char(19) 21 | #define CHR_DC4 char(20) 22 | #define CHR_NAK char(21) 23 | #define CHR_SYN char(22) 24 | #define CHR_ETB char(23) 25 | #define CHR_CAN char(24) 26 | #define CHR_EM char(25) 27 | #define CHR_SUB char(26) 28 | #define CHR_ESC char(27) 29 | #define CHR_FS char(28) 30 | #define CHR_GS char(29) 31 | #define CHR_RS char(30) 32 | #define CHR_US char(31) 33 | #define CHR_SPACE char(32) 34 | #define CHR_EXCLAMATION_MARK char(33) 35 | #define CHR_DOUBLE_QUOTE char(34) 36 | #define CHR_HASH char(35) 37 | #define CHR_DOLLAR char(36) 38 | #define CHR_PERCENT char(37) 39 | #define CHR_AMPERSAND char(38) 40 | #define CHR_SINGLE_QUOTE char(39) 41 | #define CHR_OPEN_PAREN char(40) 42 | #define CHR_CLOSE_PAREN char(41) 43 | #define CHR_TIMES char(42) 44 | #define CHR_PLUS char(43) 45 | #define CHR_COMMA char(44) 46 | #define CHR_DASH char(45) 47 | #define CHR_DOT char(46) 48 | #define CHR_SLASH char(47) 49 | #define CHR_0 char(48) 50 | #define CHR_1 char(49) 51 | #define CHR_2 char(50) 52 | #define CHR_3 char(51) 53 | #define CHR_4 char(52) 54 | #define CHR_5 char(53) 55 | #define CHR_6 char(54) 56 | #define CHR_7 char(55) 57 | #define CHR_8 char(56) 58 | #define CHR_9 char(57) 59 | #define CHR_COLON char(58) 60 | #define CHR_SEMICOLON char(59) 61 | #define CHR_OPEN_ANGLE_BRACKET char(60) 62 | #define CHR_EQUAL char(61) 63 | #define CHR_CLOSE_ANGLE_BRACKET char(62) 64 | #define CHR_QUESTION_MARK char(63) 65 | #define CHR_AT char(64) 66 | #define CHR_A char(65) 67 | #define CHR_B char(66) 68 | #define CHR_C char(67) 69 | #define CHR_D char(68) 70 | #define CHR_E char(69) 71 | #define CHR_F char(70) 72 | #define CHR_G char(71) 73 | #define CHR_H char(72) 74 | #define CHR_I char(73) 75 | #define CHR_J char(74) 76 | #define CHR_K char(75) 77 | #define CHR_L char(76) 78 | #define CHR_M char(77) 79 | #define CHR_N char(78) 80 | #define CHR_O char(79) 81 | #define CHR_P char(80) 82 | #define CHR_Q char(81) 83 | #define CHR_R char(82) 84 | #define CHR_S char(83) 85 | #define CHR_T char(84) 86 | #define CHR_U char(85) 87 | #define CHR_V char(86) 88 | #define CHR_W char(87) 89 | #define CHR_X char(88) 90 | #define CHR_Y char(89) 91 | #define CHR_Z char(90) 92 | #define CHR_OPEN_SQUARE_BRACKET char(91) 93 | #define CHR_BACKSLASH char(92) 94 | #define CHR_CLOSE_SQUARE_BRACKET char(93) 95 | #define CHR_CARET char(94) 96 | #define CHR_UNDERSCORE char(95) 97 | #define CHR_BACKTICK char(96) 98 | #define CHR_a char(97) 99 | #define CHR_b char(98) 100 | #define CHR_c char(99) 101 | #define CHR_d char(100) 102 | #define CHR_e char(101) 103 | #define CHR_f char(102) 104 | #define CHR_g char(103) 105 | #define CHR_h char(104) 106 | #define CHR_i char(105) 107 | #define CHR_j char(106) 108 | #define CHR_k char(107) 109 | #define CHR_l char(108) 110 | #define CHR_m char(109) 111 | #define CHR_n char(110) 112 | #define CHR_o char(111) 113 | #define CHR_p char(112) 114 | #define CHR_q char(113) 115 | #define CHR_r char(114) 116 | #define CHR_s char(115) 117 | #define CHR_t char(116) 118 | #define CHR_u char(117) 119 | #define CHR_v char(118) 120 | #define CHR_w char(119) 121 | #define CHR_x char(120) 122 | #define CHR_y char(121) 123 | #define CHR_z char(122) 124 | #define CHR_OPEN_CURLY_BRACKET char(123) 125 | #define CHR_PIPE char(124) 126 | #define CHR_CLOSE_CURLY_BRACKET char(125) 127 | #define CHR_TILDE char(126) 128 | #define CHR_DEL char(127) 129 | -------------------------------------------------------------------------------- /spirv-io/test/test_file.glsl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env gls 2 | 3 | #include "../lib/file.glsl" 4 | 5 | ThreadLocalCount = 1; 6 | ThreadGroupCount = 1; 7 | 8 | #define rg(i,n) for (int i=0,_l_=(n); i<_l_; i++) 9 | #define mapIO(i, n, f) { io _ios_[n]; rg(i, n) _ios_[i] = f; rg(i, n) awaitIO(_ios_[i]); } 10 | 11 | bool testRead() { 12 | string r1 = readSync("hello.txt", malloc(100)); 13 | bool okShort = strEq(r1, "Hello, world!\n"); 14 | if (!okShort) println(concat(str(strLen(r1)), " ", r1)); 15 | 16 | string buf = malloc(100); 17 | int ok; 18 | io reqNum = read("hello.txt", 0, 100, buf); 19 | string res = awaitIO(reqNum, ok); 20 | bool okLong = strEq(res, "Hello, world!\n"); 21 | if (!okLong) println(concat(str(strLen(res)), " ", res)); 22 | 23 | return okShort && okLong; 24 | } 25 | 26 | bool testWrite() { 27 | string buf = malloc(100); 28 | string filename = concat("write", str(ThreadId), ".txt"); 29 | 30 | awaitIO(createFile(filename)); 31 | awaitIO(truncateFile(filename, 0)); 32 | awaitIO(write(filename, 0, 100, "Write, write, write!")); 33 | string r1 = readSync(filename, buf); 34 | bool firstOk = strEq(r1, "Write, write, write!"); 35 | if (!firstOk) println(concat(str(strLen(r1)), r1)); 36 | awaitIO(truncateFile(filename, 0)); 37 | 38 | writeSync(filename, "Hello, world!"); 39 | string r2 = readSync(filename, buf); 40 | bool secondOk = strEq(r2, "Hello, world!"); 41 | if (!secondOk) println(concat(str(strLen(r2)), r2)); 42 | awaitIO(truncateFile(filename, 0)); 43 | awaitIO(deleteFile(filename)); 44 | 45 | return firstOk && secondOk; 46 | } 47 | 48 | bool testRunCmd() { 49 | FREE(FREE_IO( 50 | awaitIO(runCmd(concat("echo Hello from thread ", str(ThreadId)))); 51 | awaitIO(runCmd(concat( 52 | "node -e 'fs=require(`fs`); fs.writeFileSync(`node-${", 53 | str(ThreadId), 54 | "}.txt`, Date.now().toString())'" 55 | ))); 56 | )) 57 | string res = readSync(concat("node-", str(ThreadId), ".txt"), malloc(1000)); 58 | println(concat("Node says ", res)); 59 | deleteFile(concat("node-", str(ThreadId), ".txt")); 60 | return true; 61 | } 62 | 63 | bool testLs() { 64 | string dir = concat("dir-", str(ThreadId)); 65 | awaitIO(mkdir(dir)); 66 | mapIO(i, 10, createFile(concat(dir, "/", str(i)))); 67 | stringArray res = awaitIO(ls(dir, malloc(1000))); 68 | mapIO(i, 10, deleteFile(concat(dir, "/", str(i)))); 69 | awaitIO(rmdir(dir)); 70 | 71 | bool ok = true; 72 | ok = ok && arrLen(res) == 10; 73 | rg(i, 10) { 74 | bool found = false; 75 | string si = str(i); 76 | rg(j, 10) { 77 | FREE( 78 | found = found || strEq(concat(dir, "/", si), aGet(res, j)); 79 | ) 80 | } 81 | ok = ok && found; 82 | } 83 | return ok; 84 | } 85 | 86 | bool testGetCwd() { 87 | string cwd = awaitIO(getCwd()); 88 | println(concat(str(ThreadId), " cwd is ", cwd)); 89 | bool ok = strLen(cwd) > 0; 90 | if (ThreadId == 0) { 91 | awaitIO(mkdir("test_cwd")); 92 | awaitIO(chdir("test_cwd")); 93 | string newCwd = awaitIO(getCwd()); 94 | println(concat("New cwd is ", newCwd)); 95 | ok = ok && !strEq(cwd, newCwd); 96 | ok = ok && strEq("test_cwd", last(split(newCwd, '/'))); 97 | } 98 | return ok; 99 | } 100 | 101 | void printTest(bool ok, string name) { 102 | if (!ok || ThreadId == 0) { 103 | println(concat(str(ThreadId), ": ", name, ok ? " successful" : " failed!")); 104 | } 105 | } 106 | 107 | #define TEST(testFn) FREE(FREE_IO(printTest(testFn(), #testFn))) 108 | 109 | void main() { 110 | awaitIO(chdir("test_data")); 111 | TEST(testRead); 112 | TEST(testWrite); 113 | TEST(testRunCmd); 114 | TEST(testLs); 115 | TEST(testGetCwd); 116 | } 117 | 118 | -------------------------------------------------------------------------------- /spirv-io/examples/listen3.glsl: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | HeapSize = 8192; 5 | FromIOSize = 8192; 6 | ToIOSize = 8192; 7 | 8 | ThreadGroupCount = 256; 9 | ThreadLocalCount = 1; 10 | 11 | const int s_Init = 0; 12 | const int s_Accept = 1; 13 | const int s_WaitingConn = 2; 14 | const int s_Reading = 3; 15 | const int s_Writing = 4; 16 | const int s_Closing = 5; 17 | 18 | const int a_Server = 0; 19 | const int a_ConnectionIO = 1; 20 | const int a_Connection = 2; 21 | const int a_ReadIO = 3; 22 | const int a_WriteIO = 4; 23 | const int a_CloseIO = 5; 24 | const int a_HeapStart = 6; 25 | 26 | string process(string req) { 27 | return concat("HTTP/1.1 200 OK\r\ncontent-type: text/plain\r\n\r\nHello from ", str(ThreadId), "\n"); 28 | } 29 | 30 | #define LOAD(k) atomicLoad(k, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsAcquire) 31 | #define STORE(k,v) atomicStore(k, v, gl_ScopeDevice, gl_StorageSemanticsBuffer, gl_SemanticsRelease) 32 | 33 | void main() { 34 | string readBuf = malloc(4096); 35 | 36 | io r; 37 | int64_t startTime; 38 | 39 | if (ThreadId == 0) rerunProgram = RERUN_ON_IO; 40 | 41 | // This should do 512 accept+reads at a time and process them as they become ready. 42 | // Ditto for the writes and closes. 43 | // r = acceptReadBatch(listen_fd, conn_fds_i32a, reads_str_array); 44 | // conn_count = awaitIO(r).x; 45 | // pfor(i, conn_count, { 46 | // process_req(i, conn_fds_i32a, reads_str_array, writes_str_array); 47 | // }); 48 | // writeCloseBatch(conn_fds_buf, conn_count, writes_str_array); 49 | // 50 | 51 | stateMachine m = loadStateMachine(s_Init); 52 | // atomicAdd(programReturnValue, 1); 53 | // if (ThreadId == 0) { 54 | // while(programReturnValue < ThreadCount); 55 | // startTime = microTimeSync(); 56 | // STORE(io_pad_8, 0); 57 | // STORE(programReturnValue, 0); 58 | // } 59 | // while (LOAD(programReturnValue) != 0); 60 | // uint32_t idx = 0; 61 | // while (LOAD(io_pad_8) == 0) { 62 | // idx++; 63 | // if (ThreadId == 0 && (idx % 4096 == 0)) { 64 | // int64_t now = microTimeSync(); 65 | // if (now - startTime > 10000) { 66 | // STORE(io_pad_8, 1); 67 | // } 68 | // idx = 1; 69 | // } 70 | // barrier(); 71 | switch (getState(m)) { 72 | case s_Init: 73 | if (ThreadId == 0) FREE_ALL(log("Starting server on port 8080...")); 74 | setAttr(m, a_Server, listenSync(8080)); 75 | setAttr(m, a_HeapStart, heapPtr); 76 | setState(m, s_Accept); 77 | if (ThreadId == 0) FREE_ALL(log("Server running, accepting connections.")); 78 | // break; 79 | 80 | case s_Accept: 81 | heapPtr = getI32Attr(m, a_HeapStart); 82 | fromIOPtr = fromIOStart; 83 | toIOPtr = toIOStart; 84 | setAttr(m, a_ConnectionIO, acceptAndRecv(getAttr(m, a_Server), readBuf)); 85 | setState(m, s_WaitingConn); 86 | // break; 87 | 88 | case s_WaitingConn: 89 | r = getIOAttr(m, a_ConnectionIO); 90 | if (pollIO(r)) { 91 | string req; 92 | socket conn = awaitIO2(r, req); 93 | string response = process(req); 94 | setAttr(m, a_WriteIO, sendAndClose(conn, response)); 95 | setState(m, s_Closing); 96 | } 97 | break; 98 | 99 | case s_Closing: 100 | r = getIOAttr(m, a_WriteIO); 101 | if (pollIO(r)) { 102 | awaitIO(r); 103 | setState(m, s_Accept); 104 | } 105 | break; 106 | } 107 | // } 108 | saveStateMachine(m); 109 | } 110 | 111 | -------------------------------------------------------------------------------- /include/spirv_cross/external_interface.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015-2017 ARM Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef SPIRV_CROSS_EXTERNAL_INTERFACE_H 18 | #define SPIRV_CROSS_EXTERNAL_INTERFACE_H 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | #include 25 | 26 | typedef struct spirv_cross_shader spirv_cross_shader_t; 27 | 28 | struct spirv_cross_interface 29 | { 30 | spirv_cross_shader_t *(*construct)(void); 31 | void (*destruct)(spirv_cross_shader_t *thiz); 32 | void (*invoke)(spirv_cross_shader_t *thiz); 33 | }; 34 | 35 | void spirv_cross_set_stage_input(spirv_cross_shader_t *thiz, unsigned location, void *data, size_t size); 36 | 37 | void spirv_cross_set_stage_output(spirv_cross_shader_t *thiz, unsigned location, void *data, size_t size); 38 | 39 | void spirv_cross_set_push_constant(spirv_cross_shader_t *thiz, void *data, size_t size); 40 | 41 | void spirv_cross_set_uniform_constant(spirv_cross_shader_t *thiz, unsigned location, void *data, size_t size); 42 | 43 | void spirv_cross_set_resource(spirv_cross_shader_t *thiz, unsigned set, unsigned binding, void **data, size_t size); 44 | 45 | const struct spirv_cross_interface *spirv_cross_get_interface(void); 46 | 47 | typedef enum spirv_cross_builtin { 48 | SPIRV_CROSS_BUILTIN_POSITION = 0, 49 | SPIRV_CROSS_BUILTIN_FRAG_COORD = 1, 50 | SPIRV_CROSS_BUILTIN_WORK_GROUP_ID = 2, 51 | SPIRV_CROSS_BUILTIN_NUM_WORK_GROUPS = 3, 52 | SPIRV_CROSS_NUM_BUILTINS 53 | } spirv_cross_builtin; 54 | 55 | void spirv_cross_set_builtin(spirv_cross_shader_t *thiz, spirv_cross_builtin builtin, void *data, size_t size); 56 | 57 | #define SPIRV_CROSS_NUM_DESCRIPTOR_SETS 4 58 | #define SPIRV_CROSS_NUM_DESCRIPTOR_BINDINGS 16 59 | #define SPIRV_CROSS_NUM_STAGE_INPUTS 16 60 | #define SPIRV_CROSS_NUM_STAGE_OUTPUTS 16 61 | #define SPIRV_CROSS_NUM_UNIFORM_CONSTANTS 32 62 | 63 | enum spirv_cross_format 64 | { 65 | SPIRV_CROSS_FORMAT_R8_UNORM = 0, 66 | SPIRV_CROSS_FORMAT_R8G8_UNORM = 1, 67 | SPIRV_CROSS_FORMAT_R8G8B8_UNORM = 2, 68 | SPIRV_CROSS_FORMAT_R8G8B8A8_UNORM = 3, 69 | 70 | SPIRV_CROSS_NUM_FORMATS 71 | }; 72 | 73 | enum spirv_cross_wrap 74 | { 75 | SPIRV_CROSS_WRAP_CLAMP_TO_EDGE = 0, 76 | SPIRV_CROSS_WRAP_REPEAT = 1, 77 | 78 | SPIRV_CROSS_NUM_WRAP 79 | }; 80 | 81 | enum spirv_cross_filter 82 | { 83 | SPIRV_CROSS_FILTER_NEAREST = 0, 84 | SPIRV_CROSS_FILTER_LINEAR = 1, 85 | 86 | SPIRV_CROSS_NUM_FILTER 87 | }; 88 | 89 | enum spirv_cross_mipfilter 90 | { 91 | SPIRV_CROSS_MIPFILTER_BASE = 0, 92 | SPIRV_CROSS_MIPFILTER_NEAREST = 1, 93 | SPIRV_CROSS_MIPFILTER_LINEAR = 2, 94 | 95 | SPIRV_CROSS_NUM_MIPFILTER 96 | }; 97 | 98 | struct spirv_cross_miplevel 99 | { 100 | const void *data; 101 | unsigned width, height; 102 | size_t stride; 103 | }; 104 | 105 | struct spirv_cross_sampler_info 106 | { 107 | const struct spirv_cross_miplevel *mipmaps; 108 | unsigned num_mipmaps; 109 | 110 | enum spirv_cross_format format; 111 | enum spirv_cross_wrap wrap_s; 112 | enum spirv_cross_wrap wrap_t; 113 | enum spirv_cross_filter min_filter; 114 | enum spirv_cross_filter mag_filter; 115 | enum spirv_cross_mipfilter mip_filter; 116 | }; 117 | 118 | typedef struct spirv_cross_sampler_2d spirv_cross_sampler_2d_t; 119 | spirv_cross_sampler_2d_t *spirv_cross_create_sampler_2d(const struct spirv_cross_sampler_info *info); 120 | void spirv_cross_destroy_sampler_2d(spirv_cross_sampler_2d_t *samp); 121 | 122 | #ifdef __cplusplus 123 | } 124 | #endif 125 | 126 | #endif 127 | -------------------------------------------------------------------------------- /spirv-io/test/test_hashtable.glsl: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include "../lib/hashtable.glsl" 4 | 5 | ThreadLocalCount = 1; 6 | ThreadGroupCount = 1; 7 | 8 | HeapSize = 16777216; 9 | ToIOSize = 16777216; 10 | FromIOSize = 16777216; 11 | 12 | 13 | void test_i32hAlloc() { 14 | 15 | i32map ht = i32hAlloc(300); 16 | assert(512 == ht.capacity); 17 | assert(512*3 == strLen(ht.table)); 18 | assert(0 == ht.count); 19 | ht = i32hAlloc(256); 20 | assert(256 == ht.capacity); 21 | assert(256*3 == strLen(ht.table)); 22 | assert(0 == ht.count); 23 | ht = i32hAlloc(257); 24 | assert(512 == ht.capacity); 25 | assert(512*3 == strLen(ht.table)); 26 | assert(0 == ht.count); 27 | } 28 | 29 | void test_i32hSet() { 30 | 31 | i32map ht = i32hAlloc(256); 32 | int32_t v = 0; 33 | 34 | i32hSet(ht, 45, 1); 35 | i32hSet(ht, 46, 2); 36 | i32hSet(ht, 47, 3); 37 | assert(true == i32hGet(ht, 45, v)); 38 | assert(1 == v); 39 | i32hSet(ht, 45, 4); 40 | i32hSet(ht, 248, 5); 41 | assert(true == i32hGet(ht, 46, v)); 42 | assert(2 == v); 43 | assert(true == i32hGet(ht, 47, v)); 44 | assert(3 == v); 45 | assert(true == i32hGet(ht, 45, v)); 46 | assert(4 == v); 47 | assert(true == i32hGet(ht, 248, v)); 48 | assert(5 == v); assert(256 == ht.capacity); 49 | log("Adding 260 keys"); 50 | for (int32_t i = 0; i < 260; i++) { 51 | i32hSet(ht, i, i); 52 | } 53 | 54 | // Resized table 55 | assert(512 == ht.capacity); 56 | log("Checking for keys"); 57 | // Check if all the keys are still there 58 | for (int32_t i = 0; i < 260; i++) { 59 | assert(true == i32hGet(ht, i, v)); 60 | assert(i == v); 61 | } 62 | 63 | 64 | } 65 | 66 | void test_i32hGet() { 67 | 68 | i32map ht = i32hAlloc(256); 69 | int32_t v = 123; 70 | assert(false == i32hGet(ht, 30, v)); 71 | i32hSet(ht, 30, 321); 72 | assert(true == i32hGet(ht, 30, v)); 73 | assert(321 == v); assert(false == i32hGet(ht, 31, v)); 74 | for (int32_t i = 32; i < 512; i++) { 75 | assert(false == i32hGet(ht, i, v)); 76 | } 77 | 78 | 79 | } 80 | 81 | void test_i32hDelete() { 82 | 83 | i32map ht = i32hAlloc(256); 84 | int32_t v = 0; 85 | 86 | i32hSet(ht, 30, 321); 87 | assert(true == i32hGet(ht, 30, v)); 88 | assert(321 == v); assert(true == i32hDelete(ht, 30)); assert(false == i32hGet(ht, 30, v)); 89 | i32hSet(ht, 30, 321); 90 | 91 | log("i32hDelete: Adding and deleting 468 keys"); 92 | 93 | for (int32_t i = 32; i < 500; i++) { 94 | i32hSet(ht, i, i); 95 | assert(true == i32hGet(ht, i, i)); 96 | assert(true == i32hDelete(ht, i)); 97 | } 98 | 99 | log("i32hDelete: Checking that none of the keys exist"); 100 | 101 | for (int32_t i = 32; i < 500; i++) { 102 | assert(false == i32hGet(ht, i, v)); 103 | assert(false == i32hDelete(ht, i)); 104 | } 105 | assert(true == i32hGet(ht, 30, v)); 106 | assert(321 == v); 107 | log("i32hDelete: Check sequences of gets, sets and deletes"); 108 | 109 | for (int32_t i = 0; i < 500; i+=3) { 110 | i32hSet(ht, i, i); 111 | } 112 | for (int32_t i = 0; i < 500; i+=7) { 113 | i32hDelete(ht, i); 114 | } 115 | for (int32_t i = 0; i < 500; i+=3) { 116 | if (i % 7 != 0) { 117 | assert(true == i32hGet(ht, i, v)); 118 | assert(i == v); 119 | if (!i32hGet(ht, i, v)) { 120 | log(concat("err 1.1: ", str(i))); 121 | } 122 | } else { 123 | assert(false == i32hGet(ht, i, v)); 124 | if (i32hGet(ht, i, v)) { 125 | log(concat("err 1.2: ", str(i))); 126 | } 127 | } 128 | } 129 | 130 | for (int32_t i = 0; i < 500; i+=11) { 131 | i32hSet(ht, i, i); 132 | } 133 | for (int32_t i = 0; i < 500; i+=3) { 134 | i32hDelete(ht, i); 135 | } 136 | for (int32_t i = 0; i < 500; i+=11) { 137 | if (i % 3 != 0) { 138 | assert(true == i32hGet(ht, i, v)); 139 | assert(i == v); 140 | if (!i32hGet(ht, i, v)) { 141 | log(concat("err 2.1: ", str(i))); 142 | } 143 | } else { 144 | assert(false == i32hGet(ht, i, v)); 145 | if (i32hGet(ht, i, v)) { 146 | log(concat("err 2.2: ", str(i))); 147 | } 148 | } 149 | } 150 | 151 | 152 | } 153 | 154 | void main() { 155 | FREE_ALL(test_i32hAlloc()); 156 | FREE_ALL(test_i32hSet()); 157 | FREE_ALL(test_i32hGet()); 158 | FREE_ALL(test_i32hDelete()); 159 | } 160 | -------------------------------------------------------------------------------- /spirv-io/test/test_array.glsl: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include "../lib/array.glsl" 4 | 5 | ThreadLocalCount = 1; 6 | ThreadGroupCount = 1; 7 | 8 | HeapSize = 16777216; 9 | ToIOSize = 16777216; 10 | FromIOSize = 16777216; 11 | 12 | 13 | void test_i32alloc() { 14 | 15 | i32array a = i32alloc(3); 16 | assert(3 == i32len(a)); 17 | } 18 | 19 | void test_i32len() { 20 | assert(3 == i32len(i32{1,2,3})); 21 | assert(1 == i32len(i32{1})); 22 | assert(0 == i32len(i32{})); 23 | } 24 | 25 | void test_i32get() { 26 | 27 | i32array a = i32{1,2,3}; 28 | assert(1 == i32get(a, 0)); 29 | assert(2 == i32get(a, 1)); 30 | assert(3 == i32get(a, 2)); 31 | } 32 | 33 | void test_i32set() { 34 | 35 | i32array a = i32{1,2,3}; 36 | i32set(a, 0, 4); 37 | i32set(a, 1, 5); 38 | i32set(a, 2, 6); 39 | assert(4 == i32get(a, 0)); 40 | assert(5 == i32get(a, 1)); 41 | assert(6 == i32get(a, 2)); 42 | } 43 | 44 | void test_i32last() { 45 | 46 | i32array a = i32{1,2,3}; 47 | assert(3 == i32last(a)); 48 | } 49 | 50 | void test_i32first() { 51 | 52 | i32array a = i32{1,2,3}; 53 | assert(1 == i32first(a)); 54 | } 55 | 56 | void test_i32clone() { 57 | 58 | i32array a = i32{1,2,3}; 59 | i32array b = i32{4,5}; 60 | assert(true == i32eq(i32clone(a), a)); 61 | assert(true == i32eq(i32clone(b), b)); 62 | assert(a.start != i32clone(a).start); 63 | assert(a.end != i32clone(a).end); 64 | } 65 | 66 | void test_i32concat() { 67 | 68 | i32array a = i32{1,2,3}; 69 | i32array b = i32{4,5}; 70 | i32array c = i32{}; 71 | assert(true == i32eq(i32concat(a,b), i32{1,2,3,4,5})); 72 | assert(true == i32eq(i32concat(b,a), i32{4,5,1,2,3})); 73 | assert(true == i32eq(i32concat(a,c), i32{1,2,3})); 74 | assert(true == i32eq(i32concat(c,a), i32{1,2,3})); 75 | assert(true == i32eq(i32concat(c,c), i32{})); 76 | } 77 | 78 | void test_i32fill() { 79 | 80 | i32array a = i32{1,2,3,4,5}; 81 | i32fill(a, 9); 82 | assert(true == i32eq(a, i32{9,9,9,9,9})); 83 | } 84 | 85 | void test_i32indexOf() { 86 | 87 | i32array a = i32{1,2,3,2,5}; 88 | assert(0 == i32indexOf(a, 1)); 89 | assert(4 == i32indexOf(a, 5)); 90 | assert(1 == i32indexOf(a, 2)); 91 | assert(2 == i32indexOf(a, 3)); 92 | assert(-1 == i32indexOf(a, 4)); 93 | } 94 | 95 | void test_i32lastIndexOf() { 96 | 97 | i32array a = i32{1,2,3,2,5}; 98 | assert(0 == i32lastIndexOf(a, 1)); 99 | assert(4 == i32lastIndexOf(a, 5)); 100 | assert(3 == i32lastIndexOf(a, 2)); 101 | assert(2 == i32lastIndexOf(a, 3)); 102 | assert(-1 == i32lastIndexOf(a, 4)); 103 | } 104 | 105 | void test_i32includes() { 106 | 107 | i32array a = i32{1,2,3,4,5}; 108 | assert(true == i32includes(a, 1)); 109 | assert(true == i32includes(a, 2)); 110 | assert(true == i32includes(a, 3)); 111 | assert(true == i32includes(a, 4)); 112 | assert(true == i32includes(a, 5)); 113 | assert(false == i32includes(a, 6)); 114 | } 115 | 116 | void test_i32reverseInPlace() { 117 | 118 | i32array a = i32{1,2,3,4,5}; 119 | i32reverseInPlace(a); 120 | assert(true == i32eq(a, i32{5,4,3,2,1})); 121 | } 122 | 123 | void test_i32reverse() { 124 | 125 | i32array a = i32{1,2,3,4,5}; 126 | assert(true == i32eq(i32reverse(a), i32{5,4,3,2,1})); 127 | } 128 | 129 | void test_i32slice() { 130 | 131 | i32array a = i32{1,2,3,4,5}; 132 | assert(3 == i32len(i32slice(a, 2))); 133 | assert(2 == i32len(i32slice(a, 3))); 134 | assert(2 == i32len(i32slice(a, -2))); 135 | assert(5 == i32len(i32slice(a, -5))); 136 | assert(0 == i32len(i32slice(a, 5))); 137 | assert(5 == i32len(i32slice(a, 0))); 138 | assert(3 == i32get(i32slice(a, 2), 0)); 139 | // (i32array a, size_t i) => i32len(a) >= i32len(i32slice(a, i)); 140 | 141 | } 142 | 143 | void test_i32slice_() { 144 | 145 | i32array arr = i32alloc(5); 146 | for (ptr_t i = 0; i < 5; i++) i32set(arr, i, i); 147 | assert(3 == i32len(i32slice(arr, 1, -1))); 148 | assert(3 == i32len(i32slice(arr, 1, 4))); 149 | assert(3 == i32len(i32slice(arr, 2, 7))); 150 | assert(3 == i32get(i32slice(arr, 1, -1), 2)); 151 | // (i32array a, size_t i, size_t j) => i32len(a) >= i32len(i32slice(a, i, j)); 152 | 153 | } 154 | 155 | void main() { 156 | FREE_ALL(test_i32alloc()); 157 | FREE_ALL(test_i32len()); 158 | FREE_ALL(test_i32get()); 159 | FREE_ALL(test_i32set()); 160 | FREE_ALL(test_i32last()); 161 | FREE_ALL(test_i32first()); 162 | FREE_ALL(test_i32clone()); 163 | FREE_ALL(test_i32concat()); 164 | FREE_ALL(test_i32fill()); 165 | FREE_ALL(test_i32indexOf()); 166 | FREE_ALL(test_i32lastIndexOf()); 167 | FREE_ALL(test_i32includes()); 168 | FREE_ALL(test_i32reverseInPlace()); 169 | FREE_ALL(test_i32reverse()); 170 | FREE_ALL(test_i32slice()); 171 | FREE_ALL(test_i32slice_()); 172 | } 173 | -------------------------------------------------------------------------------- /spirv-io/examples/grep_cpu.glsl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env gls 2 | 3 | ThreadLocalCount = 256; 4 | ThreadGroupCount = 64; 5 | 6 | #include 7 | // #include 8 | 9 | shared int done; 10 | shared int64_t wgOff; 11 | shared string wgBuf; 12 | shared int32_t decompressedSize; 13 | shared bool isCompressed; 14 | 15 | void addHit(int32_t k, int32_t off, inout bool found) { 16 | i32fromIO[atomicAdd(groupHeapPtr, 4)/4] = int32_t(k) + off; 17 | found = true; 18 | } 19 | 20 | bool grepBuffer(int32_t blockSize, string buf, string pattern, char p, int32_t off) { 21 | bool found = false; 22 | for (size_t i = 0, l = strLen(buf); i < blockSize; i+=32) { 23 | ptr_t idx = buf.x + i; 24 | i64vec4 v = i64v4heap[idx / 32]; 25 | for (size_t j = 0, k = i, jdx = idx; j < 64; j += 8, idx++, k++, jdx++) { 26 | i8vec4 u = i8vec4((v >> int64_t(j)) & 0xff); 27 | if (any(equal(u, i8vec4(p)))) { 28 | if (k < l && p == u.x && startsWith(string(jdx, buf.y), pattern)) addHit(k, off, found); 29 | if (k+8 < l && p == u.y && startsWith(string(jdx+8, buf.y), pattern)) addHit(k + 8, off, found); 30 | if (k+16 < l && p == u.z && startsWith(string(jdx+16, buf.y), pattern)) addHit(k + 16, off, found); 31 | if (k+24 < l && p == u.w && startsWith(string(jdx+24, buf.y), pattern)) addHit(k + 24, off, found); 32 | } 33 | } 34 | } 35 | return found; 36 | } 37 | 38 | 39 | void main() { 40 | 41 | if (arrLen(argv) < 3) { 42 | if (ThreadId == 0) eprintln("USAGE: grep.glsl pattern file"); 43 | return; 44 | } 45 | 46 | string pattern = aGet(argv, 1); 47 | string filename = aGet(argv, 2); 48 | 49 | if (ThreadId == 0) { 50 | Stat st = statSync(filename); 51 | programReturnValue = (st.error == 0) ? 1 : 2; 52 | // readaheadSync(filename, 0, st.st_size); 53 | } 54 | while (programReturnValue == 0); // Wait for first thread. 55 | 56 | if (programReturnValue == 2) { 57 | if (ThreadId == 0) eprintln(concat("File not found: ", filename)); 58 | return; 59 | } 60 | 61 | int32_t patternLength = strLen(pattern); 62 | int32_t blockSize = HeapSize - (((patternLength+31) / 32) * 32); 63 | int32_t wgBufSize = ThreadLocalCount * blockSize + patternLength; 64 | 65 | if (ThreadLocalId == 0) { 66 | done = 0; 67 | wgOff = int64_t(ThreadGroupId * ThreadLocalCount) * int64_t(blockSize); 68 | isCompressed = true; 69 | } 70 | 71 | bool found = false; 72 | char p = heap[pattern.x]; 73 | 74 | ptr_t hitStart = 0; 75 | 76 | while (done == 0) { 77 | FREE(FREE_IO( 78 | barrier(); memoryBarrier(); 79 | 80 | if (ThreadLocalId == 0) { 81 | fromIOPtr = groupHeapStart; 82 | toIOPtr = groupHeapStart; 83 | 84 | io r = read(filename, wgOff, wgBufSize, string(groupHeapStart, groupHeapStart + (HeapSize * ThreadLocalCount))); 85 | wgBuf = awaitIO(r, true, decompressedSize, isCompressed); 86 | 87 | if (decompressedSize != wgBufSize) { 88 | done = (decompressedSize == 0) ? 2 : 1; 89 | } 90 | groupHeapPtr = groupHeapStart; 91 | hitStart = groupHeapPtr; 92 | } 93 | 94 | barrier(); memoryBarrier(); 95 | 96 | if (done == 2) break; 97 | 98 | // if (isCompressed) { 99 | // for (int32_t i = 0; i < 128; i += ThreadLocalCount/LZ4_GROUP_SIZE) { 100 | // lz4DecompressBlockStreamFromIOToHeap(i + ThreadLocalId/LZ4_GROUP_SIZE, LZ4_STREAM_BLOCK_SIZE, wgBuf, string(groupHeapStart, groupHeapStart + decompressedSize)); 101 | // } 102 | // } else { 103 | copyFromIOToHeap( 104 | string(groupHeapStart + ThreadLocalId * HeapSize, groupHeapStart + (ThreadLocalId+1) * HeapSize), 105 | string(groupHeapStart + ThreadLocalId * HeapSize, groupHeapStart + (ThreadLocalId+1) * HeapSize) 106 | ); 107 | // } 108 | 109 | if (ThreadLocalId == 0) { 110 | wgBuf = string(groupHeapStart, groupHeapStart + decompressedSize); 111 | } 112 | 113 | barrier(); memoryBarrier(); 114 | 115 | string buf = string( 116 | min(wgBuf.y, wgBuf.x + ThreadLocalId * blockSize), 117 | min(wgBuf.y, wgBuf.x + (ThreadLocalId+1) * blockSize + patternLength) 118 | ); 119 | 120 | bool blockFound = grepBuffer(blockSize, buf, pattern, p, ThreadLocalId * blockSize); 121 | found = found || blockFound; 122 | 123 | barrier(); memoryBarrier(); 124 | 125 | if (ThreadLocalId == 0) { 126 | fromIOPtr = groupHeapStart; 127 | toIOPtr = groupHeapStart; 128 | ptr_t start = hitStart / 4; 129 | ptr_t end = groupHeapPtr / 4; 130 | 131 | if (start != end) { 132 | heapPtr = groupHeapStart; 133 | for (int j = start; j < end; j++) { 134 | str(int64_t(i32fromIO[j]) + wgOff); 135 | _w('\n'); 136 | } 137 | print(string(groupHeapStart, heapPtr)); 138 | } 139 | 140 | wgOff += int64_t(ThreadCount * blockSize); 141 | } 142 | 143 | barrier(); memoryBarrier(); 144 | )) 145 | } 146 | 147 | } 148 | 149 | 150 | -------------------------------------------------------------------------------- /spirv-io/examples/grep.glsl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env gls 2 | 3 | ThreadLocalCount = 256; 4 | ThreadGroupCount = 64; 5 | 6 | #define LZ4_GROUP_SIZE 8 7 | #define LZ4_STREAM_BLOCK_SIZE 8192 8 | 9 | #include 10 | #include 11 | 12 | shared int done; 13 | shared int64_t wgOff; 14 | shared string wgBuf; 15 | shared int32_t decompressedSize; 16 | shared bool isCompressed; 17 | 18 | void addHit(int32_t k, int32_t off, inout bool found) { 19 | i32fromIO[atomicAdd(groupHeapPtr, 4)/4] = int32_t(k) + off; 20 | found = true; 21 | } 22 | 23 | bool grepBuffer(int32_t blockSize, string buf, string pattern, char p, int32_t off) { 24 | bool found = false; 25 | for (size_t i = 0, l = strLen(buf); i < blockSize; i+=32) { 26 | ptr_t idx = buf.x + i; 27 | i64vec4 v = i64v4heap[idx / 32]; 28 | for (size_t j = 0, k = i, jdx = idx; j < 64; j += 8, idx++, k++, jdx++) { 29 | i8vec4 u = i8vec4((v >> j) & 0xff); 30 | if (any(equal(u, i8vec4(p)))) { 31 | if (k < l && p == u.x && startsWith(string(jdx, buf.y), pattern)) addHit(k, off, found); 32 | if (k+8 < l && p == u.y && startsWith(string(jdx+8, buf.y), pattern)) addHit(k + 8, off, found); 33 | if (k+16 < l && p == u.z && startsWith(string(jdx+16, buf.y), pattern)) addHit(k + 16, off, found); 34 | if (k+24 < l && p == u.w && startsWith(string(jdx+24, buf.y), pattern)) addHit(k + 24, off, found); 35 | } 36 | } 37 | } 38 | return found; 39 | } 40 | 41 | 42 | void main() { 43 | 44 | if (arrLen(argv) < 3) { 45 | if (ThreadId == 0) eprintln("USAGE: grep.glsl pattern file"); 46 | return; 47 | } 48 | 49 | string pattern = aGet(argv, 1); 50 | string filename = aGet(argv, 2); 51 | 52 | if (ThreadId == 0) { 53 | Stat st = statSync(filename); 54 | programReturnValue = (st.error == 0) ? 1 : 2; 55 | // readaheadSync(filename, 0, st.st_size); 56 | } 57 | while (programReturnValue == 0); // Wait for first thread. 58 | 59 | if (programReturnValue == 2) { 60 | if (ThreadId == 0) eprintln(concat("File not found: ", filename)); 61 | return; 62 | } 63 | 64 | int32_t patternLength = strLen(pattern); 65 | int32_t blockSize = HeapSize - (((patternLength+31) / 32) * 32); 66 | int32_t wgBufSize = ThreadLocalCount * blockSize + patternLength; 67 | 68 | if (ThreadLocalId == 0) { 69 | done = 0; 70 | wgOff = int64_t(ThreadGroupId * ThreadLocalCount) * int64_t(blockSize); 71 | isCompressed = true; 72 | } 73 | 74 | bool found = false; 75 | char p = heap[pattern.x]; 76 | 77 | ptr_t hitStart = 0; 78 | 79 | while (done == 0) { 80 | FREE(FREE_IO( 81 | barrier(); memoryBarrier(); 82 | 83 | if (ThreadLocalId == 0) { 84 | fromIOPtr = groupHeapStart; 85 | toIOPtr = groupHeapStart; 86 | 87 | io r = read(filename, wgOff, wgBufSize, string(groupHeapStart, groupHeapStart + (HeapSize * ThreadLocalCount)), IO_COMPRESS_LZ4_BLOCK_STREAM | LZ4_STREAM_BLOCK_SIZE); 88 | wgBuf = awaitIO(r, true, decompressedSize, isCompressed); 89 | 90 | if (decompressedSize != wgBufSize) { 91 | done = (decompressedSize == 0) ? 2 : 1; 92 | } 93 | groupHeapPtr = groupHeapStart; 94 | hitStart = groupHeapPtr; 95 | } 96 | 97 | barrier(); memoryBarrier(); 98 | 99 | if (done == 2) break; 100 | 101 | if (isCompressed) { 102 | for (int32_t i = 0; i < 128; i += ThreadLocalCount/LZ4_GROUP_SIZE) { 103 | lz4DecompressBlockStreamFromIOToHeap(i + ThreadLocalId/LZ4_GROUP_SIZE, LZ4_STREAM_BLOCK_SIZE, wgBuf, string(groupHeapStart, groupHeapStart + decompressedSize)); 104 | } 105 | } else { 106 | copyFromIOToHeap( 107 | string(groupHeapStart + ThreadLocalId * HeapSize, groupHeapStart + (ThreadLocalId+1) * HeapSize), 108 | string(groupHeapStart + ThreadLocalId * HeapSize, groupHeapStart + (ThreadLocalId+1) * HeapSize) 109 | ); 110 | } 111 | 112 | if (ThreadLocalId == 0) { 113 | wgBuf = string(groupHeapStart, groupHeapStart + decompressedSize); 114 | } 115 | 116 | barrier(); memoryBarrier(); 117 | 118 | string buf = string( 119 | min(wgBuf.y, wgBuf.x + ThreadLocalId * blockSize), 120 | min(wgBuf.y, wgBuf.x + (ThreadLocalId+1) * blockSize + patternLength) 121 | ); 122 | 123 | bool blockFound = grepBuffer(blockSize, buf, pattern, p, ThreadLocalId * blockSize); 124 | found = found || blockFound; 125 | 126 | barrier(); memoryBarrier(); 127 | 128 | if (ThreadLocalId == 0) { 129 | fromIOPtr = groupHeapStart; 130 | toIOPtr = groupHeapStart; 131 | ptr_t start = hitStart / 4; 132 | ptr_t end = groupHeapPtr / 4; 133 | 134 | if (start != end) { 135 | heapPtr = groupHeapStart; 136 | for (int j = start; j < end; j++) { 137 | str(int64_t(i32fromIO[j]) + wgOff); 138 | _w('\n'); 139 | } 140 | print(string(groupHeapStart, heapPtr)); 141 | } 142 | 143 | wgOff += int64_t(ThreadCount * blockSize); 144 | } 145 | 146 | barrier(); memoryBarrier(); 147 | )) 148 | } 149 | 150 | atomicMin(programReturnValue, found ? 0 : 1); 151 | } 152 | 153 | -------------------------------------------------------------------------------- /http_shader/cpp/cppRunner.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015-2017 ARM Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | 19 | #include "spirv_cross/external_interface.h" 20 | #include 21 | #include 22 | #include 23 | 24 | inline int _atomicCompSwap(int *ptr, int val, int newval) { 25 | __atomic_compare_exchange(ptr, &val, &newval, false, 0, 0); 26 | return val; 27 | } 28 | 29 | #define atomicCompSwap(ptr, val, newval) _atomicCompSwap(&(ptr), (val), (newval)) 30 | 31 | #include "httpd.cpp" 32 | 33 | #ifndef GLM_FORCE_SWIZZLE 34 | #define GLM_FORCE_SWIZZLE 35 | #endif 36 | 37 | #ifndef GLM_FORCE_RADIANS 38 | #define GLM_FORCE_RADIANS 39 | #endif 40 | 41 | #include 42 | using namespace glm; 43 | 44 | // Build some input data for our compute shader. 45 | #define NUM_WORKGROUPS_X 32 46 | #define NUM_WORKGROUPS_Y 1 47 | 48 | static const uint requestCount = NUM_WORKGROUPS_X * NUM_WORKGROUPS_Y * 1024 * 16; 49 | 50 | static const int requestSize = 1024; 51 | 52 | static int inputBuffer[(requestSize / 4) * requestCount] = {}; 53 | static int outputBuffer[(requestSize / 4) * requestCount] = {}; 54 | static int heapBuffer[(requestSize / 4) * requestCount] = {}; 55 | 56 | int main() 57 | { 58 | // First, we get the C interface to the shader. 59 | // This can be loaded from a dynamic library, or as here, 60 | // linked in as a static library. 61 | auto *iface = spirv_cross_get_interface(); 62 | 63 | // Create an instance of the shader interface. 64 | auto *shader = iface->construct(); 65 | 66 | void *inputs_ptr = inputBuffer; 67 | void *outputs_ptr = outputBuffer; 68 | void *heap_ptr = heapBuffer; 69 | 70 | int requestTemplate[(requestSize / 4)]; 71 | for (int i = 0; i < requestCount; i++) { 72 | if (i % 2 == 0) { 73 | snprintf((char*)(&inputBuffer[(requestSize / 4) * i + 4]), ((requestSize / 16) - 1) * 16, "POST /%07d HTTP/1.1\r\nhost: localhost\r\n\r\ntext/html\r\n\r\nThis is post number %d.", i*2/3, i); 74 | } else { 75 | snprintf((char*)(&inputBuffer[(requestSize / 4) * i + 4]), ((requestSize / 16) - 1) * 16, "GET /%07d HTTP/1.1\r\nhost: localhost\r\n\r\n", i); 76 | } 77 | if (i % 11 == 10) { 78 | int j = i % 10; 79 | snprintf((char*)(&inputBuffer[(requestSize / 4) * i + 4]), ((requestSize / 16) - 1) * 16, "POST /%07d HTTP/1.1\r\nhost: localhost\r\n\r\ntext/html\r\n\r\nThis is %d spam-post %d number %d.", j, i, i, i); 80 | } 81 | inputBuffer[(requestSize / 4) * i] = strlen((char*)(&inputBuffer[(requestSize / 4) * i + 4])); 82 | // if (i < 10) printf("%d\n%s\n", inputBuffer[(requestSize / 4) * i], (char*)(&inputBuffer[(requestSize / 4) * i + 4])); 83 | 84 | snprintf((char*)(&heapBuffer[(requestSize / 4) * i + 4]), ((requestSize / 16) - 1) * 16, "text/html\r\n\r\nThis is document number %d.", i); 85 | heapBuffer[(requestSize / 4) * i] = strlen((char*)(&heapBuffer[(requestSize / 4) * i + 4])); 86 | // if (i < 10) printf("%d\n%s\n", heapBuffer[(requestSize / 4) * i], (char*)(&heapBuffer[(requestSize / 4) * i + 4])); 87 | } 88 | 89 | std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); 90 | for (int i = 0; i < 100; i++) { 91 | 92 | // Bind resources to the shader. 93 | // For resources like samplers and buffers, we provide a list of pointers, 94 | // since UBOs, SSBOs and samplers can be arrays, and can point to different types, 95 | // which is especially true for samplers. 96 | spirv_cross_set_resource(shader, 0, 0, &inputs_ptr, sizeof(inputs_ptr)); 97 | spirv_cross_set_resource(shader, 0, 1, &outputs_ptr, sizeof(outputs_ptr)); 98 | spirv_cross_set_resource(shader, 0, 2, &heap_ptr, sizeof(heap_ptr)); 99 | 100 | // We also have to set builtins. 101 | // The relevant builtins will depend on the shader, 102 | // but for compute, there are few builtins, which are gl_NumWorkGroups and gl_WorkGroupID. 103 | // LocalInvocationID and GlobalInvocationID are inferred when executing the invocation. 104 | uvec3 num_workgroups(NUM_WORKGROUPS_X, NUM_WORKGROUPS_Y, 1); 105 | uvec3 work_group_id(0, 0, 0); 106 | spirv_cross_set_builtin(shader, SPIRV_CROSS_BUILTIN_NUM_WORK_GROUPS, &num_workgroups, sizeof(num_workgroups)); 107 | spirv_cross_set_builtin(shader, SPIRV_CROSS_BUILTIN_WORK_GROUP_ID, &work_group_id, sizeof(work_group_id)); 108 | 109 | // Execute work groups. 110 | for (unsigned x = 0; x < NUM_WORKGROUPS_X; x++) 111 | for (unsigned y = 0; y < NUM_WORKGROUPS_Y; y++) 112 | { 113 | work_group_id.x = x; 114 | work_group_id.y = y; 115 | iface->invoke(shader); 116 | } 117 | 118 | } 119 | std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); 120 | 121 | // Call destructor. 122 | iface->destruct(shader); 123 | 124 | for (int i = 0; i < 10; i++) { 125 | write(1, ((char*)outputBuffer)+requestSize*i+16, outputBuffer[(requestSize / 4)*i]); 126 | printf("\n"); 127 | } 128 | 129 | printf("\nElapsed: %ld ms\n", std::chrono::duration_cast(end - begin).count()); 130 | printf("Million requests per second: %.3f\n\n", 1e-6 * (requestCount * 100.0) / (0.001 * std::chrono::duration_cast(end - begin).count())); 131 | 132 | return 0; 133 | } 134 | -------------------------------------------------------------------------------- /http_shader/ispc_char/httpd.glsl: -------------------------------------------------------------------------------- 1 | #define version #version 2 | 3 | version 450 4 | 5 | #include "../chr.glsl" 6 | 7 | #define BLK_SZ 1024 8 | 9 | #define REQUESTS_PER_INVOCATION 512 10 | 11 | #define strCopy(SRC, DST, i, start, end) uint _s = start; uint _e = end; while (_s < _e) (DST)[i++] = (SRC)[_s++]; 12 | #define strCopyAll(SRC, DST, i) uint _str[] = SRC; strCopy(_str, DST, i, 0, _str.length()) 13 | 14 | #define A_OK if (i > BLK_SZ) { error(index); return; } 15 | 16 | layout (local_size_x = 16, local_size_y = 1, local_size_z = 1 ) in; 17 | 18 | layout(std430, binding = 0) readonly buffer inputBuffer { lowp uint inputBytes[]; }; 19 | layout(std430, binding = 1) buffer outputBuffer { lowp uint outputBytes[]; }; 20 | layout(std430, binding = 2) buffer heapBuffer { lowp uint heap[]; }; 21 | 22 | const uint METHOD_UNKNOWN = 0; 23 | const uint METHOD_GET = 1; 24 | const uint METHOD_POST = 2; 25 | const uint METHOD_OPTION = 3; 26 | 27 | const uint PROTOCOL_UNKNOWN = 0; 28 | const uint PROTOCOL_HTTP10 = 1; 29 | const uint PROTOCOL_HTTP11 = 2; 30 | 31 | const uint MIME_TEXT_PLAIN = 0; 32 | const uint MIME_TEXT_HTML = 1; 33 | 34 | struct header { 35 | uvec2 name; 36 | uvec2 value; 37 | }; 38 | 39 | void readRequestUntilChar(inout uint i, uint index, uint endChar, out uvec2 str) { 40 | str.x = index + i; 41 | while (i < BLK_SZ && inputBytes[index+i] != endChar) { 42 | i++; 43 | } 44 | str.y = index + i; 45 | i++; 46 | } 47 | 48 | void readMethod(inout uint i, uint index, out uint method) { 49 | uint j = index + i; 50 | uint c = inputBytes[j]; 51 | if ( 52 | inputBytes[j] == CHR_G && 53 | inputBytes[j+1] == CHR_E && 54 | inputBytes[j+2] == CHR_T && 55 | inputBytes[j+3] == CHR_SPACE 56 | ) { 57 | method = METHOD_GET; 58 | i += 4; 59 | return; 60 | } else if ( 61 | inputBytes[j] == CHR_P && 62 | inputBytes[j+1] == CHR_O && 63 | inputBytes[j+2] == CHR_S && 64 | inputBytes[j+3] == CHR_T && 65 | inputBytes[j+4] == CHR_SPACE 66 | ) { 67 | method = METHOD_POST; 68 | i += 5; 69 | return; 70 | } else if (inputBytes[j] == CHR_O && inputBytes[j+6] == CHR_SPACE) { 71 | method = METHOD_OPTION; 72 | i += 7; 73 | return; 74 | } 75 | method = METHOD_UNKNOWN; 76 | i = BLK_SZ+1; 77 | } 78 | 79 | void readPath(inout uint i, uint index, out uvec2 path) { 80 | readRequestUntilChar(i, index, CHR_SPACE, path); 81 | } 82 | 83 | void readProtocol(inout uint i, uint index, out uint protocol) { 84 | uvec2 protocolString; 85 | readRequestUntilChar(i, index, CHR_CR, protocolString); 86 | if (i < 1024 && inputBytes[index+i] == CHR_LF) { 87 | i++; 88 | if (inputBytes[protocolString.y-1] == CHR_1) { 89 | protocol = PROTOCOL_HTTP11; 90 | } else { 91 | protocol = PROTOCOL_HTTP10; 92 | } 93 | } else { 94 | protocol = PROTOCOL_UNKNOWN; 95 | i = BLK_SZ+1; 96 | } 97 | } 98 | 99 | bool readHeader(inout uint i, uint index, out header hdr) { 100 | if (inputBytes[index+i] == CHR_CR) { 101 | i += 2; 102 | return true; 103 | } 104 | readRequestUntilChar(i, index, CHR_COLON, hdr.name); 105 | while (i < 1024 && inputBytes[index+i] == CHR_SPACE) i++; 106 | readRequestUntilChar(i, index, CHR_CR, hdr.value); 107 | i++; 108 | return false; 109 | } 110 | 111 | void writeStatus(inout uint i, uint index, uint statusCode) { 112 | uint j = i + index; 113 | strCopyAll("HTTP/1.1 ", outputBytes, j); 114 | if (statusCode == 200) { 115 | strCopyAll("200 OK", outputBytes, j); 116 | } else { 117 | strCopyAll("500 Error", outputBytes, j); 118 | } 119 | outputBytes[j++] = CHR_CR; 120 | outputBytes[j++] = CHR_LF; 121 | i = j - index; 122 | } 123 | 124 | void writeContentType(inout uint i, uint index, uint contentType) { 125 | uint j = i + index; 126 | 127 | uint contentTypeString[] = "Content-Type: "; 128 | strCopyAll(contentTypeString, outputBytes, j); 129 | if (contentType == MIME_TEXT_PLAIN) { 130 | strCopyAll("text/plain", outputBytes, j); 131 | } else { 132 | strCopyAll("text/html", outputBytes, j); 133 | } 134 | outputBytes[j++] = CHR_CR; 135 | outputBytes[j++] = CHR_LF; 136 | 137 | i = j - index; 138 | } 139 | 140 | void writeEndHeaders(inout uint i, uint index) { 141 | uint j = i + index; 142 | outputBytes[j++] = CHR_CR; 143 | outputBytes[j++] = CHR_LF; 144 | i = j - index; 145 | } 146 | 147 | void writeBody(inout uint i, uint index, uvec2 path) { 148 | uint j = i + index; 149 | strCopyAll("Hello, World!", outputBytes, j); 150 | outputBytes[j++] = CHR_LF; 151 | i = j - index; 152 | } 153 | 154 | void error(uint index) { 155 | uint i = 16; 156 | writeStatus(i, index, 500); 157 | writeContentType(i, index, MIME_TEXT_PLAIN); 158 | writeEndHeaders(i, index); 159 | outputBytes[index+0] = ((i-16) << 0) & 0xFF; 160 | outputBytes[index+1] = ((i-16) << 8) & 0xFF; 161 | outputBytes[index+2] = ((i-16) << 16) & 0xFF; 162 | outputBytes[index+3] = ((i-16) << 24) & 0xFF; 163 | } 164 | 165 | void handleRequest(uint index) { 166 | uint i = 16; 167 | uint method; 168 | uvec2 path; 169 | uint protocol; 170 | header headers[32]; 171 | uint headerCount = 0; 172 | 173 | readMethod(i, index, method); A_OK; 174 | readPath(i, index, path); A_OK; 175 | readProtocol(i, index, protocol); A_OK; 176 | 177 | for (uint j = 0; j < 32; j++) { 178 | bool done = readHeader(i, index, headers[j]); A_OK; 179 | if (done) break; 180 | headerCount++; 181 | } 182 | 183 | i = 16; 184 | writeStatus(i, index, 200); 185 | writeContentType(i, index, MIME_TEXT_PLAIN); 186 | writeEndHeaders(i, index); 187 | writeBody(i, index, path); 188 | outputBytes[index+0] = ((i-16) << 0) & 0xFF; 189 | outputBytes[index+1] = ((i-16) << 8) & 0xFF; 190 | outputBytes[index+2] = ((i-16) << 16) & 0xFF; 191 | outputBytes[index+3] = ((i-16) << 24) & 0xFF; 192 | } 193 | 194 | void main() { 195 | uint index = BLK_SZ * REQUESTS_PER_INVOCATION * (gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * (gl_NumWorkGroups.x * gl_WorkGroupSize.x)); 196 | for (uint i = 0; i < REQUESTS_PER_INVOCATION; i++) { 197 | handleRequest(index + BLK_SZ); 198 | } 199 | } 200 | -------------------------------------------------------------------------------- /spirv-io/lib/malloc.glsl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define ptr_t int32_t 4 | #define size_t int32_t 5 | 6 | struct alloc_t { ptr_t x; ptr_t y; }; 7 | struct pair_t { alloc_t x; alloc_t y; }; 8 | 9 | #define INDEX_SIZE 4 10 | 11 | #define FREE(f) { int32_t _hp_ = heapPtr; f; heapPtr = _hp_; } 12 | 13 | layout(std430, binding = 0) buffer indexBuf { ptr_t indexHeap[]; }; 14 | 15 | layout(std430, binding = 0) buffer f32Buffer { float32_t f32heap[]; }; 16 | layout(std430, binding = 0) buffer f64Buffer { float64_t f64heap[]; }; 17 | 18 | layout(std430, binding = 0) buffer u8Buffer { uint8_t u8heap[]; }; 19 | layout(std430, binding = 0) buffer u16Buffer { uint16_t u16heap[]; }; 20 | layout(std430, binding = 0) buffer u32Buffer { uint32_t u32heap[]; }; 21 | layout(std430, binding = 0) buffer u64Buffer { uint64_t u64heap[]; }; 22 | 23 | layout(std430, binding = 0) buffer i8Buffer { int8_t i8heap[]; }; 24 | layout(std430, binding = 0) buffer i16Buffer { int16_t i16heap[]; }; 25 | layout(std430, binding = 0) buffer i32Buffer { int32_t i32heap[]; }; 26 | layout(std430, binding = 0) buffer i64Buffer { int64_t i64heap[]; }; 27 | 28 | #ifdef FLOAT16 29 | layout(std430, binding = 0) buffer f16Buffer { float16_t f16heap[]; }; 30 | layout(std430, binding = 0) buffer f16v2Buffer { f16vec2 f16v2heap[]; }; 31 | layout(std430, binding = 0) buffer f16v3Buffer { f16vec3 f16v3heap[]; }; 32 | layout(std430, binding = 0) buffer f16v4Buffer { f16vec4 f16v4heap[]; }; 33 | layout(std430, binding = 0) buffer f16m2Buffer { f16mat2 f16m2heap[]; }; 34 | layout(std430, binding = 0) buffer f16m3Buffer { f16mat3 f16m3heap[]; }; 35 | layout(std430, binding = 0) buffer f16m4Buffer { f16mat4 f16m4heap[]; }; 36 | #endif 37 | 38 | layout(std430, binding = 0) buffer f32v2Buffer { f32vec2 f32v2heap[]; }; 39 | layout(std430, binding = 0) buffer f64v2Buffer { f64vec2 f64v2heap[]; }; 40 | 41 | layout(std430, binding = 0) buffer f32v3Buffer { f32vec3 f32v3heap[]; }; 42 | layout(std430, binding = 0) buffer f64v3Buffer { f64vec3 f64v3heap[]; }; 43 | 44 | layout(std430, binding = 0) buffer f32v4Buffer { f32vec4 f32v4heap[]; }; 45 | layout(std430, binding = 0) buffer f64v4Buffer { f64vec4 f64v4heap[]; }; 46 | 47 | layout(std430, binding = 0) buffer u8v2Buffer { u8vec2 u8v2heap[]; }; 48 | layout(std430, binding = 0) buffer u16v2Buffer { u16vec2 u16v2heap[]; }; 49 | layout(std430, binding = 0) buffer u32v2Buffer { u32vec2 u32v2heap[]; }; 50 | layout(std430, binding = 0) buffer u64v2Buffer { u64vec2 u64v2heap[]; }; 51 | 52 | layout(std430, binding = 0) buffer u8v3Buffer { u8vec3 u8v3heap[]; }; 53 | layout(std430, binding = 0) buffer u16v3Buffer { u16vec3 u16v3heap[]; }; 54 | layout(std430, binding = 0) buffer u32v3Buffer { u32vec3 u32v3heap[]; }; 55 | layout(std430, binding = 0) buffer u64v3Buffer { u64vec3 u64v3heap[]; }; 56 | 57 | layout(std430, binding = 0) buffer u8v4Buffer { u8vec4 u8v4heap[]; }; 58 | layout(std430, binding = 0) buffer u16v4Buffer { u16vec4 u16v4heap[]; }; 59 | layout(std430, binding = 0) buffer u32v4Buffer { u32vec4 u32v4heap[]; }; 60 | layout(std430, binding = 0) buffer u64v4Buffer { u64vec4 u64v4heap[]; }; 61 | 62 | layout(std430, binding = 0) buffer i8v2Buffer { i8vec2 i8v2heap[]; }; 63 | layout(std430, binding = 0) buffer i16v2Buffer { i16vec2 i16v2heap[]; }; 64 | layout(std430, binding = 0) buffer i32v2Buffer { i32vec2 i32v2heap[]; }; 65 | layout(std430, binding = 0) buffer i64v2Buffer { i64vec2 i64v2heap[]; }; 66 | 67 | layout(std430, binding = 0) buffer i8v3Buffer { i8vec3 i8v3heap[]; }; 68 | layout(std430, binding = 0) buffer i16v3Buffer { i16vec3 i16v3heap[]; }; 69 | layout(std430, binding = 0) buffer i32v3Buffer { i32vec3 i32v3heap[]; }; 70 | layout(std430, binding = 0) buffer i64v3Buffer { i64vec3 i64v3heap[]; }; 71 | 72 | layout(std430, binding = 0) buffer i8v4Buffer { i8vec4 i8v4heap[]; }; 73 | layout(std430, binding = 0) buffer i16v4Buffer { i16vec4 i16v4heap[]; }; 74 | layout(std430, binding = 0) buffer i32v4Buffer { i32vec4 i32v4heap[]; }; 75 | layout(std430, binding = 0) buffer i64v4Buffer { i64vec4 i64v4heap[]; }; 76 | 77 | layout(std430, binding = 0) buffer f32m2Buffer { f32mat2 f32m2heap[]; }; 78 | layout(std430, binding = 0) buffer f64m2Buffer { f64mat2 f64m2heap[]; }; 79 | 80 | layout(std430, binding = 0) buffer f32m3Buffer { f32mat3 f32m3heap[]; }; 81 | layout(std430, binding = 0) buffer f64m3Buffer { f64mat3 f64m3heap[]; }; 82 | 83 | layout(std430, binding = 0) buffer f32m4Buffer { f32mat4 f32m4heap[]; }; 84 | layout(std430, binding = 0) buffer f64m4Buffer { f64mat4 f64m4heap[]; }; 85 | 86 | 87 | ptr_t heapStart = ThreadId * HeapSize; 88 | ptr_t heapEnd = heapStart + HeapSize; 89 | 90 | ptr_t heapPtr = heapStart; 91 | 92 | ptr_t groupHeapStart = ThreadGroupId * GroupHeapSize; 93 | ptr_t groupHeapEnd = groupHeapStart + GroupHeapSize; 94 | 95 | shared ptr_t groupHeapPtr; 96 | 97 | size_t allocSize(alloc_t a) { 98 | return a.y - a.x; 99 | } 100 | 101 | alloc_t malloc(size_t len) { 102 | ptr_t ptr = heapPtr; 103 | heapPtr += len; 104 | return alloc_t(ptr, heapPtr); 105 | } 106 | 107 | alloc_t malloc(size_t len, size_t align) { 108 | ptr_t ptr = ((heapPtr+(align-1)) / align) * align; 109 | heapPtr = ptr + len; 110 | return alloc_t(ptr, heapPtr); 111 | } 112 | 113 | alloc_t malloc(uint64_t len) { 114 | ptr_t ptr = heapPtr; 115 | heapPtr += ptr_t(len); 116 | return alloc_t(ptr, heapPtr); 117 | } 118 | 119 | alloc_t malloc(uint64_t len, size_t align) { 120 | ptr_t ptr = ((heapPtr+(align-1)) / align) * align; 121 | heapPtr = ptr + ptr_t(len); 122 | return alloc_t(ptr, heapPtr); 123 | } 124 | 125 | alloc_t malloc(uint32_t len) { 126 | ptr_t ptr = heapPtr; 127 | heapPtr += ptr_t(len); 128 | return alloc_t(ptr, heapPtr); 129 | } 130 | 131 | alloc_t malloc(uint32_t len, size_t align) { 132 | ptr_t ptr = ((heapPtr+(align-1)) / align) * align; 133 | heapPtr = ptr + ptr_t(len); 134 | return alloc_t(ptr, heapPtr); 135 | } 136 | 137 | ptr_t toIndexPtr(ptr_t ptr) { 138 | return ((ptr+(INDEX_SIZE-1)) / INDEX_SIZE); 139 | } 140 | 141 | ptr_t fromIndexPtr(ptr_t ptr) { 142 | return ptr * INDEX_SIZE; 143 | } 144 | -------------------------------------------------------------------------------- /http_shader/cpp/httpd_ivec4.glsl: -------------------------------------------------------------------------------- 1 | #define version #version 2 | 3 | version 450 4 | 5 | #include "../chr.glsl" 6 | 7 | #define REQUEST_SIZE 1024 8 | #define RESPONSE_SIZE 1024 9 | #define HEAP_SIZE 1024 10 | 11 | #define REQUESTS_PER_INVOCATION 1024 12 | 13 | #define HEAP_TOTAL_SZ (32 * 16 * 1024 * (HEAP_SIZE / 16)) 14 | 15 | layout ( local_size_x = 16, local_size_y = 1, local_size_z = 1 ) in; 16 | 17 | layout(std430, binding = 0) readonly buffer inputBuffer { highp ivec4 inputBytes[]; }; 18 | layout(std430, binding = 1) buffer outputBuffer { highp ivec4 outputBytes[]; }; 19 | layout(std430, binding = 2) buffer heapBuffer { highp ivec4 heap[]; }; 20 | 21 | const highp int METHOD_GET = 'GET '; 22 | const highp int METHOD_POST = 'POST'; 23 | const highp int METHOD_OPTION = 'OPTI'; 24 | const highp int METHOD_UNKNOWN = 0; 25 | 26 | void setE(inout ivec4 v, int i, int value) { 27 | if (i == 0) v.x = value; 28 | else if (i == 1) v.y = value; 29 | else if (i == 2) v.z = value; 30 | else v.w = value; 31 | } 32 | 33 | int getE(ivec4 v, int i) { 34 | int value = v.x; 35 | if (i == 1) value = v.y; 36 | else if (i == 2) value = v.z; 37 | else if (i == 3) value = v.w; 38 | return value; 39 | } 40 | 41 | void main() { 42 | int wgId = int(gl_GlobalInvocationID.x) * REQUESTS_PER_INVOCATION; 43 | 44 | for (int j = 0; j < REQUESTS_PER_INVOCATION; j++) { 45 | int reqOff = (wgId+j) * (REQUEST_SIZE / 16); 46 | int resOff = (wgId+j) * (RESPONSE_SIZE / 16); 47 | 48 | // Parse request in format 49 | // [GET |/xxx|xxxx| HTT|P/1.|1\r\n.|...] 50 | // [POST| /xx|xxxx|x HT|TP/1|.1\r\n|....\r\nmimetype\r\n\r\npost body] 51 | ivec4 requestInfo = inputBytes[reqOff]; 52 | if (requestInfo.x == 0) { // skip empty requests 53 | continue; 54 | } 55 | ivec4 req = inputBytes[reqOff+1]; 56 | ivec4 req2 = inputBytes[reqOff+2]; 57 | int method = req.x; 58 | 59 | int i = resOff; 60 | 61 | if (method == METHOD_GET) { 62 | // Parse key from path /xxxxxxx 63 | int key = ( 64 | (((req.y >> 8) & 0xFF) - 48) * 1000000 + 65 | (((req.y >> 16) & 0xFF) - 48) * 100000 + 66 | (((req.y >> 24) & 0xFF) - 48) * 10000 + 67 | (((req.z >> 0) & 0xFF) - 48) * 1000 + 68 | (((req.z >> 8) & 0xFF) - 48) * 100 + 69 | (((req.z >> 16) & 0xFF) - 48) * 10 + 70 | (((req.z >> 24) & 0xFF) - 48) * 1 71 | ) * (HEAP_SIZE / 16); 72 | // Check that the key is valid and fetch the content from the heap buffer if so. 73 | if (key >= 0 && key < HEAP_TOTAL_SZ && heap[key].x > 0 && heap[key].x <= RESPONSE_SIZE - 3 * 16) { 74 | int locked = atomicCompSwap(heap[key].w, 0, 1); 75 | if (locked >= 0) { 76 | atomicAdd(heap[key].w, 1); 77 | outputBytes[i+0] = ivec4(2*16 + heap[key].x, 0, 0, 0); 78 | outputBytes[i+1] = ivec4('200 ', 'OK H', 'TTP/', '1.1\r'); 79 | outputBytes[i+2] = ivec4('\ncon', 'tent', '-typ', 'e: '); 80 | int len = heap[key].x / 16 + (heap[key].x % 16 > 0 ? 1 : 0); 81 | for (int k = 0; k < len; k++) { 82 | outputBytes[i+3+k] = heap[key+1+k]; 83 | } 84 | atomicAdd(heap[key].w, -1); 85 | } else { 86 | outputBytes[i+0] = ivec4(3*16, 0, 0, 0); 87 | outputBytes[i+1] = ivec4('400 ', 'NO H', 'TTP/', '1.1\r'); 88 | outputBytes[i+2] = ivec4('\ncon', 'tent', '-typ', 'e: t'); 89 | outputBytes[i+3] = ivec4('ext/', 'plai', 'n\r\n\r', '\nBLK'); 90 | } 91 | continue; 92 | } 93 | } else if (method == METHOD_POST) { 94 | // Parse key from path /xxxxxxx 95 | int key = ( 96 | (((req.y >> 16) & 0xFF) - 48) * 1000000 + 97 | (((req.y >> 24) & 0xFF) - 48) * 100000 + 98 | (((req.z >> 0) & 0xFF) - 48) * 10000 + 99 | (((req.z >> 8) & 0xFF) - 48) * 1000 + 100 | (((req.z >> 16) & 0xFF) - 48) * 100 + 101 | (((req.z >> 24) & 0xFF) - 48) * 10 + 102 | (((req.w >> 0) & 0xFF) - 48) * 1 103 | ) * (HEAP_SIZE / 16); 104 | // If the key is valid, replace the content in the heap buffer with the post body. 105 | if (key >= 0 && key < HEAP_TOTAL_SZ) { 106 | int locked = atomicCompSwap(heap[key].w, 0, -1); 107 | if (locked == 0) { 108 | int rnrn = 0; 109 | int readStart = 0; 110 | int readEnd = 512; 111 | ivec4 w = ivec4(0); 112 | int l = 0; 113 | int hi = 0; 114 | for (int k = 13; k < REQUEST_SIZE && k < HEAP_SIZE; k++) { 115 | int v4i = k / 16; 116 | int vi = k - (v4i * 16); 117 | int c = vi / 4; 118 | int b = vi - (c * 4); 119 | int chr = (getE(inputBytes[reqOff + 1 + v4i], c) >> (b * 8)) & 0xFF; 120 | if (readStart > 0) { 121 | if (chr == 0) { 122 | readEnd = k; 123 | break; 124 | } 125 | int wc = l / 4; 126 | int wb = l - (wc * 4); 127 | setE(w, wc, getE(w, wc) | (chr << (wb * 8))); 128 | l++; 129 | if (l == 16) { 130 | heap[key+1+hi] = w; 131 | hi++; 132 | w *= 0; 133 | l = 0; 134 | } 135 | } else if (chr == CHR_CR && (rnrn & 1) == 0) { 136 | rnrn++; 137 | } else if (chr == CHR_LF && (rnrn & 1) == 1) { 138 | rnrn++; 139 | if (rnrn == 4) { 140 | readStart = k; 141 | } 142 | } else { 143 | rnrn = 0; 144 | } 145 | } 146 | if (l > 0 && (1 + hi) < (HEAP_SIZE/16)) { 147 | heap[key+1+hi] = w; 148 | } 149 | heap[key].x = readEnd - readStart; 150 | heap[key].w = 0; 151 | outputBytes[i+0] = ivec4(3*16, 0, 0, 0); 152 | outputBytes[i+1] = ivec4('200 ', 'OK H', 'TTP/', '1.1\r'); 153 | outputBytes[i+2] = ivec4('\ncon', 'tent', '-typ', 'e: t'); 154 | outputBytes[i+3] = ivec4('ext/', 'plai', 'n\r\n\r', '\nOK.'); 155 | } else { 156 | outputBytes[i+0] = ivec4(3*16, 0, 0, 0); 157 | outputBytes[i+1] = ivec4('200 ', 'OK H', 'TTP/', '1.1\r'); 158 | outputBytes[i+2] = ivec4('\ncon', 'tent', '-typ', 'e: t'); 159 | outputBytes[i+3] = ivec4('ext/', 'plai', 'n\r\n\r', '\nBLK'); 160 | } 161 | continue; 162 | } 163 | } 164 | outputBytes[i+0] = ivec4(3*16 - 3*4 - 2, 0, 0, 0); 165 | outputBytes[i+1] = ivec4('500 ', 'BAD ', 'HTTP', '/1.1'); 166 | outputBytes[i+2] = ivec4('\r\n\r\n', req.x, req.y, req.z); 167 | outputBytes[i+3] = ivec4(req.w, req2.x, req2.y, req2.z); 168 | 169 | } 170 | 171 | } 172 | -------------------------------------------------------------------------------- /http_shader/vulkan/httpd_ivec4.glsl: -------------------------------------------------------------------------------- 1 | #define version #version 2 | 3 | version 450 4 | 5 | #include "../chr.glsl" 6 | 7 | #define REQUEST_SIZE 1024 8 | #define RESPONSE_SIZE 1024 9 | #define HEAP_SIZE 1024 10 | 11 | #define REQUESTS_PER_INVOCATION 1024 12 | 13 | #define HEAP_TOTAL_SZ (32 * 16 * 1024 * (HEAP_SIZE / 16)) 14 | 15 | layout ( local_size_x = 16, local_size_y = 1, local_size_z = 1 ) in; 16 | 17 | layout(std430, binding = 0) readonly buffer inputBuffer { highp ivec4 inputBytes[]; }; 18 | layout(std430, binding = 1) buffer outputBuffer { highp ivec4 outputBytes[]; }; 19 | layout(std430, binding = 2) buffer heapBuffer { highp ivec4 heap[]; }; 20 | 21 | const highp int METHOD_GET = 'GET '; 22 | const highp int METHOD_POST = 'POST'; 23 | const highp int METHOD_OPTION = 'OPTI'; 24 | const highp int METHOD_UNKNOWN = 0; 25 | 26 | void setE(inout ivec4 v, int i, int value) { 27 | if (i == 0) v.x = value; 28 | else if (i == 1) v.y = value; 29 | else if (i == 2) v.z = value; 30 | else v.w = value; 31 | } 32 | 33 | int getE(ivec4 v, int i) { 34 | int value = v.x; 35 | if (i == 1) value = v.y; 36 | else if (i == 2) value = v.z; 37 | else if (i == 3) value = v.w; 38 | return value; 39 | } 40 | 41 | void main() { 42 | int wgId = int(gl_GlobalInvocationID.x) * REQUESTS_PER_INVOCATION; 43 | 44 | for (int j = 0; j < REQUESTS_PER_INVOCATION; j++) { 45 | int reqOff = (wgId+j) * (REQUEST_SIZE / 16); 46 | int resOff = (wgId+j) * (RESPONSE_SIZE / 16); 47 | 48 | // Parse request in format 49 | // [GET |/xxx|xxxx| HTT|P/1.|1\r\n.|...] 50 | // [POST| /xx|xxxx|x HT|TP/1|.1\r\n|....\r\nmimetype\r\n\r\npost body] 51 | ivec4 requestInfo = inputBytes[reqOff]; 52 | if (requestInfo.x == 0) { // skip empty requests 53 | continue; 54 | } 55 | ivec4 req = inputBytes[reqOff+1]; 56 | ivec4 req2 = inputBytes[reqOff+2]; 57 | int method = req.x; 58 | 59 | int i = resOff; 60 | 61 | if (method == METHOD_GET) { 62 | // Parse key from path /xxxxxxx 63 | int key = ( 64 | (((req.y >> 8) & 0xFF) - 48) * 1000000 + 65 | (((req.y >> 16) & 0xFF) - 48) * 100000 + 66 | (((req.y >> 24) & 0xFF) - 48) * 10000 + 67 | (((req.z >> 0) & 0xFF) - 48) * 1000 + 68 | (((req.z >> 8) & 0xFF) - 48) * 100 + 69 | (((req.z >> 16) & 0xFF) - 48) * 10 + 70 | (((req.z >> 24) & 0xFF) - 48) * 1 71 | ) * (HEAP_SIZE / 16); 72 | // Check that the key is valid and fetch the content from the heap buffer if so. 73 | if (key >= 0 && key < HEAP_TOTAL_SZ && heap[key].x > 0 && heap[key].x <= RESPONSE_SIZE - 3 * 16) { 74 | int locked = atomicCompSwap(heap[key].w, 0, 1); 75 | if (locked >= 0) { 76 | atomicAdd(heap[key].w, 1); 77 | outputBytes[i+0] = ivec4(2*16 + heap[key].x, 0, 0, 0); 78 | outputBytes[i+1] = ivec4('200 ', 'OK H', 'TTP/', '1.1\r'); 79 | outputBytes[i+2] = ivec4('\ncon', 'tent', '-typ', 'e: '); 80 | int len = heap[key].x / 16 + (heap[key].x % 16 > 0 ? 1 : 0); 81 | for (int k = 0; k < len; k++) { 82 | outputBytes[i+3+k] = heap[key+1+k]; 83 | } 84 | atomicAdd(heap[key].w, -1); 85 | } else { 86 | outputBytes[i+0] = ivec4(3*16, 0, 0, 0); 87 | outputBytes[i+1] = ivec4('400 ', 'NO H', 'TTP/', '1.1\r'); 88 | outputBytes[i+2] = ivec4('\ncon', 'tent', '-typ', 'e: t'); 89 | outputBytes[i+3] = ivec4('ext/', 'plai', 'n\r\n\r', '\nBLK'); 90 | } 91 | continue; 92 | } 93 | } else if (method == METHOD_POST) { 94 | // Parse key from path /xxxxxxx 95 | int key = ( 96 | (((req.y >> 16) & 0xFF) - 48) * 1000000 + 97 | (((req.y >> 24) & 0xFF) - 48) * 100000 + 98 | (((req.z >> 0) & 0xFF) - 48) * 10000 + 99 | (((req.z >> 8) & 0xFF) - 48) * 1000 + 100 | (((req.z >> 16) & 0xFF) - 48) * 100 + 101 | (((req.z >> 24) & 0xFF) - 48) * 10 + 102 | (((req.w >> 0) & 0xFF) - 48) * 1 103 | ) * (HEAP_SIZE / 16); 104 | // If the key is valid, replace the content in the heap buffer with the post body. 105 | if (key >= 0 && key < HEAP_TOTAL_SZ) { 106 | int locked = atomicCompSwap(heap[key].w, 0, -1); 107 | if (locked == 0) { 108 | int rnrn = 0; 109 | int readStart = 0; 110 | int readEnd = 512; 111 | ivec4 w = ivec4(0); 112 | int l = 0; 113 | int hi = 0; 114 | for (int k = 13; k < REQUEST_SIZE && k < HEAP_SIZE; k++) { 115 | int v4i = k / 16; 116 | int vi = k - (v4i * 16); 117 | int c = vi / 4; 118 | int b = vi - (c * 4); 119 | int chr = (getE(inputBytes[reqOff + 1 + v4i], c) >> (b * 8)) & 0xFF; 120 | if (readStart > 0) { 121 | if (chr == 0) { 122 | readEnd = k; 123 | break; 124 | } 125 | int wc = l / 4; 126 | int wb = l - (wc * 4); 127 | setE(w, wc, getE(w, wc) | (chr << (wb * 8))); 128 | l++; 129 | if (l == 16) { 130 | heap[key+1+hi] = w; 131 | hi++; 132 | w *= 0; 133 | l = 0; 134 | } 135 | } else if (chr == CHR_CR && (rnrn & 1) == 0) { 136 | rnrn++; 137 | } else if (chr == CHR_LF && (rnrn & 1) == 1) { 138 | rnrn++; 139 | if (rnrn == 4) { 140 | readStart = k; 141 | } 142 | } else { 143 | rnrn = 0; 144 | } 145 | } 146 | if (l > 0 && (1 + hi) < (HEAP_SIZE/16)) { 147 | heap[key+1+hi] = w; 148 | } 149 | heap[key].x = readEnd - readStart; 150 | heap[key].w = 0; 151 | outputBytes[i+0] = ivec4(3*16, 0, 0, 0); 152 | outputBytes[i+1] = ivec4('200 ', 'OK H', 'TTP/', '1.1\r'); 153 | outputBytes[i+2] = ivec4('\ncon', 'tent', '-typ', 'e: t'); 154 | outputBytes[i+3] = ivec4('ext/', 'plai', 'n\r\n\r', '\nOK.'); 155 | } else { 156 | outputBytes[i+0] = ivec4(3*16, 0, 0, 0); 157 | outputBytes[i+1] = ivec4('200 ', 'OK H', 'TTP/', '1.1\r'); 158 | outputBytes[i+2] = ivec4('\ncon', 'tent', '-typ', 'e: t'); 159 | outputBytes[i+3] = ivec4('ext/', 'plai', 'n\r\n\r', '\nBLK'); 160 | } 161 | continue; 162 | } 163 | } 164 | outputBytes[i+0] = ivec4(3*16 - 3*4 - 2, 0, 0, 0); 165 | outputBytes[i+1] = ivec4('500 ', 'BAD ', 'HTTP', '/1.1'); 166 | outputBytes[i+2] = ivec4('\r\n\r\n', req.x, req.y, req.z); 167 | outputBytes[i+3] = ivec4(req.w, req2.x, req2.y, req2.z); 168 | 169 | } 170 | 171 | } 172 | -------------------------------------------------------------------------------- /http_shader/ispc_ivec4/httpd_ivec4.glsl: -------------------------------------------------------------------------------- 1 | #define version #version 2 | 3 | version 450 4 | 5 | #include "../chr.glsl" 6 | 7 | #define REQUEST_SIZE 1024 8 | #define RESPONSE_SIZE 1024 9 | #define HEAP_SIZE 1024 10 | 11 | #define REQUESTS_PER_INVOCATION 1024 12 | 13 | #define HEAP_TOTAL_SZ (32 * 16 * 1024 * (HEAP_SIZE / 16)) 14 | 15 | layout ( local_size_x = 16, local_size_y = 1, local_size_z = 1 ) in; 16 | 17 | layout(std430, binding = 0) readonly buffer inputBuffer { highp ivec4 inputBytes[]; }; 18 | layout(std430, binding = 1) buffer outputBuffer { highp ivec4 outputBytes[]; }; 19 | layout(std430, binding = 2) buffer heapBuffer { highp ivec4 heap[]; }; 20 | 21 | const highp int METHOD_GET = 'GET '; 22 | const highp int METHOD_POST = 'POST'; 23 | const highp int METHOD_OPTION = 'OPTI'; 24 | const highp int METHOD_UNKNOWN = 0; 25 | 26 | void setE(inout ivec4 v, int i, int value) { 27 | if (i == 0) v.x = value; 28 | else if (i == 1) v.y = value; 29 | else if (i == 2) v.z = value; 30 | else v.w = value; 31 | } 32 | 33 | int getE(ivec4 v, int i) { 34 | int value = v.x; 35 | if (i == 1) value = v.y; 36 | else if (i == 2) value = v.z; 37 | else if (i == 3) value = v.w; 38 | return value; 39 | } 40 | 41 | void main() { 42 | int wgId = int(gl_GlobalInvocationID.x) * REQUESTS_PER_INVOCATION; 43 | 44 | for (int j = 0; j < REQUESTS_PER_INVOCATION; j++) { 45 | int reqOff = (wgId+j) * (REQUEST_SIZE / 16); 46 | int resOff = (wgId+j) * (RESPONSE_SIZE / 16); 47 | 48 | // Parse request in format 49 | // [GET |/xxx|xxxx| HTT|P/1.|1\r\n.|...] 50 | // [POST| /xx|xxxx|x HT|TP/1|.1\r\n|....\r\nmimetype\r\n\r\npost body] 51 | ivec4 requestInfo = inputBytes[reqOff]; 52 | if (requestInfo.x == 0) { // skip empty requests 53 | continue; 54 | } 55 | ivec4 req = inputBytes[reqOff+1]; 56 | ivec4 req2 = inputBytes[reqOff+2]; 57 | int method = req.x; 58 | 59 | int i = resOff; 60 | 61 | if (method == METHOD_GET) { 62 | // Parse key from path /xxxxxxx 63 | int key = ( 64 | (((req.y >> 8) & 0xFF) - 48) * 1000000 + 65 | (((req.y >> 16) & 0xFF) - 48) * 100000 + 66 | (((req.y >> 24) & 0xFF) - 48) * 10000 + 67 | (((req.z >> 0) & 0xFF) - 48) * 1000 + 68 | (((req.z >> 8) & 0xFF) - 48) * 100 + 69 | (((req.z >> 16) & 0xFF) - 48) * 10 + 70 | (((req.z >> 24) & 0xFF) - 48) * 1 71 | ) * (HEAP_SIZE / 16); 72 | // Check that the key is valid and fetch the content from the heap buffer if so. 73 | if (key >= 0 && key < HEAP_TOTAL_SZ && heap[key].x > 0 && heap[key].x <= RESPONSE_SIZE - 3 * 16) { 74 | int locked = atomicCompSwap(heap[key].w, 0, 1); 75 | if (locked >= 0) { 76 | atomicAdd(heap[key].w, 1); 77 | outputBytes[i+0] = ivec4(2*16 + heap[key].x, 0, 0, 0); 78 | outputBytes[i+1] = ivec4('200 ', 'OK H', 'TTP/', '1.1\r'); 79 | outputBytes[i+2] = ivec4('\ncon', 'tent', '-typ', 'e: '); 80 | int len = heap[key].x / 16 + (heap[key].x % 16 > 0 ? 1 : 0); 81 | for (int k = 0; k < len; k++) { 82 | outputBytes[i+3+k] = heap[key+1+k]; 83 | } 84 | atomicAdd(heap[key].w, -1); 85 | } else { 86 | outputBytes[i+0] = ivec4(3*16, 0, 0, 0); 87 | outputBytes[i+1] = ivec4('400 ', 'NO H', 'TTP/', '1.1\r'); 88 | outputBytes[i+2] = ivec4('\ncon', 'tent', '-typ', 'e: t'); 89 | outputBytes[i+3] = ivec4('ext/', 'plai', 'n\r\n\r', '\nBLK'); 90 | } 91 | continue; 92 | } 93 | } else if (method == METHOD_POST) { 94 | // Parse key from path /xxxxxxx 95 | int key = ( 96 | (((req.y >> 16) & 0xFF) - 48) * 1000000 + 97 | (((req.y >> 24) & 0xFF) - 48) * 100000 + 98 | (((req.z >> 0) & 0xFF) - 48) * 10000 + 99 | (((req.z >> 8) & 0xFF) - 48) * 1000 + 100 | (((req.z >> 16) & 0xFF) - 48) * 100 + 101 | (((req.z >> 24) & 0xFF) - 48) * 10 + 102 | (((req.w >> 0) & 0xFF) - 48) * 1 103 | ) * (HEAP_SIZE / 16); 104 | // If the key is valid, replace the content in the heap buffer with the post body. 105 | if (key >= 0 && key < HEAP_TOTAL_SZ) { 106 | int locked = atomicCompSwap(heap[key].w, 0, -1); 107 | if (locked == 0) { 108 | int rnrn = 0; 109 | int readStart = 0; 110 | int readEnd = 512; 111 | ivec4 w = ivec4(0); 112 | int l = 0; 113 | int hi = 0; 114 | for (int k = 13; k < REQUEST_SIZE && k < HEAP_SIZE; k++) { 115 | int v4i = k / 16; 116 | int vi = k - (v4i * 16); 117 | int c = vi / 4; 118 | int b = vi - (c * 4); 119 | int chr = (getE(inputBytes[reqOff + 1 + v4i], c) >> (b * 8)) & 0xFF; 120 | if (readStart > 0) { 121 | if (chr == 0) { 122 | readEnd = k; 123 | break; 124 | } 125 | int wc = l / 4; 126 | int wb = l - (wc * 4); 127 | setE(w, wc, getE(w, wc) | (chr << (wb * 8))); 128 | l++; 129 | if (l == 16) { 130 | heap[key+1+hi] = w; 131 | hi++; 132 | w *= 0; 133 | l = 0; 134 | } 135 | } else if (chr == CHR_CR && (rnrn & 1) == 0) { 136 | rnrn++; 137 | } else if (chr == CHR_LF && (rnrn & 1) == 1) { 138 | rnrn++; 139 | if (rnrn == 4) { 140 | readStart = k; 141 | } 142 | } else { 143 | rnrn = 0; 144 | } 145 | } 146 | if (l > 0 && (1 + hi) < (HEAP_SIZE/16)) { 147 | heap[key+1+hi] = w; 148 | } 149 | heap[key].x = readEnd - readStart; 150 | heap[key].w = 0; 151 | outputBytes[i+0] = ivec4(3*16, 0, 0, 0); 152 | outputBytes[i+1] = ivec4('200 ', 'OK H', 'TTP/', '1.1\r'); 153 | outputBytes[i+2] = ivec4('\ncon', 'tent', '-typ', 'e: t'); 154 | outputBytes[i+3] = ivec4('ext/', 'plai', 'n\r\n\r', '\nOK.'); 155 | } else { 156 | outputBytes[i+0] = ivec4(3*16, 0, 0, 0); 157 | outputBytes[i+1] = ivec4('200 ', 'OK H', 'TTP/', '1.1\r'); 158 | outputBytes[i+2] = ivec4('\ncon', 'tent', '-typ', 'e: t'); 159 | outputBytes[i+3] = ivec4('ext/', 'plai', 'n\r\n\r', '\nBLK'); 160 | } 161 | continue; 162 | } 163 | } 164 | outputBytes[i+0] = ivec4(3*16 - 3*4 - 2, 0, 0, 0); 165 | outputBytes[i+1] = ivec4('500 ', 'BAD ', 'HTTP', '/1.1'); 166 | outputBytes[i+2] = ivec4('\r\n\r\n', req.x, req.y, req.z); 167 | outputBytes[i+3] = ivec4(req.w, req2.x, req2.y, req2.z); 168 | 169 | } 170 | 171 | } 172 | -------------------------------------------------------------------------------- /http_shader/ispc_int/httpd_int.glsl: -------------------------------------------------------------------------------- 1 | #define version #version 2 | 3 | version 450 4 | 5 | #include "../chr.glsl" 6 | 7 | #define STRIDE 32 8 | 9 | #define BSZ 1024 10 | 11 | #define heapStrCopy(str, SRC, DST, i, index) {int _s = (str).x; int _e = (str).y; while (_s < _e) {(DST)[index+i*STRIDE] = (SRC)[index+_s*STRIDE]; i++; _s++;}} 12 | #define strCopySlice(SRC, DST, i, index, start, end) {int _s = start; int _e = end; while (_s < _e) {(DST)[index+i*STRIDE] = (SRC)[_s++]; i++;}} 13 | #define strCopy(SRC, DST, i, index) {int _str[] = SRC; strCopySlice(_str, DST, i, index, 0, _str.length())} 14 | #define W(chr) response[index + i*STRIDE] = (chr); i++; 15 | 16 | #define A_OK if (i > BSZ) { return error(index); } 17 | 18 | layout ( local_size_x = STRIDE, local_size_y = 1, local_size_z = 1 ) in; 19 | 20 | layout(std430, binding = 0) readonly buffer inputBuffer { highp int inputBytes[]; }; 21 | layout(std430, binding = 1) buffer outputBuffer { highp int outputBytes[]; }; 22 | layout(std430, binding = 2) buffer heapBuffer { lowp int heap[]; }; 23 | layout(std430, binding = 3) buffer requestBuffer { lowp int request[]; }; 24 | layout(std430, binding = 4) buffer responseBuffer { lowp int response[]; }; 25 | 26 | const highp int METHOD_GET = 'GET '; 27 | const highp int METHOD_POST = 'POST'; 28 | const highp int METHOD_OPTION = 'OPTI'; 29 | const highp int METHOD_UNKNOWN = 0; 30 | 31 | const lowp int PROTOCOL_UNKNOWN = 0; 32 | const lowp int PROTOCOL_HTTP10 = '/1.0'; 33 | const lowp int PROTOCOL_HTTP11 = '/1.1'; 34 | 35 | const lowp int MIME_TEXT_PLAIN = 0; 36 | const lowp int MIME_TEXT_HTML = 1; 37 | 38 | 39 | int strLen(ivec2 str) { 40 | return str.y - str.x; 41 | } 42 | 43 | 44 | struct header { 45 | ivec2 name; 46 | ivec2 value; 47 | }; 48 | 49 | void readRequestUntilChar(inout int i, int index, int endChar, out ivec2 str) { 50 | str.x = i; 51 | while (i < BSZ && request[index+i*STRIDE] != endChar) { 52 | i++; 53 | } 54 | str.y = i; 55 | i++; 56 | } 57 | 58 | void readMethod(inout int i, int index, out int method) { 59 | int j = index + i * STRIDE; 60 | if ( 61 | request[j+0*STRIDE] == CHR_G && 62 | request[j+1*STRIDE] == CHR_E && 63 | request[j+2*STRIDE] == CHR_T && 64 | request[j+3*STRIDE] == CHR_SPACE 65 | ) { 66 | method = METHOD_GET; 67 | i += 4; 68 | return; 69 | } else if ( 70 | request[j+0*STRIDE] == CHR_P && 71 | request[j+1*STRIDE] == CHR_O && 72 | request[j+2*STRIDE] == CHR_S && 73 | request[j+3*STRIDE] == CHR_T && 74 | request[j+4*STRIDE] == CHR_SPACE 75 | ) { 76 | method = METHOD_POST; 77 | i += 5; 78 | return; 79 | } else if (request[j+0*STRIDE] == CHR_O && request[j+6*STRIDE] == CHR_SPACE) { 80 | method = METHOD_OPTION; 81 | i += 7; 82 | return; 83 | } 84 | method = METHOD_UNKNOWN; 85 | i = BSZ+1; 86 | } 87 | 88 | void readPath(inout int i, int index, out ivec2 path) { 89 | readRequestUntilChar(i, index, CHR_SPACE, path); 90 | } 91 | 92 | void readProtocol(inout int i, int index, out int protocol) { 93 | ivec2 protocolString; 94 | readRequestUntilChar(i, index, CHR_CR, protocolString); 95 | if (i < BSZ && request[index+i*STRIDE] == CHR_LF) { 96 | i++; 97 | if (request[index+(protocolString.y-1)*STRIDE] == CHR_1) { 98 | protocol = PROTOCOL_HTTP11; 99 | } else { 100 | protocol = PROTOCOL_HTTP10; 101 | } 102 | } else { 103 | protocol = PROTOCOL_UNKNOWN; 104 | i = BSZ+1; 105 | } 106 | } 107 | 108 | bool readHeader(inout int i, int index, out header hdr) { 109 | if (request[index+i*STRIDE] == CHR_CR) { 110 | i += 2; 111 | return true; 112 | } 113 | readRequestUntilChar(i, index, CHR_COLON, hdr.name); 114 | while (i < BSZ && request[index+i*STRIDE] == CHR_SPACE) i++; 115 | readRequestUntilChar(i, index, CHR_CR, hdr.value); 116 | i++; 117 | return false; 118 | } 119 | 120 | void writeCRLF(inout int i, int index) { 121 | W(CHR_CR); 122 | W(CHR_LF); 123 | } 124 | 125 | void writeStatus(inout int i, int index, int statusCode) { 126 | strCopy("HTTP/1.1 ", response, i, index); 127 | if (statusCode == 200) { 128 | strCopy("200 OK", response, i, index); 129 | } else { 130 | strCopy("500 Error", response, i, index); 131 | } 132 | writeCRLF(i, index); 133 | } 134 | 135 | void writeContentType(inout int i, int index, int contentType) { 136 | int contentTypeString[] = "Content-Type: "; 137 | strCopy(contentTypeString, response, i, index); 138 | if (contentType == MIME_TEXT_PLAIN) { 139 | strCopy("text/plain", response, i, index); 140 | } else { 141 | strCopy("text/html", response, i, index); 142 | } 143 | writeCRLF(i, index); 144 | } 145 | 146 | void writeBody(inout int i, int index, ivec2 path, header headers[32], int headerCount) { 147 | strCopy("Hello, World!", response, i, index); 148 | W(CHR_LF); 149 | for (int j = 0; j < 32; j++) { 150 | if (j >= headerCount) break; 151 | ivec2 name = headers[j].name; 152 | ivec2 value = headers[j].value; 153 | if (strLen(name) + 3 + strLen(value) + i > 1023) break; 154 | heapStrCopy(name, request, response, i, index); 155 | strCopy(": ", response, i, index); 156 | heapStrCopy(value, request, response, i, index); 157 | W(CHR_LF); 158 | } 159 | } 160 | 161 | int error(int index) { 162 | int i = 0; 163 | writeStatus(i, index, 500); 164 | writeContentType(i, index, MIME_TEXT_PLAIN); 165 | writeCRLF(i, index); 166 | return i; 167 | } 168 | 169 | void unpackRequest(int byteIndex, int index) { 170 | int len = inputBytes[byteIndex]; 171 | for (int j = 0; j < min(256, len/4+1); j++) { 172 | int v = inputBytes[byteIndex + j + 1]; 173 | int off = index + (j * 4) * STRIDE; 174 | request[off + 0*STRIDE] = (v >> 0) & 0xFF; 175 | request[off + 1*STRIDE] = (v >> 8) & 0xFF; 176 | request[off + 2*STRIDE] = (v >> 16) & 0xFF; 177 | request[off + 3*STRIDE] = (v >> 24) & 0xFF; 178 | } 179 | } 180 | 181 | void packResponse(int byteIndex, int index, int len) { 182 | outputBytes[byteIndex] = len; 183 | for (int j = 1; j < min(256, len/4+1); j++) { 184 | int off = index + (j * 4 - 4) * STRIDE; 185 | ivec4 v = ivec4( 186 | ((response[off + 0*STRIDE] & 0xFF) << 0), 187 | ((response[off + 1*STRIDE] & 0xFF) << 8), 188 | ((response[off + 2*STRIDE] & 0xFF) << 16), 189 | ((response[off + 3*STRIDE] & 0xFF) << 24) 190 | ); 191 | outputBytes[byteIndex + j] = (v.x | v.y | v.z | v.w); 192 | } 193 | } 194 | 195 | int handleRequest(int index) { 196 | int method; 197 | ivec2 path; 198 | int protocol; 199 | header headers[32]; 200 | int headerCount = 0; 201 | 202 | int i = 0; 203 | readMethod(i, index, method); 204 | readPath(i, index, path); 205 | readProtocol(i, index, protocol); 206 | for (int j = 0; j < 32; j++) { 207 | if (readHeader(i, index, headers[j])) { 208 | break; 209 | } 210 | headerCount++; 211 | } 212 | A_OK; 213 | 214 | i = 0; 215 | writeStatus(i, index, 200); 216 | writeContentType(i, index, MIME_TEXT_PLAIN); 217 | writeCRLF(i, index); 218 | writeBody(i, index, path, headers, headerCount); 219 | return i; 220 | } 221 | 222 | void main() { 223 | int wgId = int(gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * (gl_NumWorkGroups.x * gl_WorkGroupSize.x)); 224 | int index = STRIDE * BSZ * (wgId / STRIDE); 225 | index += wgId & (STRIDE-1); 226 | unpackRequest(wgId*(BSZ/4), index); 227 | int len = handleRequest(index); 228 | packResponse(wgId*(BSZ/4), index, len); 229 | } 230 | -------------------------------------------------------------------------------- /spirv-io/lib/errno.glsl: -------------------------------------------------------------------------------- 1 | 2 | #define EPERM 1 /* Operation not permitted */ 3 | #define ENOENT 2 /* No such file or directory */ 4 | #define ESRCH 3 /* No such process */ 5 | #define EINTR 4 /* Interrupted system call */ 6 | #define EIO 5 /* I/O error */ 7 | #define ENXIO 6 /* No such device or address */ 8 | #define E2BIG 7 /* Argument list too long */ 9 | #define ENOEXEC 8 /* Exec format error */ 10 | #define EBADF 9 /* Bad file number */ 11 | #define ECHILD 10 /* No child processes */ 12 | #define EAGAIN 11 /* Try again */ 13 | #define ENOMEM 12 /* Out of memory */ 14 | #define EACCES 13 /* Permission denied */ 15 | #define EFAULT 14 /* Bad address */ 16 | #define ENOTBLK 15 /* Block device required */ 17 | #define EBUSY 16 /* Device or resource busy */ 18 | #define EEXIST 17 /* File exists */ 19 | #define EXDEV 18 /* Cross-device link */ 20 | #define ENODEV 19 /* No such device */ 21 | #define ENOTDIR 20 /* Not a directory */ 22 | #define EISDIR 21 /* Is a directory */ 23 | #define EINVAL 22 /* Invalid argument */ 24 | #define ENFILE 23 /* File table overflow */ 25 | #define EMFILE 24 /* Too many open files */ 26 | #define ENOTTY 25 /* Not a typewriter */ 27 | #define ETXTBSY 26 /* Text file busy */ 28 | #define EFBIG 27 /* File too large */ 29 | #define ENOSPC 28 /* No space left on device */ 30 | #define ESPIPE 29 /* Illegal seek */ 31 | #define EROFS 30 /* Read-only file system */ 32 | #define EMLINK 31 /* Too many links */ 33 | #define EPIPE 32 /* Broken pipe */ 34 | #define EDOM 33 /* Math argument out of domain of func */ 35 | #define ERANGE 34 /* Math result not representable */ 36 | 37 | 38 | #define EDEADLK 35 /* Resource deadlock would occur */ 39 | #define ENAMETOOLONG 36 /* File name too long */ 40 | #define ENOLCK 37 /* No record locks available */ 41 | 42 | /* 43 | * This error code is special: arch syscall entry code will return 44 | * -ENOSYS if users try to call a syscall that doesn't exist. To keep 45 | * failures of syscalls that really do exist distinguishable from 46 | * failures due to attempts to use a nonexistent syscall, syscall 47 | * implementations should refrain from returning -ENOSYS. 48 | */ 49 | #define ENOSYS 38 /* Invalid system call number */ 50 | 51 | #define ENOTEMPTY 39 /* Directory not empty */ 52 | #define ELOOP 40 /* Too many symbolic links encountered */ 53 | #define EWOULDBLOCK EAGAIN /* Operation would block */ 54 | #define ENOMSG 42 /* No message of desired type */ 55 | #define EIDRM 43 /* Identifier removed */ 56 | #define ECHRNG 44 /* Channel number out of range */ 57 | #define EL2NSYNC 45 /* Level 2 not synchronized */ 58 | #define EL3HLT 46 /* Level 3 halted */ 59 | #define EL3RST 47 /* Level 3 reset */ 60 | #define ELNRNG 48 /* Link number out of range */ 61 | #define EUNATCH 49 /* Protocol driver not attached */ 62 | #define ENOCSI 50 /* No CSI structure available */ 63 | #define EL2HLT 51 /* Level 2 halted */ 64 | #define EBADE 52 /* Invalid exchange */ 65 | #define EBADR 53 /* Invalid request descriptor */ 66 | #define EXFULL 54 /* Exchange full */ 67 | #define ENOANO 55 /* No anode */ 68 | #define EBADRQC 56 /* Invalid request code */ 69 | #define EBADSLT 57 /* Invalid slot */ 70 | 71 | #define EDEADLOCK EDEADLK 72 | 73 | #define EBFONT 59 /* Bad font file format */ 74 | #define ENOSTR 60 /* Device not a stream */ 75 | #define ENODATA 61 /* No data available */ 76 | #define ETIME 62 /* Timer expired */ 77 | #define ENOSR 63 /* Out of streams resources */ 78 | #define ENONET 64 /* Machine is not on the network */ 79 | #define ENOPKG 65 /* Package not installed */ 80 | #define EREMOTE 66 /* Object is remote */ 81 | #define ENOLINK 67 /* Link has been severed */ 82 | #define EADV 68 /* Advertise error */ 83 | #define ESRMNT 69 /* Srmount error */ 84 | #define ECOMM 70 /* Communication error on send */ 85 | #define EPROTO 71 /* Protocol error */ 86 | #define EMULTIHOP 72 /* Multihop attempted */ 87 | #define EDOTDOT 73 /* RFS specific error */ 88 | #define EBADMSG 74 /* Not a data message */ 89 | #define EOVERFLOW 75 /* Value too large for defined data type */ 90 | #define ENOTUNIQ 76 /* Name not unique on network */ 91 | #define EBADFD 77 /* File descriptor in bad state */ 92 | #define EREMCHG 78 /* Remote address changed */ 93 | #define ELIBACC 79 /* Can not access a needed shared library */ 94 | #define ELIBBAD 80 /* Accessing a corrupted shared library */ 95 | #define ELIBSCN 81 /* .lib section in a.out corrupted */ 96 | #define ELIBMAX 82 /* Attempting to link in too many shared libraries */ 97 | #define ELIBEXEC 83 /* Cannot exec a shared library directly */ 98 | #define EILSEQ 84 /* Illegal byte sequence */ 99 | #define ERESTART 85 /* Interrupted system call should be restarted */ 100 | #define ESTRPIPE 86 /* Streams pipe error */ 101 | #define EUSERS 87 /* Too many users */ 102 | #define ENOTSOCK 88 /* Socket operation on non-socket */ 103 | #define EDESTADDRREQ 89 /* Destination address required */ 104 | #define EMSGSIZE 90 /* Message too long */ 105 | #define EPROTOTYPE 91 /* Protocol wrong type for socket */ 106 | #define ENOPROTOOPT 92 /* Protocol not available */ 107 | #define EPROTONOSUPPORT 93 /* Protocol not supported */ 108 | #define ESOCKTNOSUPPORT 94 /* Socket type not supported */ 109 | #define EOPNOTSUPP 95 /* Operation not supported on transport endpoint */ 110 | #define EPFNOSUPPORT 96 /* Protocol family not supported */ 111 | #define EAFNOSUPPORT 97 /* Address family not supported by protocol */ 112 | #define EADDRINUSE 98 /* Address already in use */ 113 | #define EADDRNOTAVAIL 99 /* Cannot assign requested address */ 114 | #define ENETDOWN 100 /* Network is down */ 115 | #define ENETUNREACH 101 /* Network is unreachable */ 116 | #define ENETRESET 102 /* Network dropped connection because of reset */ 117 | #define ECONNABORTED 103 /* Software caused connection abort */ 118 | #define ECONNRESET 104 /* Connection reset by peer */ 119 | #define ENOBUFS 105 /* No buffer space available */ 120 | #define EISCONN 106 /* Transport endpoint is already connected */ 121 | #define ENOTCONN 107 /* Transport endpoint is not connected */ 122 | #define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */ 123 | #define ETOOMANYREFS 109 /* Too many references: cannot splice */ 124 | #define ETIMEDOUT 110 /* Connection timed out */ 125 | #define ECONNREFUSED 111 /* Connection refused */ 126 | #define EHOSTDOWN 112 /* Host is down */ 127 | #define EHOSTUNREACH 113 /* No route to host */ 128 | #define EALREADY 114 /* Operation already in progress */ 129 | #define EINPROGRESS 115 /* Operation now in progress */ 130 | #define ESTALE 116 /* Stale file handle */ 131 | #define EUCLEAN 117 /* Structure needs cleaning */ 132 | #define ENOTNAM 118 /* Not a XENIX named type file */ 133 | #define ENAVAIL 119 /* No XENIX semaphores available */ 134 | #define EISNAM 120 /* Is a named type file */ 135 | #define EREMOTEIO 121 /* Remote I/O error */ 136 | #define EDQUOT 122 /* Quota exceeded */ 137 | 138 | #define ENOMEDIUM 123 /* No medium found */ 139 | #define EMEDIUMTYPE 124 /* Wrong medium type */ 140 | #define ECANCELED 125 /* Operation Canceled */ 141 | #define ENOKEY 126 /* Required key not available */ 142 | #define EKEYEXPIRED 127 /* Key has expired */ 143 | #define EKEYREVOKED 128 /* Key has been revoked */ 144 | #define EKEYREJECTED 129 /* Key was rejected by service */ 145 | 146 | /* for robust mutexes */ 147 | #define EOWNERDEAD 130 /* Owner died */ 148 | #define ENOTRECOVERABLE 131 /* State not recoverable */ 149 | 150 | #define ERFKILL 132 /* Operation not possible due to RF-kill */ 151 | 152 | #define EHWPOISON 133 /* Memory page has hardware error */ 153 | -------------------------------------------------------------------------------- /spirv-io/lib/hashtable.glsl: -------------------------------------------------------------------------------- 1 | // Based on https://github.com/nosferalatu/SimpleGPUHashTable/ 2 | 3 | #include 4 | 5 | struct i32map { 6 | alloc_t table; 7 | int32_t capacity; 8 | int32_t count; 9 | }; 10 | 11 | // 32 bit Murmur3 hash 12 | int32_t murmur3hash(int32_t k) 13 | { 14 | k ^= k >> 16; 15 | k *= 0x85ebca6b; 16 | k ^= k >> 13; 17 | k *= 0xc2b2ae35; 18 | k ^= k >> 16; 19 | if (k == -1) k = 0; 20 | return k; 21 | } 22 | 23 | /*T 24 | i32map ht = i32hAlloc(300); 25 | 512 == ht.capacity; 26 | 512*3 == strLen(ht.table); 27 | 0 == ht.count; 28 | 29 | ht = i32hAlloc(256); 30 | 256 == ht.capacity; 31 | 256*3 == strLen(ht.table); 32 | 0 == ht.count; 33 | 34 | ht = i32hAlloc(257); 35 | 512 == ht.capacity; 36 | 512*3 == strLen(ht.table); 37 | 0 == ht.count; 38 | */ 39 | i32map i32hAlloc(int32_t size) { 40 | size = 1 << int32_t(ceil(log2(float(size)))); 41 | i32map ht = i32map(malloc(4 * (int32_t(size) * 3), 4), size, 0); 42 | ht.table.x /= 4; 43 | ht.table.y /= 4; 44 | for (uint32_t i = ht.table.x; i < ht.table.y; i += 3) { 45 | i32heap[i] = -1; 46 | i32heap[i+1] = -1; 47 | i32heap[i+2] = 0; 48 | } 49 | return ht; 50 | } 51 | 52 | i32map i32hAlloc() { 53 | return i32hAlloc(16); 54 | } 55 | 56 | #define i32hIter(ht, key, value, body) { \ 57 | for (uint32_t _i_ = ht.table.x; _i_ < ht.table.y; _i_ += 3) {\ 58 | if (i32heap[_i_+1] != -1) {\ 59 | key = i32heap[_i_];\ 60 | value = i32heap[_i_+2];\ 61 | body;\ 62 | }\ 63 | }\ 64 | } 65 | 66 | #define f32hIter(ht, key, value, body) { \ 67 | for (uint32_t _i_ = ht.table.x; _i_ < ht.table.y; _i_ += 3) {\ 68 | if (i32heap[_i_+1] != -1) {\ 69 | key = i32heap[_i_];\ 70 | value = intBitsToFloat(i32heap[_i_+2]);\ 71 | body;\ 72 | }\ 73 | }\ 74 | } 75 | 76 | i32array i32hKeys(i32map ht) { 77 | heapPtr += 3 - (3 - (heapPtr & 3)); 78 | ptr_t start = heapPtr; 79 | i32hIter(ht, int32_t k, int32_t v, { 80 | i32heap[heapPtr/4] = k; 81 | heapPtr += 4; 82 | }) 83 | return i32array(start/4, heapPtr/4); 84 | } 85 | 86 | i32array f32hKeys(i32map ht) { 87 | return i32hKeys(ht); 88 | } 89 | 90 | /*T 91 | i32map ht = i32hAlloc(256); 92 | int32_t v = 0; 93 | 94 | i32hSet(ht, 45, 1); 95 | i32hSet(ht, 46, 2); 96 | i32hSet(ht, 47, 3); 97 | 98 | true == i32hGet(ht, 45, v); 99 | 1 == v; 100 | 101 | i32hSet(ht, 45, 4); 102 | i32hSet(ht, 248, 5); 103 | 104 | true == i32hGet(ht, 46, v); 105 | 2 == v; 106 | true == i32hGet(ht, 47, v); 107 | 3 == v; 108 | true == i32hGet(ht, 45, v); 109 | 4 == v; 110 | true == i32hGet(ht, 248, v); 111 | 5 == v; 112 | 113 | 256 == ht.capacity; 114 | 115 | log("Adding 260 keys"); 116 | for (int32_t i = 0; i < 260; i++) { 117 | i32hSet(ht, i, i); 118 | } 119 | 120 | // Resized table 121 | 512 == ht.capacity; 122 | 123 | log("Checking for keys"); 124 | // Check if all the keys are still there 125 | for (int32_t i = 0; i < 260; i++) { 126 | true == i32hGet(ht, i, v); 127 | i == v; 128 | } 129 | 130 | */ 131 | void i32hSet(inout i32map ht, int32_t key, int32_t value) { 132 | if ((ht.count + 1) * 100 > ht.capacity * 70) { 133 | i32map nt = i32hAlloc(ht.capacity*2); 134 | //FREE_ALL( log(concat("Resize ", str(ivec2(ht.capacity, nt.capacity)))) ); 135 | for (uint32_t i = ht.table.x; i < ht.table.y; i += 3) { 136 | if (i32heap[i+1] != -1) { 137 | int32_t idx = i32heap[i+1] & (nt.capacity-1); 138 | while (i32heap[nt.table.x + idx*3] != -1) { 139 | idx = (idx + 1) & (nt.capacity-1); 140 | } 141 | i32heap[nt.table.x + idx*3 ] = i32heap[i]; 142 | i32heap[nt.table.x + idx*3 + 1] = i32heap[i+1]; 143 | i32heap[nt.table.x + idx*3 + 2] = i32heap[i+2]; 144 | nt.count++; 145 | } 146 | } 147 | ht = nt; 148 | } 149 | int32_t h = murmur3hash(key); 150 | int32_t idx = h & (ht.capacity-1); 151 | while (i32heap[ht.table.x + idx*3] != -1 && i32heap[ht.table.x + idx*3] != key) { 152 | idx = (idx + 1) & (ht.capacity-1); 153 | } 154 | if (i32heap[ht.table.x + idx*3] == -1) ht.count++; 155 | i32heap[ht.table.x + idx*3] = key; 156 | i32heap[ht.table.x + idx*3 + 1] = h; 157 | i32heap[ht.table.x + idx*3 + 2] = value; 158 | } 159 | 160 | /*T 161 | i32map ht = i32hAlloc(256); 162 | int32_t v = 123; 163 | 164 | false == i32hGet(ht, 30, v); 165 | 166 | i32hSet(ht, 30, 321); 167 | 168 | true == i32hGet(ht, 30, v); 169 | 321 == v; 170 | 171 | false == i32hGet(ht, 31, v); 172 | 173 | for (int32_t i = 32; i < 512; i++) { 174 | false == i32hGet(ht, i, v); 175 | } 176 | 177 | */ 178 | bool i32hGet(i32map ht, int32_t key, out int32_t value) { 179 | int32_t idx = murmur3hash(key) & (ht.capacity-1); 180 | while (true) { 181 | int32_t k = i32heap[ht.table.x + idx * 3]; 182 | if (k == key) { 183 | int32_t kh = i32heap[ht.table.x + idx * 3 + 1]; 184 | if (kh == -1) return false; 185 | value = i32heap[ht.table.x + idx * 3 + 2]; 186 | return true; 187 | } else if (k == -1) { 188 | return false; 189 | } 190 | idx = (idx + 1) & (ht.capacity-1); 191 | } 192 | return false; 193 | } 194 | 195 | /*T 196 | i32map ht = i32hAlloc(256); 197 | int32_t v = 0; 198 | 199 | i32hSet(ht, 30, 321); 200 | 201 | true == i32hGet(ht, 30, v); 202 | 321 == v; 203 | 204 | true == i32hDelete(ht, 30); 205 | 206 | false == i32hGet(ht, 30, v); 207 | 208 | i32hSet(ht, 30, 321); 209 | 210 | log("i32hDelete: Adding and deleting 468 keys"); 211 | 212 | for (int32_t i = 32; i < 500; i++) { 213 | i32hSet(ht, i, i); 214 | true == i32hGet(ht, i, i); 215 | true == i32hDelete(ht, i); 216 | } 217 | 218 | log("i32hDelete: Checking that none of the keys exist"); 219 | 220 | for (int32_t i = 32; i < 500; i++) { 221 | false == i32hGet(ht, i, v); 222 | false == i32hDelete(ht, i); 223 | } 224 | 225 | true == i32hGet(ht, 30, v); 226 | 321 == v; 227 | 228 | log("i32hDelete: Check sequences of gets, sets and deletes"); 229 | 230 | for (int32_t i = 0; i < 500; i+=3) { 231 | i32hSet(ht, i, i); 232 | } 233 | for (int32_t i = 0; i < 500; i+=7) { 234 | i32hDelete(ht, i); 235 | } 236 | for (int32_t i = 0; i < 500; i+=3) { 237 | if (i % 7 != 0) { 238 | true == i32hGet(ht, i, v); 239 | i == v; 240 | if (!i32hGet(ht, i, v)) { 241 | log(concat("err 1.1: ", str(i))); 242 | } 243 | } else { 244 | false == i32hGet(ht, i, v); 245 | if (i32hGet(ht, i, v)) { 246 | log(concat("err 1.2: ", str(i))); 247 | } 248 | } 249 | } 250 | 251 | for (int32_t i = 0; i < 500; i+=11) { 252 | i32hSet(ht, i, i); 253 | } 254 | for (int32_t i = 0; i < 500; i+=3) { 255 | i32hDelete(ht, i); 256 | } 257 | for (int32_t i = 0; i < 500; i+=11) { 258 | if (i % 3 != 0) { 259 | true == i32hGet(ht, i, v); 260 | i == v; 261 | if (!i32hGet(ht, i, v)) { 262 | log(concat("err 2.1: ", str(i))); 263 | } 264 | } else { 265 | false == i32hGet(ht, i, v); 266 | if (i32hGet(ht, i, v)) { 267 | log(concat("err 2.2: ", str(i))); 268 | } 269 | } 270 | } 271 | 272 | */ 273 | bool i32hDelete(inout i32map ht, int32_t key) { 274 | int32_t idx = murmur3hash(key) & (ht.capacity-1); 275 | while (true) { 276 | int32_t k = i32heap[ht.table.x + idx * 3]; 277 | if (k == key) { 278 | if (i32heap[ht.table.x + idx * 3 + 1] == -1) return false; 279 | i32heap[ht.table.x + idx * 3 + 1] = -1; 280 | return true; 281 | } else if (k == -1) { 282 | return false; 283 | } 284 | idx = (idx + 1) & (ht.capacity-1); 285 | } 286 | return false; 287 | } 288 | 289 | i32map f32hAlloc(int32_t size) { 290 | return i32hAlloc(size); 291 | } 292 | 293 | i32map f32hAlloc() { 294 | return f32hAlloc(16); 295 | } 296 | 297 | void f32hDelete(inout i32map ht, int32_t key) { 298 | i32hDelete(ht, key); 299 | } 300 | 301 | bool f32hGet(i32map ht, int32_t key, out float value) { 302 | int32_t v; 303 | bool rv = i32hGet(ht, key, v); 304 | value = intBitsToFloat(v); 305 | return rv; 306 | } 307 | 308 | void f32hSet(inout i32map ht, int32_t key, float value) { 309 | i32hSet(ht, key, floatBitsToInt(value)); 310 | } 311 | 312 | --------------------------------------------------------------------------------