├── .coveragerc ├── .dockerignore ├── .github └── workflows │ └── vulkpy.yaml ├── .gitignore ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── README.md ├── debug └── 01-nn-dense.py ├── doc ├── api.md ├── broadcasting.md ├── conf.py ├── development.md ├── example.md ├── index.md └── synchronization.md ├── example ├── 00-arithmetic.py ├── 01-random.py └── 02-nn.py ├── mypy.ini ├── pyproject.toml ├── setup.py ├── test ├── test_nn.py ├── test_random.py └── test_vulkpy.py └── vulkpy ├── __init__.py ├── _vkarray.cc ├── _vkutil.hh ├── nn ├── __init__.py ├── core.py ├── initializers.py ├── layers.py ├── losses.py ├── models.py ├── optimizers.py ├── parameters.py └── regularizers.py ├── random.py ├── shader ├── abs.comp ├── acos.comp ├── acosh.comp ├── add.comp ├── add_broadcast.comp ├── add_scalar.comp ├── asin.comp ├── asinh.comp ├── atan.comp ├── atanh.comp ├── batch_affine.comp ├── broadcast.comp ├── clamp.comp ├── clamp_ss.comp ├── clamp_sv.comp ├── clamp_vs.comp ├── cos.comp ├── cosh.comp ├── div.comp ├── div_broadcast.comp ├── div_scalar.comp ├── exp.comp ├── exp2.comp ├── gather.comp ├── gather_axis.comp ├── iabs.comp ├── iacos.comp ├── iacosh.comp ├── iadd.comp ├── iadd_broadcast.comp ├── iadd_scalar.comp ├── iasin.comp ├── iasinh.comp ├── iatan.comp ├── iatanh.comp ├── iclamp.comp ├── iclamp_ss.comp ├── iclamp_sv.comp ├── iclamp_vs.comp ├── icos.comp ├── icosh.comp ├── idiv.comp ├── idiv_broadcast.comp ├── idiv_scalar.comp ├── iexp.comp ├── iexp2.comp ├── iinvsqrt.comp ├── ilog.comp ├── ilog2.comp ├── imax.comp ├── imax_broadcast.comp ├── imax_scalar.comp ├── imin.comp ├── imin_broadcast.comp ├── imin_scalar.comp ├── imul.comp ├── imul_broadcast.comp ├── imul_scalar.comp ├── invsqrt.comp ├── ipow.comp ├── ipow_broadcast.comp ├── ipow_scalar.comp ├── isign.comp ├── isin.comp ├── isinh.comp ├── isqrt.comp ├── isub.comp ├── isub_broadcast.comp ├── isub_scalar.comp ├── itan.comp ├── itanh.comp ├── log.comp ├── log2.comp ├── matmul.comp ├── max.comp ├── max_broadcast.comp ├── max_scalar.comp ├── maximum.comp ├── maximum_axis.comp ├── maximum_axis_rebroadcast.comp ├── maximum_v1.3.comp ├── min.comp ├── min_broadcast.comp ├── min_scalar.comp ├── minimum.comp ├── minimum_axis.comp ├── minimum_axis_rebroadcast.comp ├── minimum_v1.3.comp ├── mul.comp ├── mul_broadcast.comp ├── mul_scalar.comp ├── nn_cross_entropy.comp ├── nn_cross_entropy_backward.comp ├── pow.comp ├── pow_broadcast.comp ├── pow_scalar.comp ├── prng_box_muller.comp ├── prng_ibox_muller.comp ├── prng_randrange.comp ├── prng_xoshiro128pp_float.comp ├── prng_xoshiro128pp_uint32.comp ├── prod.comp ├── prod_axis.comp ├── prod_axis_rebroadcast.comp ├── prod_v1.3.comp ├── rdiv_scalar.comp ├── rpow_scalar.comp ├── rsub_scalar.comp ├── sign.comp ├── sin.comp ├── sinh.comp ├── sqrt.comp ├── sub.comp ├── sub_broadcast.comp ├── sub_scalar.comp ├── sum.comp ├── sum_axis.comp ├── sum_axis_rebroadcast.comp ├── sum_v1.3.comp ├── tan.comp └── tanh.comp ├── util.py ├── vkarray.py └── vktyping.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | parallel = True 4 | relative_files = True 5 | 6 | [report] 7 | show_missing = True 8 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | *# 2 | #* 3 | *~ 4 | ~* 5 | *.out 6 | *.o 7 | *.spv 8 | *.egg-info 9 | *.so 10 | .DS_Store 11 | __pycache__ 12 | -------------------------------------------------------------------------------- /.github/workflows/vulkpy.yaml: -------------------------------------------------------------------------------- 1 | name: vulkpy 2 | on: [push, pull_request] 3 | jobs: 4 | ci: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - uses: actions/checkout@v3 8 | - uses: docker/setup-buildx-action@v2 9 | - uses: docker/build-push-action@v3 10 | with: 11 | context: . 12 | push: false 13 | tags: vulkpy/results:latest 14 | cache-to: type=gha,mode=max,scope=${{github.ref_name}} 15 | cache-from: type=gha,scope=${{github.ref_name}} 16 | load: true 17 | file: Dockerfile 18 | - run: | 19 | docker create --name results vulkpy/results:latest 20 | docker cp results:/coverage/. coverage/ 21 | docker cp results:/unittest/. unittest/ 22 | docker cp results:/dist/. dist/ 23 | docker cp results:/html/. html/ 24 | name: Extract Results 25 | - uses: actions/upload-artifact@v3 26 | with: 27 | name: coverage 28 | path: coverage 29 | - uses: actions/upload-artifact@v3 30 | with: 31 | name: unittest 32 | path: unittest 33 | - uses: actions/upload-artifact@v3 34 | with: 35 | name: dist 36 | path: dist 37 | - uses: actions/upload-artifact@v3 38 | with: 39 | name: html 40 | path: html 41 | - uses: EnricoMi/publish-unit-test-result-action@v2 42 | with: 43 | junit_files: "unittest/**/*.xml" 44 | - name: Add Coverage PR Comment 45 | uses: marocchino/sticky-pull-request-comment@v2 46 | if: github.event_name == 'pull_request' 47 | with: 48 | recreate: true 49 | path: coverage/summary.md 50 | - name: Write to Job Summary 51 | run: cat coverage/summary.md >> $GITHUB_STEP_SUMMARY 52 | - name: Publish package 53 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') 54 | uses: pypa/gh-action-pypi-publish@release/v1 55 | with: 56 | password: ${{ secrets.PYPI_PASS }} 57 | - uses: actions/configure-pages@v2 58 | - uses: actions/upload-pages-artifact@v1 59 | with: 60 | path: html 61 | deploy: 62 | concurrency: 63 | group: vulkpy-deploy 64 | cancel-in-progress: true 65 | if: github.ref_name == 'master' 66 | needs: ci 67 | permissions: 68 | contents: read 69 | pages: write 70 | id-token: write 71 | environment: 72 | name: github-pages 73 | url: ${{ steps.deployment.outputs.page_url }} 74 | runs-on: ubuntu-latest 75 | steps: 76 | - id: deployment 77 | uses: actions/deploy-pages@v1 78 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | a.out 2 | *.spv 3 | *.egg-info 4 | *.so 5 | .DS_Store 6 | __pycache__ 7 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:bullseye AS vulkpy-env 2 | RUN --mount=type=cache,target=/var/lib/apt/lists \ 3 | wget -qO - http://packages.lunarg.com/lunarg-signing-key-pub.asc | \ 4 | apt-key add - && \ 5 | wget -qO /etc/apt/sources.list.d/lunarg-vulkan-focal.list \ 6 | http://packages.lunarg.com/vulkan/lunarg-vulkan-focal.list && \ 7 | apt update && \ 8 | apt install -y --no-install-recommends \ 9 | libvulkan1 libvulkan-dev vulkan-headers shaderc \ 10 | vulkan-validationlayers lunarg-vulkan-layers mesa-vulkan-drivers 11 | RUN --mount=type=cache,target=/root/.cache/pip \ 12 | pip install numpy pybind11 well-behaved-logging 13 | 14 | 15 | FROM vulkpy-env AS vulkpy-install 16 | WORKDIR /vulkpy-ci 17 | RUN --mount=type=cache,target=/root/.cache/pip \ 18 | pip install coverage unittest-xml-reporting 19 | COPY setup.py pyproject.toml MANIFEST.in mypy.ini . 20 | COPY vulkpy vulkpy 21 | RUN --mount=type=cache,target=/root/.cache/pip pip install .[test] && \ 22 | mypy -p vulkpy && \ 23 | rm -rf vulkpy && \ 24 | rm setup.py pyproject.toml MANIFEST.in mypy.ini 25 | 26 | 27 | FROM vulkpy-install AS vulkpy-test 28 | COPY test test 29 | WORKDIR /vulkpy-ci/test 30 | COPY .coveragerc . 31 | RUN coverage run --source vulkpy -m xmlrunner discover || true 32 | RUN mkdir -p /coverage && cp -v .coverage.* /coverage && \ 33 | mkdir -p /unittest && cp *.xml /unittest 34 | 35 | 36 | FROM vulkpy-install AS vulkpy-combine 37 | WORKDIR /coverage 38 | RUN --mount=type=cache,target=/root/.cache/pip pip install coverage 39 | COPY vulkpy /vulkpy-ci/vulkpy 40 | COPY .coveragerc .coveragerc 41 | COPY --from=vulkpy-test /coverage /coverage 42 | RUN coverage combine && \ 43 | echo "## Test Coverage\n\`\`\`\n" >> summary.md && \ 44 | coverage report | tee -a summary.md && \ 45 | echo "\n\`\`\`" >> summary.md && \ 46 | mkdir -p /coverage/html && coverage html -d /coverage/html 47 | 48 | 49 | FROM vulkpy-install AS vulkpy-example 50 | WORKDIR /vulkpy-ci/example 51 | RUN --mount=type=cache,target=/root/.cache/pip pip install scikit-learn 52 | COPY example . 53 | RUN python 00-arithmetic.py && \ 54 | python 01-random.py && \ 55 | python 02-nn.py --debug --optimizer sgd --nepoch 1 && \ 56 | python 02-nn.py --debug --optimizer adam --nepoch 1 && \ 57 | touch /vulkpy-ci/example/example-ok 58 | 59 | 60 | FROM vulkpy-env AS vulkpy-build 61 | WORKDIR /build 62 | RUN --mount=type=cache,target=/root/.cache/pip pip install wheel 63 | COPY LICENSE setup.py README.md MANIFEST.in pyproject.toml . 64 | COPY vulkpy vulkpy 65 | RUN python setup.py sdist -d /dist 66 | 67 | 68 | FROM vulkpy-env AS vulkpy-doc 69 | WORKDIR /ci 70 | RUN --mount=type=cache,target=/var/lib/apt/lists \ 71 | apt update && apt -y --no-install-recommends install graphviz 72 | RUN --mount=type=cache,target=/root/.cache/pip pip install \ 73 | sphinx \ 74 | furo \ 75 | sphinx-automodapi \ 76 | myst-parser 77 | COPY LICENSE LICENSE 78 | COPY setup.py setup.py 79 | COPY README.md README.md 80 | COPY vulkpy vulkpy 81 | RUN --mount=type=cache,target=/root/.cache/pip pip install .[doc] 82 | COPY doc doc 83 | COPY example example 84 | RUN sphinx-build -W -b html doc /html 85 | 86 | 87 | FROM scratch AS results 88 | COPY --from=vulkpy-test /unittest /unittest/3.11 89 | COPY --from=vulkpy-combine /coverage/html /coverage/html 90 | COPY --from=vulkpy-combine /coverage/summary.md /coverage/summary.md 91 | COPY --from=vulkpy-build /dist /dist 92 | COPY --from=vulkpy-doc /html /html 93 | COPY --from=vulkpy-example /vulkpy-ci/example/example-ok /example/example-ok 94 | CMD [""] 95 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 H.Yamada 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include vulkpy/shader/*.spv 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # vulkpy: GPGPU array on Vulkan 2 | 3 | ![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/ymd-h/vulkpy/vulkpy.yaml) 4 | ![PyPI](https://img.shields.io/pypi/v/vulkpy) 5 | ![PyPI - License](https://img.shields.io/pypi/l/vulkpy) 6 | 7 | vulkpy is a Python package providing GPGPU computation based on Vulkan. 8 | 9 | 10 | ## Requirements 11 | 12 | * C++20 compatible compiler 13 | * `libvulkan` 14 | * Vulkan SDK 15 | * Headers (`vulkan/vulkan.hpp` and so on) 16 | * Shaderc (`glslc`) 17 | 18 | 19 | On Ubuntu 22.0, 20 | ```shell 21 | wget -qO - http://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - 22 | wget -qO /etc/apt/sources.list.d/lunarg-vulkan-focal.list http://packages.lunarg.com/vulkan/lunarg-vulkan-focal.list 23 | apt update 24 | apt install -y libvulkan1 libvulkan-dev vulkan-headers shaderc vulkan-validationlayers 25 | ``` 26 | 27 | > **Note** 28 | > `vulkan-sdk` cannot be installed because it requires obsolete package `qt5-default`. 29 | 30 | 31 | ## Example 32 | 33 | ```python 34 | import vulkpy as vk 35 | 36 | gpu = vk.GPU() 37 | 38 | a = vk.Array(gpu, data=[10, 10, 10]) 39 | b = vk.Array(gpu, data=[5, 5, 5]) 40 | 41 | c = a + b 42 | c.wait() 43 | 44 | print(c) 45 | # [15, 15, 15] 46 | ``` 47 | 48 | ## Features 49 | 50 | * Element-wise Arithmetic Operators between 2 `Array`s. 51 | * [x] `+`, `-`, `*`, `/`, `**`, `+=`, `-=`, `*=`, `/=`, `**=` 52 | * Arithmetic Operators between `Array` and `float`. 53 | * [x] `+`, `-`, `*`, `/`, `**`, `+=`, `-=`, `*=`, `/=`, `**=` 54 | * Arithmetic Operators between `float` and `Array`. 55 | * [x] `+`, `-`, `*`, `/`, `**` 56 | * Matrix Multiplication Operator between 1d/2d `Array`s. 57 | * [x] `@` 58 | * Element-wise math functions as `Array`'s member function 59 | * [x] `max(other, inplace=False)`, `min(other, inplace=False)` 60 | * [x] `abs(inplace=False)`, `sign(inplace=False)` 61 | * [x] `sin(inplace=False)`, `cos(inplace=False)`, `tan(inplace=False)` 62 | * [x] `asin(inplace=False)`, `acos(inplace=False)`, `atan(inplace=False)` 63 | * [x] `sinh(inplace=False)`, `cosh(inplace=False)`, `tanh(inplace=False)` 64 | * [x] `asinh(inplace=False)`, `acosh(inplace=False)`, `atanh(inplace=False)` 65 | * [x] `exp(inplace=False)`, `log(inplace=False)` 66 | * [x] `exp2(inplace=False)`, `log2(inplace=False)` 67 | * [x] `sqrt(inplace=False)`, `invsqrt(inplace=False)` 68 | * [x] `clamp(min, max, inplace=False)` 69 | * Reduction as `Array`'s member function 70 | * [x] `sum(axis=None)`, `prod(axis=None)` 71 | * [x] `maximum(axis=None)`, `minimum(axis=None)` 72 | * [x] `mean(axis=None)` 73 | * [ ] argmax, argmin 74 | * [ ] ... 75 | * Other `Array` method 76 | * [x] `gather(idx: U32Array) -> Array` 77 | * [ ] tensordot, shuffle 78 | * [ ] ... 79 | * Bloadcast 80 | * [x] Explicit broadcast copy (memory inefficient, fallback option) 81 | * `broadcast_to(shape)` (used at `clamp`) 82 | * [x] Special implementations for element-wise arithmetic operators 83 | * `+`, `-`, `*`, `/`, `**`, `+=`, `-=`, `*=`, `/=`, `**=` 84 | * [x] Reduction with re-broadcast 85 | * `sum`, `prod`, `maximum`, `minimum`, `mean` 86 | * Pseudo Random Number Generator (PRNG) 87 | * [x] xoshiro128++ (`vulkpy.random.Xoshiro128pp(gpu, *, size=None, data=None)`) 88 | * `[0, 1)` uniform (`.random(shape=None, buffer=None)`) 89 | * Gaussian with Box-Muller (`.normal(shape=None, buffer=None, mean=0.0, stddev=1.0)`) 90 | * [ ] pcg32 91 | * Neural Network 92 | * Layers 93 | * [x] `Dense`, `ReLU`, `Sigmoid`, `Softmax` 94 | * [ ] conv, batch norm, layer norm, ... 95 | * Optimizers 96 | * [x] `SGD`, `Adam`, `AdaGrad` 97 | * [ ] rmsprop, ... 98 | * Losses 99 | * [x] `CrossEntropyLoss`, `SoftmaxCrossEntropyLoss`, `MSELoss`, `HuberLoss` 100 | * [ ] ... 101 | * Initializers 102 | * [x] `Constant`, `HeNormal` 103 | * [ ] ... 104 | * Models 105 | * [x] `Sequance` 106 | * [ ] ... 107 | * [x] Regularization 108 | * `Lasso(coeff=1.0)`, `Ridge(coeff=1.0)`, `Elastic(L1=1.0, L2=1.0)` 109 | * [ ] ONNX support 110 | * [ ] Custom user layer with automatic `backward()` definition. 111 | * [ ] Define by Run API 112 | -------------------------------------------------------------------------------- /debug/01-nn-dense.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import numpy as np 4 | 5 | import vulkpy as vk 6 | from vulkpy.util import enable_debug 7 | 8 | 9 | def debug01(epoch): 10 | gpu = vk.GPU() 11 | dense = vk.nn.Dense(gpu, 1, 1) 12 | mse = vk.nn.MSELoss() 13 | 14 | _x = np.arange(100).reshape((-1, 1)) / 50 - 1.0 15 | _y = _x ** 2 16 | 17 | x = vk.Array(gpu, data=_x) 18 | y = vk.Array(gpu, data=_y) 19 | 20 | for e in range(epoch): 21 | L = mse(dense(x), y) 22 | 23 | dense.zero_grad() 24 | dx = dense.backward(mse.grad()) 25 | dense.update() 26 | 27 | print(f"Epoch: {e:4d}, Loss: {L:.6f}") 28 | 29 | 30 | if __name__ == "__main__": 31 | p = argparse.ArgumentParser("debug-01") 32 | p.add_argument("--api-dump", action="store_true") 33 | p.add_argument("--epoch", type=int, default=100) 34 | p = p.parse_args() 35 | 36 | enable_debug(api_dump=p.api_dump) 37 | debug01(p.epoch) 38 | -------------------------------------------------------------------------------- /doc/api.md: -------------------------------------------------------------------------------- 1 | # API Reference 2 | 3 | ```{eval-rst} 4 | .. automodapi:: vulkpy 5 | 6 | .. automodapi:: vulkpy.random 7 | 8 | .. automodapi:: vulkpy.nn 9 | 10 | .. automodapi:: vulkpy.util 11 | ``` 12 | -------------------------------------------------------------------------------- /doc/broadcasting.md: -------------------------------------------------------------------------------- 1 | # Broadcasting 2 | 3 | vulkpy obeys [NumPy broadcasting rule](https://numpy.org/doc/stable/user/basics.broadcasting.html). 4 | 5 | 6 | We implement 3 patterns of broadcasting implementations. 7 | 8 | 9 | ## 1. Simple Copy 10 | The simplest broadcasting is to create new broadcasted array. 11 | Usually broadcasting is executed just before other operations, 12 | so that this implementation might allocate unnecessary temporary array. 13 | This is memory- and computationally-inefficient, 14 | but it works fine in most cases. 15 | Acutually, we still use this in `clamp()` method. 16 | 17 | Users can execute this broadcasting by `broadcast_to(shape)` method. 18 | 19 | ```python 20 | import vulkpy as vk 21 | 22 | gpu = vk.GPU() 23 | a = vk.Array(gpu, data=[1, 2]) 24 | 25 | b = a.broadcast_to((2, 2)) 26 | # => [[1, 2], [1, 2]] 27 | ``` 28 | 29 | ````{note} 30 | In Vulkan compute shader, we can use only 3 global indices at most. 31 | Thay are not sufficient to point elements of 32 | `N`-dimensional array directly. 33 | Instead, we utilize linearly flattened index 34 | and calculate the position from it on GPU. 35 | We assume this index calculation is computetionally-inefficient. 36 | 37 | The following is a partial code of `broadcast.comp`. 38 | 39 | ```glsl 40 | void main(){ 41 | uint i = gl_GlobalInvocationID.x; 42 | if(i >= params.size[1]){ return; } 43 | 44 | uint i_tmp = i; 45 | uint j = 0; 46 | uint sizeA = params.size[0]; 47 | uint sizeB = params.size[1]; 48 | for(uint dim = 0; dim < params.ndim; dim++){ 49 | sizeA = sizeA / a_shape[dim]; 50 | sizeB = sizeB / b_shape[dim]; 51 | 52 | uint d = min(i_tmp / sizeB, a_shape[dim]-1); 53 | j += d * sizeA; 54 | 55 | i_tmp = i_tmp % sizeB; 56 | } 57 | 58 | b[i] = a[j]; 59 | } 60 | ``` 61 | ```` 62 | 63 | ## 2. Special Implementation 64 | We also provide special implementations for some operations. 65 | For example, a compute shader `add_broadcast.comp` implements 66 | a fused operation of broadcasting and addition. 67 | Although we still need index calculation, we can omit temporary memory allocation. 68 | 69 | For these special implementations, users don't need to call explicitly, 70 | if operations are supported, such special implementations are used automatically. 71 | 72 | ```python 73 | import vulkpy as vk 74 | 75 | gpu = vk.GPU() 76 | a = vk.Array(gpu, data=[1, 2]) 77 | b = vk.Array(gpu, data=[[1, 2], [3, 4]]) 78 | 79 | c = a + b 80 | # => [[2, 4], [4, 6]] 81 | ``` 82 | 83 | ```{note} 84 | For inplace operations, only `other` (non-inplaced) array 85 | can be broadcasted because we cannot grow already allocated memory. 86 | 87 | Since we can skip index computation for inplaced array, 88 | inplace broadcasting is more efficient in terms of not only memory 89 | but also computation. 90 | ``` 91 | 92 | 93 | ## 3. Re-broadcasting of Reduction 94 | 95 | For specific usecase like softmax, broadcasting is executed just after reduction. 96 | We define such usecase as re-broadcasting. 97 | 98 | In re-broadcasting, inefficient index calculation is not necessary, so 99 | that it is more efficient in terms of computation. 100 | 101 | Users can pass `rebroadcast=True` to reduction methods; 102 | 103 | ```python 104 | import vulkpy as vk 105 | 106 | gpu = vk.GPU() 107 | a = vk.Array(gpu, data=[[1, 2, 3], [4, 5, 6]]) 108 | 109 | b = a.mean(axis=0, rebroadcast=True) 110 | # => [[2.5, 3.5, 4.5], [2.5, 3.5, 4.5]] 111 | ``` 112 | -------------------------------------------------------------------------------- /doc/conf.py: -------------------------------------------------------------------------------- 1 | project = "vulkpy" 2 | author = "Hiroyuki Yamada" 3 | copyright = "2023, Hiroyuki Yamada" 4 | 5 | extensions = [ 6 | 'sphinx.ext.napoleon', 7 | "sphinx_automodapi.automodapi", 8 | 'sphinx_automodapi.smart_resolver', 9 | 'myst_parser' 10 | ] 11 | 12 | html_title = "vulkpy" 13 | html_theme = "furo" 14 | html_logo = "" 15 | html_favicon = "" 16 | html_show_sourcelink = False 17 | 18 | html_css_files = [ 19 | "https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/fontawesome.min.css", 20 | "https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/solid.min.css", 21 | "https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/brands.min.css", 22 | ] 23 | 24 | html_theme_options = { 25 | "footer_icons": [ 26 | { 27 | "name": "GitHub", 28 | "url": "https://github.com/ymd-h/vulkpy", 29 | "html": "", 30 | "class": "fa-brands fa-github fa-2x", 31 | }, 32 | ], 33 | } 34 | 35 | napoleon_include_init_with_doc = True 36 | napoleon_use_admonition_for_examples = True 37 | napoleon_use_admonition_for_notes = True 38 | napoleon_use_admonition_for_references = True 39 | 40 | numpydoc_show_class_members=False 41 | 42 | autodoc_class_signature = "separated" 43 | autodoc_default_options = { 44 | 'member-order': 'bysource', 45 | 'class-doc-from':'class', 46 | 'exclude-members': '__dict__, __weakref__, __module__, __new__, __reduce__, __setstate__', 47 | } 48 | 49 | automodsumm_inherited_members = True 50 | -------------------------------------------------------------------------------- /doc/development.md: -------------------------------------------------------------------------------- 1 | # Contributing and Developer's Guide 2 | 3 | ## Contributing 4 | Any contributions are welcomed. 5 | 6 | ### Run on Actual GPUs 7 | One of the most lacking part is running on various GPUs. 8 | Any feedbacks are appriciated. 9 | 10 | 11 | ### When Find any problems / bugs 12 | Check [issues](https://github.com/ymd-h/vulkpy/issues) first, 13 | and open new one unless the same problem has been reported. 14 | 15 | 16 | 17 | ## Developer's Guide 18 | 19 | ### Code Layout 20 | 21 | - `vulkpy/` (Main Package) 22 | - `vkarray.py` 23 | - Python core implementation 24 | - `_vkarray.cc`, `_vkarray.hh` 25 | - C++ internal implementation 26 | - `shader/` 27 | - GPU shaders 28 | - `nn/` 29 | - Neural Network implementation 30 | - `random.py` 31 | - Pseudo Random Number Generator (PRNG) implementation 32 | - `util.py` 33 | - Utility Function implementation 34 | - `vktyping.py` 35 | - Type implementation 36 | - `doc/` 37 | - Document Site 38 | - `example/` 39 | - Example Codes 40 | - `test/` 41 | - Test Codes 42 | - `.github/` 43 | - CI configuration 44 | - `README.md`, `LICENSE` 45 | - Project-wide information. 46 | - `setup.py`, `pyproject.toml`, `MANIFEST.in` 47 | - Configuration for Package Build 48 | - `.coverage` 49 | - Configuration for Coverage 50 | - `Dockerfile`, `.dockerignore` 51 | - Configuration for CI tasks (See below) 52 | - `mypy.ini` 53 | - Configuration for Type Check 54 | - `.gitignore` 55 | 56 | ### Continuous Integration (CI) & Continuous Delivery (CD) 57 | We use GitHub Actions for CI/CD, and its configuration is defined at 58 | `.github/workflows/vulkpy.yaml` 59 | 60 | To make CI independent from the platform as much as possible, 61 | we define actual CI tasks inside `Dockerfile`. 62 | 63 | - Type Check with [Mypy](https://mypy.readthedocs.io/) 64 | - Build wheel 65 | - Run Unit Test & Regression Test 66 | - Report Coverage with [unittest-xml-reporting](https://github.com/xmlrunner/unittest-xml-reporting) & [coverage.py](https://coverage.readthedocs.io/) 67 | - Build Document Site with [Sphinx](https://www.sphinx-doc.org/) 68 | 69 | 70 | ### Document Site 71 | Document site is generated by [Sphinx](https://www.sphinx-doc.org/) during CI/CD. 72 | We adopt [furo](https://github.com/pradyunsg/furo) theme. 73 | 74 | Most documents are written in markdown (`.md`) and parsed by [MyST](https://myst-parser.readthedocs.io/). 75 | 76 | All markdown files are located at `doc/` directory flatly. Even if we 77 | restructure document site in future, flatten layout can prevent broken link. 78 | 79 | 80 | API reference is automatically generated from docstring with 81 | [sphinx-automodapi](https://sphinx-automodapi.readthedocs.io/). 82 | 83 | 84 | ### docstring 85 | To make the usage understandable, all public classes and methods 86 | should have docstring. 87 | 88 | [Shpinx](https://www.sphinx-doc.org/) generates 89 | [API reference](https://ymd-h.github.io/vulkpy/api.html) 90 | from these docstring. 91 | 92 | 93 | Basically we obey 94 | [Numpy's style guide](https://numpydoc.readthedocs.io/en/latest/format.html), 95 | however, we adopt following [PEP-257](https://peps.python.org/pep-0257/) 96 | statement for class docstring; 97 | 98 | > The docstring for a class should summarize its behavior and list the 99 | > public methods and instance variables. If the class is intended to 100 | > be subclassed, and has an additional interface for subclasses, this 101 | > interface should be listed separately (in the docstring). The class 102 | > constructor should be documented in the docstring for its __init__ 103 | > method. Individual methods should be documented by their own 104 | > docstring. 105 | 106 | 107 | To separate class docstring and `__init__()` docstring, 108 | we configure Sphinx as follows; 109 | 110 | ```python 111 | autodoc_class_signature = "separated" 112 | autodoc_default_options = { 113 | "class-doc-from": "class" 114 | } 115 | ``` 116 | -------------------------------------------------------------------------------- /doc/example.md: -------------------------------------------------------------------------------- 1 | # Example 2 | 3 | ## 00. Arithmetic 4 | ```{literalinclude} ../example/00-arithmetic.py 5 | :language: python 6 | ``` 7 | 8 | 9 | ## 01. Random 10 | ```{literalinclude} ../example/01-random.py 11 | :language: python 12 | ``` 13 | 14 | ## 02. Neural Network 15 | ```{literalinclude} ../example/02-nn.py 16 | :language: python 17 | ``` 18 | -------------------------------------------------------------------------------- /doc/index.md: -------------------------------------------------------------------------------- 1 | # vulkpy: GPGPU array on Vulkan 2 | 3 | vulkpy is a Python package providing GPGPU computation based on Vulkan. 4 | 5 | ```{warning} 6 | vulkpy is still under development, so that API still might break without notice. 7 | ``` 8 | 9 | ```{toctree} 10 | :caption: Contents 11 | :maxdepth: 1 12 | 13 | ./broadcasting.md 14 | ./synchronization.md 15 | ./example.md 16 | ./development.md 17 | ./api.md 18 | ``` 19 | -------------------------------------------------------------------------------- /doc/synchronization.md: -------------------------------------------------------------------------------- 1 | # Synchronization 2 | 3 | With Vulkan, GPU operations are executed asynchronically. 4 | 5 | In principle, vulkpy automatically `wait()` depending `Job` 6 | before reading or destructing, and users don't need to `wait()` explicitly. 7 | 8 | In order to keep necessary resources during GPU execution, 9 | the result `Array` holds them, too. 10 | 11 | Just in case some arrays get circular reference and memory won't be released, 12 | users might call `wait()` explicitly to clear reference of depending resources. 13 | -------------------------------------------------------------------------------- /example/00-arithmetic.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import numpy as np 4 | import vulkpy as vk 5 | from vulkpy.util import enable_debug 6 | 7 | def main(): 8 | gpu = vk.GPU() 9 | 10 | shape = (100,) 11 | a = vk.Array(gpu, data=np.full(shape, 3)) 12 | b = vk.Array(gpu, data=np.full(shape, 5)) 13 | 14 | c = a + b 15 | c.wait() 16 | print(c) 17 | 18 | d = c - a 19 | e = d - b 20 | e.wait() 21 | print(e) 22 | 23 | e += a 24 | e.wait() 25 | print(e) 26 | 27 | f = e + 5 28 | f.wait() 29 | print(f) 30 | 31 | f /= 4 32 | f.wait() 33 | print(f) 34 | 35 | if __name__ == "__main__": 36 | p = argparse.ArgumentParser("00-arithmetic.py") 37 | p.add_argument("--debug", action="store_true") 38 | 39 | args = p.parse_args() 40 | if args.debug: 41 | enable_debug() 42 | 43 | main() 44 | -------------------------------------------------------------------------------- /example/01-random.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import numpy as np 4 | import vulkpy as vk 5 | from vulkpy.util import enable_debug 6 | 7 | def main(): 8 | gpu = vk.GPU() 9 | 10 | r = vk.random.Xoshiro128pp(gpu) 11 | 12 | # Sample from [0, 1) uniform distribution 13 | a = r.random(shape=(10,)) 14 | print(a) 15 | 16 | # Sample from normal distribution 17 | b = r.normal(shape=(10,)) 18 | print(b) 19 | 20 | 21 | if __name__ == "__main__": 22 | p = argparse.ArgumentParser("01-random.py") 23 | p.add_argument("--debug", action="store_true") 24 | 25 | args = p.parse_args() 26 | if args.debug: 27 | enable_debug() 28 | 29 | main() 30 | -------------------------------------------------------------------------------- /example/02-nn.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example 02: Neural Network for Classifying of Iris 3 | ================================================== 4 | 5 | Classify 3-class Iris with Sequential Neural Network. 6 | The hidden layers have units of 128 and 128, respectively. 7 | 8 | For options, see `python 02-nn.py -h`. 9 | 10 | Notes 11 | ----- 12 | This example requires scikit-learn (`pip install scikit-learn`) 13 | """ 14 | import argparse 15 | import time 16 | 17 | import numpy as np 18 | from sklearn.datasets import load_iris 19 | from sklearn.model_selection import train_test_split 20 | 21 | import vulkpy as vk 22 | from vulkpy.util import enable_debug 23 | from vulkpy import nn 24 | 25 | 26 | def example02(nepoch, batch_size, opt, lr, l1, l2, *, debug = False): 27 | if debug: 28 | enable_debug(api_dump=False) 29 | 30 | gpu = vk.GPU() 31 | rng = np.random.default_rng() 32 | 33 | train_x, test_x, train_y, test_y = train_test_split(*load_iris(return_X_y=True), 34 | random_state = 777, 35 | test_size = 0.2) 36 | 37 | # Convert to one_hot vector 38 | train_y = np.identity(3)[train_y] 39 | test_y = np.identity(3)[test_y] 40 | 41 | print(f"train_x.shape: {train_x.shape}, test_x.shape: {test_x.shape}") 42 | print(f"train_y.shape: {train_y.shape}, test_y.shape: {test_y.shape}") 43 | assert ((train_x.shape[0] == train_y.shape[0]) and 44 | ( test_x.shape[0] == test_y.shape[0])) 45 | assert train_x.shape[1] == test_x.shape[1] == 4 46 | assert train_y.shape[1] == test_y.shape[1] == 3 47 | 48 | opt = { 49 | "adam": lambda lr: nn.Adam(gpu, lr=lr), 50 | "sgd": lambda lr: nn.SGD(lr) 51 | }[opt](lr) 52 | 53 | R = None 54 | if (l1 is not None) and (l2 is not None): 55 | R = nn.Elastic(l1, l2) 56 | elif (l1 is not None): 57 | R = nn.Lasso(l1) 58 | elif (l2 is not None): 59 | R = nn.Ridge(l2) 60 | 61 | # Sequential Model: 4 -> 128 -> 128 -> 3 62 | net = nn.Sequence( 63 | [ 64 | nn.Dense(gpu, 4, 128, w_opt=opt, b_opt=opt, w_reg=R, b_reg=R), 65 | nn.ReLU(), 66 | nn.Dense(gpu, 128, 128, w_opt=opt, b_opt=opt, w_reg=R, b_reg=R), 67 | nn.ReLU(), 68 | nn.Dense(gpu, 128, 3, w_opt=opt, b_opt=opt, w_reg=R, b_reg=R), 69 | nn.Softmax(), 70 | ], 71 | nn.CrossEntropyLoss(reduce="sum") 72 | ) 73 | idx = np.arange(train_x.shape[0]) 74 | 75 | X = vk.Array(gpu, data=test_x) 76 | Y = vk.Array(gpu, data=test_y) 77 | 78 | train_loss = vk.Array(gpu, shape=(1,)) 79 | for e in range(nepoch): 80 | t = time.perf_counter() 81 | 82 | rng.shuffle(idx) # TODO: Implement GPU shuffle() 83 | train_loss[:] = 0 84 | for _idx in idx[::batch_size]: 85 | bidx = idx[_idx:_idx+batch_size] 86 | 87 | x = vk.Array(gpu, data=train_x[bidx]) 88 | y = vk.Array(gpu, data=train_y[bidx]) 89 | 90 | _, loss = net.train(x, y) 91 | train_loss += loss 92 | 93 | train_loss /= idx.shape[0] 94 | 95 | pred_y, eval_loss = net.predict(X, Y) 96 | pred_class = np.argmax(pred_y, axis=1) # TODO: Implement GPU argmax() 97 | accuracy = (np.identity(3)[pred_class] * test_y).sum(axis=1).mean() 98 | 99 | eval_loss /= idx.shape[0] 100 | 101 | dt = time.perf_counter() - t 102 | print(f"Epoch: {e:3d}, " + 103 | f"Train Loss: {train_loss[0]:.6f}, " + 104 | f"Eval Loss: {float(eval_loss.array):.6f}, " + 105 | f"Eval Acc: {accuracy:.6f} " + 106 | f"Elapsed: {dt:.6f}s") 107 | 108 | 109 | if __name__ == "__main__": 110 | p = argparse.ArgumentParser("example02") 111 | p.add_argument("--nepoch", type=int, default=100, help="# of epoch") 112 | p.add_argument("--batch-size", type=int, default=32, help="size of batch") 113 | p.add_argument("--debug", action="store_true") 114 | p.add_argument("--optimizer", choices=["adam", "sgd"], default="adam") 115 | p.add_argument("--learning-rate", type=float, default=0.0001) 116 | p.add_argument("--l1", type=float, help="L1 regularization", default=None) 117 | p.add_argument("--l2", type=float, help="L2 regularization", default=None) 118 | p = p.parse_args() 119 | 120 | example02(nepoch=p.nepoch, 121 | batch_size=p.batch_size, 122 | opt=p.optimizer, 123 | lr=p.learning_rate, 124 | l1=p.l1, 125 | l2=p.l2, 126 | debug=p.debug) 127 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | warn_return_any = True 3 | warn_unused_configs = True 4 | 5 | [mypy-wblog] 6 | ignore_missing_imports = True 7 | 8 | [mypy-vulkpy._vkarray] 9 | ignore_missing_imports = True 10 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel", "pybind11"] 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import platform 3 | from setuptools import setup, find_packages, Extension 4 | import subprocess 5 | 6 | import pybind11 7 | 8 | pkg = "vulkpy" 9 | 10 | # Compile Compute Shader 11 | for shader in [ 12 | "add", "sub", "mul", "div", 13 | "iadd", "isub", "imul", "idiv", 14 | "add_scalar", "sub_scalar", "mul_scalar", "div_scalar", 15 | "iadd_scalar", "isub_scalar", "imul_scalar", "idiv_scalar", 16 | "rsub_scalar", "rdiv_scalar", 17 | "add_broadcast", "sub_broadcast", "mul_broadcast", "div_broadcast", 18 | "iadd_broadcast", "isub_broadcast", "imul_broadcast", "idiv_broadcast", 19 | "matmul", 20 | "max", "min", "imax", "imin", 21 | "max_scalar", "min_scalar", "imax_scalar", "imin_scalar", 22 | "max_broadcast", "min_broadcast", "imax_broadcast", "imin_broadcast", 23 | "abs", "sign", "iabs", "isign", 24 | "sin", "cos", "tan", "asin", "acos", "atan", 25 | "isin", "icos", "itan", "iasin", "iacos", "iatan", 26 | "sinh", "cosh", "tanh", "asinh", "acosh", "atanh", 27 | "isinh", "icosh", "itanh", "iasinh", "iacosh", "iatanh", 28 | "exp", "log", "exp2", "log2", 29 | "iexp", "ilog", "iexp2", "ilog2", 30 | "sqrt", "invsqrt", "isqrt", "iinvsqrt", 31 | "pow", "ipow", "pow_scalar", "ipow_scalar", "rpow_scalar", 32 | "pow_broadcast", "ipow_broadcast", 33 | "clamp", "clamp_sv", "clamp_vs", "clamp_ss", 34 | "iclamp", "iclamp_sv", "iclamp_vs", "iclamp_ss", 35 | "prng_xoshiro128pp_uint32", "prng_xoshiro128pp_float", 36 | "prng_box_muller", "prng_ibox_muller", 37 | "prng_randrange", 38 | "sum", ("sum_v1.3", "--target-env=vulkan1.1"), "sum_axis", 39 | "prod", ("prod_v1.3", "--target-env=vulkan1.1"), "prod_axis", 40 | "sum_axis_rebroadcast", "prod_axis_rebroadcast", 41 | "maximum", ("maximum_v1.3", "--target-env=vulkan1.1"), "maximum_axis", 42 | "minimum", ("minimum_v1.3", "--target-env=vulkan1.1"), "minimum_axis", 43 | "maximum_axis_rebroadcast", "minimum_axis_rebroadcast", 44 | "broadcast", 45 | "batch_affine", 46 | "gather", "gather_axis", 47 | "nn_cross_entropy", "nn_cross_entropy_backward", 48 | ]: 49 | if isinstance(shader, tuple): 50 | shader, flag = shader 51 | flag = (flag,) 52 | else: 53 | shader = shader 54 | flag = tuple() 55 | s = os.path.join(pkg, "shader", shader) 56 | spv = s+".spv" 57 | comp = s+".comp" 58 | 59 | if ((not os.path.exists(spv)) or 60 | (os.path.exists(comp) and (os.stat(comp).st_mtime > os.stat(spv).st_mtime))): 61 | cmd = subprocess.run(["glslc", *flag, "-o", spv, comp], 62 | capture_output=True, text=True) 63 | if cmd.stdout: 64 | print(cmd.stdout) 65 | if cmd.stderr: 66 | print(cmd.stderr) 67 | cmd.check_returncode() 68 | 69 | 70 | if platform.system() != "Windows": 71 | extra_args = { 72 | "extra_compile_args": ["-std=c++2a", "-O3", "-march=native", "-Wall"], 73 | "extra_link_args": ["-std=c++2a"], 74 | } 75 | else: 76 | extra_args = { 77 | "extra_compile_args": ["/std:c++20", "/O2", "/Wall"], 78 | "extra_link_args": None, 79 | } 80 | 81 | ext = [Extension(f"{pkg}._vkarray", 82 | [os.path.join(f"{pkg}", "_vkarray.cc")], 83 | include_dirs=[pybind11.get_include()], 84 | libraries=["vulkan"], 85 | **extra_args)] 86 | 87 | desc = {} 88 | README = "README.md" 89 | if os.path.exists(README): 90 | with open(README) as f: 91 | desc["long_description"] = f.read() 92 | desc["long_description_content_type"] = "text/markdown" 93 | 94 | setup(name="vulkpy", 95 | version="0.0.8", 96 | author="H. Yamada", 97 | description="GPGPU array on Vulkan", 98 | **desc, 99 | url="https://github.com/ymd-h/vulkpy", 100 | packages=find_packages(), 101 | ext_modules=ext, 102 | include_package_data=True, 103 | install_requires=[ 104 | "typing_extensions", 105 | "numpy", 106 | "well-behaved-logging" 107 | ], 108 | extras_require={ 109 | "test": ["coverage", "unittest-xml-reporting", "mypy"], 110 | "doc": ["sphinx", "sphinx-rtd-theme", "myst-parser"], 111 | }, 112 | classifiers=[ 113 | "Development Status :: 4 - Beta", 114 | "Environment :: GPU", 115 | "License :: OSI Approved :: MIT License", 116 | "Programming Language :: Python :: 3 :: Only", 117 | "Programming Language :: Python :: Implementation :: CPython", 118 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 119 | ]) 120 | -------------------------------------------------------------------------------- /test/test_nn.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | 5 | import vulkpy as vk 6 | from vulkpy import nn, random 7 | 8 | 9 | class TestInitializers(unittest.TestCase): 10 | @classmethod 11 | def setUpClass(cls): 12 | cls.gpu = vk.GPU() 13 | 14 | def test_constant(self): 15 | const = nn.Constant(0.0) 16 | np.testing.assert_allclose(const(self.gpu, (3,1)), [[0.0], [0.0], [0.0]]) 17 | 18 | def test_he(self): 19 | seed = 645 20 | shape = (10,) 21 | 22 | he = nn.HeNormal(self.gpu, input_dim=2, seed=seed) 23 | self.assertEqual(he.stddev, 1.0) 24 | 25 | rng = random.Xoshiro128pp(self.gpu, seed=seed) 26 | 27 | np.testing.assert_allclose(he(self.gpu, shape), rng.normal(shape=shape)) 28 | 29 | 30 | class TestOptimizers(unittest.TestCase): 31 | @classmethod 32 | def setUpClass(cls): 33 | cls.gpu = vk.GPU() 34 | 35 | def test_sgd(self): 36 | sgd = nn.SGD(lr=0.01) 37 | 38 | grad = vk.Array(self.gpu, data=[1, 2, 3]) 39 | state = sgd.init_state(grad.shape) 40 | 41 | diff = state.grad2diff(grad) 42 | np.testing.assert_allclose(diff, grad * (-0.01)) 43 | 44 | def test_adam(self): 45 | adam = nn.Adam(self.gpu) 46 | 47 | grad = vk.Array(self.gpu, data=[1, 2, 3]) 48 | state = adam.init_state(grad.shape) 49 | self.assertEqual(state.beta1t, 1.0) 50 | self.assertEqual(state.beta2t, 1.0) 51 | 52 | diff = state.grad2diff(grad) 53 | 54 | self.assertEqual(state.beta1t, adam.beta1) 55 | self.assertEqual(state.beta2t, adam.beta2) 56 | 57 | def test_adagrad(self): 58 | adagrad = nn.AdaGrad(self.gpu) 59 | 60 | grad = vk.Array(self.gpu, data=[1, 2, 3]) 61 | state = adagrad.init_state(grad.shape) 62 | 63 | diff = state.grad2diff(grad) 64 | 65 | class TestLayers(unittest.TestCase): 66 | @classmethod 67 | def setUpClass(cls): 68 | cls.gpu = vk.GPU() 69 | 70 | def test_relu_forward(self): 71 | relu = nn.ReLU() 72 | 73 | x = vk.Array(self.gpu, data=[[-0.2, 0.0, 0.2]]) 74 | y = relu(x) 75 | 76 | np.testing.assert_allclose(y, [[0.0, 0.0, 0.2]]) 77 | 78 | def test_relu_backward(self): 79 | relu = nn.ReLU() 80 | 81 | x = vk.Array(self.gpu, data=[[-0.2, 0.0, 0.2]]) 82 | y = relu(x) 83 | 84 | dy = vk.Array(self.gpu, data=[[0.7, 0.8, 0.9]]) 85 | dx = relu.backward(dy) 86 | 87 | np.testing.assert_allclose(dx, [[0.0, 0.0, 0.9]]) 88 | 89 | def test_sigmoid_forward(self): 90 | sigmoid = nn.Sigmoid() 91 | 92 | d = np.asarray([[-100, -0.1, 0, 10, 100]]) 93 | x = vk.Array(self.gpu, data=d) 94 | 95 | y = sigmoid(x) 96 | 97 | np.testing.assert_allclose(y, 1/(1+np.exp(-d)), rtol=1e-7, atol=1e-7) 98 | 99 | def test_sigmoid_backward(self): 100 | sigmoid = nn.Sigmoid() 101 | 102 | _x = np.asarray([[-100, -0.1, 0, 10, 100]]) 103 | x = vk.Array(self.gpu, data=_x) 104 | y = sigmoid(x) 105 | 106 | _dy = np.asarray([[0.1, 0.2, 0.3, 0.5, 0.7]]) 107 | dy = vk.Array(self.gpu, data=_dy) 108 | 109 | dx = sigmoid.backward(dy) 110 | np.testing.assert_allclose(dx, dy * y * (1 - y)) 111 | 112 | def test_softmax(self): 113 | softmax = nn.Softmax() 114 | 115 | x = vk.Array(self.gpu, data=[[1.0, 1.0]]) 116 | y = softmax(x) 117 | 118 | np.testing.assert_allclose(y, [[0.5, 0.5]]) 119 | 120 | def test_softmax_skew(self): 121 | softmax = nn.Softmax() 122 | 123 | x = vk.Array(self.gpu, data=[[100.0, 0.0]]) 124 | y = softmax(x) 125 | 126 | np.testing.assert_allclose(y, [[1.0, 0]]) 127 | 128 | def test_softmax_forward(self): 129 | softmax = nn.Softmax() 130 | 131 | _x = np.asarray([[-100, -0.1, 0, 10, 100]]) 132 | x = vk.Array(self.gpu, data=_x) 133 | 134 | y = softmax(x) 135 | 136 | exp_x = np.exp(_x - _x.max(axis=1)) 137 | np.testing.assert_allclose(y, exp_x / exp_x.sum(axis=1, keepdims=True), 138 | rtol=1e-7, atol=1e-7) 139 | 140 | def test_softmax_backward(self): 141 | softmax = nn.Softmax() 142 | 143 | _x = np.asarray([[-100, -0.1, 0, 10, 100]]) 144 | x = vk.Array(self.gpu, data=_x) 145 | 146 | y = softmax(x) 147 | 148 | _dy = np.asarray([[0.1, 0.2, 0.3, 0.5, 0.7]]) 149 | dy = vk.Array(self.gpu, data=_dy) 150 | 151 | dx = softmax.backward(dy) 152 | 153 | np.testing.assert_allclose(dx, dy * y * (1 - y)) 154 | 155 | def test_dense_zero(self): 156 | dense = nn.Dense(self.gpu, 2, 2, w_init=nn.Constant(0.0)) 157 | 158 | x = vk.Array(self.gpu, data=[[1, 2], [3, 4]]) 159 | y = dense(x) 160 | 161 | np.testing.assert_allclose(y, [[0, 0], [0, 0]]) 162 | 163 | def test_dense_bias(self): 164 | dense = nn.Dense(self.gpu, 2, 2, 165 | w_init=nn.Constant(0.0), 166 | b_init=nn.Constant(1.0)) 167 | 168 | x = vk.Array(self.gpu, data=[[1,2], [3,4]]) 169 | y = dense(x) 170 | 171 | np.testing.assert_allclose(y, [[1, 1], [1, 1]]) 172 | 173 | def test_dense(self): 174 | dense = nn.Dense(self.gpu, 2, 2) 175 | 176 | x = vk.Array(self.gpu, data=[[2, 3], [2, 3]]) 177 | y = dense(x) 178 | 179 | np.testing.assert_allclose(y[0,:], y[1,:]) 180 | 181 | def test_dense_backward(self): 182 | dense = nn.Dense(self.gpu, 2, 2) 183 | np.testing.assert_allclose(dense.w.grad, [[0, 0], [0, 0]]) 184 | np.testing.assert_allclose(dense.b.grad, [0, 0]) 185 | 186 | x = vk.Array(self.gpu, data=[[1, 2], [3, 4]]) 187 | y = dense(x) 188 | 189 | dy = vk.Array(self.gpu, data=[[4, 2], [1, 3]]) 190 | dx = dense.backward(dy) 191 | 192 | np.testing.assert_allclose(dense.w.grad, [[7, 12], [11, 16]]) 193 | np.testing.assert_allclose(dense.b.grad, [5, 5]) 194 | 195 | _w = dense.w.value 196 | np.testing.assert_allclose(dx, 197 | [[_w[0,0] * dy[0,0] + _w[1,0] * dy[0,1], 198 | _w[0,1] * dy[0,0] + _w[1,1] * dy[0,1]], 199 | [_w[0,0] * dy[1,0] + _w[1,0] * dy[1,1], 200 | _w[0,1] * dy[1,0] + _w[1,1] * dy[1,1]]]) 201 | 202 | class TestLosses(unittest.TestCase): 203 | @classmethod 204 | def setUpClass(cls): 205 | cls.gpu = vk.GPU() 206 | 207 | def test_cross_entropy(self): 208 | loss = nn.CrossEntropyLoss() 209 | 210 | x = vk.Array(self.gpu, data=[[1.0, 0.0]]) 211 | y = vk.Array(self.gpu, data=[[1.0, 0.0]]) 212 | 213 | L = loss(x, y) 214 | np.testing.assert_allclose(L, [0.0]) 215 | 216 | def test_cross_entropy_equal(self): 217 | loss = nn.CrossEntropyLoss() 218 | 219 | x = vk.Array(self.gpu, data=[[0.5, 0.5]]) 220 | y = vk.Array(self.gpu, data=[[0.5, 0.5]]) 221 | 222 | L = loss(x, y) 223 | np.testing.assert_allclose(L, [0.6931472]) 224 | 225 | def test_cross_entropy_default(self): 226 | loss = nn.CrossEntropyLoss() 227 | 228 | _x = np.asarray([[0.7, 0.3], [0.2, 0.8], [1.0, 0.0]]) 229 | _y = np.asarray([[1.0, 0.0], [0.0, 1.0], [1.0, 0.0]]) 230 | 231 | x = vk.Array(self.gpu, data=_x) 232 | y = vk.Array(self.gpu, data=_y) 233 | 234 | _L = np.sum(-_y * np.log(_x + 1e-8), axis=1) 235 | L = loss(x, y) 236 | np.testing.assert_allclose(L, _L.mean(), atol=1e-7, rtol=1e-7) 237 | 238 | dx = loss.grad() 239 | _dx = - _y / (_x + 1e-8) 240 | np.testing.assert_allclose(dx, _dx / _dx.shape[0]) 241 | 242 | def test_cross_entropy_mean(self): 243 | loss = nn.CrossEntropyLoss(reduce="mean") 244 | 245 | _x = np.asarray([[0.7, 0.3], [0.2, 0.8], [1.0, 0.0]]) 246 | _y = np.asarray([[1.0, 0.0], [0.0, 1.0], [1.0, 0.0]]) 247 | 248 | x = vk.Array(self.gpu, data=_x) 249 | y = vk.Array(self.gpu, data=_y) 250 | 251 | _L = np.sum(-_y * np.log(_x + 1e-8), axis=1) 252 | L = loss(x, y) 253 | np.testing.assert_allclose(L, _L.mean(), atol=1e-7, rtol=1e-7) 254 | 255 | dx = loss.grad() 256 | _dx = - _y / (_x + 1e-8) 257 | np.testing.assert_allclose(dx, _dx / _dx.shape[0]) 258 | 259 | def test_cross_entropy_sum(self): 260 | loss = nn.CrossEntropyLoss(reduce="sum") 261 | 262 | _x = np.asarray([[0.7, 0.3], [0.2, 0.8], [1.0, 0.0]]) 263 | _y = np.asarray([[1.0, 0.0], [0.0, 1.0], [1.0, 0.0]]) 264 | 265 | x = vk.Array(self.gpu, data=_x) 266 | y = vk.Array(self.gpu, data=_y) 267 | 268 | _L = np.sum(-_y * np.log(_x + 1e-8), axis=1) 269 | L = loss(x, y) 270 | np.testing.assert_allclose(L, _L.sum(), atol=1e-7, rtol=1e-7) 271 | 272 | dx = loss.grad() 273 | _dx = - _y / (_x + 1e-8) 274 | np.testing.assert_allclose(dx, _dx) 275 | 276 | def test_softmax_crossentropy(self): 277 | sce = nn.SoftmaxCrossEntropyLoss() 278 | 279 | _x = np.asarray([[100.0, 0.0]]) 280 | x = vk.Array(self.gpu, data=_x) 281 | 282 | _y = np.asarray([[1.0, 0.0]]) 283 | y = vk.Array(self.gpu, data=_y) 284 | 285 | L = sce(x, y) 286 | np.testing.assert_allclose(L, [0.0]) 287 | 288 | def test_softmax_crossentropy_forward_default(self): 289 | sce = nn.SoftmaxCrossEntropyLoss() 290 | 291 | _x = np.asarray([[-1, 0], [10, 15]]) 292 | x = vk.Array(self.gpu, data=_x) 293 | 294 | _y = np.asarray([[1, 0], [0, 1]]) 295 | y = vk.Array(self.gpu, data=_y) 296 | 297 | L = sce(x, y) 298 | 299 | exp_x = np.exp(_x - _x.max(axis=1, keepdims=True)) 300 | _L = (-_y * np.log(exp_x / exp_x.sum(axis=1, keepdims=True))).sum(axis=1) 301 | np.testing.assert_allclose(L, _L.mean(axis=0), atol=1e-7, rtol=1e-7) 302 | 303 | def test_softmax_crossentropy_forward_mean(self): 304 | sce = nn.SoftmaxCrossEntropyLoss(reduce="mean") 305 | 306 | _x = np.asarray([[-1, 0], [10, 15]]) 307 | x = vk.Array(self.gpu, data=_x) 308 | 309 | _y = np.asarray([[1, 0], [0, 1]]) 310 | y = vk.Array(self.gpu, data=_y) 311 | 312 | L = sce(x, y) 313 | 314 | exp_x = np.exp(_x - _x.max(axis=1, keepdims=True)) 315 | _L = (-_y * np.log(exp_x / exp_x.sum(axis=1, keepdims=True))).sum(axis=1) 316 | np.testing.assert_allclose(L, _L.mean(axis=0), atol=1e-7, rtol=1e-7) 317 | 318 | def test_softmax_crossentropy_forward_sum(self): 319 | sce = nn.SoftmaxCrossEntropyLoss(reduce="sum") 320 | 321 | _x = np.asarray([[-1, 0], [10, 15]]) 322 | x = vk.Array(self.gpu, data=_x) 323 | 324 | _y = np.asarray([[1, 0], [0, 1]]) 325 | y = vk.Array(self.gpu, data=_y) 326 | 327 | L = sce(x, y) 328 | 329 | exp_x = np.exp(_x - _x.max(axis=1, keepdims=True)) 330 | _L = (-_y * np.log(exp_x / exp_x.sum(axis=1, keepdims=True))).sum(axis=1) 331 | np.testing.assert_allclose(L, _L.sum(axis=0), atol=1e-7, rtol=1e-7) 332 | 333 | def test_softmax_crossentropy_backward_default(self): 334 | sce = nn.SoftmaxCrossEntropyLoss() 335 | 336 | _x = np.asarray([[-1, 0], [10, 15]]) 337 | x = vk.Array(self.gpu, data=_x) 338 | 339 | _y = np.asarray([[1, 0], [0, 1]]) 340 | y = vk.Array(self.gpu, data=_y) 341 | 342 | L = sce(x, y) 343 | 344 | dx = sce.grad() 345 | 346 | exp_x = np.exp(_x - _x.max(axis=1, keepdims=True)) 347 | _L = exp_x / exp_x.sum(axis=1, keepdims=True) 348 | np.testing.assert_allclose(dx, (_L - _y) / _y.shape[0], atol=1e-7, rtol=1e-7) 349 | 350 | def test_softmax_crossentropy_backward_mean(self): 351 | sce = nn.SoftmaxCrossEntropyLoss(reduce="mean") 352 | 353 | _x = np.asarray([[-1, 0], [10, 15]]) 354 | x = vk.Array(self.gpu, data=_x) 355 | 356 | _y = np.asarray([[1, 0], [0, 1]]) 357 | y = vk.Array(self.gpu, data=_y) 358 | 359 | L = sce(x, y) 360 | 361 | dx = sce.grad() 362 | 363 | exp_x = np.exp(_x - _x.max(axis=1, keepdims=True)) 364 | _L = exp_x / exp_x.sum(axis=1, keepdims=True) 365 | np.testing.assert_allclose(dx, (_L - _y) / _y.shape[0], atol=1e-7, rtol=1e-7) 366 | 367 | def test_softmax_crossentropy_backward_sum(self): 368 | sce = nn.SoftmaxCrossEntropyLoss(reduce="sum") 369 | 370 | _x = np.asarray([[-1, 0], [10, 15]]) 371 | x = vk.Array(self.gpu, data=_x) 372 | 373 | _y = np.asarray([[1, 0], [0, 1]]) 374 | y = vk.Array(self.gpu, data=_y) 375 | 376 | L = sce(x, y) 377 | 378 | dx = sce.grad() 379 | 380 | exp_x = np.exp(_x - _x.max(axis=1, keepdims=True)) 381 | _L = exp_x / exp_x.sum(axis=1, keepdims=True) 382 | np.testing.assert_allclose(dx, _L - _y, atol=1e-7, rtol=1e-7) 383 | 384 | def test_mse_loss_default(self): 385 | mse = nn.MSELoss() 386 | 387 | _x = np.asarray([[4, 2], [1, 1.5]]) 388 | x = vk.Array(self.gpu, data=_x) 389 | 390 | _y = np.asarray([[3, 2.2], [0.7, 1.5]]) 391 | y = vk.Array(self.gpu, data=_y) 392 | 393 | L = mse(x, y) 394 | dx = mse.grad() 395 | 396 | np.testing.assert_allclose(L, np.square(_y - _x).sum(axis=1).mean(axis=0), 397 | atol=1e-7, rtol=1e-7) 398 | np.testing.assert_allclose(dx, (_x - _y), 399 | atol=1e-7, rtol=1e-7) 400 | 401 | def test_mse_loss_mean(self): 402 | mse = nn.MSELoss(reduce="mean") 403 | 404 | _x = np.asarray([[4, 2], [1, 1.5]]) 405 | x = vk.Array(self.gpu, data=_x) 406 | 407 | _y = np.asarray([[3, 2.2], [0.7, 1.5]]) 408 | y = vk.Array(self.gpu, data=_y) 409 | 410 | L = mse(x, y) 411 | dx = mse.grad() 412 | 413 | np.testing.assert_allclose(L, np.square(_y - _x).sum(axis=1).mean(axis=0), 414 | atol=1e-7, rtol=1e-7) 415 | np.testing.assert_allclose(dx, (_x - _y), 416 | atol=1e-7, rtol=1e-7) 417 | 418 | def test_mse_loss_sum(self): 419 | mse = nn.MSELoss(reduce="sum") 420 | 421 | _x = np.asarray([[4, 2], [1, 1.5]]) 422 | x = vk.Array(self.gpu, data=_x) 423 | 424 | _y = np.asarray([[3, 2.2], [0.7, 1.5]]) 425 | y = vk.Array(self.gpu, data=_y) 426 | 427 | L = mse(x, y) 428 | dx = mse.grad() 429 | 430 | np.testing.assert_allclose(L, np.square(_y - _x).sum(axis=1).sum(axis=0), 431 | atol=1e-7, rtol=1e-7) 432 | np.testing.assert_allclose(dx, 2 * (_x - _y), 433 | atol=1e-7, rtol=1e-7) 434 | 435 | def test_huber_loss_default(self): 436 | huber = nn.HuberLoss() 437 | 438 | _x = np.asarray([[1.0, 2.2], [-3.0, 0.7]]) 439 | x = vk.Array(self.gpu, data=_x) 440 | 441 | _y = np.asarray([[10, 3.0], [-5, 0.5]]) 442 | y = vk.Array(self.gpu, data=_y) 443 | 444 | L = huber(x, y) 445 | dx = huber.grad() 446 | 447 | np.testing.assert_allclose(L, [2.92]) 448 | np.testing.assert_allclose(dx, [[-0.5, -0.4], [0.5, 0.1]]) 449 | 450 | def test_huber_loss_mean(self): 451 | huber = nn.HuberLoss(reduce="mean") 452 | 453 | _x = np.asarray([[1.0, 2.2], [-3.0, 0.7]]) 454 | x = vk.Array(self.gpu, data=_x) 455 | 456 | _y = np.asarray([[10, 3.0], [-5, 0.5]]) 457 | y = vk.Array(self.gpu, data=_y) 458 | 459 | L = huber(x, y) 460 | dx = huber.grad() 461 | 462 | np.testing.assert_allclose(L, [2.92]) 463 | np.testing.assert_allclose(dx, [[-0.5, -0.4], [0.5, 0.1]]) 464 | 465 | def test_huber_loss_sum(self): 466 | huber = nn.HuberLoss(reduce="sum") 467 | 468 | _x = np.asarray([[1.0, 2.2], [-3.0, 0.7]]) 469 | x = vk.Array(self.gpu, data=_x) 470 | 471 | _y = np.asarray([[10, 3.0], [-5, 0.5]]) 472 | y = vk.Array(self.gpu, data=_y) 473 | 474 | L = huber(x, y) 475 | dx = huber.grad() 476 | 477 | np.testing.assert_allclose(L, [5.84]) 478 | np.testing.assert_allclose(dx, [[-1.0, -0.8], [1.0, 0.2]]) 479 | 480 | class TestRegularizer(unittest.TestCase): 481 | @classmethod 482 | def setUpClass(cls): 483 | from vulkpy.nn.parameters import Parameter 484 | cls.gpu = vk.GPU() 485 | cls.P = Parameter 486 | 487 | def test_ridge_zero(self): 488 | p = self.P(self.gpu, (1,), initializer=nn.Constant(0.0)) 489 | R = nn.Ridge(1.0) 490 | 491 | np.testing.assert_allclose(R.loss(p.value), np.asarray(0.0)) 492 | np.testing.assert_allclose(R.grad(p.value), np.asarray((0.0, ))) 493 | 494 | def test_ridge(self): 495 | p = self.P(self.gpu, (1,), initializer=nn.Constant(3.5)) 496 | R = nn.Ridge(1.0) 497 | 498 | np.testing.assert_allclose(R.loss(p.value), np.asarray(3.5 ** 2)) 499 | np.testing.assert_allclose(R.grad(p.value), np.asarray((2 * 3.5,))) 500 | 501 | def test_ridge_negative(self): 502 | p = self.P(self.gpu, (1,), initializer=nn.Constant(-3.5)) 503 | R = nn.Ridge(1.0) 504 | 505 | np.testing.assert_allclose(R.loss(p.value), np.asarray((-3.5) ** 2)) 506 | np.testing.assert_allclose(R.grad(p.value), np.asarray((2 * -3.5,))) 507 | 508 | def test_lasso_zero(self): 509 | p = self.P(self.gpu, (1,), initializer=nn.Constant(0.0)) 510 | R = nn.Lasso(1.0) 511 | 512 | np.testing.assert_allclose(R.loss(p.value), np.asarray(0.0)) 513 | np.testing.assert_allclose(R.grad(p.value), np.asarray((0.0, ))) 514 | 515 | def test_lasso(self): 516 | p = self.P(self.gpu, (1,), initializer=nn.Constant(3.5)) 517 | R = nn.Lasso(1.0) 518 | 519 | np.testing.assert_allclose(R.loss(p.value), np.asarray(3.5)) 520 | np.testing.assert_allclose(R.grad(p.value), np.asarray((1.0,))) 521 | 522 | def test_lasso_negative(self): 523 | p = self.P(self.gpu, (1,), initializer=nn.Constant(-3.5)) 524 | R = nn.Lasso(1.0) 525 | 526 | np.testing.assert_allclose(R.loss(p.value), np.asarray(3.5)) 527 | np.testing.assert_allclose(R.grad(p.value), np.asarray((-1.0,))) 528 | 529 | def test_elastic_zero(self): 530 | p = self.P(self.gpu, (1,), initializer=nn.Constant(0.0)) 531 | R = nn.Elastic(1.0, 1.0) 532 | 533 | np.testing.assert_allclose(R.loss(p.value), np.asarray(0.0)) 534 | np.testing.assert_allclose(R.grad(p.value), np.asarray(0.0,)) 535 | 536 | def test_elastic(self): 537 | p = self.P(self.gpu, (1,), initializer=nn.Constant(3.5)) 538 | R = nn.Elastic(1.0, 1.0) 539 | 540 | np.testing.assert_allclose(R.loss(p.value), np.asarray(3.5 ** 2 + 3.5)) 541 | 542 | np.testing.assert_allclose(R.grad(p.value), np.asarray((2 * 3.5 + 1.0,))) 543 | 544 | def test_elastic_negative(self): 545 | p = self.P(self.gpu, (1,), initializer=nn.Constant(-3.5)) 546 | R = nn.Elastic(1.0, 1.0) 547 | 548 | np.testing.assert_allclose(R.loss(p.value), np.asarray(3.5 ** 2 + 3.5)) 549 | np.testing.assert_allclose(R.grad(p.value), np.asarray((2 * -3.5 - 1.0,))) 550 | 551 | if __name__ == "__main__": 552 | unittest.main() 553 | -------------------------------------------------------------------------------- /test/test_random.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | import vulkpy as vk 5 | from vulkpy.util import enable_debug 6 | 7 | 8 | class TestRandom(unittest.TestCase): 9 | @classmethod 10 | def setUpClass(cls): 11 | cls.gpu = vk.GPU() 12 | 13 | def test_random(self): 14 | rng = vk.random.Xoshiro128pp(self.gpu) 15 | a = rng.random(shape=(3,)) 16 | 17 | a.wait() 18 | np.testing.assert_allclose(np.asarray(a).shape, (3,)) 19 | self.assertTrue((0 <= np.asarray(a)).all()) 20 | self.assertTrue((np.asarray(a) < 1.0).all()) 21 | 22 | def test_random_seed(self): 23 | rng1 = vk.random.Xoshiro128pp(self.gpu, seed=0) 24 | a = rng1.random(shape=(5,)) 25 | 26 | rng2 = vk.random.Xoshiro128pp(self.gpu, seed=0) 27 | b = rng2.random(shape=(5,)) 28 | 29 | a.wait() 30 | b.wait() 31 | np.testing.assert_allclose(a, b) 32 | self.assertTrue((0 <= np.asarray(a)).all()) 33 | self.assertTrue((np.asarray(a) < 1.0).all()) 34 | self.assertTrue((0 <= np.asarray(b)).all()) 35 | self.assertTrue((np.asarray(b) < 1.0).all()) 36 | 37 | def test_middle(self): 38 | rng = vk.random.Xoshiro128pp(self.gpu) 39 | a = rng.random(shape=(17,)) 40 | 41 | a.wait() 42 | np.testing.assert_allclose(np.asarray(a).shape, (17,)) 43 | self.assertTrue((0 <= np.asarray(a)).all()) 44 | self.assertTrue((np.asarray(a) < 1.0).all()) 45 | 46 | def test_larger(self): 47 | rng = vk.random.Xoshiro128pp(self.gpu) 48 | a = rng.random(shape=(65,)) 49 | 50 | a.wait() 51 | np.testing.assert_allclose(np.asarray(a).shape, (65,)) 52 | self.assertTrue((0 <= np.asarray(a)).all()) 53 | self.assertTrue((np.asarray(a) < 1.0).all()) 54 | 55 | def test_higher_dimension(self): 56 | rng = vk.random.Xoshiro128pp(self.gpu) 57 | a = rng.random(shape=(5, 5, 5)) 58 | 59 | a.wait() 60 | np.testing.assert_allclose(np.asarray(a).shape, (5, 5, 5)) 61 | self.assertTrue((0 <= np.asarray(a)).all()) 62 | self.assertTrue((np.asarray(a) < 1.0).all()) 63 | 64 | def test_buffer(self): 65 | rng = vk.random.Xoshiro128pp(self.gpu) 66 | a = vk.Array(self.gpu, shape=(5,)) 67 | a = rng.random(buffer=a) 68 | a.wait() 69 | np.testing.assert_allclose(np.asarray(a).shape, (5,)) 70 | self.assertTrue((0 <= np.asarray(a)).all()) 71 | self.assertTrue((np.asarray(a) < 1.0).all()) 72 | 73 | def test_normal_even(self): 74 | rng1 = vk.random.Xoshiro128pp(self.gpu, seed=0) 75 | rng2 = vk.random.Xoshiro128pp(self.gpu, seed=0) 76 | 77 | a1 = rng1.normal(shape=(10,)) 78 | a2 = rng2.normal(shape=(10,), mean=5, stddev=3) 79 | 80 | np.testing.assert_allclose((a2 - 5) / a1, np.full((10,), 3), rtol=1e-5) 81 | 82 | def test_normal_odd(self): 83 | rng1 = vk.random.Xoshiro128pp(self.gpu, seed=0) 84 | rng2 = vk.random.Xoshiro128pp(self.gpu, seed=0) 85 | 86 | a1 = rng1.normal(shape=(11,)) 87 | a2 = rng2.normal(shape=(11,), mean=5, stddev=3) 88 | 89 | np.testing.assert_allclose((a2 - 5) / a1, np.full((11,), 3), rtol=1e-5) 90 | 91 | def test_randint(self): 92 | rng = vk.random.Xoshiro128pp(self.gpu) 93 | a = rng.randint(shape=(5,)) 94 | 95 | np.testing.assert_allclose(a.shape, (5,)) 96 | 97 | self.assertTrue(np.all((0 <= np.asarray(a)) & (np.asarray(a) < (2 ** 32)))) 98 | 99 | def test_randrange(self): 100 | rng = vk.random.Xoshiro128pp(self.gpu) 101 | a = rng.randrange(shape=(5,), low=3, high=4) 102 | 103 | np.testing.assert_allclose(a, [3, 3, 3, 3, 3]) 104 | 105 | if __name__ == "__main__": 106 | enable_debug(api_dump=False) 107 | unittest.main() 108 | -------------------------------------------------------------------------------- /vulkpy/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | vulkpy: GPGPU array on Vulkan 3 | ============================= 4 | 5 | vulkpy provides GPGPU computations. 6 | 7 | See Also 8 | -------- 9 | vulkpy.random : Random Module 10 | vulkpy.nn : Neural Network Module 11 | vulkpy.util : Utility Module 12 | 13 | 14 | Examples 15 | -------- 16 | >>> import vulkpy as vk 17 | 18 | >>> gpu = vk.GPU() 19 | >>> a = vk.Array(gpu, data=[1, 2, 3]) 20 | >>> b = vk.Array(gpu, data=[3, 3, 3]) 21 | 22 | >>> c = a + b 23 | >>> print(c) 24 | [4., 5., 6.] 25 | """ 26 | from .vkarray import GPU, U32Array, Shape, Array, zeros 27 | from . import random 28 | from . import nn 29 | -------------------------------------------------------------------------------- /vulkpy/_vkutil.hh: -------------------------------------------------------------------------------- 1 | #ifndef VKUTIL_HH 2 | #define VKUTIL_HH 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace util { 10 | constexpr inline std::uint32_t VK_API_VERSION(std::uint32_t major, 11 | std::uint32_t minor, 12 | std::uint32_t patch) noexcept { 13 | return (major << 22) | (minor << 12) | (patch); 14 | } 15 | 16 | template 17 | auto generate_from_range(F&& f, std::uint32_t n) { 18 | auto v = std::vector>{}; 19 | v.reserve(n); 20 | 21 | auto g = [&f, i=std::uint32_t(0)]() mutable { return f(i++); }; 22 | std::generate_n(std::back_inserter(v), n, g); 23 | 24 | return v; 25 | } 26 | 27 | std::vector readCode(std::string_view name){ 28 | auto f = std::ifstream(name.data(), std::ios::ate | std::ios::binary); 29 | if(!f.is_open()){ 30 | throw std::runtime_error("failed to open file"); 31 | } 32 | auto size = f.tellg(); 33 | f.seekg(0); 34 | 35 | auto v = std::vector(size); 36 | f.read(v.data(), size); 37 | 38 | f.close(); 39 | return v; 40 | } 41 | 42 | 43 | template 44 | auto pylist2array(F&& f, const pybind11::list& pylist, 45 | std::integer_sequence){ 46 | T array[]{ 47 | pylist[pybind11::size_t(I)].cast()... 48 | }; 49 | return f(array); 50 | } 51 | } 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /vulkpy/nn/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Neural Network Module (:mod:`vulkpy.nn`) 3 | ======================================== 4 | 5 | Examples 6 | -------- 7 | >>> import vulkpy as vk 8 | >>> from vulkpy import nn 9 | >>> gpu = vk.GPU() 10 | >>> x = vk.Array(gpu, data=[ ... ]) # Features 11 | >>> y = vk.Array(gpu, data=[ ... ]) # Labels 12 | 13 | Create Optimizer and Model 14 | 15 | >>> opt = nn.Adam(gpu, lr=1e-4) 16 | >>> net = nn.Sequence( 17 | ... [ 18 | ... nn.Dense(gpu, 3, 32, w_opt=opt, b_opt=opt), 19 | ... nn.ReLU(), 20 | ... nn.Dense(gpu, 32, 4, w_opt=opt, b_opt=opt), 21 | ... nn.Softmax(), 22 | ... ], 23 | ... nn.CrossEntropy() 24 | ... ) 25 | 26 | Training Model 27 | 28 | >>> pred_y, loss = net.train(x, y) 29 | 30 | Predict with Model 31 | 32 | >>> pred_y = net.predict(x) 33 | """ 34 | 35 | from .core import ( 36 | Optimizer, 37 | OptimizerState, 38 | Loss, 39 | Regularizer, 40 | Module, 41 | ) 42 | from .initializers import Constant, HeNormal 43 | from .optimizers import ( 44 | SGD, SGDState, 45 | Adam, AdamState, 46 | AdaGrad, AdaGradState, 47 | ) 48 | from .layers import Dense, ReLU, Sigmoid, Softmax 49 | from .losses import ( 50 | CrossEntropyLoss, 51 | SoftmaxCrossEntropyLoss, 52 | MSELoss, 53 | HuberLoss, 54 | ) 55 | from .regularizers import ( 56 | Lasso, 57 | Ridge, 58 | Elastic, 59 | ) 60 | from .models import Sequence 61 | -------------------------------------------------------------------------------- /vulkpy/nn/core.py: -------------------------------------------------------------------------------- 1 | """ 2 | Neural Network Core Module (:mod:`vulkpy.nn.core`) 3 | ================================================== 4 | 5 | This module provides abstract base classes for Neural Network. 6 | """ 7 | from __future__ import annotations 8 | from typing import Iterable 9 | 10 | from vulkpy.vkarray import GPU, Array 11 | 12 | 13 | __all__ = [ 14 | "OptimizerState", 15 | "Optimizer", 16 | "Regularizer", 17 | "Loss", 18 | "Module", 19 | ] 20 | 21 | 22 | class OptimizerState: 23 | """ 24 | Abstract base class for Optimizer State 25 | 26 | See Also 27 | -------- 28 | vulkpy.nn.Optimizer : Optimizer 29 | vulkpy.nn.SGDState : OptimizerState subclass for SGD 30 | vulkpy.nn.AdamState : OptimizerState subclass for Adam 31 | 32 | Notes 33 | ----- 34 | Mutable per-parameter values are stored at this class instance, 35 | although static global parameters (e.g. learning rate) are 36 | stored at ``Optimizer`` class. 37 | 38 | Subclass of ``OptimizerState`` should implement ``Optimizer.grad2diff()``, 39 | which takes accumulated gradients and returns update difference. 40 | 41 | In standard design, ``OptimizerState`` holds a reference to 42 | its parent ``Optimizer`` in order to access global parameters. 43 | """ 44 | def grad2diff(self, grad: Array) -> Array: 45 | """ 46 | Compute update diff from gradient 47 | 48 | Parameters 49 | ---------- 50 | grad : vulkpy.Array 51 | Accumulated gradient 52 | 53 | Returns 54 | ------- 55 | diff : vulkpy.Array 56 | Update diff. (``v += opt_state.grad2diff(grad)``) 57 | 58 | Notes 59 | ----- 60 | Subclass must implement this method. 61 | """ 62 | raise NotImplementedError 63 | 64 | class Optimizer: 65 | """ 66 | Abstract base class for Optimizer 67 | 68 | See Also 69 | -------- 70 | vulkpy.nn.OptimizerState : Optimizer State 71 | vulkpy.nn.SGD : Optimizer subclass for SGD 72 | vulkpy.nn.Adam : Optimizer subclass for Adam 73 | 74 | Notes 75 | ----- 76 | ``Optimizer`` class is designed to pass to ``Parameter`` constructor 77 | through ``Module`` constructor. 78 | Inside ``Parameter`` constructor, ``Optimizer.init_state()`` is called and 79 | corresponding ``OptimizerState`` are stored at the ``Parameter`` instance. 80 | 81 | Mutable per-parameter values are stored at ``OptimizerState`` class instance, 82 | although static global parameters (e.g. learning rate) are 83 | stored at this class. 84 | 85 | To implement specific optimizer, Subclass of ``Optimizer`` should implement 86 | ``Optimizer.init_state()`` method, which returns corresponding subclass of 87 | ``OptimizerState``. 88 | 89 | Examples 90 | -------- 91 | >>> import vulkpy as vk 92 | >>> gpu = vk.GPU() 93 | >>> 94 | >>> adam = vk.nn.Adam(gpu) # Optimizer 95 | >>> dense = vk.nn.Dense(gpu, 1, 1, w_opt=adam, b_opt=adam) # Module 96 | """ 97 | def init_state(self, shape: Iterable[int]) -> OptimizerState: 98 | """ 99 | Create OptimizerState 100 | 101 | Parameters 102 | ---------- 103 | shape : iterable of ints 104 | Parameter Shape 105 | 106 | Returns 107 | ------- 108 | opt_state : vulkpy.nn.OptimizerState 109 | Optimizer State 110 | 111 | Notes 112 | ----- 113 | Subclass must implement this method. 114 | """ 115 | raise NotImplementedError 116 | 117 | class Loss: 118 | """ 119 | Abstract base class for Loss 120 | 121 | See Also 122 | -------- 123 | vulkpy.nn.CrossEntropyLoss : Cross Entropy Loss 124 | vulkpy.nn.SoftmaxCrossEntropyLoss : Softmax Cross Entropy Loss 125 | vulkpy.nn.HuberLoss : Huber Loss 126 | vulkpy.nn.MSELoss : MSE Loss 127 | vulkpy.nn.MixLoss : Mixing Loss 128 | 129 | Notes 130 | ----- 131 | ``Loss`` is designed 132 | 133 | Subclass of ``Loss`` must implements ``__call__()`` and ``grad()``. 134 | """ 135 | def __call__(self, x: Array, y: Array) -> Array: 136 | """ 137 | Compute Loss 138 | 139 | Parameters 140 | ---------- 141 | x : vulkpy.Array 142 | Input features 143 | y : vulkpy.Array 144 | Output target/label 145 | 146 | Returns 147 | ------- 148 | loss : vulkpy.Array 149 | Loss 150 | 151 | Notes 152 | ----- 153 | Subclass must implement this method. 154 | """ 155 | raise NotImplementedError 156 | 157 | def grad(self) -> Array: 158 | """ 159 | Compute Gradient 160 | 161 | Returns 162 | ------- 163 | grad : vulkpy.Array 164 | Gradient 165 | 166 | Notes 167 | ----- 168 | Subclass must implement this method. 169 | """ 170 | raise NotImplementedError 171 | 172 | class Regularizer: 173 | """ 174 | Abstract base class for Regularizer 175 | 176 | See Also 177 | -------- 178 | vulkpy.nn.Lasso : Lasso (L1) Regularizer 179 | vulkpy.nn.Ridge : Ridge (L2) Regularizer 180 | vulkpy.nn.Elastic : Elastic (L1 + L2) Regularizer 181 | 182 | Notes 183 | ----- 184 | Subclass must implement ``loss()`` and ``grad()``. 185 | """ 186 | def loss(self, param: Array) -> Array: 187 | """ 188 | Compute Regularizer Loss 189 | 190 | Parameters 191 | ---------- 192 | param : vulkpy.Array 193 | Parameters 194 | 195 | Returns 196 | ------- 197 | loss : vulkpy.Array 198 | Loss 199 | 200 | Notes 201 | ----- 202 | Subclass must implement this method. 203 | """ 204 | raise NotImplementedError 205 | 206 | def grad(self, param: Array) -> Array: 207 | """ 208 | Compute Gradient 209 | 210 | Parameters 211 | ---------- 212 | param : vulkpy.Array 213 | Parameters 214 | 215 | Returns 216 | ------- 217 | grad : vulkpy.Array 218 | Gradient 219 | 220 | Notes 221 | ----- 222 | Subclass must implement this method. 223 | """ 224 | raise NotImplementedError 225 | 226 | 227 | class Module: 228 | """ 229 | Abstract base class for Module 230 | 231 | See Also 232 | -------- 233 | vulkpy.nn.Dense : Dense Layer (subclass) 234 | vulkpy.nn.ReLU : ReLU Layer (subclass) 235 | vulkpy.nn.Sigmoid : Sigmoid Layer (subclass) 236 | vulkpy.nn.Softmax : Softmax Layer (subclass) 237 | vulkpy.nn.Sequence : Sequential Model 238 | 239 | Notes 240 | ----- 241 | ``Module`` is designed to for Neural Network Layer. 242 | 243 | Subclass must implement ``forward()`` and ``backward()``, and can implement 244 | ``zero_grad()`` and ``update()`` when it is necessary. 245 | """ 246 | 247 | def __call__(self, x: Array) -> Array: 248 | """ 249 | Call Module 250 | 251 | Parameters 252 | ---------- 253 | x : vulkpy.Array 254 | Input 255 | 256 | Returns 257 | ------- 258 | y : vulkpy.Array 259 | Output 260 | 261 | Raises 262 | ------ 263 | ValueError 264 | If input (``x``) shape doesn't have at least 2-dimensions. 265 | 266 | Notes 267 | ----- 268 | This function stores input (``x``) and output (``y``) for training. 269 | """ 270 | if len(x.shape) < 2: 271 | raise ValueError("Input must have at least 2-dimensions.") 272 | 273 | self._x = x 274 | self._y = self.forward(x) 275 | return self._y 276 | 277 | def forward(self, x: Array) -> Array: 278 | """ 279 | Forward Calculation 280 | 281 | Parameters 282 | ---------- 283 | x : vulkpy.Array 284 | Input features 285 | 286 | Returns 287 | ------- 288 | y : vulkpy.Array 289 | Output 290 | 291 | Notes 292 | ----- 293 | Subclass must implement this method. 294 | """ 295 | raise NotImplementedError 296 | 297 | def backward(self, dy: Array) -> Array: 298 | """ 299 | Backward Calculation 300 | 301 | Parameters 302 | ---------- 303 | dy : vulkpy.Array 304 | dL/dy propagated from following layer 305 | 306 | Returns 307 | ------- 308 | dx : vulkpy.Array 309 | dL/dx propagated to previous layer 310 | 311 | Notes 312 | ----- 313 | Subclass must implement this method. 314 | """ 315 | raise NotImplementedError 316 | 317 | def zero_grad(self): 318 | """ 319 | Reset accumulated gradients to 0. 320 | 321 | Notes 322 | ----- 323 | Base class implement no-operation. 324 | Subclass can customize this method. 325 | """ 326 | pass 327 | 328 | def update(self): 329 | """ 330 | Update parameters based on accumulated gradients 331 | 332 | Notes 333 | ----- 334 | Base class implement no-operation. 335 | Subclass can customize this method. 336 | """ 337 | pass 338 | -------------------------------------------------------------------------------- /vulkpy/nn/initializers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Neural Network Initializer Module (:mod:`vulkpy.nn.initializers`) 3 | ================================================================= 4 | """ 5 | from __future__ import annotations 6 | from typing import Iterable, Optional 7 | 8 | import numpy as np 9 | 10 | from vulkpy.vkarray import GPU, Array 11 | from vulkpy.random import Xoshiro128pp 12 | 13 | 14 | __all__ = ["Constant", "HeNormal"] 15 | 16 | class Initializer: 17 | def __call__(self, gpu: GPU, shape: Iterable[int]) -> Array: 18 | raise NotImplementedError 19 | 20 | 21 | class Constant(Initializer): 22 | """ 23 | Constant Initializer 24 | """ 25 | def __init__(self, value: float): 26 | """ 27 | Initialize Constant Initializer 28 | 29 | Parameters 30 | ---------- 31 | value : float 32 | Constant value 33 | """ 34 | self.value = value 35 | 36 | def __call__(self, gpu: GPU, shape: Iterable[int]) -> Array: 37 | """ 38 | Initialize new parameters 39 | 40 | Parameters 41 | ---------- 42 | gpu : vulkpy.GPU 43 | GPU 44 | shape : iterable of ints 45 | Parameter shape 46 | """ 47 | p = Array(gpu, shape=shape) 48 | p[:] = self.value 49 | return p 50 | 51 | 52 | class HeNormal(Initializer): 53 | r""" 54 | He Normal Initializer 55 | 56 | Note 57 | ---- 58 | Standard deviation :math:`\sigma` is following; 59 | 60 | .. math:: \sigma = \sqrt{2/d_{\text{in}}} 61 | """ 62 | def __init__(self, gpu: GPU, input_dim: int, *, seed: Optional[int] = None): 63 | """ 64 | Initialize He Normal Initializer 65 | 66 | Parameters 67 | ---------- 68 | gpu : vulkpy.GPU 69 | GPU 70 | input_dim : int 71 | Input dimension 72 | seed : int, optional 73 | Initial seed for PRNG 74 | """ 75 | self.rng = Xoshiro128pp(gpu, seed=seed) 76 | self.stddev = np.sqrt(2 / input_dim) 77 | 78 | def __call__(self, gpu: GPU, shape: Iterable[int]): 79 | """ 80 | Initialize new parameters 81 | 82 | Parameters 83 | ---------- 84 | gpu : vulkpy.GPU 85 | GPU 86 | shape : iterable of ints 87 | Parameter shape 88 | """ 89 | return self.rng.normal(shape=shape, stddev=self.stddev) 90 | -------------------------------------------------------------------------------- /vulkpy/nn/layers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Neural Network Layer Module (:mod:`vulkpy.nn.layers`) 3 | ===================================================== 4 | """ 5 | from __future__ import annotations 6 | from typing import Callable, Iterable, Optional 7 | 8 | from vulkpy.util import getShader 9 | from vulkpy.vkarray import GPU, Array, DataShape, BatchAffineParams 10 | from .core import Module, Optimizer, Regularizer 11 | from .parameters import Parameter 12 | from .initializers import HeNormal 13 | 14 | 15 | __all__ = ["Dense", "ReLU", "Sigmoid", "Softmax"] 16 | 17 | 18 | class Dense(Module): 19 | """ 20 | Fully connected Dense Layer 21 | """ 22 | 23 | _batch_affine = getShader("batch_affine.spv") 24 | 25 | def __init__(self, gpu: GPU, input_dim: int, output_dim: int, *, 26 | w_init: Optional[Callable[[GPU, Iterable[int]], Array]] = None, 27 | b_init: Optional[Callable[[GPU, Iterable[int]], Array]] = None, 28 | w_opt: Optional[Optimizer] = None, 29 | b_opt: Optional[Optimizer] = None, 30 | w_reg: Optional[Regularizer] = None, 31 | b_reg: Optional[Regularizer] = None): 32 | """ 33 | Initialize Dense 34 | 35 | Parameters 36 | ---------- 37 | gpu : vulkpy.GPU 38 | GPU 39 | input_dim : int 40 | Input dimension 41 | output_dim : int 42 | Output dimension 43 | w_init Callable, optional 44 | Weight initializer. If ``None`` (default), 45 | ``vulkpy.nn.HeNormal`` is used. 46 | b_init Callable, optional 47 | Bias initializer. If ``None`` (default), 48 | bias is initialized with ``0``. 49 | w_opt : vulkpy.nn.Optimizer, optional 50 | Weight Optimizer. If ``None`` (default), 51 | ``vulkpy.nn.Adam`` is used. 52 | b_opt : vulkpy.nn.Optimizer, optional 53 | Bias Optimizer. If ``None`` (default), 54 | ``vulkpy.nn.Adam`` is used. 55 | w_reg : vulkpy.nn.Regularizer, optional 56 | Weight Regularizer. 57 | b_reg : vulkpy.nn.Regularizer, optional 58 | Bias Regularizer 59 | """ 60 | self.input_dim = int(input_dim) 61 | self.output_dim = int(output_dim) 62 | 63 | if w_init is None: 64 | w_init = HeNormal(gpu, self.input_dim) 65 | 66 | self.w = Parameter(gpu, shape=(self.output_dim, self.input_dim), 67 | initializer=w_init, opt=w_opt, regularizer=w_reg) 68 | self.b = Parameter(gpu, shape=(self.output_dim,), 69 | initializer=b_init, opt=b_opt, regularizer=b_reg) 70 | 71 | def forward(self, x: Array) -> Array: 72 | r""" 73 | Forward 74 | 75 | Parameters 76 | ---------- 77 | x : vulkpy.Array 78 | Batch input 79 | 80 | Returns 81 | ------- 82 | vulkpy.Array 83 | Batch output 84 | 85 | Notes 86 | ----- 87 | .. math:: y = Wx + b 88 | 89 | .. warning:: 90 | 91 | Generally, users should not call this method directly. 92 | Use ``__call__`` instead, where input / output are stored for training. 93 | """ 94 | y = Array(x._gpu, shape=(x.shape[0], self.output_dim)) 95 | y.job = x._gpu._submit(self._batch_affine, 1, 64, 1, 96 | [self.w.value, self.b.value, x, y], 97 | DataShape(x.shape[0], self.output_dim, 1), 98 | BatchAffineParams(x.shape[0], 99 | x.shape[1], 100 | self.output_dim)) 101 | y._keep.extend([self.w.value, self.b.value, x]) 102 | return y 103 | 104 | def backward(self, dy: Array) -> Array: 105 | r""" 106 | Backward 107 | 108 | Parameters 109 | ---------- 110 | dy : vulkpy.Array 111 | Batch grad 112 | 113 | Returns 114 | ------- 115 | vulkpy.Array 116 | Batch grad 117 | 118 | Notes 119 | ----- 120 | .. math:: 121 | 122 | dx = dy W\\ 123 | dW = dy ^T \cdot x\\ 124 | db = dy 125 | """ 126 | db = dy.sum(axis=0) # Allocate 127 | self.b.add_grad(db) 128 | 129 | x_shape = self._x.shape 130 | dy_shape = dy.shape 131 | dy.reshape((dy.shape[0], dy.shape[1], 1)) 132 | self._x.reshape((self._x.shape[0], 1, self._x.shape[1])) 133 | 134 | dW = dy * self._x # Allocate 135 | dW = dW.sum(axis=0) # Allocate 136 | self.w.add_grad(dW) 137 | 138 | self._x.reshape(x_shape) 139 | dy.reshape(dy_shape) 140 | 141 | return dy @ self.w.value # Allocate 142 | 143 | def zero_grad(self): 144 | """ 145 | Clear accumulated gradients 146 | """ 147 | self.w.zero_grad() 148 | self.b.zero_grad() 149 | 150 | def update(self): 151 | """ 152 | Update values with accumulated gradients 153 | """ 154 | self.w.update() 155 | self.b.update() 156 | 157 | 158 | class ReLU(Module): 159 | """ 160 | Rectified Linear Unit (ReLU) 161 | """ 162 | def forward(self, x: Array) -> Array: 163 | r""" 164 | Forward 165 | 166 | Parameters 167 | ---------- 168 | x : vulkpy.Array 169 | Batch input 170 | 171 | Returns 172 | ------- 173 | vulkpy.Array 174 | Batch output 175 | 176 | Notes 177 | ----- 178 | .. math:: y = \max(x, 0) 179 | 180 | .. warning:: 181 | 182 | Generally, users should not call this method directly. 183 | Use ``__call__`` instead, where input / output are stored for training. 184 | """ 185 | return x.max(0.0) # Allocate 186 | 187 | def backward(self, dy: Array) -> Array: 188 | r""" 189 | Backward 190 | 191 | Parameters 192 | ---------- 193 | dy : vulkpy.Array 194 | Batch grad 195 | 196 | Returns 197 | ------- 198 | vulkpy.Array 199 | Batch grad 200 | 201 | Notes 202 | ----- 203 | .. math:: dx = dy \cdot \max(\rm{sign}(y), 0) 204 | 205 | if x == 0, dy/dx => 0 206 | """ 207 | dx = self._y.sign() # Allocate 208 | dx.max(0.0, inplace=True) 209 | dx *= dy 210 | return dx 211 | 212 | 213 | class Sigmoid(Module): 214 | """ 215 | Sigmoid 216 | """ 217 | def forward(self, x: Array) -> Array: 218 | r""" 219 | Forward 220 | 221 | Parameters 222 | ---------- 223 | x : vulkpy.Array 224 | Batch input 225 | 226 | Returns 227 | ------- 228 | vulkpy.Array 229 | Batch output 230 | 231 | Notes 232 | ----- 233 | .. math:: y = 1/(1 + \exp (-x)) 234 | 235 | .. warning:: 236 | 237 | Generally, users should not call this method directly. 238 | Use ``__call__`` instead, where input / output are stored for training. 239 | """ 240 | y = 0.0 - x # Allocate 241 | y.exp(inplace=True) 242 | y += 1.0 243 | y = 1.0 / y # Allocate 244 | return y 245 | 246 | def backward(self, dy: Array) -> Array: 247 | r""" 248 | Backward 249 | 250 | Parameters 251 | ---------- 252 | dy : vulkpy.Array 253 | Batch grad 254 | 255 | Returns 256 | ------- 257 | vulkpy.Array 258 | Batch grad 259 | 260 | Notes 261 | ----- 262 | .. math:: dx = dy \cdot y(1 - y) 263 | """ 264 | dx = 1.0 - self._y 265 | dx *= self._y 266 | dx *= dy 267 | return dx 268 | 269 | 270 | class Softmax(Module): 271 | """ 272 | SoftMax 273 | """ 274 | def forward(self, x: Array) -> Array: 275 | r""" 276 | Forward 277 | 278 | Parameters 279 | ---------- 280 | x : vulkpy.Array 281 | Batch input 282 | 283 | Returns 284 | ------- 285 | vulkpy.Array 286 | Batch output 287 | 288 | Notes 289 | ----- 290 | .. math:: y = \exp (x) / \sum _i \exp(x_i) 291 | 292 | .. warning:: 293 | 294 | Generally, users should not call this method directly. 295 | Use ``__call__`` instead, where input / output are stored for training. 296 | """ 297 | X = x - x.maximum(axis=1, rebroadcast=True) 298 | X.exp(inplace=True) 299 | X /= X.sum(axis=1, rebroadcast=True) 300 | return X 301 | 302 | def backward(self, dy: Array) -> Array: 303 | r""" 304 | Backward 305 | 306 | Parameters 307 | ---------- 308 | dy : vulkpy.Array 309 | Batch grad 310 | 311 | Returns 312 | ------- 313 | vulkpy.Array 314 | Batch grad 315 | 316 | Notes 317 | ----- 318 | .. math:: dx = dy \cdot y(1 - y) 319 | """ 320 | dx = 1.0 - self._y 321 | dx *= self._y 322 | dx *= dy 323 | return dx 324 | -------------------------------------------------------------------------------- /vulkpy/nn/losses.py: -------------------------------------------------------------------------------- 1 | """ 2 | Neural Network Loss Module (:mod:`vulkpy.nn.losses`) 3 | ==================================================== 4 | 5 | Examples 6 | -------- 7 | >>> import vulkpy as vk 8 | >>> from vulkpy import nn 9 | >>> gpu = vk.GPU() 10 | >>> x = vk.Array(gpu, data=[[ ... ]]) # Predicted 11 | >>> y = vk.Array(gpu, data=[[ ... ]]) # True 12 | 13 | Loss class takes predicted values and true labels/targets, then returns scalar loss. 14 | 15 | >>> L = nn.CrossEntropy() 16 | >>> loss = L(x, y) 17 | 18 | Gradients can be computed with `grad()` method 19 | 20 | >>> dx = L.grad() 21 | """ 22 | from __future__ import annotations 23 | from typing import cast, Callable, Iterable, Literal, Optional, Tuple 24 | 25 | from vulkpy.util import getShader 26 | from vulkpy.vkarray import Array, DataShape, VectorParams 27 | from .core import Loss 28 | from .layers import Softmax 29 | 30 | __all__ = [ 31 | "CrossEntropyLoss", 32 | "SoftmaxCrossEntropyLoss", 33 | "MSELoss", 34 | "HuberLoss", 35 | "MixLoss", 36 | ] 37 | 38 | 39 | F = Callable[[Array], Array] 40 | class ReduceLoss(Loss): 41 | def __init__(self, reduce: Literal["mean", "sum"] = "mean"): 42 | tmp: Tuple[F, Optional[F]] = { 43 | "mean": (lambda _L: _L.mean(axis=0), lambda _dx: 1/_dx.shape[0]), 44 | "sum": (lambda _L: _L.sum(axis=0), None), 45 | }[reduce] 46 | self.reduce, self.scale_backward = tmp 47 | 48 | 49 | def __call__(self, x: Array, y: Array) -> Array: 50 | r""" 51 | Compute Loss 52 | 53 | Parameters 54 | ---------- 55 | x : vulkpy.Array 56 | Batch input features 57 | y : vulkpy.Array 58 | Batch labels/targets 59 | 60 | Returns 61 | ------- 62 | loss : vulkpy.Array 63 | Loss 64 | """ 65 | self._x = x 66 | self._y = y 67 | L = self.forward(x, y) 68 | return self.reduce(L) 69 | 70 | def grad(self) -> Array: 71 | r""" 72 | Compute Gradients 73 | 74 | Returns 75 | ------- 76 | dx : vulkpy.Array 77 | Batch gradients of dL/dx 78 | 79 | Notes 80 | ----- 81 | This method calculates gradients for the last ``__call__(x, y)``. 82 | """ 83 | dx = self.backward() 84 | if self.scale_backward is not None: 85 | dx *= self.scale_backward(dx) 86 | return dx 87 | 88 | def forward(self, x: Array, y: Array) -> Array: 89 | raise NotImplementedError 90 | 91 | def backward(self) -> Array: 92 | raise NotImplementedError 93 | 94 | 95 | class CrossEntropyLoss(ReduceLoss): 96 | """ 97 | Cross Entropy Loss 98 | """ 99 | _forward = getShader("nn_cross_entropy.spv") 100 | _backward = getShader("nn_cross_entropy_backward.spv") 101 | 102 | def __init__(self, *args, **kwargs): 103 | """ 104 | Initialize Cross Entropy Loss 105 | 106 | Parameters 107 | ---------- 108 | reduce : {"mean", "sum"}, optional 109 | Reduction method over batch. The default is ``"mean"``. 110 | """ 111 | super().__init__(*args, **kwargs) 112 | 113 | def forward(self, x: Array, y: Array) -> Array: 114 | r""" 115 | Forward 116 | 117 | Parameters 118 | ---------- 119 | x : vulkpy.Array 120 | Batch input features 121 | y : vulkpy.Array 122 | Batch input labels as One hot vector 123 | 124 | Returns 125 | ------- 126 | loss : vulkpy.Array 127 | Cross Entropy Loss 128 | 129 | Notes 130 | ----- 131 | .. math:: 132 | 133 | L = - f _{\text{reduce}} ( y_i \log (x_i) ) 134 | 135 | .. warning:: 136 | 137 | Generally, users should not call this method directly. 138 | Use ``__call__`` instead, where input / output are stored for training. 139 | """ 140 | size = x.buffer.size() 141 | L = Array(x._gpu, shape=x.shape) 142 | L.job = x._gpu._submit(self._forward, 64, 1, 1, 143 | [x, y, L], 144 | DataShape(size, 1, 1), 145 | VectorParams(size)) 146 | L._keep.extend([x, y]) 147 | return L.sum(axis=1) 148 | 149 | def backward(self) -> Array: 150 | r""" 151 | Backward 152 | 153 | Returns 154 | ------- 155 | loss : vulkpy.Array 156 | Batch gradients 157 | 158 | Notes 159 | ----- 160 | .. math:: 161 | 162 | dx = \frac{-y}{x + \epsilon} 163 | 164 | .. warning:: 165 | 166 | Generally, users should not call this method directly. 167 | Use ``grad()`` instead, where reduction scale is corrected. 168 | """ 169 | size = self._x.buffer.size() 170 | dx = Array(self._x._gpu, shape=self._x.shape) 171 | dx.job = self._x._gpu._submit(self._backward, 64, 1, 1, 172 | [self._x, self._y, dx], 173 | DataShape(size, 1, 1), 174 | VectorParams(size)) 175 | dx._keep.extend([self._x, self._y]) 176 | return dx 177 | 178 | 179 | class SoftmaxCrossEntropyLoss(CrossEntropyLoss): 180 | """ 181 | Softmax Cross Entropy Loss 182 | 183 | See Also 184 | -------- 185 | vulkpy.nn.Softmax : Softmax layer 186 | vulkpy.nn.CrossEntropyLoss : Cross Entropy loss without Softmax 187 | """ 188 | def __init__(self, *args, **kwargs): 189 | """ 190 | Initialize Softmax Cross Entropy Loss 191 | 192 | Parameters 193 | ---------- 194 | reduce : {"mean", "sum"} 195 | Reduction method over batch. The default is ``"mean"``. 196 | """ 197 | super().__init__(*args, **kwargs) 198 | self._sm = Softmax() 199 | 200 | def forward(self, x: Array, y: Array) -> Array: 201 | r""" 202 | Forward 203 | 204 | Parameters 205 | ---------- 206 | x : vulkpy.Array 207 | Batch input features 208 | y : vulkpy.Array 209 | Batch labels 210 | 211 | Returns 212 | ------- 213 | loss : vulkpy.Array 214 | Loss 215 | 216 | Notes 217 | ----- 218 | .. math:: 219 | 220 | L = - f _{\text{reduce}} (y_i \log (\rm{softmax}(x) _i)) 221 | 222 | .. warning:: 223 | 224 | Generally, users should not call this method directly. 225 | Use ``__call__`` instead, where input / output are stored for training. 226 | """ 227 | return super().forward(self._sm(x), y) 228 | 229 | def backward(self) -> Array: 230 | r""" 231 | Backward 232 | 233 | Returns 234 | ------- 235 | loss : vulkpy.Array 236 | Batch gradients 237 | 238 | Notes 239 | ----- 240 | .. math:: 241 | 242 | dx = \rm{softmax}(x) - y 243 | 244 | .. warning:: 245 | 246 | Generally, users should not call this method directly. 247 | Use ``grad()`` instead, where reduction scale is corrected. 248 | """ 249 | return cast(Array, self._sm._y) - self._y 250 | 251 | 252 | class MSELoss(ReduceLoss): 253 | """ 254 | Mean Squared Loss 255 | """ 256 | def __init__(self, *args, **kwargs): 257 | """ 258 | Initialize MSE Loss 259 | 260 | Parameters 261 | ---------- 262 | reduce : {"mean", "sum"} 263 | Reduction method over batch. The default is ``"mean"``. 264 | """ 265 | super().__init__(*args, **kwargs) 266 | 267 | def forward(self, x: Array, y: Array) -> Array: 268 | r""" 269 | Forward 270 | 271 | Parameters 272 | ---------- 273 | x : vulkpy.Array 274 | Batch input features 275 | y : vulkpy.Array 276 | Batch labels 277 | 278 | Returns 279 | ------- 280 | loss : vulkpy.Array 281 | Loss 282 | 283 | Notes 284 | ----- 285 | .. math:: 286 | 287 | L = f _{\text{reduce}} |x - y|^2 288 | 289 | .. warning:: 290 | 291 | Generally, users should not call this method directly. 292 | Use ``__call__`` instead, where input / output are stored for training. 293 | """ 294 | L = (y - x) # Allocate 295 | L **= 2.0 296 | return L.sum(axis=1) # Allocate 297 | 298 | def backward(self) -> Array: 299 | r""" 300 | Backward 301 | 302 | Returns 303 | ------- 304 | loss : vulkpy.Array 305 | Batch gradients 306 | 307 | Notes 308 | ----- 309 | .. math:: 310 | 311 | dx = 2 (x - y) 312 | 313 | .. warning:: 314 | 315 | Generally, users should not call this method directly. 316 | Use ``grad()`` instead, where reduction scale is corrected. 317 | """ 318 | dx = self._x - self._y # Allocate 319 | dx *= 2 320 | return dx 321 | 322 | 323 | class HuberLoss(ReduceLoss): 324 | """ 325 | Huber Loss 326 | """ 327 | def __init__(self, *args, **kwargs): 328 | """ 329 | Initialize Huber Loss 330 | 331 | Parameters 332 | ---------- 333 | reduce : {"mean", "sum"} 334 | Reduction method over batch. The default is ``"mean"``. 335 | """ 336 | super().__init__(*args, **kwargs) 337 | 338 | def forward(self, x: Array, y: Array) -> Array: 339 | r""" 340 | Forward 341 | 342 | Parameters 343 | ---------- 344 | x : vulkpy.Array 345 | Batch input features 346 | y : vulkpy.Array 347 | Batch labels 348 | 349 | Returns 350 | ------- 351 | loss : vulkpy.Array 352 | Loss 353 | 354 | Notes 355 | ----- 356 | .. math:: 357 | 358 | L = 0.5 f _{\text{reduce}} \min(|x - y|^2, |x - y|) 359 | 360 | .. warning:: 361 | 362 | Generally, users should not call this method directly. 363 | Use ``__call__`` instead, where input / output are stored for training. 364 | """ 365 | delta = y - x # Allocate 366 | delta.abs(inplace=True) # |y-x| 367 | delta.min(delta ** 2.0, inplace=True) # min(|y-x|^2, |y-x|) 368 | delta *= 0.5 # min(|y-x|^2, |y-x|) * 0.5 369 | return delta.sum(axis=1) # Allocate 370 | 371 | def backward(self) -> Array: 372 | r""" 373 | Backward 374 | 375 | Returns 376 | ------- 377 | loss : vulkpy.Array 378 | Batch gradients 379 | 380 | Notes 381 | ----- 382 | .. math:: 383 | 384 | dx = \text{clamp}(x - y, -1.0, 1.0) 385 | 386 | .. warning:: 387 | 388 | Generally, users should not call this method directly. 389 | Use ``grad()`` instead, where reduction scale is corrected. 390 | """ 391 | delta = self._x - self._y 392 | delta.clamp(-1.0, 1.0, inplace=True) 393 | return delta 394 | 395 | 396 | class MixLoss(Loss): 397 | """ 398 | Mixing Loss class 399 | """ 400 | def __init__(self, losses: Iterable[Tuple[float, Loss]]): 401 | """ 402 | Initializer MixLoss 403 | 404 | Parameters 405 | ---------- 406 | losses : iterable of tuple of float and vulkpy.Loss 407 | Sets of coefficient and loss. 408 | 409 | Raises 410 | ------ 411 | ValueError 412 | When losses is empty 413 | """ 414 | self.L: Tuple[Tuple[float, Loss], ...] = tuple(losses) 415 | if len(self.L) < 1: 416 | raise ValueError(f"losses should not empty") 417 | 418 | def __call__(self, x: Array, y: Array) -> Array: 419 | r""" 420 | Compute Loss 421 | 422 | Parameters 423 | ---------- 424 | x : vulkpy.Array 425 | Batch input features 426 | y : vulkpy.Array 427 | Batch labels/targets 428 | 429 | Returns 430 | ------- 431 | loss : vulkpy.Array 432 | Loss 433 | """ 434 | return self._sum(lambda _L: _L(x, y)) 435 | 436 | def grad(self) -> Array: 437 | r""" 438 | Compute Gradients 439 | 440 | Returns 441 | ------- 442 | dx : vulkpy.Array 443 | Batch gradients of dL/dx 444 | 445 | Notes 446 | ----- 447 | This method calculates gradients for the last ``__call__(x, y)``. 448 | """ 449 | return self._sum(lambda _L: _L.grad()) 450 | 451 | def _sum(self, F: Callable[[Loss], Array]) -> Array: 452 | coeff, _L = self.L[0] 453 | s = coeff * F(_L) 454 | 455 | for coeff, _L in self.L[1:]: 456 | s += coeff * F(_L) 457 | 458 | return s 459 | -------------------------------------------------------------------------------- /vulkpy/nn/models.py: -------------------------------------------------------------------------------- 1 | """ 2 | Neural Network Model Module (:mod:`vulkpy.nn.models`) 3 | ===================================================== 4 | """ 5 | from __future__ import annotations 6 | from typing import Iterable, Optional, Tuple, Union 7 | 8 | from vulkpy import Array 9 | from .core import Module, Loss, Regularizer 10 | 11 | 12 | __all__ = ["Sequence"] 13 | 14 | 15 | class Sequence: 16 | """ 17 | Sequential Model 18 | 19 | All layers sequentially connceted. 20 | """ 21 | def __init__(self, 22 | layers: Iterable[Module], 23 | loss: Loss): 24 | """ 25 | Initialize Sequence 26 | 27 | Parameters 28 | ---------- 29 | layers : iterable of vulkpy.nn.Module 30 | Layers to be called sequentially 31 | loss : vulkpy.nn.Loss 32 | Loss layer 33 | """ 34 | self.L: Tuple[Module, ...] = tuple(layers) 35 | self.loss: Loss = loss 36 | 37 | def _forward(self, x: Array) -> Array: 38 | for _L in self.L: 39 | x = _L(x) 40 | return x 41 | 42 | def _backward(self): 43 | dx = self.loss.grad() 44 | for _L in self.L[::-1]: 45 | dx = _L.backward(dx) 46 | 47 | def _zero_grad(self): 48 | for _L in self.L: 49 | _L.zero_grad() 50 | 51 | def _update(self): 52 | for _L in self.L: 53 | _L.update() 54 | 55 | def train(self, x: Array, y: Array) -> Tuple[Array, Array]: 56 | """ 57 | Train model 58 | 59 | Parameters 60 | ---------- 61 | x, y : vulkpy.Array 62 | Features and Labels/Targets 63 | 64 | Returns 65 | ------- 66 | y : vulkpy.Array 67 | Predicted Labels/Targets 68 | loss : vulkpy.Array 69 | Loss 70 | """ 71 | _y = self._forward(x) 72 | _loss = self.loss(_y, y) 73 | 74 | self._zero_grad() 75 | self._backward() 76 | self._update() 77 | 78 | return _y, _loss 79 | 80 | def predict(self, 81 | x: Array, 82 | y: Optional[Array] = None) -> Union[Array, Tuple[Array, Array]]: 83 | """ 84 | Predict Label/Target 85 | 86 | Parameters 87 | ---------- 88 | x : vulkpy.Array 89 | Features 90 | y : vulkpy.Array, optional 91 | Labels/Targets. 92 | 93 | Returns 94 | ------- 95 | pred_y : vulkpy.Array 96 | Predicted Labels/Targets 97 | loss : vulkpy.Array 98 | Loss. Return only if ``y`` is specified. 99 | """ 100 | _y = self._forward(x) 101 | if y is None: 102 | return _y 103 | 104 | _loss = self.loss(_y, y) 105 | return _y, _loss 106 | -------------------------------------------------------------------------------- /vulkpy/nn/optimizers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Neural Network Optimizer Module (:mod:`vulkpy.nn.optimizers`) 3 | ============================================================= 4 | """ 5 | from __future__ import annotations 6 | from dataclasses import dataclass 7 | from typing import Iterable, Union 8 | 9 | from wblog import getLogger 10 | 11 | from vulkpy.vkarray import GPU, Array, zeros 12 | from .core import Optimizer, OptimizerState 13 | 14 | __all__ = [ 15 | "SGD", "SGDState", 16 | "AdaGrad", "AdaGradState", 17 | "Adam", "AdamState", 18 | "Optimizer", "OptimizerState", 19 | ] 20 | 21 | logger = getLogger() 22 | 23 | 24 | class SGDState(OptimizerState): 25 | """ 26 | Optimizer State for SGD 27 | """ 28 | def __init__(self, opt: SGD): 29 | """ 30 | Initialize SGD state 31 | 32 | Parameters 33 | ---------- 34 | opt : vulkpy.SGD 35 | SGD Optimizer 36 | """ 37 | self.opt: SGD = opt 38 | 39 | def grad2diff(self, grad: Array) -> Array: 40 | """ 41 | Compute diff from gradient 42 | 43 | Parameters 44 | ---------- 45 | grad : vulkpy.Array 46 | Gradient 47 | 48 | Returns 49 | ------- 50 | diff : vulkpy.Array 51 | Update diff 52 | """ 53 | return (-self.opt.lr) * grad 54 | 55 | class SGD(Optimizer): 56 | """ 57 | SGD Optimizer 58 | 59 | Use constant learning rate 60 | 61 | See Also 62 | -------- 63 | vulkpy.nn.Adam : Adam optimizer 64 | """ 65 | def __init__(self, lr: float): 66 | """ 67 | Initialize Stachostic Gradient Decent (SGD) Optimizer 68 | 69 | Parameters 70 | ---------- 71 | lr : float 72 | Learning rate 73 | """ 74 | self.lr: float = lr 75 | logger.debug("SGD(lr=%f)", self.lr) 76 | 77 | def init_state(self, shape: Iterable[int]) -> SGDState: 78 | """ 79 | Initialize Optimizer state 80 | 81 | Parameters 82 | ---------- 83 | shape : iterable of ints 84 | Shape of parameter 85 | 86 | Returns 87 | ------- 88 | SGDState 89 | Optimizer state 90 | 91 | Notes 92 | ----- 93 | Currently SGDState is empty, however, 94 | we might add some field like momentum in future. 95 | """ 96 | return SGDState(self) 97 | 98 | 99 | class AdaGradState(OptimizerState): 100 | """ 101 | Optimizer State for AdaGrad 102 | """ 103 | def __init__(self, opt: AdaGrad, shape: Iterable[int], tau: float): 104 | """ 105 | Initialize AdaGrad 106 | 107 | Parameters 108 | ---------- 109 | opt : vulkpy.AdaGrad 110 | AdaGrad Optimizer 111 | shape : iterable of ints 112 | Value shape 113 | tau : float 114 | Initial summation 115 | """ 116 | self.opt: AdaGrad = opt 117 | self.h: Array = zeros(self.opt.gpu, shape=shape) 118 | self.h[:] = tau 119 | 120 | def grad2diff(self, grad: Array) -> Array: 121 | """ 122 | Compute diff from gradient 123 | 124 | Parameters 125 | ---------- 126 | grad : vulkpy.Array 127 | Gradient 128 | 129 | Returns 130 | ------- 131 | diff : vulkpy.Array 132 | Update diff 133 | """ 134 | self.h += (grad ** 2) 135 | 136 | sqrt = self.h.sqrt() # sqrt(sum) 137 | sqrt += self.opt.eps # sqrt(sum) + eps 138 | ret = grad / sqrt # grad / (sqrt(sum) + eps) 139 | ret *= (-self.opt.lr) # -lr * grad / (sqrt(sum) + eps) 140 | return ret 141 | 142 | class AdaGrad(Optimizer): 143 | r""" 144 | AdaGrad Optimizer 145 | 146 | Notes 147 | ----- 148 | This class implement AdaGrad [adagrad1]_. 149 | 150 | References 151 | ---------- 152 | .. [adagrad1] 153 | """ 154 | def __init__(self, 155 | gpu: GPU, *, 156 | lr: float = 0.01, 157 | tau: float = 0.0, 158 | eps: float = 1e-8): 159 | """ 160 | Initialize AdaGrad 161 | 162 | Parameters 163 | ---------- 164 | gpu : vulkpy.GPU 165 | GPU 166 | lr : float, optional 167 | AdaGrad parameter (learning rate). The default is ``0.01``. 168 | tau : float, optional 169 | AdaGrad parameter (initialial accumulator). 170 | The default is ``0``. 171 | eps : float, optional 172 | AdaGrad parameter (small positive). 173 | The default is ``1e-8`` 174 | """ 175 | self.gpu: GPU = gpu 176 | self.lr: float = lr 177 | self.tau: float = tau 178 | self.eps: float = eps 179 | 180 | logger.debug("AdaGrad(lr=%f, tau=%f, eps=%f)", 181 | self.lr, self.tau, self.eps) 182 | 183 | def init_state(self, shape: Iterable[int]) -> AdaGradState: 184 | """ 185 | Initialize Optimizer state 186 | 187 | Parameters 188 | ---------- 189 | shape : iterable of ints 190 | Shape of parameter 191 | 192 | Returns 193 | ------- 194 | AdaGradState 195 | Optimizer state 196 | """ 197 | return AdaGradState(opt=self, shape=shape, tau=self.tau) 198 | 199 | 200 | class AdamState(OptimizerState): 201 | """ 202 | Optimizer State for Adam 203 | """ 204 | def __init__(self, opt: Adam, shape: Iterable[int]): 205 | """ 206 | Initialize Adam state 207 | 208 | Parameters 209 | ---------- 210 | opt : vulkpy.Adam 211 | Adam Optimizer 212 | shape : iterable of ints 213 | Value shape 214 | """ 215 | self.opt: Adam = opt 216 | self.m: Array = zeros(self.opt.gpu, shape=shape) 217 | self.v: Array = zeros(self.opt.gpu, shape=shape) 218 | self.beta1t: float = 1.0 219 | self.beta2t: float = 1.0 220 | 221 | def grad2diff(self, grad: Array) -> Array: 222 | """ 223 | Compute diff from gradient 224 | 225 | Parameters 226 | ---------- 227 | grad : vulkpy.Array 228 | Gradient 229 | 230 | Returns 231 | ------- 232 | diff : vulkpy.Array 233 | Update diff 234 | """ 235 | self.m *= self.opt.beta1 236 | self.m += (1 - self.opt.beta1) * grad # Allocate 237 | 238 | self.v *= self.opt.beta2 239 | self.v += (1 - self.opt.beta2) * (grad ** 2) # Allocate 240 | 241 | self.beta1t *= self.opt.beta1 242 | self.beta2t *= self.opt.beta2 243 | 244 | mhat = self.m / (1 - self.beta1t) # Allocate 245 | vhat = self.v / (1 - self.beta2t) # Allocate 246 | 247 | vhat.sqrt(inplace=True) # sqrt(vhat) 248 | vhat += self.opt.eps # sqrt(vhat) + eps 249 | 250 | mhat *= (-self.opt.lr) # -lr * mhat 251 | mhat /= vhat # -lr * mhat / (sqrt(vhat) + eps) 252 | 253 | return mhat 254 | 255 | 256 | class Adam(Optimizer): 257 | r""" 258 | Adam Optimizer 259 | 260 | See Also 261 | -------- 262 | vulkpy.nn.SGD : SGD optimizer 263 | 264 | Notes 265 | ----- 266 | This class implement Adam [adam1]_. 267 | The algorithm utilizes moving averages of the 1st and 2nd order moment. 268 | The 1st (:math:`m_t`) and 2nd (:math:`v_t`) order moment are updated as follows; 269 | 270 | .. math:: 271 | 272 | m_t = \beta _1 m_{t-1} + (1 - \beta _1) g_t\\ 273 | v_t = \beta _2 v_{t-1} + (1 - \beta _2) g_t ^2 274 | 275 | where :math:`g_t` is gradient. 276 | 277 | To mitigate initial underestimation, 278 | corrected :math:`\hat{m_t}` and :math:`\hat{v_t}` are used for parameter update. 279 | 280 | .. math:: 281 | 282 | \hat{m}_t = m_t / (1 - \beta _1 ^t)\\ 283 | \hat{v}_t = v_t / (1 - \beta _2 ^t) 284 | 285 | Finally, parameter :math:`\theta _t` is updated by 286 | 287 | .. math:: 288 | 289 | \theta _t = \theta _{t-1} - \text{lr} \times 290 | \hat{m}_t/(\sqrt{\hat{v}_t} + \epsilon) 291 | 292 | 293 | References 294 | ---------- 295 | .. [adam1] D. Kingma and J. Ba, "Adam: A Method for Stochastic Optimization", 296 | ICLR (Poster) 2015, https://dblp.org/rec/journals/corr/KingmaB14.html 297 | 298 | Examples 299 | -------- 300 | >>> import vulkpy.vk 301 | >>> from vulkpy import nn 302 | >>> gpu = vk.GPU() 303 | >>> adam = nn.Adam(gpu, lr=0.001, beta1=0.9, beta2=0.999) 304 | """ 305 | def __init__(self, 306 | gpu: GPU, *, 307 | lr: float = 0.001, 308 | beta1: float = 0.9, 309 | beta2: float = 0.999, 310 | eps: float = 1e-8): 311 | """ 312 | Initialize Adam Optimizer 313 | 314 | Parameters 315 | ---------- 316 | gpu : vulkpy.GPU 317 | GPU 318 | lr : float, optional 319 | Adam parameter. The default is ``0.001``. 320 | beta1 : float, optional 321 | Adam parameter. The default is ``0.9``. 322 | beta2 : float, optional 323 | Adam parameter. The defeault is ``0.999``. 324 | eps : float, optional 325 | Adam parameter. The default is ``1e-8``. 326 | """ 327 | self.gpu: GPU = gpu 328 | self.lr: float = lr 329 | self.beta1: float = beta1 330 | self.beta2: float = beta2 331 | self.eps: float = eps 332 | 333 | logger.debug("Adam(lr=%f, beta1=%f, beta2=%f, eps=%f)", 334 | self.lr, self.beta1, self.beta2, self.eps) 335 | 336 | def init_state(self, shape: Iterable[int]) -> AdamState: 337 | """ 338 | Initialize Optimizer state 339 | 340 | Parameters 341 | ---------- 342 | shape : iterable of ints 343 | Shape of parameter 344 | 345 | Returns 346 | ------- 347 | AdamState 348 | Optimizer state 349 | """ 350 | return AdamState(opt=self, shape=shape) 351 | -------------------------------------------------------------------------------- /vulkpy/nn/parameters.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from typing import Callable, Iterable, Optional 3 | 4 | from vulkpy.vkarray import GPU, Array, zeros 5 | from .core import Optimizer, OptimizerState, Regularizer 6 | from .optimizers import Adam 7 | 8 | 9 | __all__ = [ 10 | "Parameter" 11 | ] 12 | 13 | 14 | class Parameter: 15 | """ 16 | Neural Network Parameter 17 | """ 18 | def __init__(self, 19 | gpu: GPU, 20 | shape: Iterable[int], 21 | trainable: bool = True, 22 | opt: Optional[Optimizer] = None, 23 | initializer: Optional[Callable[[GPU, Iterable[int]], Array]]=None, 24 | regularizer: Optional[Regularizer] = None): 25 | """ 26 | Initialize Parameter 27 | 28 | Parameters 29 | ---------- 30 | gpu : vulkpy.GPU 31 | GPU 32 | shape : iterable of ints 33 | Shape of parameter 34 | trainable : bool, optional 35 | If ``True`` (default), track gradient 36 | opt : vulkpy.nn.Optimizer, optional 37 | Optimizer. If ``None`` (default), ``vulkpy.nn.Adam`` is used. 38 | initializer : callable, optional 39 | Initializer function. If ``None`` (default), initialized with ``0.0``. 40 | regularizer : vulkpy.nn.Regularizer, optional 41 | Regularizer. If ``None`` (default), no regularization is applied. 42 | """ 43 | if initializer is None: 44 | initializer = zeros 45 | self.value: Array = initializer(gpu, shape) 46 | 47 | self.grad: Optional[Array] = None 48 | self.opt_state: Optional[OptimizerState] = None 49 | if trainable: 50 | self.grad = zeros(gpu, shape=shape) 51 | 52 | if opt is None: 53 | opt = Adam(gpu) 54 | self.opt_state = opt.init_state(shape) 55 | 56 | self.R: Optional[Regularizer] = regularizer 57 | 58 | def is_trainable(self) -> bool: 59 | """ 60 | Whether this parameter is trainable 61 | 62 | Returns 63 | ------- 64 | bool 65 | Is trainable 66 | """ 67 | return self.grad is not None 68 | 69 | def add_grad(self, grad: Array): 70 | """ 71 | Add gradient 72 | 73 | Parameters 74 | ---------- 75 | grad : vulkpy.Array 76 | Gradient to be accumulated 77 | """ 78 | if self.grad is not None: 79 | self.grad += grad 80 | 81 | def zero_grad(self): 82 | """ 83 | Clear gradient to 0.0 84 | """ 85 | if self.grad is not None: 86 | self.grad[:] = 0.0 87 | 88 | def update(self): 89 | """ 90 | Update value 91 | 92 | Update value with accumulated gradients only if this value is trainable. 93 | """ 94 | if self.grad is not None: 95 | self.value += self.opt_state.grad2diff(self.grad) 96 | 97 | def regular_loss(self) -> Array: 98 | """ 99 | Regularization Loss 100 | 101 | Returns 102 | ------- 103 | vulkpy.nn.Array 104 | Loss 105 | """ 106 | if self.R is not None: 107 | return self.R.loss(self.value) 108 | 109 | return zeros(self.value._gpu, shape=(1,)) 110 | 111 | def regular_grad(self): 112 | """ 113 | Add Regularization Gradients 114 | """ 115 | if self.R is not None: 116 | self.add_grad(self.R.grad(self.value)) 117 | -------------------------------------------------------------------------------- /vulkpy/nn/regularizers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Neural Network Regularizer Module (:mod:`vulkpy.nn.regularizers`) 3 | ================================================================= 4 | """ 5 | from __future__ import annotations 6 | from typing import Iterable, Tuple 7 | from typing_extensions import Protocol 8 | 9 | import wblog 10 | 11 | from vulkpy import Array 12 | from .core import Regularizer 13 | 14 | __all__ = [ 15 | "Lasso", 16 | "Ridge", 17 | "Elastic", 18 | ] 19 | 20 | logger = wblog.getLogger() 21 | 22 | 23 | class Lasso(Regularizer): 24 | r""" 25 | Lasso (L1) Regularization 26 | 27 | Notes 28 | ----- 29 | .. math:: 30 | 31 | L = \text{coeff} \times \sum_i |W_i|\\ 32 | dL/dW_i = \text{coeff} \times \rm{sign}(W_i) 33 | """ 34 | def __init__(self, coeff: float = 1.0): 35 | """ 36 | Initialize Lasso Regularizer 37 | 38 | Parameters 39 | ---------- 40 | coeff : float, optional 41 | L1 Coefficient 42 | """ 43 | logger.debug(f"Lasso(L1={coeff})") 44 | self.coeff: float = coeff 45 | 46 | def loss(self, param: Array) -> Array: 47 | """ 48 | L1 Regularization Loss 49 | 50 | Parameters 51 | ---------- 52 | param : vulkpy.Array 53 | Parameter 54 | 55 | Returns 56 | ------- 57 | loss : vulkpy.Array 58 | L1 Regularization Loss 59 | """ 60 | L = param.abs().sum() 61 | L *= self.coeff 62 | return L 63 | 64 | def grad(self, param: Array) -> Array: 65 | """ 66 | Gradient of L1 Regularization Loss 67 | 68 | Parameters 69 | ---------- 70 | param : vulkpy.Array 71 | Parameter 72 | 73 | Returns 74 | ------- 75 | dW : vulkpy.Array 76 | Gradient for L1 Regularization Loss 77 | """ 78 | return self.coeff * param.sign() 79 | 80 | class Ridge(Regularizer): 81 | r""" 82 | Ridge (L2) Regularization 83 | 84 | Notes 85 | ----- 86 | .. math:: 87 | 88 | L = \text{coeff} \times \sum_i |W_i|^2\\ 89 | dL/dW_i = 2 \cdot \text{coeff} \times W_i 90 | """ 91 | def __init__(self, coeff: float = 1.0): 92 | """ 93 | Initialize Ridge Regularizer 94 | 95 | Parameters 96 | ---------- 97 | coef : float, optional 98 | L2 Coefficient 99 | """ 100 | logger.debug(f"Ridge(L2={coeff})") 101 | self.coeff: float = coeff 102 | 103 | def loss(self, param: Array) -> Array: 104 | """ 105 | L2 Regularization Loss 106 | 107 | Parameters 108 | ---------- 109 | param : vulkpy.Array 110 | Parameter 111 | 112 | Returns 113 | ------- 114 | loss : vulkpy.Array 115 | L2 Regularization Loss 116 | """ 117 | L = (param ** 2).sum() 118 | L *= self.coeff 119 | return L 120 | 121 | def grad(self, param: Array) -> Array: 122 | """ 123 | Gradient of L2 Regularization Loss 124 | 125 | Parameters 126 | ---------- 127 | param : vulkpy.Array 128 | Parameter 129 | 130 | Returns 131 | ------- 132 | dW : vulkpy.Array 133 | Gradient for L2 Regularization Loss 134 | """ 135 | return (2 * self.coeff) * param 136 | 137 | class Elastic(Regularizer): 138 | r""" 139 | Elastic (L1 + L2) Regularization 140 | 141 | Notes 142 | ----- 143 | .. math:: 144 | 145 | L = \alpha \sum _i |W_i| + \beta \sum _i |W_i|^2\\ 146 | dL/dW_i = \alpha \rm{sign}(W_i) + 2 \beta W_i 147 | """ 148 | def __init__(self, L1: float = 1.0, L2: float = 1.0): 149 | """ 150 | Initialize Elastic Regularizer 151 | 152 | Parameters 153 | ---------- 154 | L1 : float, optional 155 | L1 Coefficient 156 | L2 : float, optional 157 | L2 Coefficient 158 | """ 159 | self.L1 = Lasso(L1) 160 | self.L2 = Ridge(L2) 161 | 162 | def loss(self, param: Array) -> Array: 163 | """ 164 | L1 + L2 Regularization Loss 165 | 166 | Parameters 167 | ---------- 168 | param : vulkpy.Array 169 | Parameter 170 | 171 | Returns 172 | ------- 173 | loss : vulkpy.Array 174 | L1 + L2 Regularization Loss 175 | """ 176 | return self.L1.loss(param) + self.L2.loss(param) 177 | 178 | def grad(self, param: Array) -> Array: 179 | """ 180 | Gradient of L1 + L2 Regularization Loss 181 | 182 | Parameters 183 | ---------- 184 | param : vulkpy.Array 185 | Parameter 186 | 187 | Returns 188 | ------- 189 | dW : vulkpy.Array 190 | Gradient for L1 + L2 Regularization Loss 191 | """ 192 | return self.L1.grad(param) + self.L2.grad(param) 193 | -------------------------------------------------------------------------------- /vulkpy/random.py: -------------------------------------------------------------------------------- 1 | """ 2 | Random Module (:mod:`vulkpy.random`) 3 | ==================================== 4 | 5 | GPU-based Pseudo Random Number Generator (PRNG) 6 | 7 | 8 | Examples 9 | -------- 10 | >>> import vulkpy as vk 11 | >>> gpu = vk.GPU() 12 | >>> r = vk.random.Xoshiro128pp(gpu, seed=0) 13 | 14 | [0, 1) uniform random numbers can be generated by 15 | ``random(shape=None, buffer=None)``. 16 | 17 | >>> print(r.random(shape=(3,))) 18 | [0.42977667 0.8235899 0.90622926] 19 | 20 | Gaussian random numbers can be generated by 21 | ``normal(shape=None, buffer=None, mean=0.0, stddev=1.0)``. 22 | 23 | >>> print(r.normal(shape=(3,))) 24 | [-2.3403292 0.7247794 0.7118352] 25 | """ 26 | 27 | from __future__ import annotations 28 | 29 | import os 30 | from typing import cast, Iterable, Optional 31 | 32 | import numpy as np 33 | 34 | from . import _vkarray 35 | from . import vkarray as vk 36 | from .util import getShader 37 | 38 | __all__ = ["Xoshiro128pp"] 39 | 40 | class PRNG(vk.Resource): 41 | _box_muller = getShader("prng_box_muller.spv") 42 | _ibox_muller = getShader("prng_ibox_muller.spv") 43 | _randrange = getShader("prng_randrange.spv") 44 | 45 | _2p32 = int(2 ** 32) 46 | 47 | def __init__(self, gpu: vk.GPU): 48 | self._gpu = gpu 49 | 50 | def random(self, *, 51 | shape: Optional[Iterable[int]] = None, 52 | buffer: Optional[vk.Array] = None) -> vk.Array: 53 | raise NotImplementedError 54 | 55 | def randint(self, *, 56 | shape: Optional[Iterable[int]] = None, 57 | buffer: Optional[vk.U32Array] = None) -> vk.U32Array: 58 | raise NotImplementedError 59 | 60 | def normal(self, *, 61 | shape: Optional[Iterable[int]] = None, 62 | buffer: Optional[vk.Array] = None, 63 | mean: float = 0.0, 64 | stddev: float = 1.0) -> vk.Array: 65 | """ 66 | Generate Normal Distributing numbers 67 | 68 | Parameters 69 | ---------- 70 | shape : iterable of ints, optional 71 | If specified, new ``vulkpy.Array`` with ``shape`` will be returned. 72 | buffer : vulkpy.Array 73 | If specified, generated numbers will be stored. 74 | 75 | Returns 76 | ------- 77 | vulkpy.Array 78 | Array which will get random numbers. 79 | 80 | Raises 81 | ------ 82 | ValueError 83 | If neither ``shape`` or ``buffer`` are specified 84 | 85 | Notes 86 | ----- 87 | This method first generates [0, 1) uniform random numbers, 88 | then transforms them to normal distribution with Box-Muller method. 89 | Box-Muller might have problem in terms of random number quality, 90 | however, it is quite GPU friendly. 91 | """ 92 | _local_size = 64 93 | if buffer is None: 94 | if shape is None: 95 | raise ValueError("One of `shape` and `buffer` must be specified.") 96 | 97 | buffer = vk.Array(self._gpu, shape=shape) 98 | else: 99 | # For safety, we wait output buffer job. 100 | buffer.wait() 101 | 102 | # After checking, coarse type to Array 103 | buffer = cast(vk.Array, buffer) 104 | 105 | n = int(np.prod(buffer.shape)) 106 | floor_n = n // 2 107 | dshape = _vkarray.DataShape(floor_n, 1, 1) 108 | p = _vkarray.VectorScalar2Params(n, mean, stddev) 109 | if n % 2 == 0: 110 | # Even: Reuse `buffer` 111 | buffer = self.random(buffer=buffer) 112 | buffer.job = self._gpu._submit(self._ibox_muller, 113 | _local_size, 1, 1, 114 | [buffer], dshape, p) 115 | buffer._keep = [] 116 | else: 117 | # Odd: Require additional space for intermediate [0, 1) 118 | rng = self.random(shape=(2*(floor_n + 1),)) 119 | buffer.job = self._gpu._submit(self._box_muller, 120 | _local_size, 1, 1, 121 | [rng, buffer], dshape, p) 122 | 123 | buffer._keep = [rng] 124 | return buffer 125 | 126 | def randrange(self, *, 127 | shape: Optional[Iterable[int]] = None, 128 | buffer: Optional[vk.U32Array] = None, 129 | low: int = 0, 130 | high: int = int(2 ** 32)) -> vk.U32Array: 131 | """ 132 | Generate [low, high) random numbers 133 | 134 | Parameters 135 | ---------- 136 | shape : iterable of ints, optional 137 | If specified, new ``vulkpy.U32Array`` with ``shape`` will be returned. 138 | buffer : vulkpy.Array 139 | If specified, generated numbers will be stored. 140 | low : int, optional 141 | Inclusive lowest value. The default is ``0``. 142 | high : int, optional 143 | Exclusive highest value. The default is ``2^32``. 144 | 145 | Returns 146 | ------- 147 | vulkpy.U32Array 148 | Array which will get random numbers. 149 | 150 | Raises 151 | ------ 152 | ValueError 153 | If neither ``shape`` or ``buffer`` are specified. 154 | ValueError 155 | If not 0 <= low < high <= 2^32. 156 | """ 157 | if low < 0: 158 | raise ValueError(f"`low` must be non negative integer, but {low}") 159 | if high > self._2p32: 160 | raise ValueError(f"`high` must not be greater than 2^32, but {high}") 161 | if low >= high: 162 | raise ValueError(f"`low` must be smaller than `high`, but {low}, {high}") 163 | 164 | if (low == 0) and (high == self._2p32): 165 | return self.randint(shape=shape, buffer=buffer) 166 | 167 | if buffer is None: 168 | if shape is None: 169 | raise ValueError("One of `shape` and `buffer` must be specified.") 170 | 171 | buffer = vk.U32Array(self._gpu, shape=shape) 172 | else: 173 | # For safety, we wait output buffer job. 174 | buffer.wait() 175 | 176 | # After checking, coarse type to U32Array 177 | buffer = cast(vk.U32Array, buffer) 178 | 179 | size = buffer.buffer.size() 180 | rng = self.random(shape=buffer.shape) 181 | buffer.job = self._gpu._submit(self._randrange, 64, 1, 1, 182 | [rng, buffer], 183 | _vkarray.DataShape(size, 1, 1), 184 | _vkarray.VectorRangeParams(size, low, high-1)) 185 | buffer._keep = [rng] 186 | return buffer 187 | 188 | def wait(self): 189 | pass 190 | 191 | 192 | class Xoshiro128pp(PRNG): 193 | """ 194 | xoshiro128++: Pseudo Random Number Generator 195 | 196 | Notes 197 | ----- 198 | This class implements xoshiro128++ [1]_. Initial internal states are 199 | sequentially generated during construction on CPU and are spaced 2^64 steps. 200 | Generating (pseudo-)random numbers are executed parallelly on GPU. 201 | 202 | References 203 | ---------- 204 | .. [1] S. Vigna "xoshiro / xoroshiro generators and the PRNG shootout", 205 | https://prng.di.unimi.it/ 206 | """ 207 | _spv_uint32 = getShader("prng_xoshiro128pp_uint32.spv") 208 | _spv_float = getShader("prng_xoshiro128pp_float.spv") 209 | 210 | def __init__(self, gpu: vk.GPU, size: int = 64, *, seed: Optional[int] = None): 211 | """ 212 | Initialize Xoshiro128pp 213 | 214 | Parameters 215 | ---------- 216 | gpu : vulkpy.GPU 217 | GPU where PRNG allocates 218 | size : int 219 | Number of internal states. These states generate random number parallelly. 220 | seed : int, optional 221 | Random seed. If ``None`` (default), use hardware random instead. 222 | """ 223 | super().__init__(gpu) 224 | 225 | if seed is None: 226 | self.rng = _vkarray.Xoshiro128pp(self._gpu.gpu, 227 | self._spv_uint32, self._spv_float, 228 | size) 229 | else: 230 | self.rng = _vkarray.Xoshiro128pp(self._gpu.gpu, 231 | self._spv_uint32, self._spv_float, 232 | size, seed) 233 | 234 | def random(self, *, 235 | shape: Optional[Iterable[int]] = None, 236 | buffer: Optional[vk.Array] = None) -> vk.Array: 237 | """ 238 | Generate [0, 1) floating numbers 239 | 240 | Parameters 241 | ---------- 242 | shape : iterable of ints, optional 243 | If specified, new ``vulkpy.Array`` with ``shape`` will be returned. 244 | buffer : vulkpy.Array 245 | If specified, generated numbers will be stored. 246 | 247 | Returns 248 | ------- 249 | vulkpy.Array 250 | Array which will get random numbers. 251 | 252 | Raises 253 | ------ 254 | ValueError 255 | If neither ``shape`` or ``buffer`` are specified. 256 | """ 257 | if buffer is None: 258 | if shape is None: 259 | raise ValueError("One of `shape` and `buffer` must be specified.") 260 | 261 | buffer = vk.Array(self._gpu, shape=shape) 262 | else: 263 | # For safety, we wait output buffer job. 264 | buffer.wait() 265 | 266 | # After checking, coarse type to Array 267 | buffer = cast(vk.Array, buffer) 268 | 269 | n = int(np.prod(buffer.shape)) 270 | buffer.job = self.rng.random_float(n, buffer.buffer.info()) 271 | buffer._keep = [self] 272 | return buffer 273 | 274 | def randint(self, *, 275 | shape: Optional[Iterable[int]] = None, 276 | buffer: Optional[vk.U32Array] = None) -> vk.U32Array: 277 | """ 278 | Generate [0, 2^32) unsigned integer numbers 279 | 280 | Parameters 281 | ---------- 282 | shape : iterable of ints, optional 283 | If specified, new ``vulkpy.U32Array`` with ``shape`` will be returned. 284 | buffer : vulkpy.U32Array 285 | If specified, generated numbers will be stored. 286 | 287 | Returns 288 | ------- 289 | vulkpy.U32Array 290 | Array which will get random numbers. 291 | 292 | Raises 293 | ------ 294 | ValueError 295 | If neither ``shape`` or ``buffer`` are specified. 296 | """ 297 | if buffer is None: 298 | if shape is None: 299 | raise ValueError("One of `shape` and `buffer` must be specified.") 300 | 301 | buffer = vk.U32Array(self._gpu, shape=shape) 302 | else: 303 | # For safety, we wait output buffer job 304 | buffer.wait() 305 | 306 | # After checking, coarse type to Array 307 | buffer = cast(vk.U32Array, buffer) 308 | 309 | n = int(np.prod(buffer.shape)) 310 | buffer.job = self.rng.random_uint32(n, buffer.buffer.info()) 311 | buffer._keep = [self] 312 | return buffer 313 | -------------------------------------------------------------------------------- /vulkpy/shader/abs.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) writeonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | b[i] = abs(a[i]); 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/acos.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) writeonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | b[i] = acos(a[i]); 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/acosh.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) writeonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | b[i] = acosh(a[i]); 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/add.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) readonly buffer B { 14 | float b[]; 15 | }; 16 | layout(std430, binding = 2) writeonly buffer C { 17 | float c[]; 18 | }; 19 | 20 | 21 | void main(){ 22 | uint i = gl_GlobalInvocationID.x; 23 | if(i >= params.size){ return; } 24 | 25 | c[i] = a[i] + b[i]; 26 | } 27 | -------------------------------------------------------------------------------- /vulkpy/shader/add_broadcast.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size[3]; 7 | uint ndim; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) readonly buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) readonly buffer B { 15 | float b[]; 16 | }; 17 | layout(std430, binding = 2) writeonly buffer C { 18 | float c[]; 19 | }; 20 | layout(std430, binding = 3) readonly buffer D { 21 | uint shapeABC[]; // [a0, ..., an, b0, ..., bn, c0, ..., cn] for n = ndim-1 22 | }; 23 | 24 | 25 | void main(){ 26 | const uint ci = gl_GlobalInvocationID.x; 27 | if(ci >= params.size[2]){ return; } 28 | uvec3 size = uvec3(params.size[0], params.size[1], params.size[2]); 29 | 30 | uvec2 abi = uvec2(0, 0); 31 | uint ci_tmp = ci; 32 | for(uint dim = 0; dim < params.ndim; dim++){ 33 | uvec3 sABC = uvec3(shapeABC[dim], 34 | shapeABC[dim + params.ndim], 35 | shapeABC[dim + params.ndim * 2]); 36 | size = size / sABC; 37 | 38 | uint d = ci_tmp / size.z; 39 | abi += size.xy * min(uvec2(d, d), sABC.xy - 1); 40 | 41 | ci_tmp = ci_tmp % size.z; 42 | } 43 | 44 | c[ci] = a[abi.x] + b[abi.y]; 45 | } 46 | -------------------------------------------------------------------------------- /vulkpy/shader/add_scalar.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | float scalar; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) readonly buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) writeonly buffer B { 15 | float b[]; 16 | }; 17 | 18 | 19 | void main(){ 20 | uint i = gl_GlobalInvocationID.x; 21 | if(i >= params.size){ return; } 22 | 23 | b[i] = a[i] + params.scalar; 24 | } 25 | -------------------------------------------------------------------------------- /vulkpy/shader/asin.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) writeonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | b[i] = asin(a[i]); 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/asinh.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) writeonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | b[i] = asinh(a[i]); 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/atan.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) writeonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | b[i] = atan(a[i]); 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/atanh.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) writeonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | b[i] = atanh(a[i]); 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/batch_affine.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 1, local_size_y = 64, local_size_z = 1) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint batch_size; 7 | uint input_size; 8 | uint output_size; 9 | } params; 10 | 11 | 12 | layout(std430, binding = 0) readonly buffer W { 13 | float w[]; // [output_size, input_size] 14 | }; 15 | layout(std430, binding = 1) readonly buffer B { 16 | float b[]; // [output_size] 17 | }; 18 | layout(std430, binding = 2) readonly buffer X { 19 | float x[]; // [batch_size, input_size] 20 | }; 21 | layout(std430, binding = 3) writeonly buffer Y { 22 | float y[]; // [batch_size, output_size] 23 | }; 24 | 25 | void main(){ 26 | const uint b_idx = gl_GlobalInvocationID.x; 27 | const uint o_idx = gl_GlobalInvocationID.y; 28 | if((b_idx >= params.batch_size) || (o_idx >= params.output_size)){ return; } 29 | 30 | const uint batch = b_idx * params.input_size; 31 | const uint elem = o_idx * params.input_size; 32 | 33 | float sum = 0.0; 34 | for(uint i = 0; i < params.input_size; i++){ 35 | sum += w[elem + i] * x[batch + i]; 36 | } 37 | 38 | y[b_idx * params.output_size + o_idx] = sum + b[o_idx]; 39 | } 40 | -------------------------------------------------------------------------------- /vulkpy/shader/broadcast.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size[2]; 7 | uint ndim; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) readonly buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) writeonly buffer B { 15 | float b[]; 16 | }; 17 | layout(std430, binding = 2) readonly buffer C { 18 | uint a_shape[]; 19 | }; 20 | layout(std430, binding = 3) readonly buffer D { 21 | uint b_shape[]; 22 | }; 23 | 24 | 25 | void main(){ 26 | uint i = gl_GlobalInvocationID.x; 27 | if(i >= params.size[1]){ return; } 28 | 29 | uint i_tmp = i; 30 | uint j = 0; 31 | uint sizeA = params.size[0]; 32 | uint sizeB = params.size[1]; 33 | for(uint dim = 0; dim < params.ndim; dim++){ 34 | sizeA = sizeA / a_shape[dim]; 35 | sizeB = sizeB / b_shape[dim]; 36 | 37 | uint d = min(i_tmp / sizeB, a_shape[dim]-1); 38 | j += d * sizeA; 39 | 40 | i_tmp = i_tmp % sizeB; 41 | } 42 | 43 | b[i] = a[j]; 44 | } 45 | -------------------------------------------------------------------------------- /vulkpy/shader/clamp.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) readonly buffer B { 14 | float b[]; // min 15 | }; 16 | layout(std430, binding = 2) readonly buffer C { 17 | float c[]; // max 18 | }; 19 | layout(std430, binding = 3) writeonly buffer D { 20 | float d[]; 21 | }; 22 | 23 | 24 | void main(){ 25 | uint i = gl_GlobalInvocationID.x; 26 | if(i >= params.size){ return; } 27 | 28 | d[i] = clamp(a[i], b[i], c[i]); 29 | } 30 | -------------------------------------------------------------------------------- /vulkpy/shader/clamp_ss.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | float scalar[2]; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) readonly buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) writeonly buffer B { 15 | float b[]; 16 | }; 17 | 18 | 19 | void main(){ 20 | uint i = gl_GlobalInvocationID.x; 21 | if(i >= params.size){ return; } 22 | 23 | b[i] = clamp(a[i], params.scalar[0], params.scalar[1]); 24 | } 25 | -------------------------------------------------------------------------------- /vulkpy/shader/clamp_sv.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | float scalar; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) readonly buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) readonly buffer B { 15 | float b[]; // max 16 | }; 17 | layout(std430, binding = 2) writeonly buffer C { 18 | float c[]; 19 | }; 20 | 21 | 22 | void main(){ 23 | uint i = gl_GlobalInvocationID.x; 24 | if(i >= params.size){ return; } 25 | 26 | c[i] = clamp(a[i], params.scalar, b[i]); 27 | } 28 | -------------------------------------------------------------------------------- /vulkpy/shader/clamp_vs.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | float scalar; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) readonly buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) readonly buffer B { 15 | float b[]; // min 16 | }; 17 | layout(std430, binding = 2) writeonly buffer C { 18 | float c[]; 19 | }; 20 | 21 | 22 | void main(){ 23 | uint i = gl_GlobalInvocationID.x; 24 | if(i >= params.size){ return; } 25 | 26 | c[i] = clamp(a[i], b[i], params.scalar); 27 | } 28 | -------------------------------------------------------------------------------- /vulkpy/shader/cos.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) writeonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | b[i] = cos(a[i]); 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/cosh.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) writeonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | b[i] = cosh(a[i]); 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/div.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) readonly buffer B { 14 | float b[]; 15 | }; 16 | layout(std430, binding = 2) writeonly buffer C { 17 | float c[]; 18 | }; 19 | 20 | 21 | void main(){ 22 | uint i = gl_GlobalInvocationID.x; 23 | if(i >= params.size){ return; } 24 | 25 | c[i] = a[i] / b[i]; 26 | } 27 | -------------------------------------------------------------------------------- /vulkpy/shader/div_broadcast.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size[3]; 7 | uint ndim; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) readonly buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) readonly buffer B { 15 | float b[]; 16 | }; 17 | layout(std430, binding = 2) writeonly buffer C { 18 | float c[]; 19 | }; 20 | layout(std430, binding = 3) readonly buffer D { 21 | uint shapeABC[]; // [a0, ..., an, b0, ..., bn, c0, ..., cn] for n = ndim-1 22 | }; 23 | 24 | 25 | void main(){ 26 | const uint ci = gl_GlobalInvocationID.x; 27 | if(ci >= params.size[2]){ return; } 28 | uvec3 size = uvec3(params.size[0], params.size[1], params.size[2]); 29 | 30 | uvec2 abi = uvec2(0, 0); 31 | uint ci_tmp = ci; 32 | for(uint dim = 0; dim < params.ndim; dim++){ 33 | uvec3 sABC = uvec3(shapeABC[dim], 34 | shapeABC[dim + params.ndim], 35 | shapeABC[dim + params.ndim * 2]); 36 | size = size / sABC; 37 | 38 | uint d = ci_tmp / size.z; 39 | abi += size.xy * min(uvec2(d, d), sABC.xy - 1); 40 | 41 | ci_tmp = ci_tmp % size.z; 42 | } 43 | 44 | c[ci] = a[abi.x] / b[abi.y]; 45 | } 46 | -------------------------------------------------------------------------------- /vulkpy/shader/div_scalar.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | float scalar; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) readonly buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) writeonly buffer B { 15 | float b[]; 16 | }; 17 | 18 | 19 | void main(){ 20 | uint i = gl_GlobalInvocationID.x; 21 | if(i >= params.size){ return; } 22 | 23 | b[i] = a[i] / params.scalar; 24 | } 25 | -------------------------------------------------------------------------------- /vulkpy/shader/exp.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) writeonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | b[i] = exp(a[i]); 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/exp2.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) writeonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | b[i] = exp2(a[i]); 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/gather.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) readonly buffer B { 14 | uint b[]; 15 | }; 16 | layout(std430, binding = 2) writeonly buffer C { 17 | float c[]; 18 | }; 19 | 20 | 21 | void main(){ 22 | uint i = gl_GlobalInvocationID.x; 23 | if(i >= params.size){ return; } 24 | 25 | c[i] = a[b[i]]; 26 | } 27 | -------------------------------------------------------------------------------- /vulkpy/shader/gather_axis.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 1, local_size_y = 64, local_size_z = 1) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint prev_prod; // Global x 7 | uint post_prod; // Global y 8 | uint axis_size; 9 | uint index_size;// Global z 10 | } params; 11 | 12 | 13 | layout(std430, binding = 0) readonly buffer A { 14 | float a[]; // [prev..., axis_size, post...] 15 | }; 16 | layout(std430, binding = 1) readonly buffer B { 17 | uint b[]; // axis_size 18 | }; 19 | layout(std430, binding = 2) writeonly buffer C { 20 | float c[]; // [index_size, prev..., post...] 21 | }; 22 | 23 | 24 | void main(){ 25 | const uint i = gl_GlobalInvocationID.x; 26 | const uint j = gl_GlobalInvocationID.y; 27 | const uint k = gl_GlobalInvocationID.z; 28 | if((i >= params.prev_prod) || (j >= params.post_prod) || (k >= params.index_size)){ 29 | return; 30 | } 31 | 32 | const uint bk = clamp(b[k], 0, params.axis_size); 33 | const uint a_idx = 34 | i * params.axis_size * params.post_prod + 35 | bk * params.post_prod + 36 | j; 37 | const uint c_idx = 38 | k * params.prev_prod * params.post_prod + 39 | i * params.post_prod + 40 | j; 41 | 42 | c[c_idx] = a[a_idx]; 43 | } 44 | -------------------------------------------------------------------------------- /vulkpy/shader/iabs.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | 14 | 15 | void main(){ 16 | uint i = gl_GlobalInvocationID.x; 17 | if(i >= params.size){ return; } 18 | 19 | a[i] = abs(a[i]); 20 | } 21 | -------------------------------------------------------------------------------- /vulkpy/shader/iacos.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | 14 | 15 | void main(){ 16 | uint i = gl_GlobalInvocationID.x; 17 | if(i >= params.size){ return; } 18 | 19 | a[i] = acos(a[i]); 20 | } 21 | -------------------------------------------------------------------------------- /vulkpy/shader/iacosh.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | 14 | 15 | void main(){ 16 | uint i = gl_GlobalInvocationID.x; 17 | if(i >= params.size){ return; } 18 | 19 | a[i] = acosh(a[i]); 20 | } 21 | -------------------------------------------------------------------------------- /vulkpy/shader/iadd.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) readonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | a[i] = a[i] + b[i]; 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/iadd_broadcast.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size[2]; 7 | uint ndim; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) readonly buffer B { 15 | float b[]; 16 | }; 17 | layout(std430, binding = 2) readonly buffer D { 18 | uint shapeAB[]; // [a0, ..., an, b0, ..., bn] for n = ndim-1 19 | }; 20 | 21 | 22 | void main(){ 23 | const uint ai = gl_GlobalInvocationID.x; 24 | if(ai >= params.size[0]){ return; } 25 | uvec2 size = uvec2(params.size[0], params.size[1]); 26 | 27 | uint bi = 0; 28 | uint ai_tmp = ai; 29 | for(uint dim = 0; dim < params.ndim; dim++){ 30 | uvec2 sAB = uvec2(shapeAB[dim], 31 | shapeAB[dim + params.ndim]); 32 | size = size / sAB; 33 | 34 | uint d = ai_tmp / size.x; 35 | bi += size.y * min(d, sAB.y - 1); 36 | 37 | ai_tmp = ai_tmp % size.x; 38 | } 39 | 40 | a[ai] += b[bi]; 41 | } 42 | -------------------------------------------------------------------------------- /vulkpy/shader/iadd_scalar.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | float scalar; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) buffer A { 12 | float a[]; 13 | }; 14 | 15 | 16 | void main(){ 17 | uint i = gl_GlobalInvocationID.x; 18 | if(i >= params.size){ return; } 19 | 20 | a[i] = a[i] + params.scalar; 21 | } 22 | -------------------------------------------------------------------------------- /vulkpy/shader/iasin.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | 14 | 15 | void main(){ 16 | uint i = gl_GlobalInvocationID.x; 17 | if(i >= params.size){ return; } 18 | 19 | a[i] = asin(a[i]); 20 | } 21 | -------------------------------------------------------------------------------- /vulkpy/shader/iasinh.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | 14 | 15 | void main(){ 16 | uint i = gl_GlobalInvocationID.x; 17 | if(i >= params.size){ return; } 18 | 19 | a[i] = asinh(a[i]); 20 | } 21 | -------------------------------------------------------------------------------- /vulkpy/shader/iatan.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | 14 | 15 | void main(){ 16 | uint i = gl_GlobalInvocationID.x; 17 | if(i >= params.size){ return; } 18 | 19 | a[i] = atan(a[i]); 20 | } 21 | -------------------------------------------------------------------------------- /vulkpy/shader/iatanh.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | 14 | 15 | void main(){ 16 | uint i = gl_GlobalInvocationID.x; 17 | if(i >= params.size){ return; } 18 | 19 | a[i] = atanh(a[i]); 20 | } 21 | -------------------------------------------------------------------------------- /vulkpy/shader/iclamp.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) readonly buffer B { 14 | float b[]; // min 15 | }; 16 | layout(std430, binding = 2) readonly buffer C { 17 | float c[]; // max 18 | }; 19 | 20 | 21 | void main(){ 22 | uint i = gl_GlobalInvocationID.x; 23 | if(i >= params.size){ return; } 24 | 25 | a[i] = clamp(a[i], b[i], c[i]); 26 | } 27 | -------------------------------------------------------------------------------- /vulkpy/shader/iclamp_ss.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | float scalar[2]; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) buffer A { 12 | float a[]; 13 | }; 14 | 15 | 16 | void main(){ 17 | uint i = gl_GlobalInvocationID.x; 18 | if(i >= params.size){ return; } 19 | 20 | a[i] = clamp(a[i], params.scalar[0], params.scalar[1]); 21 | } 22 | -------------------------------------------------------------------------------- /vulkpy/shader/iclamp_sv.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | float scalar; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) readonly buffer B { 15 | float b[]; // max 16 | }; 17 | 18 | 19 | void main(){ 20 | uint i = gl_GlobalInvocationID.x; 21 | if(i >= params.size){ return; } 22 | 23 | a[i] = clamp(a[i], params.scalar, b[i]); 24 | } 25 | -------------------------------------------------------------------------------- /vulkpy/shader/iclamp_vs.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | float scalar; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) readonly buffer B { 15 | float b[]; // min 16 | }; 17 | 18 | 19 | void main(){ 20 | uint i = gl_GlobalInvocationID.x; 21 | if(i >= params.size){ return; } 22 | 23 | a[i] = clamp(a[i], b[i], params.scalar); 24 | } 25 | -------------------------------------------------------------------------------- /vulkpy/shader/icos.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | 14 | 15 | void main(){ 16 | uint i = gl_GlobalInvocationID.x; 17 | if(i >= params.size){ return; } 18 | 19 | a[i] = cos(a[i]); 20 | } 21 | -------------------------------------------------------------------------------- /vulkpy/shader/icosh.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | 14 | 15 | void main(){ 16 | uint i = gl_GlobalInvocationID.x; 17 | if(i >= params.size){ return; } 18 | 19 | a[i] = cosh(a[i]); 20 | } 21 | -------------------------------------------------------------------------------- /vulkpy/shader/idiv.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) readonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | a[i] = a[i] / b[i]; 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/idiv_broadcast.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size[2]; 7 | uint ndim; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) readonly buffer B { 15 | float b[]; 16 | }; 17 | layout(std430, binding = 2) readonly buffer D { 18 | uint shapeAB[]; // [a0, ..., an, b0, ..., bn] for n = ndim-1 19 | }; 20 | 21 | 22 | void main(){ 23 | const uint ai = gl_GlobalInvocationID.x; 24 | if(ai >= params.size[0]){ return; } 25 | uvec2 size = uvec2(params.size[0], params.size[1]); 26 | 27 | uint bi = 0; 28 | uint ai_tmp = ai; 29 | for(uint dim = 0; dim < params.ndim; dim++){ 30 | uvec2 sAB = uvec2(shapeAB[dim], 31 | shapeAB[dim + params.ndim]); 32 | size = size / sAB; 33 | 34 | uint d = ai_tmp / size.x; 35 | bi += size.y * min(d, sAB.y - 1); 36 | 37 | ai_tmp = ai_tmp % size.x; 38 | } 39 | 40 | a[ai] /= b[bi]; 41 | } 42 | -------------------------------------------------------------------------------- /vulkpy/shader/idiv_scalar.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | float scalar; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) buffer A { 12 | float a[]; 13 | }; 14 | 15 | 16 | void main(){ 17 | uint i = gl_GlobalInvocationID.x; 18 | if(i >= params.size){ return; } 19 | 20 | a[i] = a[i] / params.scalar; 21 | } 22 | -------------------------------------------------------------------------------- /vulkpy/shader/iexp.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | 14 | 15 | void main(){ 16 | uint i = gl_GlobalInvocationID.x; 17 | if(i >= params.size){ return; } 18 | 19 | a[i] = exp(a[i]); 20 | } 21 | -------------------------------------------------------------------------------- /vulkpy/shader/iexp2.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | 14 | 15 | void main(){ 16 | uint i = gl_GlobalInvocationID.x; 17 | if(i >= params.size){ return; } 18 | 19 | a[i] = exp2(a[i]); 20 | } 21 | -------------------------------------------------------------------------------- /vulkpy/shader/iinvsqrt.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | 14 | 15 | void main(){ 16 | uint i = gl_GlobalInvocationID.x; 17 | if(i >= params.size){ return; } 18 | 19 | a[i] = inversesqrt(a[i]); 20 | } 21 | -------------------------------------------------------------------------------- /vulkpy/shader/ilog.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | 14 | 15 | void main(){ 16 | uint i = gl_GlobalInvocationID.x; 17 | if(i >= params.size){ return; } 18 | 19 | a[i] = log(a[i]); 20 | } 21 | -------------------------------------------------------------------------------- /vulkpy/shader/ilog2.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | 14 | 15 | void main(){ 16 | uint i = gl_GlobalInvocationID.x; 17 | if(i >= params.size){ return; } 18 | 19 | a[i] = log2(a[i]); 20 | } 21 | -------------------------------------------------------------------------------- /vulkpy/shader/imax.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) readonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | a[i] = max(a[i], b[i]); 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/imax_broadcast.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size[2]; 7 | uint ndim; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) readonly buffer B { 15 | float b[]; 16 | }; 17 | layout(std430, binding = 2) readonly buffer D { 18 | uint shapeAB[]; // [a0, ..., an, b0, ..., bn] for n = ndim-1 19 | }; 20 | 21 | 22 | void main(){ 23 | const uint ai = gl_GlobalInvocationID.x; 24 | if(ai >= params.size[0]){ return; } 25 | uvec2 size = uvec2(params.size[0], params.size[1]); 26 | 27 | uint bi = 0; 28 | uint ai_tmp = ai; 29 | for(uint dim = 0; dim < params.ndim; dim++){ 30 | uvec2 sAB = uvec2(shapeAB[dim], 31 | shapeAB[dim + params.ndim]); 32 | size = size / sAB; 33 | 34 | uint d = ai_tmp / size.x; 35 | bi += size.y * min(d, sAB.y - 1); 36 | 37 | ai_tmp = ai_tmp % size.x; 38 | } 39 | 40 | a[ai] = max(a[ai], b[bi]); 41 | } 42 | -------------------------------------------------------------------------------- /vulkpy/shader/imax_scalar.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | float scalar; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) buffer A { 12 | float a[]; 13 | }; 14 | 15 | 16 | void main(){ 17 | uint i = gl_GlobalInvocationID.x; 18 | if(i >= params.size){ return; } 19 | 20 | a[i] = max(a[i], params.scalar); 21 | } 22 | -------------------------------------------------------------------------------- /vulkpy/shader/imin.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) readonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | a[i] = min(a[i], b[i]); 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/imin_broadcast.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size[2]; 7 | uint ndim; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) readonly buffer B { 15 | float b[]; 16 | }; 17 | layout(std430, binding = 2) readonly buffer D { 18 | uint shapeAB[]; // [a0, ..., an, b0, ..., bn] for n = ndim-1 19 | }; 20 | 21 | 22 | void main(){ 23 | const uint ai = gl_GlobalInvocationID.x; 24 | if(ai >= params.size[0]){ return; } 25 | uvec2 size = uvec2(params.size[0], params.size[1]); 26 | 27 | uint bi = 0; 28 | uint ai_tmp = ai; 29 | for(uint dim = 0; dim < params.ndim; dim++){ 30 | uvec2 sAB = uvec2(shapeAB[dim], 31 | shapeAB[dim + params.ndim]); 32 | size = size / sAB; 33 | 34 | uint d = ai_tmp / size.x; 35 | bi += size.y * min(d, sAB.y - 1); 36 | 37 | ai_tmp = ai_tmp % size.x; 38 | } 39 | 40 | a[ai] = min(a[ai], b[bi]); 41 | } 42 | -------------------------------------------------------------------------------- /vulkpy/shader/imin_scalar.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | float scalar; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) buffer A { 12 | float a[]; 13 | }; 14 | 15 | 16 | void main(){ 17 | uint i = gl_GlobalInvocationID.x; 18 | if(i >= params.size){ return; } 19 | 20 | a[i] = min(a[i], params.scalar); 21 | } 22 | -------------------------------------------------------------------------------- /vulkpy/shader/imul.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) readonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | a[i] = a[i] * b[i]; 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/imul_broadcast.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size[2]; 7 | uint ndim; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) readonly buffer B { 15 | float b[]; 16 | }; 17 | layout(std430, binding = 2) readonly buffer D { 18 | uint shapeAB[]; // [a0, ..., an, b0, ..., bn] for n = ndim-1 19 | }; 20 | 21 | 22 | void main(){ 23 | const uint ai = gl_GlobalInvocationID.x; 24 | if(ai >= params.size[0]){ return; } 25 | uvec2 size = uvec2(params.size[0], params.size[1]); 26 | 27 | uint bi = 0; 28 | uint ai_tmp = ai; 29 | for(uint dim = 0; dim < params.ndim; dim++){ 30 | uvec2 sAB = uvec2(shapeAB[dim], 31 | shapeAB[dim + params.ndim]); 32 | size = size / sAB; 33 | 34 | uint d = ai_tmp / size.x; 35 | bi += size.y * min(d, sAB.y - 1); 36 | 37 | ai_tmp = ai_tmp % size.x; 38 | } 39 | 40 | a[ai] *= b[bi]; 41 | } 42 | -------------------------------------------------------------------------------- /vulkpy/shader/imul_scalar.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | float scalar; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) buffer A { 12 | float a[]; 13 | }; 14 | 15 | 16 | void main(){ 17 | uint i = gl_GlobalInvocationID.x; 18 | if(i >= params.size){ return; } 19 | 20 | a[i] = a[i] * params.scalar; 21 | } 22 | -------------------------------------------------------------------------------- /vulkpy/shader/invsqrt.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) writeonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | b[i] = inversesqrt(a[i]); 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/ipow.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) readonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | a[i] = pow(a[i], b[i]); 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/ipow_broadcast.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size[2]; 7 | uint ndim; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) readonly buffer B { 15 | float b[]; 16 | }; 17 | layout(std430, binding = 2) readonly buffer D { 18 | uint shapeAB[]; // [a0, ..., an, b0, ..., bn] for n = ndim-1 19 | }; 20 | 21 | 22 | void main(){ 23 | const uint ai = gl_GlobalInvocationID.x; 24 | if(ai >= params.size[0]){ return; } 25 | uvec2 size = uvec2(params.size[0], params.size[1]); 26 | 27 | uint bi = 0; 28 | uint ai_tmp = ai; 29 | for(uint dim = 0; dim < params.ndim; dim++){ 30 | uvec2 sAB = uvec2(shapeAB[dim], 31 | shapeAB[dim + params.ndim]); 32 | size = size / sAB; 33 | 34 | uint d = ai_tmp / size.x; 35 | bi += size.y * min(d, sAB.y - 1); 36 | 37 | ai_tmp = ai_tmp % size.x; 38 | } 39 | 40 | a[ai] = pow(a[ai], b[bi]); 41 | } 42 | -------------------------------------------------------------------------------- /vulkpy/shader/ipow_scalar.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | float scalar; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) buffer A { 12 | float a[]; 13 | }; 14 | 15 | 16 | void main(){ 17 | uint i = gl_GlobalInvocationID.x; 18 | if(i >= params.size){ return; } 19 | 20 | a[i] = pow(a[i], params.scalar); 21 | } 22 | -------------------------------------------------------------------------------- /vulkpy/shader/isign.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | 14 | 15 | void main(){ 16 | uint i = gl_GlobalInvocationID.x; 17 | if(i >= params.size){ return; } 18 | 19 | a[i] = sign(a[i]); 20 | } 21 | -------------------------------------------------------------------------------- /vulkpy/shader/isin.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | 14 | 15 | void main(){ 16 | uint i = gl_GlobalInvocationID.x; 17 | if(i >= params.size){ return; } 18 | 19 | a[i] = sin(a[i]); 20 | } 21 | -------------------------------------------------------------------------------- /vulkpy/shader/isinh.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | 14 | 15 | void main(){ 16 | uint i = gl_GlobalInvocationID.x; 17 | if(i >= params.size){ return; } 18 | 19 | a[i] = sinh(a[i]); 20 | } 21 | -------------------------------------------------------------------------------- /vulkpy/shader/isqrt.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | 14 | 15 | void main(){ 16 | uint i = gl_GlobalInvocationID.x; 17 | if(i >= params.size){ return; } 18 | 19 | a[i] = sqrt(a[i]); 20 | } 21 | -------------------------------------------------------------------------------- /vulkpy/shader/isub.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) readonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | a[i] = a[i] - b[i]; 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/isub_broadcast.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size[2]; 7 | uint ndim; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) readonly buffer B { 15 | float b[]; 16 | }; 17 | layout(std430, binding = 2) readonly buffer D { 18 | uint shapeAB[]; // [a0, ..., an, b0, ..., bn] for n = ndim-1 19 | }; 20 | 21 | 22 | void main(){ 23 | const uint ai = gl_GlobalInvocationID.x; 24 | if(ai >= params.size[0]){ return; } 25 | uvec2 size = uvec2(params.size[0], params.size[1]); 26 | 27 | uint bi = 0; 28 | uint ai_tmp = ai; 29 | for(uint dim = 0; dim < params.ndim; dim++){ 30 | uvec2 sAB = uvec2(shapeAB[dim], 31 | shapeAB[dim + params.ndim]); 32 | size = size / sAB; 33 | 34 | uint d = ai_tmp / size.x; 35 | bi += size.y * min(d, sAB.y - 1); 36 | 37 | ai_tmp = ai_tmp % size.x; 38 | } 39 | 40 | a[ai] -= b[bi]; 41 | } 42 | -------------------------------------------------------------------------------- /vulkpy/shader/isub_scalar.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | float scalar; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) buffer A { 12 | float a[]; 13 | }; 14 | 15 | 16 | void main(){ 17 | uint i = gl_GlobalInvocationID.x; 18 | if(i >= params.size){ return; } 19 | 20 | a[i] = a[i] - params.scalar; 21 | } 22 | -------------------------------------------------------------------------------- /vulkpy/shader/itan.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | 14 | 15 | void main(){ 16 | uint i = gl_GlobalInvocationID.x; 17 | if(i >= params.size){ return; } 18 | 19 | a[i] = tan(a[i]); 20 | } 21 | -------------------------------------------------------------------------------- /vulkpy/shader/itanh.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) buffer A { 11 | float a[]; 12 | }; 13 | 14 | 15 | void main(){ 16 | uint i = gl_GlobalInvocationID.x; 17 | if(i >= params.size){ return; } 18 | 19 | a[i] = tanh(a[i]); 20 | } 21 | -------------------------------------------------------------------------------- /vulkpy/shader/log.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) writeonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | b[i] = log(a[i]); 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/log2.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) writeonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | b[i] = log2(a[i]); 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/matmul.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 1, local_size_y = 64, local_size_z = 1) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint rowA; 7 | uint contractSize; 8 | uint columnB; 9 | } params; 10 | 11 | 12 | layout(std430, binding = 0) readonly buffer A { 13 | float a[]; // [rowA, contractSize] 14 | }; 15 | layout(std430, binding = 1) readonly buffer B { 16 | float b[]; // [contractSize, columnB] 17 | }; 18 | layout(std430, binding = 2) writeonly buffer C { 19 | float c[]; // [rowA, columnB] 20 | }; 21 | 22 | 23 | void main(){ 24 | uint row = gl_GlobalInvocationID.x; 25 | uint col = gl_GlobalInvocationID.y; 26 | if((row >= params.rowA) || (col >= params.columnB)){ return; } 27 | 28 | float sum = 0.0; 29 | for(uint s = 0; s < params.contractSize; s++){ 30 | sum += a[row * params.contractSize + s] * b[s * params.columnB + col]; 31 | } 32 | c[row * params.columnB + col] = sum; 33 | } 34 | -------------------------------------------------------------------------------- /vulkpy/shader/max.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) readonly buffer B { 14 | float b[]; 15 | }; 16 | layout(std430, binding = 2) writeonly buffer C { 17 | float c[]; 18 | }; 19 | 20 | 21 | void main(){ 22 | uint i = gl_GlobalInvocationID.x; 23 | if(i >= params.size){ return; } 24 | 25 | c[i] = max(a[i], b[i]); 26 | } 27 | -------------------------------------------------------------------------------- /vulkpy/shader/max_broadcast.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size[3]; 7 | uint ndim; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) readonly buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) readonly buffer B { 15 | float b[]; 16 | }; 17 | layout(std430, binding = 2) writeonly buffer C { 18 | float c[]; 19 | }; 20 | layout(std430, binding = 3) readonly buffer D { 21 | uint shapeABC[]; // [a0, ..., an, b0, ..., bn, c0, ..., cn] for n = ndim-1 22 | }; 23 | 24 | 25 | void main(){ 26 | const uint ci = gl_GlobalInvocationID.x; 27 | if(ci >= params.size[2]){ return; } 28 | uvec3 size = uvec3(params.size[0], params.size[1], params.size[2]); 29 | 30 | uvec2 abi = uvec2(0, 0); 31 | uint ci_tmp = ci; 32 | for(uint dim = 0; dim < params.ndim; dim++){ 33 | uvec3 sABC = uvec3(shapeABC[dim], 34 | shapeABC[dim + params.ndim], 35 | shapeABC[dim + params.ndim * 2]); 36 | size = size / sABC; 37 | 38 | uint d = ci_tmp / size.z; 39 | abi += size.xy * min(uvec2(d, d), sABC.xy - 1); 40 | 41 | ci_tmp = ci_tmp % size.z; 42 | } 43 | 44 | c[ci] = max(a[abi.x], b[abi.y]); 45 | } 46 | -------------------------------------------------------------------------------- /vulkpy/shader/max_scalar.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | float scalar; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) readonly buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) writeonly buffer B { 15 | float b[]; 16 | }; 17 | 18 | 19 | void main(){ 20 | uint i = gl_GlobalInvocationID.x; 21 | if(i >= params.size){ return; } 22 | 23 | b[i] = max(a[i], params.scalar); 24 | } 25 | -------------------------------------------------------------------------------- /vulkpy/shader/maximum.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size[2]; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) writeonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | uint sizeA = params.size[0]; 21 | uint sizeB = params.size[1]; 22 | if(i >= sizeB){ return; } 23 | 24 | float partial_max = a[i]; 25 | for(uint j = i; j < sizeA; j += sizeB){ 26 | partial_max = max(partial_max, a[j]); 27 | } 28 | 29 | b[i] = partial_max; 30 | } 31 | -------------------------------------------------------------------------------- /vulkpy/shader/maximum_axis.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 1, local_size_y = 64, local_size_z = 1) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint prev_prod; // Global x 7 | uint axis_size; 8 | uint post_prod; // Global y 9 | } params; 10 | 11 | 12 | layout(std430, binding = 0) readonly buffer A { 13 | float a[]; // [prev..., axis, post...] 14 | }; 15 | layout(std430, binding = 1) writeonly buffer B { 16 | float b[]; // [prev..., post...] 17 | }; 18 | 19 | 20 | void main(){ 21 | uint i = gl_GlobalInvocationID.x; 22 | uint j = gl_GlobalInvocationID.y; 23 | if((i >= params.prev_prod) || (j >= params.post_prod)){ return; } 24 | 25 | const uint ij = (i * params.axis_size * params.post_prod) + j; 26 | 27 | float partial_max = a[ij]; 28 | for(uint k=0; k= params.prev_prod) || (j >= params.post_prod)){ return; } 24 | 25 | const uint ij = (i * params.axis_size * params.post_prod) + j; 26 | 27 | float partial_max = a[ij]; 28 | for(uint k=0; k= params.size){ return; } 23 | 24 | float partial_max = subgroupMax(a[i]); 25 | 26 | if(subgroupElect()){ 27 | b[gl_SubgroupID] = partial_max; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /vulkpy/shader/min.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) readonly buffer B { 14 | float b[]; 15 | }; 16 | layout(std430, binding = 2) writeonly buffer C { 17 | float c[]; 18 | }; 19 | 20 | 21 | void main(){ 22 | uint i = gl_GlobalInvocationID.x; 23 | if(i >= params.size){ return; } 24 | 25 | c[i] = min(a[i], b[i]); 26 | } 27 | -------------------------------------------------------------------------------- /vulkpy/shader/min_broadcast.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size[3]; 7 | uint ndim; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) readonly buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) readonly buffer B { 15 | float b[]; 16 | }; 17 | layout(std430, binding = 2) writeonly buffer C { 18 | float c[]; 19 | }; 20 | layout(std430, binding = 3) readonly buffer D { 21 | uint shapeABC[]; // [a0, ..., an, b0, ..., bn, c0, ..., cn] for n = ndim-1 22 | }; 23 | 24 | 25 | void main(){ 26 | const uint ci = gl_GlobalInvocationID.x; 27 | if(ci >= params.size[2]){ return; } 28 | uvec3 size = uvec3(params.size[0], params.size[1], params.size[2]); 29 | 30 | uvec2 abi = uvec2(0, 0); 31 | uint ci_tmp = ci; 32 | for(uint dim = 0; dim < params.ndim; dim++){ 33 | uvec3 sABC = uvec3(shapeABC[dim], 34 | shapeABC[dim + params.ndim], 35 | shapeABC[dim + params.ndim * 2]); 36 | size = size / sABC; 37 | 38 | uint d = ci_tmp / size.z; 39 | abi += size.xy * min(uvec2(d, d), sABC.xy - 1); 40 | 41 | ci_tmp = ci_tmp % size.z; 42 | } 43 | 44 | c[ci] = min(a[abi.x], b[abi.y]); 45 | } 46 | -------------------------------------------------------------------------------- /vulkpy/shader/min_scalar.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | float scalar; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) readonly buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) writeonly buffer B { 15 | float b[]; 16 | }; 17 | 18 | 19 | void main(){ 20 | uint i = gl_GlobalInvocationID.x; 21 | if(i >= params.size){ return; } 22 | 23 | b[i] = min(a[i], params.scalar); 24 | } 25 | -------------------------------------------------------------------------------- /vulkpy/shader/minimum.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size[2]; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) writeonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | uint sizeA = params.size[0]; 21 | uint sizeB = params.size[1]; 22 | if(i >= sizeB){ return; } 23 | 24 | float partial_min = a[i]; 25 | for(uint j = i; j < sizeA; j += sizeB){ 26 | partial_min = min(partial_min, a[j]); 27 | } 28 | 29 | b[i] = partial_min; 30 | } 31 | -------------------------------------------------------------------------------- /vulkpy/shader/minimum_axis.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 1, local_size_y = 64, local_size_z = 1) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint prev_prod; // Global x 7 | uint axis_size; 8 | uint post_prod; // Global y 9 | } params; 10 | 11 | 12 | layout(std430, binding = 0) readonly buffer A { 13 | float a[]; // [prev..., axis, post...] 14 | }; 15 | layout(std430, binding = 1) writeonly buffer B { 16 | float b[]; // [prev..., post...] 17 | }; 18 | 19 | 20 | void main(){ 21 | uint i = gl_GlobalInvocationID.x; 22 | uint j = gl_GlobalInvocationID.y; 23 | if((i >= params.prev_prod) || (j >= params.post_prod)){ return; } 24 | 25 | const uint ij = (i * params.axis_size * params.post_prod) + j; 26 | 27 | float partial_min = a[ij]; 28 | for(uint k=0; k= params.prev_prod) || (j >= params.post_prod)){ return; } 24 | 25 | const uint ij = (i * params.axis_size * params.post_prod) + j; 26 | 27 | float partial_min = a[ij]; 28 | for(uint k=0; k= params.size){ return; } 23 | 24 | float partial_min = subgroupMin(a[i]); 25 | 26 | if(subgroupElect()){ 27 | b[gl_SubgroupID] = partial_min; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /vulkpy/shader/mul.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) readonly buffer B { 14 | float b[]; 15 | }; 16 | layout(std430, binding = 2) writeonly buffer C { 17 | float c[]; 18 | }; 19 | 20 | 21 | void main(){ 22 | uint i = gl_GlobalInvocationID.x; 23 | if(i >= params.size){ return; } 24 | 25 | c[i] = a[i] * b[i]; 26 | } 27 | -------------------------------------------------------------------------------- /vulkpy/shader/mul_broadcast.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size[3]; 7 | uint ndim; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) readonly buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) readonly buffer B { 15 | float b[]; 16 | }; 17 | layout(std430, binding = 2) writeonly buffer C { 18 | float c[]; 19 | }; 20 | layout(std430, binding = 3) readonly buffer D { 21 | uint shapeABC[]; // [a0, ..., an, b0, ..., bn, c0, ..., cn] for n = ndim-1 22 | }; 23 | 24 | 25 | void main(){ 26 | const uint ci = gl_GlobalInvocationID.x; 27 | if(ci >= params.size[2]){ return; } 28 | uvec3 size = uvec3(params.size[0], params.size[1], params.size[2]); 29 | 30 | uvec2 abi = uvec2(0, 0); 31 | uint ci_tmp = ci; 32 | for(uint dim = 0; dim < params.ndim; dim++){ 33 | uvec3 sABC = uvec3(shapeABC[dim], 34 | shapeABC[dim + params.ndim], 35 | shapeABC[dim + params.ndim * 2]); 36 | size = size / sABC; 37 | 38 | uint d = ci_tmp / size.z; 39 | abi += size.xy * min(uvec2(d, d), sABC.xy - 1); 40 | 41 | ci_tmp = ci_tmp % size.z; 42 | } 43 | 44 | c[ci] = a[abi.x] * b[abi.y]; 45 | } 46 | -------------------------------------------------------------------------------- /vulkpy/shader/mul_scalar.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | float scalar; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) readonly buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) writeonly buffer B { 15 | float b[]; 16 | }; 17 | 18 | 19 | void main(){ 20 | uint i = gl_GlobalInvocationID.x; 21 | if(i >= params.size){ return; } 22 | 23 | b[i] = a[i] * params.scalar; 24 | } 25 | -------------------------------------------------------------------------------- /vulkpy/shader/nn_cross_entropy.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer X { 11 | float x[]; 12 | }; 13 | layout(std430, binding = 1) readonly buffer Y { 14 | float y[]; 15 | }; 16 | layout(std430, binding = 2) writeonly buffer Loss { 17 | float L[]; 18 | }; 19 | 20 | 21 | void main(){ 22 | uint i = gl_GlobalInvocationID.x; 23 | if(i >= params.size){ return; } 24 | 25 | L[i] = - y[i] * log(x[i] + 1e-8); 26 | } 27 | -------------------------------------------------------------------------------- /vulkpy/shader/nn_cross_entropy_backward.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer X { 11 | float x[]; 12 | }; 13 | layout(std430, binding = 1) readonly buffer Y { 14 | float y[]; 15 | }; 16 | layout(std430, binding = 2) writeonly buffer dX { 17 | float dx[]; 18 | }; 19 | 20 | 21 | void main(){ 22 | uint i = gl_GlobalInvocationID.x; 23 | if(i >= params.size){ return; } 24 | 25 | dx[i] = - y[i] / (x[i] + 1e-8); 26 | } 27 | -------------------------------------------------------------------------------- /vulkpy/shader/pow.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) readonly buffer B { 14 | float b[]; 15 | }; 16 | layout(std430, binding = 2) writeonly buffer C { 17 | float c[]; 18 | }; 19 | 20 | 21 | void main(){ 22 | uint i = gl_GlobalInvocationID.x; 23 | if(i >= params.size){ return; } 24 | 25 | c[i] = pow(a[i], b[i]); 26 | } 27 | -------------------------------------------------------------------------------- /vulkpy/shader/pow_broadcast.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size[3]; 7 | uint ndim; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) readonly buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) readonly buffer B { 15 | float b[]; 16 | }; 17 | layout(std430, binding = 2) writeonly buffer C { 18 | float c[]; 19 | }; 20 | layout(std430, binding = 3) readonly buffer D { 21 | uint shapeABC[]; // [a0, ..., an, b0, ..., bn, c0, ..., cn] for n = ndim-1 22 | }; 23 | 24 | 25 | void main(){ 26 | const uint ci = gl_GlobalInvocationID.x; 27 | if(ci >= params.size[2]){ return; } 28 | uvec3 size = uvec3(params.size[0], params.size[1], params.size[2]); 29 | 30 | uvec2 abi = uvec2(0, 0); 31 | uint ci_tmp = ci; 32 | for(uint dim = 0; dim < params.ndim; dim++){ 33 | uvec3 sABC = uvec3(shapeABC[dim], 34 | shapeABC[dim + params.ndim], 35 | shapeABC[dim + params.ndim * 2]); 36 | size = size / sABC; 37 | 38 | uint d = ci_tmp / size.z; 39 | abi += size.xy * min(uvec2(d, d), sABC.xy - 1); 40 | 41 | ci_tmp = ci_tmp % size.z; 42 | } 43 | 44 | c[ci] = pow(a[abi.x], b[abi.y]); 45 | } 46 | -------------------------------------------------------------------------------- /vulkpy/shader/pow_scalar.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | float scalar; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) readonly buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) writeonly buffer B { 15 | float b[]; 16 | }; 17 | 18 | 19 | void main(){ 20 | uint i = gl_GlobalInvocationID.x; 21 | if(i >= params.size){ return; } 22 | 23 | b[i] = pow(a[i], params.scalar); 24 | } 25 | -------------------------------------------------------------------------------- /vulkpy/shader/prng_box_muller.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | float scalar[2]; // [mean, stddev] 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) readonly buffer A { 12 | float a[]; // Uniform Distribution bewteen [0, 1). 13 | }; 14 | layout(std430, binding = 1) writeonly buffer B { 15 | float b[]; 16 | }; 17 | 18 | 19 | void main(){ 20 | const uint i = gl_GlobalInvocationID.x; 21 | const uint j = 2*i; 22 | const uint k = j + 1; 23 | if(j >= params.size){ return; } 24 | 25 | const float r = sqrt(-2 * log(1.0 - a[j])) * params.scalar[1]; 26 | const float angle = 6.28318530718f * a[k]; 27 | 28 | b[j] = params.scalar[0] + r * sin(angle); 29 | 30 | if(k >= params.size){ return; } 31 | b[k] = params.scalar[0] + r * cos(angle); 32 | } 33 | -------------------------------------------------------------------------------- /vulkpy/shader/prng_ibox_muller.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | float scalar[2]; // [mean, stddev] 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) buffer A { 12 | float a[]; // Uniform Distribution bewteen [0, 1). 13 | }; 14 | 15 | 16 | void main(){ 17 | const uint i = gl_GlobalInvocationID.x; 18 | const uint j = 2*i; 19 | const uint k = j + 1; 20 | if(j >= params.size){ return; } 21 | 22 | const float r = sqrt(-2 * log(1.0 - a[j])) * params.scalar[1]; 23 | const float angle = 6.28318530718f * a[k]; 24 | 25 | a[j] = params.scalar[0] + r * sin(angle); 26 | a[k] = params.scalar[0] + r * cos(angle); 27 | } 28 | -------------------------------------------------------------------------------- /vulkpy/shader/prng_randrange.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | uint low; 8 | uint high; 9 | } params; 10 | 11 | 12 | layout(std430, binding = 0) readonly buffer A { 13 | float a[]; // [0, 1) 14 | }; 15 | layout(std430, binding = 1) writeonly buffer B { 16 | uint b[]; // [low, high] 17 | }; 18 | 19 | 20 | void main(){ 21 | uint i = gl_GlobalInvocationID.x; 22 | if(i >= params.size){ return; } 23 | 24 | const uint range = params.high - params.low + 1; 25 | 26 | b[i] = params.low + uint(range * a[i]); 27 | } 28 | -------------------------------------------------------------------------------- /vulkpy/shader/prng_xoshiro128pp_float.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint shift; 7 | uint size; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) buffer A { 12 | uint a[]; 13 | }; 14 | layout(std430, binding = 1) writeonly buffer B { 15 | float b[]; 16 | }; 17 | 18 | 19 | uint rotl(uint x, int k){ 20 | return (x << k) | (x >> (32 - k)); 21 | } 22 | 23 | 24 | // xoshiro128++ 25 | // https://prng.di.unimi.it/xoshiro128plusplus.c 26 | void main(){ 27 | uint i = gl_GlobalInvocationID.x; 28 | uint shifted_i = i+params.shift; 29 | if(i >= params.size){ return; } 30 | uint j = 4 * i; 31 | 32 | uint result = rotl(a[j] + a[j+3], 7) + a[j]; 33 | b[shifted_i] = uintBitsToFloat((result >> 9) | 0x3f800000) - 1.0; 34 | 35 | uint t = (a[j+1] << 9); 36 | 37 | a[j+2] ^= a[j ]; 38 | a[j+3] ^= a[j+1]; 39 | a[j+1] ^= a[j+2]; 40 | a[j ] ^= a[j+3]; 41 | 42 | a[j+2] ^= t; 43 | a[j+3] = rotl(a[j+3], 11); 44 | } 45 | -------------------------------------------------------------------------------- /vulkpy/shader/prng_xoshiro128pp_uint32.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint shift; 7 | uint size; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) buffer A { 12 | uint a[]; 13 | }; 14 | layout(std430, binding = 1) writeonly buffer B { 15 | uint b[]; 16 | }; 17 | 18 | 19 | uint rotl(uint x, int k){ 20 | return (x << k) | (x >> (32 - k)); 21 | } 22 | 23 | 24 | // xoshiro128++ 25 | // https://prng.di.unimi.it/xoshiro128plusplus.c 26 | void main(){ 27 | uint i = gl_GlobalInvocationID.x; 28 | uint shifted_i = i+params.shift; 29 | if(i >= params.size){ return; } 30 | uint j = 4 * i; 31 | 32 | b[shifted_i] = rotl(a[j] + a[j+3], 7) + a[j]; 33 | 34 | uint t = (a[j+1] << 9); 35 | 36 | a[j+2] ^= a[j ]; 37 | a[j+3] ^= a[j+1]; 38 | a[j+1] ^= a[j+2]; 39 | a[j ] ^= a[j+3]; 40 | 41 | a[j+2] ^= t; 42 | a[j+3] = rotl(a[j+3], 11); 43 | } 44 | -------------------------------------------------------------------------------- /vulkpy/shader/prod.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size[2]; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) writeonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | uint sizeA = params.size[0]; 21 | uint sizeB = params.size[1]; 22 | if(i >= sizeB){ return; } 23 | 24 | float partial_prod = 1.0f; 25 | for(uint j = i; j < sizeA; j += sizeB){ 26 | partial_prod *= a[j]; 27 | } 28 | 29 | b[i] = partial_prod; 30 | } 31 | -------------------------------------------------------------------------------- /vulkpy/shader/prod_axis.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 1, local_size_y = 64, local_size_z = 1) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint prev_prod; // Global x 7 | uint axis_size; 8 | uint post_prod; // Global y 9 | } params; 10 | 11 | 12 | layout(std430, binding = 0) readonly buffer A { 13 | float a[]; // [prev..., axis, post...] 14 | }; 15 | layout(std430, binding = 1) writeonly buffer B { 16 | float b[]; // [prev..., post...] 17 | }; 18 | 19 | 20 | void main(){ 21 | uint i = gl_GlobalInvocationID.x; 22 | uint j = gl_GlobalInvocationID.y; 23 | if((i >= params.prev_prod) || (j >= params.post_prod)){ return; } 24 | 25 | const uint ij = (i * params.axis_size * params.post_prod) + j; 26 | 27 | float partial_prod = 1.0f; 28 | for(uint k=0; k= params.prev_prod) || (j >= params.post_prod)){ return; } 24 | 25 | const uint ij = (i * params.axis_size * params.post_prod) + j; 26 | 27 | float partial_prod = 1.0f; 28 | for(uint k=0; k= params.size){ return; } 23 | 24 | float partial_prod = subgroupMul(a[i]); 25 | 26 | if(subgroupElect()){ 27 | b[gl_SubgroupID] = partial_prod; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /vulkpy/shader/rdiv_scalar.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | float scalar; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) readonly buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) writeonly buffer B { 15 | float b[]; 16 | }; 17 | 18 | 19 | void main(){ 20 | uint i = gl_GlobalInvocationID.x; 21 | if(i >= params.size){ return; } 22 | 23 | b[i] = params.scalar / a[i]; 24 | } 25 | -------------------------------------------------------------------------------- /vulkpy/shader/rpow_scalar.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | float scalar; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) readonly buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) writeonly buffer B { 15 | float b[]; 16 | }; 17 | 18 | 19 | void main(){ 20 | uint i = gl_GlobalInvocationID.x; 21 | if(i >= params.size){ return; } 22 | 23 | b[i] = pow(params.scalar, a[i]); 24 | } 25 | -------------------------------------------------------------------------------- /vulkpy/shader/rsub_scalar.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | float scalar; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) readonly buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) writeonly buffer B { 15 | float b[]; 16 | }; 17 | 18 | 19 | void main(){ 20 | uint i = gl_GlobalInvocationID.x; 21 | if(i >= params.size){ return; } 22 | 23 | b[i] = params.scalar - a[i]; 24 | } 25 | -------------------------------------------------------------------------------- /vulkpy/shader/sign.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) writeonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | b[i] = sign(a[i]); 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/sin.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) writeonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | b[i] = sin(a[i]); 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/sinh.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) writeonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | b[i] = sinh(a[i]); 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/sqrt.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) writeonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | b[i] = sqrt(a[i]); 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/sub.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) readonly buffer B { 14 | float b[]; 15 | }; 16 | layout(std430, binding = 2) writeonly buffer C { 17 | float c[]; 18 | }; 19 | 20 | 21 | void main(){ 22 | uint i = gl_GlobalInvocationID.x; 23 | if(i >= params.size){ return; } 24 | 25 | c[i] = a[i] - b[i]; 26 | } 27 | -------------------------------------------------------------------------------- /vulkpy/shader/sub_broadcast.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size[3]; 7 | uint ndim; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) readonly buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) readonly buffer B { 15 | float b[]; 16 | }; 17 | layout(std430, binding = 2) writeonly buffer C { 18 | float c[]; 19 | }; 20 | layout(std430, binding = 3) readonly buffer D { 21 | uint shapeABC[]; // [a0, ..., an, b0, ..., bn, c0, ..., cn] for n = ndim-1 22 | }; 23 | 24 | 25 | void main(){ 26 | const uint ci = gl_GlobalInvocationID.x; 27 | if(ci >= params.size[2]){ return; } 28 | uvec3 size = uvec3(params.size[0], params.size[1], params.size[2]); 29 | 30 | uvec2 abi = uvec2(0, 0); 31 | uint ci_tmp = ci; 32 | for(uint dim = 0; dim < params.ndim; dim++){ 33 | uvec3 sABC = uvec3(shapeABC[dim], 34 | shapeABC[dim + params.ndim], 35 | shapeABC[dim + params.ndim * 2]); 36 | size = size / sABC; 37 | 38 | uint d = ci_tmp / size.z; 39 | abi += size.xy * min(uvec2(d, d), sABC.xy - 1); 40 | 41 | ci_tmp = ci_tmp % size.z; 42 | } 43 | 44 | c[ci] = a[abi.x] - b[abi.y]; 45 | } 46 | -------------------------------------------------------------------------------- /vulkpy/shader/sub_scalar.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | float scalar; 8 | } params; 9 | 10 | 11 | layout(std430, binding = 0) readonly buffer A { 12 | float a[]; 13 | }; 14 | layout(std430, binding = 1) writeonly buffer B { 15 | float b[]; 16 | }; 17 | 18 | 19 | void main(){ 20 | uint i = gl_GlobalInvocationID.x; 21 | if(i >= params.size){ return; } 22 | 23 | b[i] = a[i] - params.scalar; 24 | } 25 | -------------------------------------------------------------------------------- /vulkpy/shader/sum.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size[2]; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) writeonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | uint sizeA = params.size[0]; 21 | uint sizeB = params.size[1]; 22 | if(i >= sizeB){ return; } 23 | 24 | float partial_sum = 0.0f; 25 | for(uint j = i; j < sizeA; j += sizeB){ 26 | partial_sum += a[j]; 27 | } 28 | 29 | b[i] = partial_sum; 30 | } 31 | -------------------------------------------------------------------------------- /vulkpy/shader/sum_axis.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 1, local_size_y = 64, local_size_z = 1) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint prev_prod; // Global x 7 | uint axis_size; 8 | uint post_prod; // Global y 9 | } params; 10 | 11 | 12 | layout(std430, binding = 0) readonly buffer A { 13 | float a[]; // [prev..., axis, post...] 14 | }; 15 | layout(std430, binding = 1) writeonly buffer B { 16 | float b[]; // [prev..., post...] 17 | }; 18 | 19 | 20 | void main(){ 21 | uint i = gl_GlobalInvocationID.x; 22 | uint j = gl_GlobalInvocationID.y; 23 | if((i >= params.prev_prod) || (j >= params.post_prod)){ return; } 24 | 25 | const uint ij = (i * params.axis_size * params.post_prod) + j; 26 | 27 | float partial_sum = 0.0f; 28 | for(uint k=0; k= params.prev_prod) || (j >= params.post_prod)){ return; } 24 | 25 | const uint ij = (i * params.axis_size * params.post_prod) + j; 26 | 27 | float partial_sum = 0.0f; 28 | for(uint k=0; k= params.size){ return; } 23 | 24 | float partial_sum = subgroupAdd(a[i]); 25 | 26 | if(subgroupElect()){ 27 | b[gl_SubgroupID] = partial_sum; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /vulkpy/shader/tan.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) writeonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | b[i] = tan(a[i]); 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/shader/tanh.comp: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout(local_size_x = 64) in; 3 | 4 | 5 | layout(push_constant) uniform constants { 6 | uint size; 7 | } params; 8 | 9 | 10 | layout(std430, binding = 0) readonly buffer A { 11 | float a[]; 12 | }; 13 | layout(std430, binding = 1) writeonly buffer B { 14 | float b[]; 15 | }; 16 | 17 | 18 | void main(){ 19 | uint i = gl_GlobalInvocationID.x; 20 | if(i >= params.size){ return; } 21 | 22 | b[i] = tanh(a[i]); 23 | } 24 | -------------------------------------------------------------------------------- /vulkpy/util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility Module (:mod:`vulkpy.util`) 3 | =================================== 4 | 5 | 6 | Examples 7 | -------- 8 | >>> from vulkpy.util import enable_debug 9 | >>> enable_debug(api_dump=False) 10 | """ 11 | 12 | import os 13 | import logging 14 | 15 | import wblog 16 | logger = wblog.getLogger() 17 | 18 | 19 | def enable_debug(*, validation: bool = True, api_dump: bool = True): 20 | """ 21 | Enable debug message 22 | 23 | Parameters 24 | ---------- 25 | validation : bool, optional 26 | If ``True`` (default), enable Vulkan validation. 27 | api_dump : bool, optional 28 | If ``True`` (default), enable Vulkan API dump. 29 | 30 | Notes 31 | ----- 32 | ``validation`` requires validation layer [1]_. 33 | ``api_dump`` requires LunarG API dump layer [2]_. 34 | If required layers are not installed, the options are ignored. 35 | 36 | References 37 | ---------- 38 | .. [1] VK_LAYER_KHRONOS_validation 39 | https://github.com/KhronosGroup/Vulkan-ValidationLayers 40 | .. [2] VK_LAYER_LUNARG_api_dump 41 | https://github.com/LunarG/VulkanTools/blob/main/layersvt/api_dump_layer.md 42 | """ 43 | wblog.start_logging("vulkpy", level=logging.DEBUG) 44 | logger.debug("Enable debug mode") 45 | 46 | layers = [] 47 | if validation: 48 | layers.append("VK_LAYER_KHRONOS_validation") 49 | logger.debug("Enable Vulkan Validation") 50 | if api_dump: 51 | layers.append("VK_LAYER_LUNARG_api_dump") 52 | logger.debug("Enable Vulkan API dump") 53 | 54 | if len(layers) > 0: 55 | os.environ["VK_INSTANCE_LAYERS"] = ":".join(layers) 56 | 57 | 58 | def getShader(name: str) -> str: 59 | """ 60 | Get Shader Path 61 | 62 | Parameters 63 | ---------- 64 | name : str 65 | SPIR-V (.spv) name 66 | 67 | Returns 68 | ------- 69 | str 70 | Shader file path 71 | """ 72 | return os.path.join(os.path.dirname(__file__), "shader", name) 73 | -------------------------------------------------------------------------------- /vulkpy/vktyping.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from typing import Tuple, Union 3 | from typing_extensions import Protocol 4 | 5 | import numpy as np 6 | 7 | 8 | KeyType = Union[int, np.ndarray, slice] 9 | ValueType = Union[int, float, np.ndarray, Tuple] 10 | 11 | class Resource: 12 | pass 13 | 14 | class ArrayProtocol(Protocol): 15 | @property 16 | def shape(self) -> Tuple[int, ...]: ... 17 | 18 | @property 19 | def array(self) -> np.ndarray: ... 20 | 21 | def wait(self): ... 22 | --------------------------------------------------------------------------------