├── .coveragerc
├── .dockerignore
├── .github
    └── workflows
    │   └── vulkpy.yaml
├── .gitignore
├── Dockerfile
├── LICENSE
├── MANIFEST.in
├── README.md
├── debug
    └── 01-nn-dense.py
├── doc
    ├── api.md
    ├── broadcasting.md
    ├── conf.py
    ├── development.md
    ├── example.md
    ├── index.md
    └── synchronization.md
├── example
    ├── 00-arithmetic.py
    ├── 01-random.py
    └── 02-nn.py
├── mypy.ini
├── pyproject.toml
├── setup.py
├── test
    ├── test_nn.py
    ├── test_random.py
    └── test_vulkpy.py
└── vulkpy
    ├── __init__.py
    ├── _vkarray.cc
    ├── _vkutil.hh
    ├── nn
        ├── __init__.py
        ├── core.py
        ├── initializers.py
        ├── layers.py
        ├── losses.py
        ├── models.py
        ├── optimizers.py
        ├── parameters.py
        └── regularizers.py
    ├── random.py
    ├── shader
        ├── abs.comp
        ├── acos.comp
        ├── acosh.comp
        ├── add.comp
        ├── add_broadcast.comp
        ├── add_scalar.comp
        ├── asin.comp
        ├── asinh.comp
        ├── atan.comp
        ├── atanh.comp
        ├── batch_affine.comp
        ├── broadcast.comp
        ├── clamp.comp
        ├── clamp_ss.comp
        ├── clamp_sv.comp
        ├── clamp_vs.comp
        ├── cos.comp
        ├── cosh.comp
        ├── div.comp
        ├── div_broadcast.comp
        ├── div_scalar.comp
        ├── exp.comp
        ├── exp2.comp
        ├── gather.comp
        ├── gather_axis.comp
        ├── iabs.comp
        ├── iacos.comp
        ├── iacosh.comp
        ├── iadd.comp
        ├── iadd_broadcast.comp
        ├── iadd_scalar.comp
        ├── iasin.comp
        ├── iasinh.comp
        ├── iatan.comp
        ├── iatanh.comp
        ├── iclamp.comp
        ├── iclamp_ss.comp
        ├── iclamp_sv.comp
        ├── iclamp_vs.comp
        ├── icos.comp
        ├── icosh.comp
        ├── idiv.comp
        ├── idiv_broadcast.comp
        ├── idiv_scalar.comp
        ├── iexp.comp
        ├── iexp2.comp
        ├── iinvsqrt.comp
        ├── ilog.comp
        ├── ilog2.comp
        ├── imax.comp
        ├── imax_broadcast.comp
        ├── imax_scalar.comp
        ├── imin.comp
        ├── imin_broadcast.comp
        ├── imin_scalar.comp
        ├── imul.comp
        ├── imul_broadcast.comp
        ├── imul_scalar.comp
        ├── invsqrt.comp
        ├── ipow.comp
        ├── ipow_broadcast.comp
        ├── ipow_scalar.comp
        ├── isign.comp
        ├── isin.comp
        ├── isinh.comp
        ├── isqrt.comp
        ├── isub.comp
        ├── isub_broadcast.comp
        ├── isub_scalar.comp
        ├── itan.comp
        ├── itanh.comp
        ├── log.comp
        ├── log2.comp
        ├── matmul.comp
        ├── max.comp
        ├── max_broadcast.comp
        ├── max_scalar.comp
        ├── maximum.comp
        ├── maximum_axis.comp
        ├── maximum_axis_rebroadcast.comp
        ├── maximum_v1.3.comp
        ├── min.comp
        ├── min_broadcast.comp
        ├── min_scalar.comp
        ├── minimum.comp
        ├── minimum_axis.comp
        ├── minimum_axis_rebroadcast.comp
        ├── minimum_v1.3.comp
        ├── mul.comp
        ├── mul_broadcast.comp
        ├── mul_scalar.comp
        ├── nn_cross_entropy.comp
        ├── nn_cross_entropy_backward.comp
        ├── pow.comp
        ├── pow_broadcast.comp
        ├── pow_scalar.comp
        ├── prng_box_muller.comp
        ├── prng_ibox_muller.comp
        ├── prng_randrange.comp
        ├── prng_xoshiro128pp_float.comp
        ├── prng_xoshiro128pp_uint32.comp
        ├── prod.comp
        ├── prod_axis.comp
        ├── prod_axis_rebroadcast.comp
        ├── prod_v1.3.comp
        ├── rdiv_scalar.comp
        ├── rpow_scalar.comp
        ├── rsub_scalar.comp
        ├── sign.comp
        ├── sin.comp
        ├── sinh.comp
        ├── sqrt.comp
        ├── sub.comp
        ├── sub_broadcast.comp
        ├── sub_scalar.comp
        ├── sum.comp
        ├── sum_axis.comp
        ├── sum_axis_rebroadcast.comp
        ├── sum_v1.3.comp
        ├── tan.comp
        └── tanh.comp
    ├── util.py
    ├── vkarray.py
    └── vktyping.py


/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | branch = True
3 | parallel = True
4 | relative_files = True
5 | 
6 | [report]
7 | show_missing = True
8 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | *#
 2 | #*
 3 | *~
 4 | ~*
 5 | *.out
 6 | *.o
 7 | *.spv
 8 | *.egg-info
 9 | *.so
10 | .DS_Store
11 | __pycache__
12 | 


--------------------------------------------------------------------------------
/.github/workflows/vulkpy.yaml:
--------------------------------------------------------------------------------
 1 | name: vulkpy
 2 | on: [push, pull_request]
 3 | jobs:
 4 |   ci:
 5 |     runs-on: ubuntu-latest
 6 |     steps:
 7 |       - uses: actions/checkout@v3
 8 |       - uses: docker/setup-buildx-action@v2
 9 |       - uses: docker/build-push-action@v3
10 |         with:
11 |           context: .
12 |           push: false
13 |           tags: vulkpy/results:latest
14 |           cache-to: type=gha,mode=max,scope=${{github.ref_name}}
15 |           cache-from: type=gha,scope=${{github.ref_name}}
16 |           load: true
17 |           file: Dockerfile
18 |       - run: |
19 |           docker create --name results vulkpy/results:latest
20 |           docker cp results:/coverage/. coverage/
21 |           docker cp results:/unittest/. unittest/
22 |           docker cp results:/dist/. dist/
23 |           docker cp results:/html/. html/
24 |         name: Extract Results
25 |       - uses: actions/upload-artifact@v3
26 |         with:
27 |           name: coverage
28 |           path: coverage
29 |       - uses: actions/upload-artifact@v3
30 |         with:
31 |           name: unittest
32 |           path: unittest
33 |       - uses: actions/upload-artifact@v3
34 |         with:
35 |           name: dist
36 |           path: dist
37 |       - uses: actions/upload-artifact@v3
38 |         with:
39 |           name: html
40 |           path: html
41 |       - uses: EnricoMi/publish-unit-test-result-action@v2
42 |         with:
43 |           junit_files: "unittest/**/*.xml"
44 |       - name: Add Coverage PR Comment
45 |         uses: marocchino/sticky-pull-request-comment@v2
46 |         if: github.event_name == 'pull_request'
47 |         with:
48 |           recreate: true
49 |           path: coverage/summary.md
50 |       - name: Write to Job Summary
51 |         run: cat coverage/summary.md >> $GITHUB_STEP_SUMMARY
52 |       - name: Publish package
53 |         if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
54 |         uses: pypa/gh-action-pypi-publish@release/v1
55 |         with:
56 |           password: ${{ secrets.PYPI_PASS }}
57 |       - uses: actions/configure-pages@v2
58 |       - uses: actions/upload-pages-artifact@v1
59 |         with:
60 |           path: html
61 |   deploy:
62 |     concurrency:
63 |       group: vulkpy-deploy
64 |       cancel-in-progress: true
65 |     if: github.ref_name == 'master'
66 |     needs: ci
67 |     permissions:
68 |       contents: read
69 |       pages: write
70 |       id-token: write
71 |     environment:
72 |       name: github-pages
73 |       url: ${{ steps.deployment.outputs.page_url }}
74 |     runs-on: ubuntu-latest
75 |     steps:
76 |       - id: deployment
77 |         uses: actions/deploy-pages@v1
78 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | a.out
2 | *.spv
3 | *.egg-info
4 | *.so
5 | .DS_Store
6 | __pycache__
7 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:bullseye AS vulkpy-env
 2 | RUN --mount=type=cache,target=/var/lib/apt/lists \
 3 |     wget -qO - http://packages.lunarg.com/lunarg-signing-key-pub.asc | \
 4 |     apt-key add - && \
 5 |     wget -qO /etc/apt/sources.list.d/lunarg-vulkan-focal.list \
 6 |     http://packages.lunarg.com/vulkan/lunarg-vulkan-focal.list && \
 7 |     apt update && \
 8 |     apt install -y --no-install-recommends \
 9 |     libvulkan1 libvulkan-dev vulkan-headers shaderc \
10 |     vulkan-validationlayers lunarg-vulkan-layers mesa-vulkan-drivers
11 | RUN --mount=type=cache,target=/root/.cache/pip \
12 |     pip install numpy pybind11 well-behaved-logging
13 | 
14 | 
15 | FROM vulkpy-env AS vulkpy-install
16 | WORKDIR /vulkpy-ci
17 | RUN --mount=type=cache,target=/root/.cache/pip \
18 |     pip install coverage unittest-xml-reporting
19 | COPY setup.py pyproject.toml MANIFEST.in mypy.ini .
20 | COPY vulkpy vulkpy
21 | RUN --mount=type=cache,target=/root/.cache/pip pip install .[test] && \
22 |     mypy -p vulkpy && \
23 |     rm -rf vulkpy && \
24 |     rm setup.py pyproject.toml MANIFEST.in mypy.ini
25 | 
26 | 
27 | FROM vulkpy-install AS vulkpy-test
28 | COPY test test
29 | WORKDIR /vulkpy-ci/test
30 | COPY .coveragerc .
31 | RUN coverage run --source vulkpy -m xmlrunner discover || true
32 | RUN mkdir -p /coverage && cp -v .coverage.* /coverage && \
33 |     mkdir -p /unittest && cp *.xml /unittest
34 | 
35 | 
36 | FROM vulkpy-install AS vulkpy-combine
37 | WORKDIR /coverage
38 | RUN --mount=type=cache,target=/root/.cache/pip pip install coverage
39 | COPY vulkpy /vulkpy-ci/vulkpy
40 | COPY .coveragerc .coveragerc
41 | COPY --from=vulkpy-test /coverage /coverage
42 | RUN coverage combine && \
43 |     echo "## Test Coverage\n\`\`\`\n" >> summary.md && \
44 |     coverage report | tee -a summary.md && \
45 |     echo "\n\`\`\`" >> summary.md && \
46 |     mkdir -p /coverage/html && coverage html -d /coverage/html
47 | 
48 | 
49 | FROM vulkpy-install AS vulkpy-example
50 | WORKDIR /vulkpy-ci/example
51 | RUN --mount=type=cache,target=/root/.cache/pip pip install scikit-learn
52 | COPY example .
53 | RUN python 00-arithmetic.py && \
54 |     python 01-random.py && \
55 |     python 02-nn.py --debug --optimizer sgd --nepoch 1 && \
56 |     python 02-nn.py --debug --optimizer adam  --nepoch 1 && \
57 |     touch /vulkpy-ci/example/example-ok
58 | 
59 | 
60 | FROM vulkpy-env AS vulkpy-build
61 | WORKDIR /build
62 | RUN --mount=type=cache,target=/root/.cache/pip pip install wheel
63 | COPY LICENSE setup.py README.md MANIFEST.in pyproject.toml .
64 | COPY vulkpy vulkpy
65 | RUN python setup.py sdist -d /dist
66 | 
67 | 
68 | FROM vulkpy-env AS vulkpy-doc
69 | WORKDIR /ci
70 | RUN --mount=type=cache,target=/var/lib/apt/lists \
71 |     apt update && apt -y --no-install-recommends install graphviz
72 | RUN --mount=type=cache,target=/root/.cache/pip pip install \
73 |     sphinx \
74 |     furo \
75 |     sphinx-automodapi \
76 |     myst-parser
77 | COPY LICENSE LICENSE
78 | COPY setup.py setup.py
79 | COPY README.md README.md
80 | COPY vulkpy vulkpy
81 | RUN --mount=type=cache,target=/root/.cache/pip pip install .[doc]
82 | COPY doc doc
83 | COPY example example
84 | RUN sphinx-build -W -b html doc /html
85 | 
86 | 
87 | FROM scratch AS results
88 | COPY --from=vulkpy-test /unittest /unittest/3.11
89 | COPY --from=vulkpy-combine /coverage/html /coverage/html
90 | COPY --from=vulkpy-combine /coverage/summary.md /coverage/summary.md
91 | COPY --from=vulkpy-build /dist /dist
92 | COPY --from=vulkpy-doc /html /html
93 | COPY --from=vulkpy-example /vulkpy-ci/example/example-ok /example/example-ok
94 | CMD [""]
95 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 H.Yamada
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include vulkpy/shader/*.spv
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # vulkpy: GPGPU array on Vulkan
  2 | 
  3 | ![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/ymd-h/vulkpy/vulkpy.yaml)
  4 | ![PyPI](https://img.shields.io/pypi/v/vulkpy)
  5 | ![PyPI - License](https://img.shields.io/pypi/l/vulkpy)
  6 | 
  7 | vulkpy is a Python package providing GPGPU computation based on Vulkan.
  8 | 
  9 | 
 10 | ## Requirements
 11 | 
 12 | * C++20 compatible compiler
 13 | * `libvulkan`
 14 | * Vulkan SDK
 15 |   * Headers (`vulkan/vulkan.hpp` and so on)
 16 |   * Shaderc (`glslc`)
 17 | 
 18 | 
 19 | On Ubuntu 22.0,
 20 | ```shell
 21 | wget -qO - http://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add -
 22 | wget -qO /etc/apt/sources.list.d/lunarg-vulkan-focal.list http://packages.lunarg.com/vulkan/lunarg-vulkan-focal.list
 23 | apt update
 24 | apt install -y libvulkan1 libvulkan-dev vulkan-headers shaderc vulkan-validationlayers
 25 | ```
 26 | 
 27 | > **Note**  
 28 | > `vulkan-sdk` cannot be installed because it requires obsolete package `qt5-default`.
 29 | 
 30 | 
 31 | ## Example
 32 | 
 33 | ```python
 34 | import vulkpy as vk
 35 | 
 36 | gpu = vk.GPU()
 37 | 
 38 | a = vk.Array(gpu, data=[10, 10, 10])
 39 | b = vk.Array(gpu, data=[5, 5, 5])
 40 | 
 41 | c = a + b
 42 | c.wait()
 43 | 
 44 | print(c)
 45 | # [15, 15, 15]
 46 | ```
 47 | 
 48 | ## Features
 49 | 
 50 | * Element-wise Arithmetic Operators between 2 `Array`s.
 51 |   * [x] `+`, `-`, `*`, `/`, `**`, `+=`, `-=`, `*=`, `/=`, `**=`
 52 | * Arithmetic Operators between `Array` and `float`.
 53 |   * [x] `+`, `-`, `*`, `/`, `**`, `+=`, `-=`, `*=`, `/=`, `**=`
 54 | * Arithmetic Operators between `float` and `Array`.
 55 |   * [x] `+`, `-`, `*`, `/`, `**`
 56 | * Matrix Multiplication Operator between 1d/2d `Array`s.
 57 |   * [x] `@`
 58 | * Element-wise math functions as `Array`'s member function
 59 |   * [x] `max(other, inplace=False)`, `min(other, inplace=False)`
 60 |   * [x] `abs(inplace=False)`, `sign(inplace=False)`
 61 |   * [x] `sin(inplace=False)`, `cos(inplace=False)`, `tan(inplace=False)`
 62 |   * [x] `asin(inplace=False)`, `acos(inplace=False)`, `atan(inplace=False)`
 63 |   * [x] `sinh(inplace=False)`, `cosh(inplace=False)`, `tanh(inplace=False)`
 64 |   * [x] `asinh(inplace=False)`, `acosh(inplace=False)`, `atanh(inplace=False)`
 65 |   * [x] `exp(inplace=False)`, `log(inplace=False)`
 66 |   * [x] `exp2(inplace=False)`, `log2(inplace=False)`
 67 |   * [x] `sqrt(inplace=False)`, `invsqrt(inplace=False)`
 68 |   * [x] `clamp(min, max, inplace=False)`
 69 | * Reduction as `Array`'s member function
 70 |   * [x] `sum(axis=None)`, `prod(axis=None)`
 71 |   * [x] `maximum(axis=None)`, `minimum(axis=None)`
 72 |   * [x] `mean(axis=None)`
 73 |   * [ ] argmax, argmin
 74 |   * [ ] ...
 75 | * Other `Array` method
 76 |   * [x] `gather(idx: U32Array) -> Array`
 77 |   * [ ] tensordot, shuffle
 78 |   * [ ] ...
 79 | * Bloadcast
 80 |   * [x] Explicit broadcast copy (memory inefficient, fallback option)
 81 |     * `broadcast_to(shape)` (used at `clamp`)
 82 |   * [x] Special implementations for element-wise arithmetic operators
 83 |     * `+`, `-`, `*`, `/`, `**`, `+=`, `-=`, `*=`, `/=`, `**=`
 84 |   * [x] Reduction with re-broadcast
 85 |     * `sum`, `prod`, `maximum`, `minimum`, `mean`
 86 | * Pseudo Random Number Generator (PRNG)
 87 |   * [x] xoshiro128++ (`vulkpy.random.Xoshiro128pp(gpu, *, size=None, data=None)`)
 88 |     * `[0, 1)` uniform (`.random(shape=None, buffer=None)`)
 89 |     * Gaussian with Box-Muller (`.normal(shape=None, buffer=None, mean=0.0, stddev=1.0)`)
 90 |   * [ ] pcg32
 91 | * Neural Network
 92 |   * Layers
 93 |     * [x] `Dense`, `ReLU`, `Sigmoid`, `Softmax`
 94 |     * [ ] conv, batch norm, layer norm, ...
 95 |   * Optimizers
 96 |     * [x] `SGD`, `Adam`, `AdaGrad`
 97 |     * [ ] rmsprop, ...
 98 |   * Losses
 99 |     * [x] `CrossEntropyLoss`, `SoftmaxCrossEntropyLoss`, `MSELoss`, `HuberLoss`
100 |     * [ ] ...
101 |   * Initializers
102 |     * [x] `Constant`, `HeNormal`
103 |     * [ ] ...
104 |   * Models
105 |     * [x] `Sequance`
106 |     * [ ] ...
107 |   * [x] Regularization
108 |     * `Lasso(coeff=1.0)`, `Ridge(coeff=1.0)`, `Elastic(L1=1.0, L2=1.0)`
109 |   * [ ] ONNX support
110 |   * [ ] Custom user layer with automatic `backward()` definition.
111 |   * [ ] Define by Run API
112 | 


--------------------------------------------------------------------------------
/debug/01-nn-dense.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import numpy as np
 4 | 
 5 | import vulkpy as vk
 6 | from vulkpy.util import enable_debug
 7 | 
 8 | 
 9 | def debug01(epoch):
10 |     gpu = vk.GPU()
11 |     dense = vk.nn.Dense(gpu, 1, 1)
12 |     mse = vk.nn.MSELoss()
13 | 
14 |     _x = np.arange(100).reshape((-1, 1)) / 50 - 1.0
15 |     _y = _x ** 2
16 | 
17 |     x = vk.Array(gpu, data=_x)
18 |     y = vk.Array(gpu, data=_y)
19 | 
20 |     for e in range(epoch):
21 |         L = mse(dense(x), y)
22 | 
23 |         dense.zero_grad()
24 |         dx = dense.backward(mse.grad())
25 |         dense.update()
26 | 
27 |         print(f"Epoch: {e:4d}, Loss: {L:.6f}")
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     p = argparse.ArgumentParser("debug-01")
32 |     p.add_argument("--api-dump", action="store_true")
33 |     p.add_argument("--epoch", type=int, default=100)
34 |     p = p.parse_args()
35 | 
36 |     enable_debug(api_dump=p.api_dump)
37 |     debug01(p.epoch)
38 | 


--------------------------------------------------------------------------------
/doc/api.md:
--------------------------------------------------------------------------------
 1 | # API Reference
 2 | 
 3 | ```{eval-rst}
 4 | .. automodapi:: vulkpy
 5 | 
 6 | .. automodapi:: vulkpy.random
 7 | 
 8 | .. automodapi:: vulkpy.nn
 9 | 
10 | .. automodapi:: vulkpy.util
11 | ```
12 | 


--------------------------------------------------------------------------------
/doc/broadcasting.md:
--------------------------------------------------------------------------------
  1 | # Broadcasting
  2 | 
  3 | vulkpy obeys [NumPy broadcasting rule](https://numpy.org/doc/stable/user/basics.broadcasting.html).
  4 | 
  5 | 
  6 | We implement 3 patterns of broadcasting implementations.
  7 | 
  8 | 
  9 | ## 1. Simple Copy
 10 | The simplest broadcasting is to create new broadcasted array.
 11 | Usually broadcasting is executed just before other operations,
 12 | so that this implementation might allocate unnecessary temporary array.
 13 | This is memory- and computationally-inefficient,
 14 | but it works fine in most cases.
 15 | Acutually, we still use this in `clamp()` method.
 16 | 
 17 | Users can execute this broadcasting by `broadcast_to(shape)` method.
 18 | 
 19 | ```python
 20 | import vulkpy as vk
 21 | 
 22 | gpu = vk.GPU()
 23 | a = vk.Array(gpu, data=[1, 2])
 24 | 
 25 | b = a.broadcast_to((2, 2))
 26 | # => [[1, 2], [1, 2]]
 27 | ```
 28 | 
 29 | ````{note}
 30 | In Vulkan compute shader, we can use only 3 global indices at most.
 31 | Thay are not sufficient to point elements of
 32 | `N`-dimensional array directly.
 33 | Instead, we utilize linearly flattened index
 34 | and calculate the position from it on GPU.
 35 | We assume this index calculation is computetionally-inefficient.
 36 | 
 37 | The following is a partial code of `broadcast.comp`.
 38 | 
 39 | ```glsl
 40 | void main(){
 41 |   uint i = gl_GlobalInvocationID.x;
 42 |   if(i >= params.size[1]){ return; }
 43 | 
 44 |   uint i_tmp = i;
 45 |   uint j = 0;
 46 |   uint sizeA = params.size[0];
 47 |   uint sizeB = params.size[1];
 48 |   for(uint dim = 0; dim < params.ndim; dim++){
 49 |     sizeA = sizeA / a_shape[dim];
 50 |     sizeB = sizeB / b_shape[dim];
 51 | 
 52 |     uint d = min(i_tmp / sizeB, a_shape[dim]-1);
 53 |     j += d * sizeA;
 54 | 
 55 |     i_tmp = i_tmp % sizeB;
 56 |   }
 57 | 
 58 |   b[i] = a[j];
 59 | }
 60 | ```
 61 | ````
 62 | 
 63 | ## 2. Special Implementation
 64 | We also provide special implementations for some operations.
 65 | For example, a compute shader `add_broadcast.comp` implements
 66 | a fused operation of broadcasting and addition.
 67 | Although we still need index calculation, we can omit temporary memory allocation.
 68 | 
 69 | For these special implementations, users don't need to call explicitly,
 70 | if operations are supported, such special implementations are used automatically.
 71 | 
 72 | ```python
 73 | import vulkpy as vk
 74 | 
 75 | gpu = vk.GPU()
 76 | a = vk.Array(gpu, data=[1, 2])
 77 | b = vk.Array(gpu, data=[[1, 2], [3, 4]])
 78 | 
 79 | c = a + b
 80 | # => [[2, 4], [4, 6]]
 81 | ```
 82 | 
 83 | ```{note}
 84 | For inplace operations, only `other` (non-inplaced) array
 85 | can be broadcasted because we cannot grow already allocated memory.
 86 | 
 87 | Since we can skip index computation for inplaced array,
 88 | inplace broadcasting is more efficient in terms of not only memory
 89 | but also computation.
 90 | ```
 91 | 
 92 | 
 93 | ## 3. Re-broadcasting of Reduction
 94 | 
 95 | For specific usecase like softmax, broadcasting is executed just after reduction.
 96 | We define such usecase as re-broadcasting.
 97 | 
 98 | In re-broadcasting, inefficient index calculation is not necessary, so
 99 | that it is more efficient in terms of computation.
100 | 
101 | Users can pass `rebroadcast=True` to reduction methods;
102 | 
103 | ```python
104 | import vulkpy as vk
105 | 
106 | gpu = vk.GPU()
107 | a = vk.Array(gpu, data=[[1, 2, 3], [4, 5, 6]])
108 | 
109 | b = a.mean(axis=0, rebroadcast=True)
110 | # => [[2.5, 3.5, 4.5], [2.5, 3.5, 4.5]]
111 | ```
112 | 


--------------------------------------------------------------------------------
/doc/conf.py:
--------------------------------------------------------------------------------
 1 | project = "vulkpy"
 2 | author = "Hiroyuki Yamada"
 3 | copyright = "2023, Hiroyuki Yamada"
 4 | 
 5 | extensions = [
 6 |     'sphinx.ext.napoleon',
 7 |     "sphinx_automodapi.automodapi",
 8 |     'sphinx_automodapi.smart_resolver',
 9 |     'myst_parser'
10 | ]
11 | 
12 | html_title = "vulkpy"
13 | html_theme = "furo"
14 | html_logo = ""
15 | html_favicon = ""
16 | html_show_sourcelink = False
17 | 
18 | html_css_files = [
19 |       "https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/fontawesome.min.css",
20 |       "https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/solid.min.css",
21 |       "https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/brands.min.css",
22 | ]
23 | 
24 | html_theme_options = {
25 |     "footer_icons": [
26 |         {
27 |             "name": "GitHub",
28 |             "url": "https://github.com/ymd-h/vulkpy",
29 |             "html": "",
30 |             "class": "fa-brands fa-github fa-2x",
31 |         },
32 |     ],
33 | }
34 | 
35 | napoleon_include_init_with_doc = True
36 | napoleon_use_admonition_for_examples = True
37 | napoleon_use_admonition_for_notes = True
38 | napoleon_use_admonition_for_references = True
39 | 
40 | numpydoc_show_class_members=False
41 | 
42 | autodoc_class_signature = "separated"
43 | autodoc_default_options = {
44 |     'member-order': 'bysource',
45 |     'class-doc-from':'class',
46 |     'exclude-members': '__dict__, __weakref__, __module__, __new__, __reduce__, __setstate__',
47 | }
48 | 
49 | automodsumm_inherited_members = True
50 | 


--------------------------------------------------------------------------------
/doc/development.md:
--------------------------------------------------------------------------------
  1 | # Contributing and Developer's Guide
  2 | 
  3 | ## Contributing
  4 | Any contributions are welcomed.
  5 | 
  6 | ### Run on Actual GPUs
  7 | One of the most lacking part is running on various GPUs.
  8 | Any feedbacks are appriciated.
  9 | 
 10 | 
 11 | ### When Find any problems / bugs
 12 | Check [issues](https://github.com/ymd-h/vulkpy/issues) first,
 13 | and open new one unless the same problem has been reported.
 14 | 
 15 | 
 16 | 
 17 | ## Developer's Guide
 18 | 
 19 | ### Code Layout
 20 | 
 21 | - `vulkpy/` (Main Package)
 22 |   - `vkarray.py`
 23 |     - Python core implementation
 24 |   - `_vkarray.cc`, `_vkarray.hh`
 25 |     - C++ internal implementation
 26 |   - `shader/`
 27 |     - GPU shaders
 28 |   - `nn/`
 29 |     - Neural Network implementation
 30 |   - `random.py`
 31 |     - Pseudo Random Number Generator (PRNG) implementation
 32 |   - `util.py`
 33 |     - Utility Function implementation
 34 |   - `vktyping.py`
 35 |     - Type implementation
 36 | - `doc/`
 37 |   - Document Site
 38 | - `example/`
 39 |   - Example Codes
 40 | - `test/`
 41 |   - Test Codes
 42 | - `.github/`
 43 |   - CI configuration
 44 | - `README.md`, `LICENSE`
 45 |   - Project-wide information.
 46 | - `setup.py`, `pyproject.toml`, `MANIFEST.in`
 47 |   - Configuration for Package Build
 48 | - `.coverage`
 49 |   - Configuration for Coverage
 50 | - `Dockerfile`, `.dockerignore`
 51 |   - Configuration for CI tasks (See below)
 52 | - `mypy.ini`
 53 |   - Configuration for Type Check
 54 | - `.gitignore`
 55 | 
 56 | ### Continuous Integration (CI) & Continuous Delivery (CD)
 57 | We use GitHub Actions for CI/CD, and its configuration is defined at
 58 | `.github/workflows/vulkpy.yaml`
 59 | 
 60 | To make CI independent from the platform as much as possible,
 61 | we define actual CI tasks inside `Dockerfile`.
 62 | 
 63 | - Type Check with [Mypy](https://mypy.readthedocs.io/)
 64 | - Build wheel
 65 | - Run Unit Test & Regression Test
 66 |   - Report Coverage with [unittest-xml-reporting](https://github.com/xmlrunner/unittest-xml-reporting) & [coverage.py](https://coverage.readthedocs.io/)
 67 | - Build Document Site with [Sphinx](https://www.sphinx-doc.org/)
 68 | 
 69 | 
 70 | ### Document Site
 71 | Document site is generated by [Sphinx](https://www.sphinx-doc.org/) during CI/CD.
 72 | We adopt [furo](https://github.com/pradyunsg/furo) theme.
 73 | 
 74 | Most documents are written in markdown (`.md`) and parsed by [MyST](https://myst-parser.readthedocs.io/).
 75 | 
 76 | All markdown files are located at `doc/` directory flatly. Even if we
 77 | restructure document site in future, flatten layout can prevent broken link.
 78 | 
 79 | 
 80 | API reference is automatically generated from docstring with
 81 | [sphinx-automodapi](https://sphinx-automodapi.readthedocs.io/).
 82 | 
 83 | 
 84 | ### docstring
 85 | To make the usage understandable, all public classes and methods
 86 | should have docstring.
 87 | 
 88 | [Shpinx](https://www.sphinx-doc.org/) generates
 89 | [API reference](https://ymd-h.github.io/vulkpy/api.html)
 90 | from these docstring.
 91 | 
 92 | 
 93 | Basically we obey
 94 | [Numpy's style guide](https://numpydoc.readthedocs.io/en/latest/format.html),
 95 | however, we adopt following [PEP-257](https://peps.python.org/pep-0257/)
 96 | statement for class docstring;
 97 | 
 98 | > The docstring for a class should summarize its behavior and list the
 99 | > public methods and instance variables. If the class is intended to
100 | > be subclassed, and has an additional interface for subclasses, this
101 | > interface should be listed separately (in the docstring). The class
102 | > constructor should be documented in the docstring for its __init__
103 | > method. Individual methods should be documented by their own
104 | > docstring.
105 | 
106 | 
107 | To separate class docstring and `__init__()` docstring,
108 | we configure Sphinx as follows;
109 | 
110 | ```python
111 | autodoc_class_signature = "separated"
112 | autodoc_default_options = {
113 |     "class-doc-from": "class"
114 | }
115 | ```
116 | 


--------------------------------------------------------------------------------
/doc/example.md:
--------------------------------------------------------------------------------
 1 | # Example
 2 | 
 3 | ## 00. Arithmetic
 4 | ```{literalinclude} ../example/00-arithmetic.py
 5 | :language: python
 6 | ```
 7 | 
 8 | 
 9 | ## 01. Random
10 | ```{literalinclude} ../example/01-random.py
11 | :language: python
12 | ```
13 | 
14 | ## 02. Neural Network
15 | ```{literalinclude} ../example/02-nn.py
16 | :language: python
17 | ```
18 | 


--------------------------------------------------------------------------------
/doc/index.md:
--------------------------------------------------------------------------------
 1 | # vulkpy: GPGPU array on Vulkan
 2 | 
 3 | vulkpy is a Python package providing GPGPU computation based on Vulkan.
 4 | 
 5 | ```{warning}
 6 | vulkpy is still under development, so that API still might break without notice.
 7 | ```
 8 | 
 9 | ```{toctree}
10 | :caption: Contents
11 | :maxdepth: 1
12 | 
13 | ./broadcasting.md
14 | ./synchronization.md
15 | ./example.md
16 | ./development.md
17 | ./api.md
18 | ```
19 | 


--------------------------------------------------------------------------------
/doc/synchronization.md:
--------------------------------------------------------------------------------
 1 | # Synchronization
 2 | 
 3 | With Vulkan, GPU operations are executed asynchronically.
 4 | 
 5 | In principle, vulkpy automatically `wait()` depending `Job`
 6 | before reading or destructing, and users don't need to `wait()` explicitly.
 7 | 
 8 | In order to keep necessary resources during GPU execution,
 9 | the result `Array` holds them, too.
10 | 
11 | Just in case some arrays get circular reference and memory won't be released,
12 | users might call `wait()` explicitly to clear reference of depending resources.
13 | 


--------------------------------------------------------------------------------
/example/00-arithmetic.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import numpy as np
 4 | import vulkpy as vk
 5 | from vulkpy.util import enable_debug
 6 | 
 7 | def main():
 8 |     gpu = vk.GPU()
 9 | 
10 |     shape = (100,)
11 |     a = vk.Array(gpu, data=np.full(shape, 3))
12 |     b = vk.Array(gpu, data=np.full(shape, 5))
13 | 
14 |     c = a + b
15 |     c.wait()
16 |     print(c)
17 | 
18 |     d = c - a
19 |     e = d - b
20 |     e.wait()
21 |     print(e)
22 | 
23 |     e += a
24 |     e.wait()
25 |     print(e)
26 | 
27 |     f = e + 5
28 |     f.wait()
29 |     print(f)
30 | 
31 |     f /= 4
32 |     f.wait()
33 |     print(f)
34 | 
35 | if __name__ == "__main__":
36 |     p = argparse.ArgumentParser("00-arithmetic.py")
37 |     p.add_argument("--debug", action="store_true")
38 | 
39 |     args = p.parse_args()
40 |     if args.debug:
41 |         enable_debug()
42 | 
43 |     main()
44 | 


--------------------------------------------------------------------------------
/example/01-random.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import numpy as np
 4 | import vulkpy as vk
 5 | from vulkpy.util import enable_debug
 6 | 
 7 | def main():
 8 |     gpu = vk.GPU()
 9 | 
10 |     r = vk.random.Xoshiro128pp(gpu)
11 | 
12 |     # Sample from [0, 1) uniform distribution
13 |     a = r.random(shape=(10,))
14 |     print(a)
15 | 
16 |     # Sample from normal distribution
17 |     b = r.normal(shape=(10,))
18 |     print(b)
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     p = argparse.ArgumentParser("01-random.py")
23 |     p.add_argument("--debug", action="store_true")
24 | 
25 |     args = p.parse_args()
26 |     if args.debug:
27 |         enable_debug()
28 | 
29 |     main()
30 | 


--------------------------------------------------------------------------------
/example/02-nn.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Example 02: Neural Network for Classifying of Iris
  3 | ==================================================
  4 | 
  5 | Classify 3-class Iris with Sequential Neural Network.
  6 | The hidden layers have units of 128 and 128, respectively.
  7 | 
  8 | For options, see `python 02-nn.py -h`.
  9 | 
 10 | Notes
 11 | -----
 12 | This example requires scikit-learn (`pip install scikit-learn`)
 13 | """
 14 | import argparse
 15 | import time
 16 | 
 17 | import numpy as np
 18 | from sklearn.datasets import load_iris
 19 | from sklearn.model_selection import train_test_split
 20 | 
 21 | import vulkpy as vk
 22 | from vulkpy.util import enable_debug
 23 | from vulkpy import nn
 24 | 
 25 | 
 26 | def example02(nepoch, batch_size, opt, lr, l1, l2, *, debug = False):
 27 |     if debug:
 28 |         enable_debug(api_dump=False)
 29 | 
 30 |     gpu = vk.GPU()
 31 |     rng = np.random.default_rng()
 32 | 
 33 |     train_x, test_x, train_y, test_y = train_test_split(*load_iris(return_X_y=True),
 34 |                                                         random_state = 777,
 35 |                                                         test_size = 0.2)
 36 | 
 37 |     # Convert to one_hot vector
 38 |     train_y = np.identity(3)[train_y]
 39 |     test_y = np.identity(3)[test_y]
 40 | 
 41 |     print(f"train_x.shape: {train_x.shape}, test_x.shape: {test_x.shape}")
 42 |     print(f"train_y.shape: {train_y.shape}, test_y.shape: {test_y.shape}")
 43 |     assert ((train_x.shape[0] == train_y.shape[0]) and
 44 |             ( test_x.shape[0] ==  test_y.shape[0]))
 45 |     assert train_x.shape[1] == test_x.shape[1] == 4
 46 |     assert train_y.shape[1] == test_y.shape[1] == 3
 47 | 
 48 |     opt = {
 49 |         "adam": lambda lr: nn.Adam(gpu, lr=lr),
 50 |         "sgd": lambda lr: nn.SGD(lr)
 51 |     }[opt](lr)
 52 | 
 53 |     R = None
 54 |     if (l1 is not None) and (l2 is not None):
 55 |         R = nn.Elastic(l1, l2)
 56 |     elif (l1 is not None):
 57 |         R = nn.Lasso(l1)
 58 |     elif (l2 is not None):
 59 |         R = nn.Ridge(l2)
 60 | 
 61 |     # Sequential Model: 4 -> 128 -> 128 -> 3
 62 |     net = nn.Sequence(
 63 |         [
 64 |             nn.Dense(gpu, 4, 128, w_opt=opt, b_opt=opt, w_reg=R, b_reg=R),
 65 |             nn.ReLU(),
 66 |             nn.Dense(gpu, 128, 128, w_opt=opt, b_opt=opt, w_reg=R, b_reg=R),
 67 |             nn.ReLU(),
 68 |             nn.Dense(gpu, 128, 3, w_opt=opt, b_opt=opt, w_reg=R, b_reg=R),
 69 |             nn.Softmax(),
 70 |          ],
 71 |         nn.CrossEntropyLoss(reduce="sum")
 72 |     )
 73 |     idx = np.arange(train_x.shape[0])
 74 | 
 75 |     X = vk.Array(gpu, data=test_x)
 76 |     Y = vk.Array(gpu, data=test_y)
 77 | 
 78 |     train_loss = vk.Array(gpu, shape=(1,))
 79 |     for e in range(nepoch):
 80 |         t = time.perf_counter()
 81 | 
 82 |         rng.shuffle(idx) # TODO: Implement GPU shuffle()
 83 |         train_loss[:] = 0
 84 |         for _idx in idx[::batch_size]:
 85 |             bidx = idx[_idx:_idx+batch_size]
 86 | 
 87 |             x = vk.Array(gpu, data=train_x[bidx])
 88 |             y = vk.Array(gpu, data=train_y[bidx])
 89 | 
 90 |             _, loss = net.train(x, y)
 91 |             train_loss += loss
 92 | 
 93 |         train_loss /= idx.shape[0]
 94 | 
 95 |         pred_y, eval_loss = net.predict(X, Y)
 96 |         pred_class = np.argmax(pred_y, axis=1) # TODO: Implement GPU argmax()
 97 |         accuracy = (np.identity(3)[pred_class] * test_y).sum(axis=1).mean()
 98 | 
 99 |         eval_loss /= idx.shape[0]
100 | 
101 |         dt = time.perf_counter() - t
102 |         print(f"Epoch: {e:3d}, " +
103 |               f"Train Loss: {train_loss[0]:.6f}, " +
104 |               f"Eval Loss: {float(eval_loss.array):.6f}, " +
105 |               f"Eval Acc: {accuracy:.6f} " +
106 |               f"Elapsed: {dt:.6f}s")
107 | 
108 | 
109 | if __name__ == "__main__":
110 |     p = argparse.ArgumentParser("example02")
111 |     p.add_argument("--nepoch", type=int, default=100, help="# of epoch")
112 |     p.add_argument("--batch-size", type=int, default=32, help="size of batch")
113 |     p.add_argument("--debug", action="store_true")
114 |     p.add_argument("--optimizer", choices=["adam", "sgd"], default="adam")
115 |     p.add_argument("--learning-rate", type=float, default=0.0001)
116 |     p.add_argument("--l1", type=float, help="L1 regularization", default=None)
117 |     p.add_argument("--l2", type=float, help="L2 regularization", default=None)
118 |     p = p.parse_args()
119 | 
120 |     example02(nepoch=p.nepoch,
121 |               batch_size=p.batch_size,
122 |               opt=p.optimizer,
123 |               lr=p.learning_rate,
124 |               l1=p.l1,
125 |               l2=p.l2,
126 |               debug=p.debug)
127 | 


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
 1 | [mypy]
 2 | warn_return_any = True
 3 | warn_unused_configs = True
 4 | 
 5 | [mypy-wblog]
 6 | ignore_missing_imports = True
 7 | 
 8 | [mypy-vulkpy._vkarray]
 9 | ignore_missing_imports = True
10 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "wheel", "pybind11"]
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import platform
  3 | from setuptools import setup, find_packages, Extension
  4 | import subprocess
  5 | 
  6 | import pybind11
  7 | 
  8 | pkg = "vulkpy"
  9 | 
 10 | # Compile Compute Shader
 11 | for shader in [
 12 |         "add", "sub", "mul", "div",
 13 |         "iadd", "isub", "imul", "idiv",
 14 |         "add_scalar", "sub_scalar", "mul_scalar", "div_scalar",
 15 |         "iadd_scalar", "isub_scalar", "imul_scalar", "idiv_scalar",
 16 |         "rsub_scalar", "rdiv_scalar",
 17 |         "add_broadcast", "sub_broadcast", "mul_broadcast", "div_broadcast",
 18 |         "iadd_broadcast", "isub_broadcast", "imul_broadcast", "idiv_broadcast",
 19 |         "matmul",
 20 |         "max", "min", "imax", "imin",
 21 |         "max_scalar", "min_scalar", "imax_scalar", "imin_scalar",
 22 |         "max_broadcast", "min_broadcast", "imax_broadcast", "imin_broadcast",
 23 |         "abs", "sign", "iabs", "isign",
 24 |         "sin", "cos", "tan", "asin", "acos", "atan",
 25 |         "isin", "icos", "itan", "iasin", "iacos", "iatan",
 26 |         "sinh", "cosh", "tanh", "asinh", "acosh", "atanh",
 27 |         "isinh", "icosh", "itanh", "iasinh", "iacosh", "iatanh",
 28 |         "exp", "log", "exp2", "log2",
 29 |         "iexp", "ilog", "iexp2", "ilog2",
 30 |         "sqrt", "invsqrt", "isqrt", "iinvsqrt",
 31 |         "pow", "ipow", "pow_scalar", "ipow_scalar", "rpow_scalar",
 32 |         "pow_broadcast", "ipow_broadcast",
 33 |         "clamp", "clamp_sv", "clamp_vs", "clamp_ss",
 34 |         "iclamp", "iclamp_sv", "iclamp_vs", "iclamp_ss",
 35 |         "prng_xoshiro128pp_uint32", "prng_xoshiro128pp_float",
 36 |         "prng_box_muller", "prng_ibox_muller",
 37 |         "prng_randrange",
 38 |         "sum", ("sum_v1.3", "--target-env=vulkan1.1"), "sum_axis",
 39 |         "prod", ("prod_v1.3", "--target-env=vulkan1.1"), "prod_axis",
 40 |         "sum_axis_rebroadcast", "prod_axis_rebroadcast",
 41 |         "maximum", ("maximum_v1.3", "--target-env=vulkan1.1"), "maximum_axis",
 42 |         "minimum", ("minimum_v1.3", "--target-env=vulkan1.1"), "minimum_axis",
 43 |         "maximum_axis_rebroadcast", "minimum_axis_rebroadcast",
 44 |         "broadcast",
 45 |         "batch_affine",
 46 |         "gather", "gather_axis",
 47 |         "nn_cross_entropy", "nn_cross_entropy_backward",
 48 | ]:
 49 |     if isinstance(shader, tuple):
 50 |         shader, flag = shader
 51 |         flag = (flag,)
 52 |     else:
 53 |         shader = shader
 54 |         flag = tuple()
 55 |     s = os.path.join(pkg, "shader", shader)
 56 |     spv = s+".spv"
 57 |     comp = s+".comp"
 58 | 
 59 |     if ((not os.path.exists(spv)) or
 60 |         (os.path.exists(comp) and (os.stat(comp).st_mtime > os.stat(spv).st_mtime))):
 61 |         cmd = subprocess.run(["glslc", *flag, "-o", spv, comp],
 62 |                              capture_output=True, text=True)
 63 |         if cmd.stdout:
 64 |             print(cmd.stdout)
 65 |         if cmd.stderr:
 66 |             print(cmd.stderr)
 67 |         cmd.check_returncode()
 68 | 
 69 | 
 70 | if platform.system() != "Windows":
 71 |     extra_args = {
 72 |         "extra_compile_args": ["-std=c++2a", "-O3", "-march=native", "-Wall"],
 73 |         "extra_link_args": ["-std=c++2a"],
 74 |     }
 75 | else:
 76 |     extra_args = {
 77 |         "extra_compile_args": ["/std:c++20", "/O2", "/Wall"],
 78 |         "extra_link_args": None,
 79 |     }
 80 | 
 81 | ext = [Extension(f"{pkg}._vkarray",
 82 |                  [os.path.join(f"{pkg}", "_vkarray.cc")],
 83 |                  include_dirs=[pybind11.get_include()],
 84 |                  libraries=["vulkan"],
 85 |                  **extra_args)]
 86 | 
 87 | desc = {}
 88 | README = "README.md"
 89 | if os.path.exists(README):
 90 |     with open(README) as f:
 91 |         desc["long_description"] = f.read()
 92 |         desc["long_description_content_type"] = "text/markdown"
 93 | 
 94 | setup(name="vulkpy",
 95 |       version="0.0.8",
 96 |       author="H. Yamada",
 97 |       description="GPGPU array on Vulkan",
 98 |       **desc,
 99 |       url="https://github.com/ymd-h/vulkpy",
100 |       packages=find_packages(),
101 |       ext_modules=ext,
102 |       include_package_data=True,
103 |       install_requires=[
104 |           "typing_extensions",
105 |           "numpy",
106 |           "well-behaved-logging"
107 |       ],
108 |       extras_require={
109 |           "test": ["coverage", "unittest-xml-reporting", "mypy"],
110 |           "doc": ["sphinx", "sphinx-rtd-theme", "myst-parser"],
111 |       },
112 |       classifiers=[
113 |           "Development Status :: 4 - Beta",
114 |           "Environment :: GPU",
115 |           "License :: OSI Approved :: MIT License",
116 |           "Programming Language :: Python :: 3 :: Only",
117 |           "Programming Language :: Python :: Implementation :: CPython",
118 |           "Topic :: Scientific/Engineering :: Artificial Intelligence",
119 |       ])
120 | 


--------------------------------------------------------------------------------
/test/test_nn.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | 
  3 | import numpy as np
  4 | 
  5 | import vulkpy as vk
  6 | from vulkpy import nn, random
  7 | 
  8 | 
  9 | class TestInitializers(unittest.TestCase):
 10 |     @classmethod
 11 |     def setUpClass(cls):
 12 |         cls.gpu = vk.GPU()
 13 | 
 14 |     def test_constant(self):
 15 |         const = nn.Constant(0.0)
 16 |         np.testing.assert_allclose(const(self.gpu, (3,1)), [[0.0], [0.0], [0.0]])
 17 | 
 18 |     def test_he(self):
 19 |         seed = 645
 20 |         shape = (10,)
 21 | 
 22 |         he = nn.HeNormal(self.gpu, input_dim=2, seed=seed)
 23 |         self.assertEqual(he.stddev, 1.0)
 24 | 
 25 |         rng = random.Xoshiro128pp(self.gpu, seed=seed)
 26 | 
 27 |         np.testing.assert_allclose(he(self.gpu, shape), rng.normal(shape=shape))
 28 | 
 29 | 
 30 | class TestOptimizers(unittest.TestCase):
 31 |     @classmethod
 32 |     def setUpClass(cls):
 33 |         cls.gpu = vk.GPU()
 34 | 
 35 |     def test_sgd(self):
 36 |         sgd = nn.SGD(lr=0.01)
 37 | 
 38 |         grad = vk.Array(self.gpu, data=[1, 2, 3])
 39 |         state = sgd.init_state(grad.shape)
 40 | 
 41 |         diff = state.grad2diff(grad)
 42 |         np.testing.assert_allclose(diff, grad * (-0.01))
 43 | 
 44 |     def test_adam(self):
 45 |         adam = nn.Adam(self.gpu)
 46 | 
 47 |         grad = vk.Array(self.gpu, data=[1, 2, 3])
 48 |         state = adam.init_state(grad.shape)
 49 |         self.assertEqual(state.beta1t, 1.0)
 50 |         self.assertEqual(state.beta2t, 1.0)
 51 | 
 52 |         diff = state.grad2diff(grad)
 53 | 
 54 |         self.assertEqual(state.beta1t, adam.beta1)
 55 |         self.assertEqual(state.beta2t, adam.beta2)
 56 | 
 57 |     def test_adagrad(self):
 58 |         adagrad = nn.AdaGrad(self.gpu)
 59 | 
 60 |         grad = vk.Array(self.gpu, data=[1, 2, 3])
 61 |         state = adagrad.init_state(grad.shape)
 62 | 
 63 |         diff = state.grad2diff(grad)
 64 | 
 65 | class TestLayers(unittest.TestCase):
 66 |     @classmethod
 67 |     def setUpClass(cls):
 68 |         cls.gpu = vk.GPU()
 69 | 
 70 |     def test_relu_forward(self):
 71 |         relu = nn.ReLU()
 72 | 
 73 |         x = vk.Array(self.gpu, data=[[-0.2, 0.0, 0.2]])
 74 |         y = relu(x)
 75 | 
 76 |         np.testing.assert_allclose(y, [[0.0, 0.0, 0.2]])
 77 | 
 78 |     def test_relu_backward(self):
 79 |         relu = nn.ReLU()
 80 | 
 81 |         x = vk.Array(self.gpu, data=[[-0.2, 0.0, 0.2]])
 82 |         y = relu(x)
 83 | 
 84 |         dy = vk.Array(self.gpu, data=[[0.7, 0.8, 0.9]])
 85 |         dx = relu.backward(dy)
 86 | 
 87 |         np.testing.assert_allclose(dx, [[0.0, 0.0, 0.9]])
 88 | 
 89 |     def test_sigmoid_forward(self):
 90 |         sigmoid = nn.Sigmoid()
 91 | 
 92 |         d = np.asarray([[-100, -0.1, 0, 10,  100]])
 93 |         x = vk.Array(self.gpu, data=d)
 94 | 
 95 |         y = sigmoid(x)
 96 | 
 97 |         np.testing.assert_allclose(y, 1/(1+np.exp(-d)), rtol=1e-7, atol=1e-7)
 98 | 
 99 |     def test_sigmoid_backward(self):
100 |         sigmoid = nn.Sigmoid()
101 | 
102 |         _x = np.asarray([[-100, -0.1, 0, 10,  100]])
103 |         x = vk.Array(self.gpu, data=_x)
104 |         y = sigmoid(x)
105 | 
106 |         _dy = np.asarray([[0.1, 0.2, 0.3, 0.5, 0.7]])
107 |         dy = vk.Array(self.gpu, data=_dy)
108 | 
109 |         dx = sigmoid.backward(dy)
110 |         np.testing.assert_allclose(dx, dy * y * (1 - y))
111 | 
112 |     def test_softmax(self):
113 |         softmax = nn.Softmax()
114 | 
115 |         x = vk.Array(self.gpu, data=[[1.0, 1.0]])
116 |         y = softmax(x)
117 | 
118 |         np.testing.assert_allclose(y, [[0.5, 0.5]])
119 | 
120 |     def test_softmax_skew(self):
121 |         softmax = nn.Softmax()
122 | 
123 |         x = vk.Array(self.gpu, data=[[100.0, 0.0]])
124 |         y = softmax(x)
125 | 
126 |         np.testing.assert_allclose(y, [[1.0, 0]])
127 | 
128 |     def test_softmax_forward(self):
129 |         softmax = nn.Softmax()
130 | 
131 |         _x = np.asarray([[-100, -0.1, 0, 10, 100]])
132 |         x = vk.Array(self.gpu, data=_x)
133 | 
134 |         y = softmax(x)
135 | 
136 |         exp_x = np.exp(_x - _x.max(axis=1))
137 |         np.testing.assert_allclose(y, exp_x / exp_x.sum(axis=1, keepdims=True),
138 |                                    rtol=1e-7, atol=1e-7)
139 | 
140 |     def test_softmax_backward(self):
141 |         softmax = nn.Softmax()
142 | 
143 |         _x = np.asarray([[-100, -0.1, 0, 10, 100]])
144 |         x = vk.Array(self.gpu, data=_x)
145 | 
146 |         y = softmax(x)
147 | 
148 |         _dy = np.asarray([[0.1, 0.2, 0.3, 0.5, 0.7]])
149 |         dy = vk.Array(self.gpu, data=_dy)
150 | 
151 |         dx = softmax.backward(dy)
152 | 
153 |         np.testing.assert_allclose(dx, dy * y * (1 - y))
154 | 
155 |     def test_dense_zero(self):
156 |         dense = nn.Dense(self.gpu, 2, 2, w_init=nn.Constant(0.0))
157 | 
158 |         x = vk.Array(self.gpu, data=[[1, 2], [3, 4]])
159 |         y = dense(x)
160 | 
161 |         np.testing.assert_allclose(y, [[0, 0], [0, 0]])
162 | 
163 |     def test_dense_bias(self):
164 |         dense = nn.Dense(self.gpu, 2, 2,
165 |                          w_init=nn.Constant(0.0),
166 |                          b_init=nn.Constant(1.0))
167 | 
168 |         x = vk.Array(self.gpu, data=[[1,2], [3,4]])
169 |         y = dense(x)
170 | 
171 |         np.testing.assert_allclose(y, [[1, 1], [1, 1]])
172 | 
173 |     def test_dense(self):
174 |         dense = nn.Dense(self.gpu, 2, 2)
175 | 
176 |         x = vk.Array(self.gpu, data=[[2, 3], [2, 3]])
177 |         y = dense(x)
178 | 
179 |         np.testing.assert_allclose(y[0,:], y[1,:])
180 | 
181 |     def test_dense_backward(self):
182 |         dense = nn.Dense(self.gpu, 2, 2)
183 |         np.testing.assert_allclose(dense.w.grad, [[0, 0], [0, 0]])
184 |         np.testing.assert_allclose(dense.b.grad, [0, 0])
185 | 
186 |         x = vk.Array(self.gpu, data=[[1, 2], [3, 4]])
187 |         y = dense(x)
188 | 
189 |         dy = vk.Array(self.gpu, data=[[4, 2], [1, 3]])
190 |         dx = dense.backward(dy)
191 | 
192 |         np.testing.assert_allclose(dense.w.grad, [[7, 12], [11, 16]])
193 |         np.testing.assert_allclose(dense.b.grad, [5, 5])
194 | 
195 |         _w = dense.w.value
196 |         np.testing.assert_allclose(dx,
197 |                                    [[_w[0,0] * dy[0,0] + _w[1,0] * dy[0,1],
198 |                                      _w[0,1] * dy[0,0] + _w[1,1] * dy[0,1]],
199 |                                     [_w[0,0] * dy[1,0] + _w[1,0] * dy[1,1],
200 |                                      _w[0,1] * dy[1,0] + _w[1,1] * dy[1,1]]])
201 | 
202 | class TestLosses(unittest.TestCase):
203 |     @classmethod
204 |     def setUpClass(cls):
205 |         cls.gpu = vk.GPU()
206 | 
207 |     def test_cross_entropy(self):
208 |         loss = nn.CrossEntropyLoss()
209 | 
210 |         x = vk.Array(self.gpu, data=[[1.0, 0.0]])
211 |         y = vk.Array(self.gpu, data=[[1.0, 0.0]])
212 | 
213 |         L = loss(x, y)
214 |         np.testing.assert_allclose(L, [0.0])
215 | 
216 |     def test_cross_entropy_equal(self):
217 |         loss = nn.CrossEntropyLoss()
218 | 
219 |         x = vk.Array(self.gpu, data=[[0.5, 0.5]])
220 |         y = vk.Array(self.gpu, data=[[0.5, 0.5]])
221 | 
222 |         L = loss(x, y)
223 |         np.testing.assert_allclose(L, [0.6931472])
224 | 
225 |     def test_cross_entropy_default(self):
226 |         loss = nn.CrossEntropyLoss()
227 | 
228 |         _x = np.asarray([[0.7, 0.3], [0.2, 0.8], [1.0, 0.0]])
229 |         _y = np.asarray([[1.0, 0.0], [0.0, 1.0], [1.0, 0.0]])
230 | 
231 |         x = vk.Array(self.gpu, data=_x)
232 |         y = vk.Array(self.gpu, data=_y)
233 | 
234 |         _L = np.sum(-_y * np.log(_x + 1e-8), axis=1)
235 |         L = loss(x, y)
236 |         np.testing.assert_allclose(L, _L.mean(), atol=1e-7, rtol=1e-7)
237 | 
238 |         dx = loss.grad()
239 |         _dx = - _y / (_x + 1e-8)
240 |         np.testing.assert_allclose(dx, _dx / _dx.shape[0])
241 | 
242 |     def test_cross_entropy_mean(self):
243 |         loss = nn.CrossEntropyLoss(reduce="mean")
244 | 
245 |         _x = np.asarray([[0.7, 0.3], [0.2, 0.8], [1.0, 0.0]])
246 |         _y = np.asarray([[1.0, 0.0], [0.0, 1.0], [1.0, 0.0]])
247 | 
248 |         x = vk.Array(self.gpu, data=_x)
249 |         y = vk.Array(self.gpu, data=_y)
250 | 
251 |         _L = np.sum(-_y * np.log(_x + 1e-8), axis=1)
252 |         L = loss(x, y)
253 |         np.testing.assert_allclose(L, _L.mean(), atol=1e-7, rtol=1e-7)
254 | 
255 |         dx = loss.grad()
256 |         _dx = - _y / (_x + 1e-8)
257 |         np.testing.assert_allclose(dx, _dx / _dx.shape[0])
258 | 
259 |     def test_cross_entropy_sum(self):
260 |         loss = nn.CrossEntropyLoss(reduce="sum")
261 | 
262 |         _x = np.asarray([[0.7, 0.3], [0.2, 0.8], [1.0, 0.0]])
263 |         _y = np.asarray([[1.0, 0.0], [0.0, 1.0], [1.0, 0.0]])
264 | 
265 |         x = vk.Array(self.gpu, data=_x)
266 |         y = vk.Array(self.gpu, data=_y)
267 | 
268 |         _L = np.sum(-_y * np.log(_x + 1e-8), axis=1)
269 |         L = loss(x, y)
270 |         np.testing.assert_allclose(L, _L.sum(), atol=1e-7, rtol=1e-7)
271 | 
272 |         dx = loss.grad()
273 |         _dx = - _y / (_x + 1e-8)
274 |         np.testing.assert_allclose(dx, _dx)
275 | 
276 |     def test_softmax_crossentropy(self):
277 |         sce = nn.SoftmaxCrossEntropyLoss()
278 | 
279 |         _x = np.asarray([[100.0, 0.0]])
280 |         x = vk.Array(self.gpu, data=_x)
281 | 
282 |         _y = np.asarray([[1.0, 0.0]])
283 |         y = vk.Array(self.gpu, data=_y)
284 | 
285 |         L = sce(x, y)
286 |         np.testing.assert_allclose(L, [0.0])
287 | 
288 |     def test_softmax_crossentropy_forward_default(self):
289 |         sce = nn.SoftmaxCrossEntropyLoss()
290 | 
291 |         _x = np.asarray([[-1, 0], [10, 15]])
292 |         x = vk.Array(self.gpu, data=_x)
293 | 
294 |         _y = np.asarray([[1, 0], [0, 1]])
295 |         y = vk.Array(self.gpu, data=_y)
296 | 
297 |         L = sce(x, y)
298 | 
299 |         exp_x = np.exp(_x - _x.max(axis=1, keepdims=True))
300 |         _L = (-_y * np.log(exp_x / exp_x.sum(axis=1, keepdims=True))).sum(axis=1)
301 |         np.testing.assert_allclose(L, _L.mean(axis=0), atol=1e-7, rtol=1e-7)
302 | 
303 |     def test_softmax_crossentropy_forward_mean(self):
304 |         sce = nn.SoftmaxCrossEntropyLoss(reduce="mean")
305 | 
306 |         _x = np.asarray([[-1, 0], [10, 15]])
307 |         x = vk.Array(self.gpu, data=_x)
308 | 
309 |         _y = np.asarray([[1, 0], [0, 1]])
310 |         y = vk.Array(self.gpu, data=_y)
311 | 
312 |         L = sce(x, y)
313 | 
314 |         exp_x = np.exp(_x - _x.max(axis=1, keepdims=True))
315 |         _L = (-_y * np.log(exp_x / exp_x.sum(axis=1, keepdims=True))).sum(axis=1)
316 |         np.testing.assert_allclose(L, _L.mean(axis=0), atol=1e-7, rtol=1e-7)
317 | 
318 |     def test_softmax_crossentropy_forward_sum(self):
319 |         sce = nn.SoftmaxCrossEntropyLoss(reduce="sum")
320 | 
321 |         _x = np.asarray([[-1, 0], [10, 15]])
322 |         x = vk.Array(self.gpu, data=_x)
323 | 
324 |         _y = np.asarray([[1, 0], [0, 1]])
325 |         y = vk.Array(self.gpu, data=_y)
326 | 
327 |         L = sce(x, y)
328 | 
329 |         exp_x = np.exp(_x - _x.max(axis=1, keepdims=True))
330 |         _L = (-_y * np.log(exp_x / exp_x.sum(axis=1, keepdims=True))).sum(axis=1)
331 |         np.testing.assert_allclose(L, _L.sum(axis=0), atol=1e-7, rtol=1e-7)
332 | 
333 |     def test_softmax_crossentropy_backward_default(self):
334 |         sce = nn.SoftmaxCrossEntropyLoss()
335 | 
336 |         _x = np.asarray([[-1, 0], [10, 15]])
337 |         x = vk.Array(self.gpu, data=_x)
338 | 
339 |         _y = np.asarray([[1, 0], [0, 1]])
340 |         y = vk.Array(self.gpu, data=_y)
341 | 
342 |         L = sce(x, y)
343 | 
344 |         dx = sce.grad()
345 | 
346 |         exp_x = np.exp(_x - _x.max(axis=1, keepdims=True))
347 |         _L = exp_x / exp_x.sum(axis=1, keepdims=True)
348 |         np.testing.assert_allclose(dx, (_L - _y) / _y.shape[0], atol=1e-7, rtol=1e-7)
349 | 
350 |     def test_softmax_crossentropy_backward_mean(self):
351 |         sce = nn.SoftmaxCrossEntropyLoss(reduce="mean")
352 | 
353 |         _x = np.asarray([[-1, 0], [10, 15]])
354 |         x = vk.Array(self.gpu, data=_x)
355 | 
356 |         _y = np.asarray([[1, 0], [0, 1]])
357 |         y = vk.Array(self.gpu, data=_y)
358 | 
359 |         L = sce(x, y)
360 | 
361 |         dx = sce.grad()
362 | 
363 |         exp_x = np.exp(_x - _x.max(axis=1, keepdims=True))
364 |         _L = exp_x / exp_x.sum(axis=1, keepdims=True)
365 |         np.testing.assert_allclose(dx, (_L - _y) / _y.shape[0], atol=1e-7, rtol=1e-7)
366 | 
367 |     def test_softmax_crossentropy_backward_sum(self):
368 |         sce = nn.SoftmaxCrossEntropyLoss(reduce="sum")
369 | 
370 |         _x = np.asarray([[-1, 0], [10, 15]])
371 |         x = vk.Array(self.gpu, data=_x)
372 | 
373 |         _y = np.asarray([[1, 0], [0, 1]])
374 |         y = vk.Array(self.gpu, data=_y)
375 | 
376 |         L = sce(x, y)
377 | 
378 |         dx = sce.grad()
379 | 
380 |         exp_x = np.exp(_x - _x.max(axis=1, keepdims=True))
381 |         _L = exp_x / exp_x.sum(axis=1, keepdims=True)
382 |         np.testing.assert_allclose(dx, _L - _y, atol=1e-7, rtol=1e-7)
383 | 
384 |     def test_mse_loss_default(self):
385 |         mse = nn.MSELoss()
386 | 
387 |         _x = np.asarray([[4, 2], [1, 1.5]])
388 |         x = vk.Array(self.gpu, data=_x)
389 | 
390 |         _y = np.asarray([[3, 2.2], [0.7, 1.5]])
391 |         y = vk.Array(self.gpu, data=_y)
392 | 
393 |         L = mse(x, y)
394 |         dx = mse.grad()
395 | 
396 |         np.testing.assert_allclose(L, np.square(_y - _x).sum(axis=1).mean(axis=0),
397 |                                    atol=1e-7, rtol=1e-7)
398 |         np.testing.assert_allclose(dx, (_x - _y),
399 |                                    atol=1e-7, rtol=1e-7)
400 | 
401 |     def test_mse_loss_mean(self):
402 |         mse = nn.MSELoss(reduce="mean")
403 | 
404 |         _x = np.asarray([[4, 2], [1, 1.5]])
405 |         x = vk.Array(self.gpu, data=_x)
406 | 
407 |         _y = np.asarray([[3, 2.2], [0.7, 1.5]])
408 |         y = vk.Array(self.gpu, data=_y)
409 | 
410 |         L = mse(x, y)
411 |         dx = mse.grad()
412 | 
413 |         np.testing.assert_allclose(L, np.square(_y - _x).sum(axis=1).mean(axis=0),
414 |                                    atol=1e-7, rtol=1e-7)
415 |         np.testing.assert_allclose(dx, (_x - _y),
416 |                                    atol=1e-7, rtol=1e-7)
417 | 
418 |     def test_mse_loss_sum(self):
419 |         mse = nn.MSELoss(reduce="sum")
420 | 
421 |         _x = np.asarray([[4, 2], [1, 1.5]])
422 |         x = vk.Array(self.gpu, data=_x)
423 | 
424 |         _y = np.asarray([[3, 2.2], [0.7, 1.5]])
425 |         y = vk.Array(self.gpu, data=_y)
426 | 
427 |         L = mse(x, y)
428 |         dx = mse.grad()
429 | 
430 |         np.testing.assert_allclose(L, np.square(_y - _x).sum(axis=1).sum(axis=0),
431 |                                    atol=1e-7, rtol=1e-7)
432 |         np.testing.assert_allclose(dx, 2 * (_x - _y),
433 |                                    atol=1e-7, rtol=1e-7)
434 | 
435 |     def test_huber_loss_default(self):
436 |         huber = nn.HuberLoss()
437 | 
438 |         _x = np.asarray([[1.0, 2.2], [-3.0, 0.7]])
439 |         x = vk.Array(self.gpu, data=_x)
440 | 
441 |         _y = np.asarray([[10, 3.0], [-5, 0.5]])
442 |         y = vk.Array(self.gpu, data=_y)
443 | 
444 |         L = huber(x, y)
445 |         dx = huber.grad()
446 | 
447 |         np.testing.assert_allclose(L, [2.92])
448 |         np.testing.assert_allclose(dx, [[-0.5, -0.4], [0.5, 0.1]])
449 | 
450 |     def test_huber_loss_mean(self):
451 |         huber = nn.HuberLoss(reduce="mean")
452 | 
453 |         _x = np.asarray([[1.0, 2.2], [-3.0, 0.7]])
454 |         x = vk.Array(self.gpu, data=_x)
455 | 
456 |         _y = np.asarray([[10, 3.0], [-5, 0.5]])
457 |         y = vk.Array(self.gpu, data=_y)
458 | 
459 |         L = huber(x, y)
460 |         dx = huber.grad()
461 | 
462 |         np.testing.assert_allclose(L, [2.92])
463 |         np.testing.assert_allclose(dx, [[-0.5, -0.4], [0.5, 0.1]])
464 | 
465 |     def test_huber_loss_sum(self):
466 |         huber = nn.HuberLoss(reduce="sum")
467 | 
468 |         _x = np.asarray([[1.0, 2.2], [-3.0, 0.7]])
469 |         x = vk.Array(self.gpu, data=_x)
470 | 
471 |         _y = np.asarray([[10, 3.0], [-5, 0.5]])
472 |         y = vk.Array(self.gpu, data=_y)
473 | 
474 |         L = huber(x, y)
475 |         dx = huber.grad()
476 | 
477 |         np.testing.assert_allclose(L, [5.84])
478 |         np.testing.assert_allclose(dx, [[-1.0, -0.8], [1.0, 0.2]])
479 | 
480 | class TestRegularizer(unittest.TestCase):
481 |     @classmethod
482 |     def setUpClass(cls):
483 |         from vulkpy.nn.parameters import Parameter
484 |         cls.gpu = vk.GPU()
485 |         cls.P = Parameter
486 | 
487 |     def test_ridge_zero(self):
488 |         p = self.P(self.gpu, (1,), initializer=nn.Constant(0.0))
489 |         R = nn.Ridge(1.0)
490 | 
491 |         np.testing.assert_allclose(R.loss(p.value), np.asarray(0.0))
492 |         np.testing.assert_allclose(R.grad(p.value), np.asarray((0.0, )))
493 | 
494 |     def test_ridge(self):
495 |         p = self.P(self.gpu, (1,), initializer=nn.Constant(3.5))
496 |         R = nn.Ridge(1.0)
497 | 
498 |         np.testing.assert_allclose(R.loss(p.value), np.asarray(3.5 ** 2))
499 |         np.testing.assert_allclose(R.grad(p.value), np.asarray((2 * 3.5,)))
500 | 
501 |     def test_ridge_negative(self):
502 |         p = self.P(self.gpu, (1,), initializer=nn.Constant(-3.5))
503 |         R = nn.Ridge(1.0)
504 | 
505 |         np.testing.assert_allclose(R.loss(p.value), np.asarray((-3.5) ** 2))
506 |         np.testing.assert_allclose(R.grad(p.value), np.asarray((2 * -3.5,)))
507 | 
508 |     def test_lasso_zero(self):
509 |         p = self.P(self.gpu, (1,), initializer=nn.Constant(0.0))
510 |         R = nn.Lasso(1.0)
511 | 
512 |         np.testing.assert_allclose(R.loss(p.value), np.asarray(0.0))
513 |         np.testing.assert_allclose(R.grad(p.value), np.asarray((0.0, )))
514 | 
515 |     def test_lasso(self):
516 |         p = self.P(self.gpu, (1,), initializer=nn.Constant(3.5))
517 |         R = nn.Lasso(1.0)
518 | 
519 |         np.testing.assert_allclose(R.loss(p.value), np.asarray(3.5))
520 |         np.testing.assert_allclose(R.grad(p.value), np.asarray((1.0,)))
521 | 
522 |     def test_lasso_negative(self):
523 |         p = self.P(self.gpu, (1,), initializer=nn.Constant(-3.5))
524 |         R = nn.Lasso(1.0)
525 | 
526 |         np.testing.assert_allclose(R.loss(p.value), np.asarray(3.5))
527 |         np.testing.assert_allclose(R.grad(p.value), np.asarray((-1.0,)))
528 | 
529 |     def test_elastic_zero(self):
530 |         p = self.P(self.gpu, (1,), initializer=nn.Constant(0.0))
531 |         R = nn.Elastic(1.0, 1.0)
532 | 
533 |         np.testing.assert_allclose(R.loss(p.value), np.asarray(0.0))
534 |         np.testing.assert_allclose(R.grad(p.value), np.asarray(0.0,))
535 | 
536 |     def test_elastic(self):
537 |         p = self.P(self.gpu, (1,), initializer=nn.Constant(3.5))
538 |         R = nn.Elastic(1.0, 1.0)
539 | 
540 |         np.testing.assert_allclose(R.loss(p.value), np.asarray(3.5 ** 2 + 3.5))
541 | 
542 |         np.testing.assert_allclose(R.grad(p.value), np.asarray((2 * 3.5 + 1.0,)))
543 | 
544 |     def test_elastic_negative(self):
545 |         p = self.P(self.gpu, (1,), initializer=nn.Constant(-3.5))
546 |         R = nn.Elastic(1.0, 1.0)
547 | 
548 |         np.testing.assert_allclose(R.loss(p.value), np.asarray(3.5 ** 2 + 3.5))
549 |         np.testing.assert_allclose(R.grad(p.value), np.asarray((2 * -3.5 - 1.0,)))
550 | 
551 | if __name__ == "__main__":
552 |     unittest.main()
553 | 


--------------------------------------------------------------------------------
/test/test_random.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | 
  3 | import numpy as np
  4 | import vulkpy as vk
  5 | from vulkpy.util import enable_debug
  6 | 
  7 | 
  8 | class TestRandom(unittest.TestCase):
  9 |     @classmethod
 10 |     def setUpClass(cls):
 11 |         cls.gpu = vk.GPU()
 12 | 
 13 |     def test_random(self):
 14 |         rng = vk.random.Xoshiro128pp(self.gpu)
 15 |         a = rng.random(shape=(3,))
 16 | 
 17 |         a.wait()
 18 |         np.testing.assert_allclose(np.asarray(a).shape, (3,))
 19 |         self.assertTrue((0 <= np.asarray(a)).all())
 20 |         self.assertTrue((np.asarray(a) < 1.0).all())
 21 | 
 22 |     def test_random_seed(self):
 23 |         rng1 = vk.random.Xoshiro128pp(self.gpu, seed=0)
 24 |         a = rng1.random(shape=(5,))
 25 | 
 26 |         rng2 = vk.random.Xoshiro128pp(self.gpu, seed=0)
 27 |         b = rng2.random(shape=(5,))
 28 | 
 29 |         a.wait()
 30 |         b.wait()
 31 |         np.testing.assert_allclose(a, b)
 32 |         self.assertTrue((0 <= np.asarray(a)).all())
 33 |         self.assertTrue((np.asarray(a) < 1.0).all())
 34 |         self.assertTrue((0 <= np.asarray(b)).all())
 35 |         self.assertTrue((np.asarray(b) < 1.0).all())
 36 | 
 37 |     def test_middle(self):
 38 |         rng = vk.random.Xoshiro128pp(self.gpu)
 39 |         a = rng.random(shape=(17,))
 40 | 
 41 |         a.wait()
 42 |         np.testing.assert_allclose(np.asarray(a).shape, (17,))
 43 |         self.assertTrue((0 <= np.asarray(a)).all())
 44 |         self.assertTrue((np.asarray(a) < 1.0).all())
 45 | 
 46 |     def test_larger(self):
 47 |         rng = vk.random.Xoshiro128pp(self.gpu)
 48 |         a = rng.random(shape=(65,))
 49 | 
 50 |         a.wait()
 51 |         np.testing.assert_allclose(np.asarray(a).shape, (65,))
 52 |         self.assertTrue((0 <= np.asarray(a)).all())
 53 |         self.assertTrue((np.asarray(a) < 1.0).all())
 54 | 
 55 |     def test_higher_dimension(self):
 56 |         rng = vk.random.Xoshiro128pp(self.gpu)
 57 |         a = rng.random(shape=(5, 5, 5))
 58 | 
 59 |         a.wait()
 60 |         np.testing.assert_allclose(np.asarray(a).shape, (5, 5, 5))
 61 |         self.assertTrue((0 <= np.asarray(a)).all())
 62 |         self.assertTrue((np.asarray(a) < 1.0).all())
 63 | 
 64 |     def test_buffer(self):
 65 |         rng = vk.random.Xoshiro128pp(self.gpu)
 66 |         a = vk.Array(self.gpu, shape=(5,))
 67 |         a = rng.random(buffer=a)
 68 |         a.wait()
 69 |         np.testing.assert_allclose(np.asarray(a).shape, (5,))
 70 |         self.assertTrue((0 <= np.asarray(a)).all())
 71 |         self.assertTrue((np.asarray(a) < 1.0).all())
 72 | 
 73 |     def test_normal_even(self):
 74 |         rng1 = vk.random.Xoshiro128pp(self.gpu, seed=0)
 75 |         rng2 = vk.random.Xoshiro128pp(self.gpu, seed=0)
 76 | 
 77 |         a1 = rng1.normal(shape=(10,))
 78 |         a2 = rng2.normal(shape=(10,), mean=5, stddev=3)
 79 | 
 80 |         np.testing.assert_allclose((a2 - 5) / a1, np.full((10,), 3), rtol=1e-5)
 81 | 
 82 |     def test_normal_odd(self):
 83 |         rng1 = vk.random.Xoshiro128pp(self.gpu, seed=0)
 84 |         rng2 = vk.random.Xoshiro128pp(self.gpu, seed=0)
 85 | 
 86 |         a1 = rng1.normal(shape=(11,))
 87 |         a2 = rng2.normal(shape=(11,), mean=5, stddev=3)
 88 | 
 89 |         np.testing.assert_allclose((a2 - 5) / a1, np.full((11,), 3), rtol=1e-5)
 90 | 
 91 |     def test_randint(self):
 92 |         rng = vk.random.Xoshiro128pp(self.gpu)
 93 |         a = rng.randint(shape=(5,))
 94 | 
 95 |         np.testing.assert_allclose(a.shape, (5,))
 96 | 
 97 |         self.assertTrue(np.all((0 <= np.asarray(a)) & (np.asarray(a) < (2 ** 32))))
 98 | 
 99 |     def test_randrange(self):
100 |         rng = vk.random.Xoshiro128pp(self.gpu)
101 |         a = rng.randrange(shape=(5,), low=3, high=4)
102 | 
103 |         np.testing.assert_allclose(a, [3, 3, 3, 3, 3])
104 | 
105 | if __name__ == "__main__":
106 |     enable_debug(api_dump=False)
107 |     unittest.main()
108 | 


--------------------------------------------------------------------------------
/vulkpy/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | vulkpy: GPGPU array on Vulkan
 3 | =============================
 4 | 
 5 | vulkpy provides GPGPU computations.
 6 | 
 7 | See Also
 8 | --------
 9 | vulkpy.random : Random Module
10 | vulkpy.nn : Neural Network Module
11 | vulkpy.util : Utility Module
12 | 
13 | 
14 | Examples
15 | --------
16 | >>> import vulkpy as vk
17 | 
18 | >>> gpu = vk.GPU()
19 | >>> a = vk.Array(gpu, data=[1, 2, 3])
20 | >>> b = vk.Array(gpu, data=[3, 3, 3])
21 | 
22 | >>> c = a + b
23 | >>> print(c)
24 | [4., 5., 6.]
25 | """
26 | from .vkarray import GPU, U32Array, Shape, Array, zeros
27 | from . import random
28 | from . import nn
29 | 


--------------------------------------------------------------------------------
/vulkpy/_vkutil.hh:
--------------------------------------------------------------------------------
 1 | #ifndef VKUTIL_HH
 2 | #define VKUTIL_HH
 3 | 
 4 | #include <algorithm>
 5 | #include <type_traits>
 6 | #include <utility>
 7 | #include <vector>
 8 | 
 9 | namespace util {
10 |   constexpr inline std::uint32_t VK_API_VERSION(std::uint32_t major,
11 |                                                 std::uint32_t minor,
12 |                                                 std::uint32_t patch) noexcept {
13 |     return (major << 22) | (minor << 12) | (patch);
14 |   }
15 | 
16 |   template<typename F>
17 |   auto generate_from_range(F&& f, std::uint32_t n) {
18 |     auto v = std::vector<std::invoke_result_t<F, std::uint32_t>>{};
19 |     v.reserve(n);
20 | 
21 |     auto g = [&f, i=std::uint32_t(0)]() mutable { return f(i++); };
22 |     std::generate_n(std::back_inserter(v), n, g);
23 | 
24 |     return v;
25 |   }
26 | 
27 |   std::vector<char> readCode(std::string_view name){
28 |     auto f = std::ifstream(name.data(), std::ios::ate | std::ios::binary);
29 |     if(!f.is_open()){
30 |       throw std::runtime_error("failed to open file");
31 |     }
32 |     auto size = f.tellg();
33 |     f.seekg(0);
34 | 
35 |     auto v = std::vector<char>(size);
36 |     f.read(v.data(), size);
37 | 
38 |     f.close();
39 |     return v;
40 |   }
41 | 
42 | 
43 |   template<typename T, typename F, std::size_t ...I>
44 |   auto pylist2array(F&& f, const pybind11::list& pylist,
45 |                     std::integer_sequence<std::size_t, I...>){
46 |     T array[]{
47 |       pylist[pybind11::size_t(I)].cast<T>()...
48 |     };
49 |     return f(array);
50 |   }
51 | }
52 | 
53 | #endif
54 | 


--------------------------------------------------------------------------------
/vulkpy/nn/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Neural Network Module (:mod:`vulkpy.nn`)
 3 | ========================================
 4 | 
 5 | Examples
 6 | --------
 7 | >>> import vulkpy as vk
 8 | >>> from vulkpy import nn
 9 | >>> gpu = vk.GPU()
10 | >>> x = vk.Array(gpu, data=[ ... ]) # Features
11 | >>> y = vk.Array(gpu, data=[ ... ]) # Labels
12 | 
13 | Create Optimizer and Model
14 | 
15 | >>> opt = nn.Adam(gpu, lr=1e-4)
16 | >>> net = nn.Sequence(
17 | ...   [
18 | ...     nn.Dense(gpu, 3, 32, w_opt=opt, b_opt=opt),
19 | ...     nn.ReLU(),
20 | ...     nn.Dense(gpu, 32, 4, w_opt=opt, b_opt=opt),
21 | ...     nn.Softmax(),
22 | ...   ],
23 | ...   nn.CrossEntropy()
24 | ... )
25 | 
26 | Training Model
27 | 
28 | >>> pred_y, loss = net.train(x, y)
29 | 
30 | Predict with Model
31 | 
32 | >>> pred_y = net.predict(x)
33 | """
34 | 
35 | from .core import (
36 |     Optimizer,
37 |     OptimizerState,
38 |     Loss,
39 |     Regularizer,
40 |     Module,
41 | )
42 | from .initializers import Constant, HeNormal
43 | from .optimizers import (
44 |     SGD, SGDState,
45 |     Adam, AdamState,
46 |     AdaGrad, AdaGradState,
47 | )
48 | from .layers import Dense, ReLU, Sigmoid, Softmax
49 | from .losses import (
50 |     CrossEntropyLoss,
51 |     SoftmaxCrossEntropyLoss,
52 |     MSELoss,
53 |     HuberLoss,
54 | )
55 | from .regularizers import (
56 |     Lasso,
57 |     Ridge,
58 |     Elastic,
59 | )
60 | from .models import Sequence
61 | 


--------------------------------------------------------------------------------
/vulkpy/nn/core.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Neural Network Core Module (:mod:`vulkpy.nn.core`)
  3 | ==================================================
  4 | 
  5 | This module provides abstract base classes for Neural Network.
  6 | """
  7 | from __future__ import annotations
  8 | from typing import Iterable
  9 | 
 10 | from vulkpy.vkarray import GPU, Array
 11 | 
 12 | 
 13 | __all__ = [
 14 |     "OptimizerState",
 15 |     "Optimizer",
 16 |     "Regularizer",
 17 |     "Loss",
 18 |     "Module",
 19 | ]
 20 | 
 21 | 
 22 | class OptimizerState:
 23 |     """
 24 |     Abstract base class for Optimizer State
 25 | 
 26 |     See Also
 27 |     --------
 28 |     vulkpy.nn.Optimizer : Optimizer
 29 |     vulkpy.nn.SGDState : OptimizerState subclass for SGD
 30 |     vulkpy.nn.AdamState : OptimizerState subclass for Adam
 31 | 
 32 |     Notes
 33 |     -----
 34 |     Mutable per-parameter values are stored at this class instance,
 35 |     although static global parameters (e.g. learning rate) are
 36 |     stored at ``Optimizer`` class.
 37 | 
 38 |     Subclass of ``OptimizerState`` should implement ``Optimizer.grad2diff()``,
 39 |     which takes accumulated gradients and returns update difference.
 40 | 
 41 |     In standard design, ``OptimizerState`` holds a reference to
 42 |     its parent ``Optimizer`` in order to access global parameters.
 43 |     """
 44 |     def grad2diff(self, grad: Array) -> Array:
 45 |         """
 46 |         Compute update diff from gradient
 47 | 
 48 |         Parameters
 49 |         ----------
 50 |         grad : vulkpy.Array
 51 |             Accumulated gradient
 52 | 
 53 |         Returns
 54 |         -------
 55 |         diff : vulkpy.Array
 56 |             Update diff. (``v += opt_state.grad2diff(grad)``)
 57 | 
 58 |         Notes
 59 |         -----
 60 |         Subclass must implement this method.
 61 |         """
 62 |         raise NotImplementedError
 63 | 
 64 | class Optimizer:
 65 |     """
 66 |     Abstract base class for Optimizer
 67 | 
 68 |     See Also
 69 |     --------
 70 |     vulkpy.nn.OptimizerState : Optimizer State
 71 |     vulkpy.nn.SGD : Optimizer subclass for SGD
 72 |     vulkpy.nn.Adam : Optimizer subclass for Adam
 73 | 
 74 |     Notes
 75 |     -----
 76 |     ``Optimizer`` class is designed to pass to ``Parameter`` constructor
 77 |     through ``Module`` constructor.
 78 |     Inside ``Parameter`` constructor, ``Optimizer.init_state()`` is called and
 79 |     corresponding ``OptimizerState`` are stored at the ``Parameter`` instance.
 80 | 
 81 |     Mutable per-parameter values are stored at ``OptimizerState`` class instance,
 82 |     although static global parameters (e.g. learning rate) are
 83 |     stored at this class.
 84 | 
 85 |     To implement specific optimizer, Subclass of ``Optimizer`` should implement
 86 |     ``Optimizer.init_state()`` method, which returns corresponding subclass of
 87 |     ``OptimizerState``.
 88 | 
 89 |     Examples
 90 |     --------
 91 |     >>> import vulkpy as vk
 92 |     >>> gpu = vk.GPU()
 93 |     >>>
 94 |     >>> adam = vk.nn.Adam(gpu) # Optimizer
 95 |     >>> dense = vk.nn.Dense(gpu, 1, 1, w_opt=adam, b_opt=adam) # Module
 96 |     """
 97 |     def init_state(self, shape: Iterable[int]) -> OptimizerState:
 98 |         """
 99 |         Create OptimizerState
100 | 
101 |         Parameters
102 |         ----------
103 |         shape : iterable of ints
104 |             Parameter Shape
105 | 
106 |         Returns
107 |         -------
108 |         opt_state : vulkpy.nn.OptimizerState
109 |             Optimizer State
110 | 
111 |         Notes
112 |         -----
113 |         Subclass must implement this method.
114 |         """
115 |         raise NotImplementedError
116 | 
117 | class Loss:
118 |     """
119 |     Abstract base class for Loss
120 | 
121 |     See Also
122 |     --------
123 |     vulkpy.nn.CrossEntropyLoss : Cross Entropy Loss
124 |     vulkpy.nn.SoftmaxCrossEntropyLoss : Softmax Cross Entropy Loss
125 |     vulkpy.nn.HuberLoss : Huber Loss
126 |     vulkpy.nn.MSELoss : MSE Loss
127 |     vulkpy.nn.MixLoss : Mixing Loss
128 | 
129 |     Notes
130 |     -----
131 |     ``Loss`` is designed
132 | 
133 |     Subclass of ``Loss`` must implements ``__call__()`` and ``grad()``.
134 |     """
135 |     def __call__(self, x: Array, y: Array) -> Array:
136 |         """
137 |         Compute Loss
138 | 
139 |         Parameters
140 |         ----------
141 |         x : vulkpy.Array
142 |             Input features
143 |         y : vulkpy.Array
144 |             Output target/label
145 | 
146 |         Returns
147 |         -------
148 |         loss : vulkpy.Array
149 |             Loss
150 | 
151 |         Notes
152 |         -----
153 |         Subclass must implement this method.
154 |         """
155 |         raise NotImplementedError
156 | 
157 |     def grad(self) -> Array:
158 |         """
159 |         Compute Gradient
160 | 
161 |         Returns
162 |         -------
163 |         grad : vulkpy.Array
164 |             Gradient
165 | 
166 |         Notes
167 |         -----
168 |         Subclass must implement this method.
169 |         """
170 |         raise NotImplementedError
171 | 
172 | class Regularizer:
173 |     """
174 |     Abstract base class for Regularizer
175 | 
176 |     See Also
177 |     --------
178 |     vulkpy.nn.Lasso : Lasso (L1) Regularizer
179 |     vulkpy.nn.Ridge : Ridge (L2) Regularizer
180 |     vulkpy.nn.Elastic : Elastic (L1 + L2) Regularizer
181 | 
182 |     Notes
183 |     -----
184 |     Subclass must implement ``loss()`` and ``grad()``.
185 |     """
186 |     def loss(self, param: Array) -> Array:
187 |         """
188 |         Compute Regularizer Loss
189 | 
190 |         Parameters
191 |         ----------
192 |         param : vulkpy.Array
193 |             Parameters
194 | 
195 |         Returns
196 |         -------
197 |         loss : vulkpy.Array
198 |             Loss
199 | 
200 |         Notes
201 |         -----
202 |         Subclass must implement this method.
203 |         """
204 |         raise NotImplementedError
205 | 
206 |     def grad(self, param: Array) -> Array:
207 |         """
208 |         Compute Gradient
209 | 
210 |         Parameters
211 |         ----------
212 |         param : vulkpy.Array
213 |             Parameters
214 | 
215 |         Returns
216 |         -------
217 |         grad : vulkpy.Array
218 |             Gradient
219 | 
220 |         Notes
221 |         -----
222 |         Subclass must implement this method.
223 |         """
224 |         raise NotImplementedError
225 | 
226 | 
227 | class Module:
228 |     """
229 |     Abstract base class for Module
230 | 
231 |     See Also
232 |     --------
233 |     vulkpy.nn.Dense : Dense Layer (subclass)
234 |     vulkpy.nn.ReLU : ReLU Layer (subclass)
235 |     vulkpy.nn.Sigmoid : Sigmoid Layer (subclass)
236 |     vulkpy.nn.Softmax : Softmax Layer (subclass)
237 |     vulkpy.nn.Sequence : Sequential Model
238 | 
239 |     Notes
240 |     -----
241 |     ``Module`` is designed to for Neural Network Layer.
242 | 
243 |     Subclass must implement ``forward()`` and ``backward()``, and can implement
244 |     ``zero_grad()`` and ``update()`` when it is necessary.
245 |     """
246 | 
247 |     def __call__(self, x: Array) -> Array:
248 |         """
249 |         Call Module
250 | 
251 |         Parameters
252 |         ----------
253 |         x : vulkpy.Array
254 |             Input
255 | 
256 |         Returns
257 |         -------
258 |         y : vulkpy.Array
259 |             Output
260 | 
261 |         Raises
262 |         ------
263 |         ValueError
264 |             If input (``x``) shape doesn't have at least 2-dimensions.
265 | 
266 |         Notes
267 |         -----
268 |         This function stores input (``x``) and output (``y``) for training.
269 |         """
270 |         if len(x.shape) < 2:
271 |             raise ValueError("Input must have at least 2-dimensions.")
272 | 
273 |         self._x = x
274 |         self._y = self.forward(x)
275 |         return self._y
276 | 
277 |     def forward(self, x: Array) -> Array:
278 |         """
279 |         Forward Calculation
280 | 
281 |         Parameters
282 |         ----------
283 |         x : vulkpy.Array
284 |             Input features
285 | 
286 |         Returns
287 |         -------
288 |         y : vulkpy.Array
289 |             Output
290 | 
291 |         Notes
292 |         -----
293 |         Subclass must implement this method.
294 |         """
295 |         raise NotImplementedError
296 | 
297 |     def backward(self, dy: Array) -> Array:
298 |         """
299 |         Backward Calculation
300 | 
301 |         Parameters
302 |         ----------
303 |         dy : vulkpy.Array
304 |             dL/dy propagated from following layer
305 | 
306 |         Returns
307 |         -------
308 |         dx : vulkpy.Array
309 |             dL/dx propagated to previous layer
310 | 
311 |         Notes
312 |         -----
313 |         Subclass must implement this method.
314 |         """
315 |         raise NotImplementedError
316 | 
317 |     def zero_grad(self):
318 |         """
319 |         Reset accumulated gradients to 0.
320 | 
321 |         Notes
322 |         -----
323 |         Base class implement no-operation.
324 |         Subclass can customize this method.
325 |         """
326 |         pass
327 | 
328 |     def update(self):
329 |         """
330 |         Update parameters based on accumulated gradients
331 | 
332 |         Notes
333 |         -----
334 |         Base class implement no-operation.
335 |         Subclass can customize this method.
336 |         """
337 |         pass
338 | 


--------------------------------------------------------------------------------
/vulkpy/nn/initializers.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Neural Network Initializer Module (:mod:`vulkpy.nn.initializers`)
 3 | =================================================================
 4 | """
 5 | from __future__ import annotations
 6 | from typing import Iterable, Optional
 7 | 
 8 | import numpy as np
 9 | 
10 | from vulkpy.vkarray import GPU, Array
11 | from vulkpy.random import Xoshiro128pp
12 | 
13 | 
14 | __all__ = ["Constant", "HeNormal"]
15 | 
16 | class Initializer:
17 |     def __call__(self, gpu: GPU, shape: Iterable[int]) -> Array:
18 |         raise NotImplementedError
19 | 
20 | 
21 | class Constant(Initializer):
22 |     """
23 |     Constant Initializer
24 |     """
25 |     def __init__(self, value: float):
26 |         """
27 |         Initialize Constant Initializer
28 | 
29 |         Parameters
30 |         ----------
31 |         value : float
32 |             Constant value
33 |         """
34 |         self.value = value
35 | 
36 |     def __call__(self, gpu: GPU, shape: Iterable[int]) -> Array:
37 |         """
38 |         Initialize new parameters
39 | 
40 |         Parameters
41 |         ----------
42 |         gpu : vulkpy.GPU
43 |             GPU
44 |         shape : iterable of ints
45 |             Parameter shape
46 |         """
47 |         p = Array(gpu, shape=shape)
48 |         p[:] = self.value
49 |         return p
50 | 
51 | 
52 | class HeNormal(Initializer):
53 |     r"""
54 |     He Normal Initializer
55 | 
56 |     Note
57 |     ----
58 |     Standard deviation :math:`\sigma` is following;
59 | 
60 |     .. math:: \sigma = \sqrt{2/d_{\text{in}}}
61 |     """
62 |     def __init__(self, gpu: GPU, input_dim: int, *, seed: Optional[int] = None):
63 |         """
64 |         Initialize He Normal Initializer
65 | 
66 |         Parameters
67 |         ----------
68 |         gpu : vulkpy.GPU
69 |             GPU
70 |         input_dim : int
71 |             Input dimension
72 |         seed : int, optional
73 |             Initial seed for PRNG
74 |         """
75 |         self.rng = Xoshiro128pp(gpu, seed=seed)
76 |         self.stddev = np.sqrt(2 / input_dim)
77 | 
78 |     def __call__(self, gpu: GPU, shape: Iterable[int]):
79 |         """
80 |         Initialize new parameters
81 | 
82 |         Parameters
83 |         ----------
84 |         gpu : vulkpy.GPU
85 |             GPU
86 |         shape : iterable of ints
87 |             Parameter shape
88 |         """
89 |         return self.rng.normal(shape=shape, stddev=self.stddev)
90 | 


--------------------------------------------------------------------------------
/vulkpy/nn/layers.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Neural Network Layer Module (:mod:`vulkpy.nn.layers`)
  3 | =====================================================
  4 | """
  5 | from __future__ import annotations
  6 | from typing import Callable, Iterable, Optional
  7 | 
  8 | from vulkpy.util import getShader
  9 | from vulkpy.vkarray import GPU, Array, DataShape, BatchAffineParams
 10 | from .core import Module, Optimizer, Regularizer
 11 | from .parameters import Parameter
 12 | from .initializers import HeNormal
 13 | 
 14 | 
 15 | __all__ = ["Dense", "ReLU", "Sigmoid", "Softmax"]
 16 | 
 17 | 
 18 | class Dense(Module):
 19 |     """
 20 |     Fully connected Dense Layer
 21 |     """
 22 | 
 23 |     _batch_affine = getShader("batch_affine.spv")
 24 | 
 25 |     def __init__(self, gpu: GPU, input_dim: int, output_dim: int, *,
 26 |                  w_init: Optional[Callable[[GPU, Iterable[int]], Array]] = None,
 27 |                  b_init: Optional[Callable[[GPU, Iterable[int]], Array]] = None,
 28 |                  w_opt: Optional[Optimizer] = None,
 29 |                  b_opt: Optional[Optimizer] = None,
 30 |                  w_reg: Optional[Regularizer] = None,
 31 |                  b_reg: Optional[Regularizer] = None):
 32 |         """
 33 |         Initialize Dense
 34 | 
 35 |         Parameters
 36 |         ----------
 37 |         gpu : vulkpy.GPU
 38 |             GPU
 39 |         input_dim : int
 40 |             Input dimension
 41 |         output_dim : int
 42 |             Output dimension
 43 |         w_init Callable, optional
 44 |             Weight initializer. If ``None`` (default),
 45 |             ``vulkpy.nn.HeNormal`` is used.
 46 |         b_init Callable, optional
 47 |             Bias initializer. If ``None`` (default),
 48 |             bias is initialized with ``0``.
 49 |         w_opt : vulkpy.nn.Optimizer, optional
 50 |             Weight Optimizer. If ``None`` (default),
 51 |             ``vulkpy.nn.Adam`` is used.
 52 |         b_opt : vulkpy.nn.Optimizer, optional
 53 |             Bias Optimizer. If ``None`` (default),
 54 |             ``vulkpy.nn.Adam`` is used.
 55 |         w_reg : vulkpy.nn.Regularizer, optional
 56 |             Weight Regularizer.
 57 |         b_reg : vulkpy.nn.Regularizer, optional
 58 |             Bias Regularizer
 59 |         """
 60 |         self.input_dim = int(input_dim)
 61 |         self.output_dim = int(output_dim)
 62 | 
 63 |         if w_init is None:
 64 |             w_init = HeNormal(gpu, self.input_dim)
 65 | 
 66 |         self.w = Parameter(gpu, shape=(self.output_dim, self.input_dim),
 67 |                            initializer=w_init, opt=w_opt, regularizer=w_reg)
 68 |         self.b = Parameter(gpu, shape=(self.output_dim,),
 69 |                            initializer=b_init, opt=b_opt, regularizer=b_reg)
 70 | 
 71 |     def forward(self, x: Array) -> Array:
 72 |         r"""
 73 |         Forward
 74 | 
 75 |         Parameters
 76 |         ----------
 77 |         x : vulkpy.Array
 78 |             Batch input
 79 | 
 80 |         Returns
 81 |         -------
 82 |         vulkpy.Array
 83 |             Batch output
 84 | 
 85 |         Notes
 86 |         -----
 87 |         .. math:: y = Wx + b
 88 | 
 89 |         .. warning::
 90 | 
 91 |              Generally, users should not call this method directly.
 92 |              Use ``__call__`` instead, where input / output are stored for training.
 93 |         """
 94 |         y = Array(x._gpu, shape=(x.shape[0], self.output_dim))
 95 |         y.job = x._gpu._submit(self._batch_affine, 1, 64, 1,
 96 |                                [self.w.value, self.b.value, x, y],
 97 |                                DataShape(x.shape[0], self.output_dim, 1),
 98 |                                BatchAffineParams(x.shape[0],
 99 |                                                  x.shape[1],
100 |                                                  self.output_dim))
101 |         y._keep.extend([self.w.value, self.b.value, x])
102 |         return y
103 | 
104 |     def backward(self, dy: Array) -> Array:
105 |         r"""
106 |         Backward
107 | 
108 |         Parameters
109 |         ----------
110 |         dy : vulkpy.Array
111 |             Batch grad
112 | 
113 |         Returns
114 |         -------
115 |         vulkpy.Array
116 |             Batch grad
117 | 
118 |         Notes
119 |         -----
120 |         .. math::
121 | 
122 |             dx = dy W\\
123 |             dW = dy ^T \cdot x\\
124 |             db = dy
125 |         """
126 |         db = dy.sum(axis=0) # Allocate
127 |         self.b.add_grad(db)
128 | 
129 |         x_shape = self._x.shape
130 |         dy_shape = dy.shape
131 |         dy.reshape((dy.shape[0], dy.shape[1], 1))
132 |         self._x.reshape((self._x.shape[0], 1, self._x.shape[1]))
133 | 
134 |         dW = dy * self._x # Allocate
135 |         dW = dW.sum(axis=0) # Allocate
136 |         self.w.add_grad(dW)
137 | 
138 |         self._x.reshape(x_shape)
139 |         dy.reshape(dy_shape)
140 | 
141 |         return dy @ self.w.value # Allocate
142 | 
143 |     def zero_grad(self):
144 |         """
145 |         Clear accumulated gradients
146 |         """
147 |         self.w.zero_grad()
148 |         self.b.zero_grad()
149 | 
150 |     def update(self):
151 |         """
152 |         Update values with accumulated gradients
153 |         """
154 |         self.w.update()
155 |         self.b.update()
156 | 
157 | 
158 | class ReLU(Module):
159 |     """
160 |     Rectified Linear Unit (ReLU)
161 |     """
162 |     def forward(self, x: Array) -> Array:
163 |         r"""
164 |         Forward
165 | 
166 |         Parameters
167 |         ----------
168 |         x : vulkpy.Array
169 |             Batch input
170 | 
171 |         Returns
172 |         -------
173 |         vulkpy.Array
174 |             Batch output
175 | 
176 |         Notes
177 |         -----
178 |         .. math:: y = \max(x, 0)
179 | 
180 |         .. warning::
181 | 
182 |              Generally, users should not call this method directly.
183 |              Use ``__call__`` instead, where input / output are stored for training.
184 |         """
185 |         return x.max(0.0) # Allocate
186 | 
187 |     def backward(self, dy: Array) -> Array:
188 |         r"""
189 |         Backward
190 | 
191 |         Parameters
192 |         ----------
193 |         dy : vulkpy.Array
194 |             Batch grad
195 | 
196 |         Returns
197 |         -------
198 |         vulkpy.Array
199 |             Batch grad
200 | 
201 |         Notes
202 |         -----
203 |         .. math:: dx = dy \cdot \max(\rm{sign}(y), 0)
204 | 
205 |         if x == 0, dy/dx => 0
206 |         """
207 |         dx = self._y.sign() # Allocate
208 |         dx.max(0.0, inplace=True)
209 |         dx *= dy
210 |         return dx
211 | 
212 | 
213 | class Sigmoid(Module):
214 |     """
215 |     Sigmoid
216 |     """
217 |     def forward(self, x: Array) -> Array:
218 |         r"""
219 |         Forward
220 | 
221 |         Parameters
222 |         ----------
223 |         x : vulkpy.Array
224 |             Batch input
225 | 
226 |         Returns
227 |         -------
228 |         vulkpy.Array
229 |             Batch output
230 | 
231 |         Notes
232 |         -----
233 |         .. math:: y = 1/(1 + \exp (-x))
234 | 
235 |         .. warning::
236 | 
237 |              Generally, users should not call this method directly.
238 |              Use ``__call__`` instead, where input / output are stored for training.
239 |         """
240 |         y = 0.0 - x # Allocate
241 |         y.exp(inplace=True)
242 |         y += 1.0
243 |         y = 1.0 / y # Allocate
244 |         return y
245 | 
246 |     def backward(self, dy: Array) -> Array:
247 |         r"""
248 |         Backward
249 | 
250 |         Parameters
251 |         ----------
252 |         dy : vulkpy.Array
253 |             Batch grad
254 | 
255 |         Returns
256 |         -------
257 |         vulkpy.Array
258 |             Batch grad
259 | 
260 |         Notes
261 |         -----
262 |         .. math:: dx = dy \cdot y(1 - y)
263 |         """
264 |         dx = 1.0 - self._y
265 |         dx *= self._y
266 |         dx *= dy
267 |         return dx
268 | 
269 | 
270 | class Softmax(Module):
271 |     """
272 |     SoftMax
273 |     """
274 |     def forward(self, x: Array) -> Array:
275 |         r"""
276 |         Forward
277 | 
278 |         Parameters
279 |         ----------
280 |         x : vulkpy.Array
281 |             Batch input
282 | 
283 |         Returns
284 |         -------
285 |         vulkpy.Array
286 |             Batch output
287 | 
288 |         Notes
289 |         -----
290 |         .. math:: y = \exp (x) / \sum _i \exp(x_i)
291 | 
292 |         .. warning::
293 | 
294 |              Generally, users should not call this method directly.
295 |              Use ``__call__`` instead, where input / output are stored for training.
296 |         """
297 |         X = x - x.maximum(axis=1, rebroadcast=True)
298 |         X.exp(inplace=True)
299 |         X /= X.sum(axis=1, rebroadcast=True)
300 |         return X
301 | 
302 |     def backward(self, dy: Array) -> Array:
303 |         r"""
304 |         Backward
305 | 
306 |         Parameters
307 |         ----------
308 |         dy : vulkpy.Array
309 |             Batch grad
310 | 
311 |         Returns
312 |         -------
313 |         vulkpy.Array
314 |             Batch grad
315 | 
316 |         Notes
317 |         -----
318 |         .. math:: dx = dy \cdot y(1 - y)
319 |         """
320 |         dx = 1.0 - self._y
321 |         dx *= self._y
322 |         dx *= dy
323 |         return dx
324 | 


--------------------------------------------------------------------------------
/vulkpy/nn/losses.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Neural Network Loss Module (:mod:`vulkpy.nn.losses`)
  3 | ====================================================
  4 | 
  5 | Examples
  6 | --------
  7 | >>> import vulkpy as vk
  8 | >>> from vulkpy import nn
  9 | >>> gpu = vk.GPU()
 10 | >>> x = vk.Array(gpu, data=[[ ... ]]) # Predicted
 11 | >>> y = vk.Array(gpu, data=[[ ... ]]) # True
 12 | 
 13 | Loss class takes predicted values and true labels/targets, then returns scalar loss.
 14 | 
 15 | >>> L = nn.CrossEntropy()
 16 | >>> loss = L(x, y)
 17 | 
 18 | Gradients can be computed with `grad()` method
 19 | 
 20 | >>> dx = L.grad()
 21 | """
 22 | from __future__ import annotations
 23 | from typing import cast, Callable, Iterable, Literal, Optional, Tuple
 24 | 
 25 | from vulkpy.util import getShader
 26 | from vulkpy.vkarray import Array, DataShape, VectorParams
 27 | from .core import Loss
 28 | from .layers import Softmax
 29 | 
 30 | __all__ = [
 31 |     "CrossEntropyLoss",
 32 |     "SoftmaxCrossEntropyLoss",
 33 |     "MSELoss",
 34 |     "HuberLoss",
 35 |     "MixLoss",
 36 | ]
 37 | 
 38 | 
 39 | F = Callable[[Array], Array]
 40 | class ReduceLoss(Loss):
 41 |     def __init__(self, reduce: Literal["mean", "sum"] = "mean"):
 42 |         tmp: Tuple[F, Optional[F]] = {
 43 |             "mean": (lambda _L: _L.mean(axis=0), lambda _dx: 1/_dx.shape[0]),
 44 |             "sum": (lambda _L: _L.sum(axis=0), None),
 45 |         }[reduce]
 46 |         self.reduce, self.scale_backward = tmp
 47 | 
 48 | 
 49 |     def __call__(self, x: Array, y: Array) -> Array:
 50 |         r"""
 51 |         Compute Loss
 52 | 
 53 |         Parameters
 54 |         ----------
 55 |         x : vulkpy.Array
 56 |             Batch input features
 57 |         y : vulkpy.Array
 58 |             Batch labels/targets
 59 | 
 60 |         Returns
 61 |         -------
 62 |         loss : vulkpy.Array
 63 |             Loss
 64 |         """
 65 |         self._x = x
 66 |         self._y = y
 67 |         L = self.forward(x, y)
 68 |         return self.reduce(L)
 69 | 
 70 |     def grad(self) -> Array:
 71 |         r"""
 72 |         Compute Gradients
 73 | 
 74 |         Returns
 75 |         -------
 76 |         dx : vulkpy.Array
 77 |             Batch gradients of dL/dx
 78 | 
 79 |         Notes
 80 |         -----
 81 |         This method calculates gradients for the last ``__call__(x, y)``.
 82 |         """
 83 |         dx = self.backward()
 84 |         if self.scale_backward is not None:
 85 |             dx *= self.scale_backward(dx)
 86 |         return dx
 87 | 
 88 |     def forward(self, x: Array, y: Array) -> Array:
 89 |         raise NotImplementedError
 90 | 
 91 |     def backward(self) -> Array:
 92 |         raise NotImplementedError
 93 | 
 94 | 
 95 | class CrossEntropyLoss(ReduceLoss):
 96 |     """
 97 |     Cross Entropy Loss
 98 |     """
 99 |     _forward = getShader("nn_cross_entropy.spv")
100 |     _backward = getShader("nn_cross_entropy_backward.spv")
101 | 
102 |     def __init__(self, *args, **kwargs):
103 |         """
104 |         Initialize Cross Entropy Loss
105 | 
106 |         Parameters
107 |         ----------
108 |         reduce : {"mean", "sum"}, optional
109 |             Reduction method over batch. The default is ``"mean"``.
110 |         """
111 |         super().__init__(*args, **kwargs)
112 | 
113 |     def forward(self, x: Array, y: Array) -> Array:
114 |         r"""
115 |         Forward
116 | 
117 |         Parameters
118 |         ----------
119 |         x : vulkpy.Array
120 |             Batch input features
121 |         y : vulkpy.Array
122 |             Batch input labels as One hot vector
123 | 
124 |         Returns
125 |         -------
126 |         loss : vulkpy.Array
127 |             Cross Entropy Loss
128 | 
129 |         Notes
130 |         -----
131 |         .. math::
132 | 
133 |              L = - f _{\text{reduce}} ( y_i \log (x_i) )
134 | 
135 |         .. warning::
136 | 
137 |              Generally, users should not call this method directly.
138 |              Use ``__call__`` instead, where input / output are stored for training.
139 |         """
140 |         size = x.buffer.size()
141 |         L = Array(x._gpu, shape=x.shape)
142 |         L.job = x._gpu._submit(self._forward, 64, 1, 1,
143 |                                [x, y, L],
144 |                                DataShape(size, 1, 1),
145 |                                VectorParams(size))
146 |         L._keep.extend([x, y])
147 |         return L.sum(axis=1)
148 | 
149 |     def backward(self) -> Array:
150 |         r"""
151 |         Backward
152 | 
153 |         Returns
154 |         -------
155 |         loss : vulkpy.Array
156 |            Batch gradients
157 | 
158 |         Notes
159 |         -----
160 |         .. math::
161 | 
162 |              dx = \frac{-y}{x + \epsilon}
163 | 
164 |         .. warning::
165 | 
166 |              Generally, users should not call this method directly.
167 |              Use ``grad()`` instead, where reduction scale is corrected.
168 |         """
169 |         size = self._x.buffer.size()
170 |         dx = Array(self._x._gpu, shape=self._x.shape)
171 |         dx.job = self._x._gpu._submit(self._backward, 64, 1, 1,
172 |                                       [self._x, self._y, dx],
173 |                                       DataShape(size, 1, 1),
174 |                                       VectorParams(size))
175 |         dx._keep.extend([self._x, self._y])
176 |         return dx
177 | 
178 | 
179 | class SoftmaxCrossEntropyLoss(CrossEntropyLoss):
180 |     """
181 |     Softmax Cross Entropy Loss
182 | 
183 |     See Also
184 |     --------
185 |     vulkpy.nn.Softmax : Softmax layer
186 |     vulkpy.nn.CrossEntropyLoss : Cross Entropy loss without Softmax
187 |     """
188 |     def __init__(self, *args, **kwargs):
189 |         """
190 |         Initialize Softmax Cross Entropy Loss
191 | 
192 |         Parameters
193 |         ----------
194 |         reduce : {"mean", "sum"}
195 |             Reduction method over batch. The default is ``"mean"``.
196 |         """
197 |         super().__init__(*args, **kwargs)
198 |         self._sm = Softmax()
199 | 
200 |     def forward(self, x: Array, y: Array) -> Array:
201 |         r"""
202 |         Forward
203 | 
204 |         Parameters
205 |         ----------
206 |         x : vulkpy.Array
207 |             Batch input features
208 |         y : vulkpy.Array
209 |             Batch labels
210 | 
211 |         Returns
212 |         -------
213 |         loss : vulkpy.Array
214 |             Loss
215 | 
216 |         Notes
217 |         -----
218 |         .. math::
219 | 
220 |              L = - f _{\text{reduce}} (y_i \log (\rm{softmax}(x) _i))
221 | 
222 |         .. warning::
223 | 
224 |              Generally, users should not call this method directly.
225 |              Use ``__call__`` instead, where input / output are stored for training.
226 |         """
227 |         return super().forward(self._sm(x), y)
228 | 
229 |     def backward(self) -> Array:
230 |         r"""
231 |         Backward
232 | 
233 |         Returns
234 |         -------
235 |         loss : vulkpy.Array
236 |            Batch gradients
237 | 
238 |         Notes
239 |         -----
240 |         .. math::
241 | 
242 |              dx = \rm{softmax}(x) - y
243 | 
244 |         .. warning::
245 | 
246 |              Generally, users should not call this method directly.
247 |              Use ``grad()`` instead, where reduction scale is corrected.
248 |         """
249 |         return cast(Array, self._sm._y) - self._y
250 | 
251 | 
252 | class MSELoss(ReduceLoss):
253 |     """
254 |     Mean Squared Loss
255 |     """
256 |     def __init__(self, *args, **kwargs):
257 |         """
258 |         Initialize MSE Loss
259 | 
260 |         Parameters
261 |         ----------
262 |         reduce : {"mean", "sum"}
263 |             Reduction method over batch. The default is ``"mean"``.
264 |         """
265 |         super().__init__(*args, **kwargs)
266 | 
267 |     def forward(self, x: Array, y: Array) -> Array:
268 |         r"""
269 |         Forward
270 | 
271 |         Parameters
272 |         ----------
273 |         x : vulkpy.Array
274 |             Batch input features
275 |         y : vulkpy.Array
276 |             Batch labels
277 | 
278 |         Returns
279 |         -------
280 |         loss : vulkpy.Array
281 |             Loss
282 | 
283 |         Notes
284 |         -----
285 |         .. math::
286 | 
287 |              L = f _{\text{reduce}} |x - y|^2
288 | 
289 |         .. warning::
290 | 
291 |              Generally, users should not call this method directly.
292 |              Use ``__call__`` instead, where input / output are stored for training.
293 |         """
294 |         L = (y - x)          # Allocate
295 |         L **= 2.0
296 |         return L.sum(axis=1) # Allocate
297 | 
298 |     def backward(self) -> Array:
299 |         r"""
300 |         Backward
301 | 
302 |         Returns
303 |         -------
304 |         loss : vulkpy.Array
305 |            Batch gradients
306 | 
307 |         Notes
308 |         -----
309 |         .. math::
310 | 
311 |              dx = 2 (x - y)
312 | 
313 |         .. warning::
314 | 
315 |              Generally, users should not call this method directly.
316 |              Use ``grad()`` instead, where reduction scale is corrected.
317 |         """
318 |         dx = self._x - self._y # Allocate
319 |         dx *= 2
320 |         return dx
321 | 
322 | 
323 | class HuberLoss(ReduceLoss):
324 |     """
325 |     Huber Loss
326 |     """
327 |     def __init__(self, *args, **kwargs):
328 |         """
329 |         Initialize Huber Loss
330 | 
331 |         Parameters
332 |         ----------
333 |         reduce : {"mean", "sum"}
334 |             Reduction method over batch. The default is ``"mean"``.
335 |         """
336 |         super().__init__(*args, **kwargs)
337 | 
338 |     def forward(self, x: Array, y: Array) -> Array:
339 |         r"""
340 |         Forward
341 | 
342 |         Parameters
343 |         ----------
344 |         x : vulkpy.Array
345 |             Batch input features
346 |         y : vulkpy.Array
347 |             Batch labels
348 | 
349 |         Returns
350 |         -------
351 |         loss : vulkpy.Array
352 |             Loss
353 | 
354 |         Notes
355 |         -----
356 |         .. math::
357 | 
358 |              L = 0.5 f _{\text{reduce}} \min(|x - y|^2, |x - y|)
359 | 
360 |         .. warning::
361 | 
362 |              Generally, users should not call this method directly.
363 |              Use ``__call__`` instead, where input / output are stored for training.
364 |         """
365 |         delta = y - x # Allocate
366 |         delta.abs(inplace=True)               # |y-x|
367 |         delta.min(delta ** 2.0, inplace=True) # min(|y-x|^2, |y-x|)
368 |         delta *= 0.5                          # min(|y-x|^2, |y-x|) * 0.5
369 |         return delta.sum(axis=1) # Allocate
370 | 
371 |     def backward(self) -> Array:
372 |         r"""
373 |         Backward
374 | 
375 |         Returns
376 |         -------
377 |         loss : vulkpy.Array
378 |            Batch gradients
379 | 
380 |         Notes
381 |         -----
382 |         .. math::
383 | 
384 |              dx = \text{clamp}(x - y, -1.0, 1.0)
385 | 
386 |         .. warning::
387 | 
388 |              Generally, users should not call this method directly.
389 |              Use ``grad()`` instead, where reduction scale is corrected.
390 |         """
391 |         delta = self._x - self._y
392 |         delta.clamp(-1.0, 1.0, inplace=True)
393 |         return delta
394 | 
395 | 
396 | class MixLoss(Loss):
397 |     """
398 |     Mixing Loss class
399 |     """
400 |     def __init__(self, losses: Iterable[Tuple[float, Loss]]):
401 |         """
402 |         Initializer MixLoss
403 | 
404 |         Parameters
405 |         ----------
406 |         losses : iterable of tuple of float and vulkpy.Loss
407 |             Sets of coefficient and loss.
408 | 
409 |         Raises
410 |         ------
411 |         ValueError
412 |             When losses is empty
413 |         """
414 |         self.L: Tuple[Tuple[float, Loss], ...] = tuple(losses)
415 |         if len(self.L) < 1:
416 |             raise ValueError(f"losses should not empty")
417 | 
418 |     def __call__(self, x: Array, y: Array) -> Array:
419 |         r"""
420 |         Compute Loss
421 | 
422 |         Parameters
423 |         ----------
424 |         x : vulkpy.Array
425 |             Batch input features
426 |         y : vulkpy.Array
427 |             Batch labels/targets
428 | 
429 |         Returns
430 |         -------
431 |         loss : vulkpy.Array
432 |             Loss
433 |         """
434 |         return self._sum(lambda _L: _L(x, y))
435 | 
436 |     def grad(self) -> Array:
437 |         r"""
438 |         Compute Gradients
439 | 
440 |         Returns
441 |         -------
442 |         dx : vulkpy.Array
443 |             Batch gradients of dL/dx
444 | 
445 |         Notes
446 |         -----
447 |         This method calculates gradients for the last ``__call__(x, y)``.
448 |         """
449 |         return self._sum(lambda _L: _L.grad())
450 | 
451 |     def _sum(self, F: Callable[[Loss], Array]) -> Array:
452 |         coeff, _L = self.L[0]
453 |         s = coeff * F(_L)
454 | 
455 |         for coeff, _L in self.L[1:]:
456 |             s += coeff * F(_L)
457 | 
458 |         return s
459 | 


--------------------------------------------------------------------------------
/vulkpy/nn/models.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Neural Network Model Module (:mod:`vulkpy.nn.models`)
  3 | =====================================================
  4 | """
  5 | from __future__ import annotations
  6 | from typing import Iterable, Optional, Tuple, Union
  7 | 
  8 | from vulkpy import Array
  9 | from .core import Module, Loss, Regularizer
 10 | 
 11 | 
 12 | __all__ = ["Sequence"]
 13 | 
 14 | 
 15 | class Sequence:
 16 |     """
 17 |     Sequential Model
 18 | 
 19 |     All layers sequentially connceted.
 20 |     """
 21 |     def __init__(self,
 22 |                  layers: Iterable[Module],
 23 |                  loss: Loss):
 24 |         """
 25 |         Initialize Sequence
 26 | 
 27 |         Parameters
 28 |         ----------
 29 |         layers : iterable of vulkpy.nn.Module
 30 |             Layers to be called sequentially
 31 |         loss : vulkpy.nn.Loss
 32 |             Loss layer
 33 |         """
 34 |         self.L: Tuple[Module, ...] = tuple(layers)
 35 |         self.loss: Loss = loss
 36 | 
 37 |     def _forward(self, x: Array) -> Array:
 38 |         for _L in self.L:
 39 |             x = _L(x)
 40 |         return x
 41 | 
 42 |     def _backward(self):
 43 |         dx = self.loss.grad()
 44 |         for _L in self.L[::-1]:
 45 |             dx = _L.backward(dx)
 46 | 
 47 |     def _zero_grad(self):
 48 |         for _L in self.L:
 49 |             _L.zero_grad()
 50 | 
 51 |     def _update(self):
 52 |         for _L in self.L:
 53 |             _L.update()
 54 | 
 55 |     def train(self, x: Array, y: Array) -> Tuple[Array, Array]:
 56 |         """
 57 |         Train model
 58 | 
 59 |         Parameters
 60 |         ----------
 61 |         x, y : vulkpy.Array
 62 |             Features and Labels/Targets
 63 | 
 64 |         Returns
 65 |         -------
 66 |         y : vulkpy.Array
 67 |             Predicted Labels/Targets
 68 |         loss : vulkpy.Array
 69 |             Loss
 70 |         """
 71 |         _y = self._forward(x)
 72 |         _loss = self.loss(_y, y)
 73 | 
 74 |         self._zero_grad()
 75 |         self._backward()
 76 |         self._update()
 77 | 
 78 |         return _y, _loss
 79 | 
 80 |     def predict(self,
 81 |                 x: Array,
 82 |                 y: Optional[Array] = None) -> Union[Array, Tuple[Array, Array]]:
 83 |         """
 84 |         Predict Label/Target
 85 | 
 86 |         Parameters
 87 |         ----------
 88 |         x : vulkpy.Array
 89 |             Features
 90 |         y : vulkpy.Array, optional
 91 |             Labels/Targets.
 92 | 
 93 |         Returns
 94 |         -------
 95 |         pred_y : vulkpy.Array
 96 |             Predicted Labels/Targets
 97 |         loss : vulkpy.Array
 98 |             Loss. Return only if ``y`` is specified.
 99 |         """
100 |         _y = self._forward(x)
101 |         if y is None:
102 |             return _y
103 | 
104 |         _loss = self.loss(_y, y)
105 |         return _y, _loss
106 | 


--------------------------------------------------------------------------------
/vulkpy/nn/optimizers.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Neural Network Optimizer Module (:mod:`vulkpy.nn.optimizers`)
  3 | =============================================================
  4 | """
  5 | from __future__ import annotations
  6 | from dataclasses import dataclass
  7 | from typing import Iterable, Union
  8 | 
  9 | from wblog import getLogger
 10 | 
 11 | from vulkpy.vkarray import GPU, Array, zeros
 12 | from .core import Optimizer, OptimizerState
 13 | 
 14 | __all__ = [
 15 |     "SGD", "SGDState",
 16 |     "AdaGrad", "AdaGradState",
 17 |     "Adam", "AdamState",
 18 |     "Optimizer", "OptimizerState",
 19 | ]
 20 | 
 21 | logger = getLogger()
 22 | 
 23 | 
 24 | class SGDState(OptimizerState):
 25 |     """
 26 |     Optimizer State for SGD
 27 |     """
 28 |     def __init__(self, opt: SGD):
 29 |         """
 30 |         Initialize SGD state
 31 | 
 32 |         Parameters
 33 |         ----------
 34 |         opt : vulkpy.SGD
 35 |             SGD Optimizer
 36 |         """
 37 |         self.opt: SGD = opt
 38 | 
 39 |     def grad2diff(self, grad: Array) -> Array:
 40 |         """
 41 |         Compute diff from gradient
 42 | 
 43 |         Parameters
 44 |         ----------
 45 |         grad : vulkpy.Array
 46 |             Gradient
 47 | 
 48 |         Returns
 49 |         -------
 50 |         diff : vulkpy.Array
 51 |             Update diff
 52 |         """
 53 |         return (-self.opt.lr) * grad
 54 | 
 55 | class SGD(Optimizer):
 56 |     """
 57 |     SGD Optimizer
 58 | 
 59 |     Use constant learning rate
 60 | 
 61 |     See Also
 62 |     --------
 63 |     vulkpy.nn.Adam : Adam optimizer
 64 |     """
 65 |     def __init__(self, lr: float):
 66 |         """
 67 |         Initialize Stachostic Gradient Decent (SGD) Optimizer
 68 | 
 69 |         Parameters
 70 |         ----------
 71 |         lr : float
 72 |             Learning rate
 73 |         """
 74 |         self.lr: float = lr
 75 |         logger.debug("SGD(lr=%f)", self.lr)
 76 | 
 77 |     def init_state(self, shape: Iterable[int]) -> SGDState:
 78 |         """
 79 |         Initialize Optimizer state
 80 | 
 81 |         Parameters
 82 |         ----------
 83 |         shape : iterable of ints
 84 |             Shape of parameter
 85 | 
 86 |         Returns
 87 |         -------
 88 |         SGDState
 89 |             Optimizer state
 90 | 
 91 |         Notes
 92 |         -----
 93 |         Currently SGDState is empty, however,
 94 |         we might add some field like momentum in future.
 95 |         """
 96 |         return SGDState(self)
 97 | 
 98 | 
 99 | class AdaGradState(OptimizerState):
100 |     """
101 |     Optimizer State for AdaGrad
102 |     """
103 |     def __init__(self, opt: AdaGrad, shape: Iterable[int], tau: float):
104 |         """
105 |         Initialize AdaGrad
106 | 
107 |         Parameters
108 |         ----------
109 |         opt : vulkpy.AdaGrad
110 |             AdaGrad Optimizer
111 |         shape : iterable of ints
112 |             Value shape
113 |         tau : float
114 |             Initial summation
115 |         """
116 |         self.opt: AdaGrad = opt
117 |         self.h: Array = zeros(self.opt.gpu, shape=shape)
118 |         self.h[:] = tau
119 | 
120 |     def grad2diff(self, grad: Array) -> Array:
121 |         """
122 |         Compute diff from gradient
123 | 
124 |         Parameters
125 |         ----------
126 |         grad : vulkpy.Array
127 |             Gradient
128 | 
129 |         Returns
130 |         -------
131 |         diff : vulkpy.Array
132 |             Update diff
133 |         """
134 |         self.h += (grad ** 2)
135 | 
136 |         sqrt = self.h.sqrt()  #               sqrt(sum)
137 |         sqrt += self.opt.eps  #               sqrt(sum) + eps
138 |         ret = grad / sqrt     #       grad / (sqrt(sum) + eps)
139 |         ret *= (-self.opt.lr) # -lr * grad / (sqrt(sum) + eps)
140 |         return ret
141 | 
142 | class AdaGrad(Optimizer):
143 |     r"""
144 |     AdaGrad Optimizer
145 | 
146 |     Notes
147 |     -----
148 |     This class implement AdaGrad [adagrad1]_.
149 | 
150 |     References
151 |     ----------
152 |     .. [adagrad1]
153 |     """
154 |     def __init__(self,
155 |                  gpu: GPU, *,
156 |                  lr: float = 0.01,
157 |                  tau: float = 0.0,
158 |                  eps: float = 1e-8):
159 |         """
160 |         Initialize AdaGrad
161 | 
162 |         Parameters
163 |         ----------
164 |         gpu : vulkpy.GPU
165 |             GPU
166 |         lr : float, optional
167 |             AdaGrad parameter (learning rate). The default is ``0.01``.
168 |         tau : float, optional
169 |             AdaGrad parameter (initialial accumulator).
170 |             The default is ``0``.
171 |         eps : float, optional
172 |             AdaGrad parameter (small positive).
173 |             The default is ``1e-8``
174 |         """
175 |         self.gpu: GPU = gpu
176 |         self.lr: float = lr
177 |         self.tau: float = tau
178 |         self.eps: float = eps
179 | 
180 |         logger.debug("AdaGrad(lr=%f, tau=%f, eps=%f)",
181 |                      self.lr, self.tau, self.eps)
182 | 
183 |     def init_state(self, shape: Iterable[int]) -> AdaGradState:
184 |         """
185 |         Initialize Optimizer state
186 | 
187 |         Parameters
188 |         ----------
189 |         shape : iterable of ints
190 |             Shape of parameter
191 | 
192 |         Returns
193 |         -------
194 |         AdaGradState
195 |             Optimizer state
196 |         """
197 |         return AdaGradState(opt=self, shape=shape, tau=self.tau)
198 | 
199 | 
200 | class AdamState(OptimizerState):
201 |     """
202 |     Optimizer State for Adam
203 |     """
204 |     def __init__(self, opt: Adam, shape: Iterable[int]):
205 |         """
206 |         Initialize Adam state
207 | 
208 |         Parameters
209 |         ----------
210 |         opt : vulkpy.Adam
211 |             Adam Optimizer
212 |         shape : iterable of ints
213 |             Value shape
214 |         """
215 |         self.opt: Adam = opt
216 |         self.m: Array = zeros(self.opt.gpu, shape=shape)
217 |         self.v: Array = zeros(self.opt.gpu, shape=shape)
218 |         self.beta1t: float = 1.0
219 |         self.beta2t: float = 1.0
220 | 
221 |     def grad2diff(self, grad: Array) -> Array:
222 |         """
223 |         Compute diff from gradient
224 | 
225 |         Parameters
226 |         ----------
227 |         grad : vulkpy.Array
228 |             Gradient
229 | 
230 |         Returns
231 |         -------
232 |         diff : vulkpy.Array
233 |             Update diff
234 |         """
235 |         self.m *= self.opt.beta1
236 |         self.m += (1 - self.opt.beta1) * grad        # Allocate
237 | 
238 |         self.v *= self.opt.beta2
239 |         self.v += (1 - self.opt.beta2) * (grad ** 2) # Allocate
240 | 
241 |         self.beta1t *= self.opt.beta1
242 |         self.beta2t *= self.opt.beta2
243 | 
244 |         mhat = self.m / (1 - self.beta1t) # Allocate
245 |         vhat = self.v / (1 - self.beta2t) # Allocate
246 | 
247 |         vhat.sqrt(inplace=True) # sqrt(vhat)
248 |         vhat += self.opt.eps    # sqrt(vhat) + eps
249 | 
250 |         mhat *= (-self.opt.lr)  # -lr * mhat
251 |         mhat /= vhat            # -lr * mhat / (sqrt(vhat) + eps)
252 | 
253 |         return mhat
254 | 
255 | 
256 | class Adam(Optimizer):
257 |     r"""
258 |     Adam Optimizer
259 | 
260 |     See Also
261 |     --------
262 |     vulkpy.nn.SGD : SGD optimizer
263 | 
264 |     Notes
265 |     -----
266 |     This class implement Adam [adam1]_.
267 |     The algorithm utilizes moving averages of the 1st and 2nd order moment.
268 |     The 1st (:math:`m_t`) and 2nd (:math:`v_t`) order moment are updated as follows;
269 | 
270 |     .. math::
271 | 
272 |          m_t = \beta _1 m_{t-1} + (1 - \beta _1) g_t\\
273 |          v_t = \beta _2 v_{t-1} + (1 - \beta _2) g_t ^2
274 | 
275 |     where :math:`g_t` is gradient.
276 | 
277 |     To mitigate initial underestimation,
278 |     corrected :math:`\hat{m_t}` and :math:`\hat{v_t}` are used for parameter update.
279 | 
280 |     .. math::
281 | 
282 |          \hat{m}_t = m_t / (1 - \beta _1 ^t)\\
283 |          \hat{v}_t = v_t / (1 - \beta _2 ^t)
284 | 
285 |     Finally, parameter :math:`\theta _t` is updated by
286 | 
287 |     .. math::
288 | 
289 |          \theta _t = \theta _{t-1} - \text{lr} \times
290 |          \hat{m}_t/(\sqrt{\hat{v}_t} + \epsilon)
291 | 
292 | 
293 |     References
294 |     ----------
295 |     .. [adam1] D. Kingma and J. Ba, "Adam: A Method for Stochastic Optimization",
296 |        ICLR (Poster) 2015, https://dblp.org/rec/journals/corr/KingmaB14.html
297 | 
298 |     Examples
299 |     --------
300 |     >>> import vulkpy.vk
301 |     >>> from vulkpy import nn
302 |     >>> gpu = vk.GPU()
303 |     >>> adam = nn.Adam(gpu, lr=0.001, beta1=0.9, beta2=0.999)
304 |     """
305 |     def __init__(self,
306 |                  gpu: GPU, *,
307 |                  lr: float = 0.001,
308 |                  beta1: float = 0.9,
309 |                  beta2: float = 0.999,
310 |                  eps: float = 1e-8):
311 |         """
312 |         Initialize Adam Optimizer
313 | 
314 |         Parameters
315 |         ----------
316 |         gpu : vulkpy.GPU
317 |             GPU
318 |         lr : float, optional
319 |             Adam parameter. The default is ``0.001``.
320 |         beta1 : float, optional
321 |             Adam parameter. The default is ``0.9``.
322 |         beta2 : float, optional
323 |             Adam parameter. The defeault is ``0.999``.
324 |         eps : float, optional
325 |             Adam parameter. The default is ``1e-8``.
326 |         """
327 |         self.gpu: GPU = gpu
328 |         self.lr: float = lr
329 |         self.beta1: float = beta1
330 |         self.beta2: float = beta2
331 |         self.eps: float = eps
332 | 
333 |         logger.debug("Adam(lr=%f, beta1=%f, beta2=%f, eps=%f)",
334 |                      self.lr, self.beta1, self.beta2, self.eps)
335 | 
336 |     def init_state(self, shape: Iterable[int]) -> AdamState:
337 |         """
338 |         Initialize Optimizer state
339 | 
340 |         Parameters
341 |         ----------
342 |         shape : iterable of ints
343 |             Shape of parameter
344 | 
345 |         Returns
346 |         -------
347 |         AdamState
348 |             Optimizer state
349 |         """
350 |         return AdamState(opt=self, shape=shape)
351 | 


--------------------------------------------------------------------------------
/vulkpy/nn/parameters.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | from typing import Callable, Iterable, Optional
  3 | 
  4 | from vulkpy.vkarray import GPU, Array, zeros
  5 | from .core import Optimizer, OptimizerState, Regularizer
  6 | from .optimizers import Adam
  7 | 
  8 | 
  9 | __all__ = [
 10 |     "Parameter"
 11 | ]
 12 | 
 13 | 
 14 | class Parameter:
 15 |     """
 16 |     Neural Network Parameter
 17 |     """
 18 |     def __init__(self,
 19 |                  gpu: GPU,
 20 |                  shape: Iterable[int],
 21 |                  trainable: bool = True,
 22 |                  opt: Optional[Optimizer] = None,
 23 |                  initializer: Optional[Callable[[GPU, Iterable[int]], Array]]=None,
 24 |                  regularizer: Optional[Regularizer] = None):
 25 |         """
 26 |         Initialize Parameter
 27 | 
 28 |         Parameters
 29 |         ----------
 30 |         gpu : vulkpy.GPU
 31 |             GPU
 32 |         shape : iterable of ints
 33 |             Shape of parameter
 34 |         trainable : bool, optional
 35 |             If ``True`` (default), track gradient
 36 |         opt : vulkpy.nn.Optimizer, optional
 37 |             Optimizer. If ``None`` (default), ``vulkpy.nn.Adam`` is used.
 38 |         initializer : callable, optional
 39 |             Initializer function. If ``None`` (default), initialized with ``0.0``.
 40 |         regularizer : vulkpy.nn.Regularizer, optional
 41 |             Regularizer. If ``None`` (default), no regularization is applied.
 42 |         """
 43 |         if initializer is None:
 44 |             initializer = zeros
 45 |         self.value: Array = initializer(gpu, shape)
 46 | 
 47 |         self.grad: Optional[Array] = None
 48 |         self.opt_state: Optional[OptimizerState] = None
 49 |         if trainable:
 50 |             self.grad = zeros(gpu, shape=shape)
 51 | 
 52 |             if opt is None:
 53 |                 opt = Adam(gpu)
 54 |             self.opt_state = opt.init_state(shape)
 55 | 
 56 |         self.R: Optional[Regularizer] = regularizer
 57 | 
 58 |     def is_trainable(self) -> bool:
 59 |         """
 60 |         Whether this parameter is trainable
 61 | 
 62 |         Returns
 63 |         -------
 64 |         bool
 65 |             Is trainable
 66 |         """
 67 |         return self.grad is not None
 68 | 
 69 |     def add_grad(self, grad: Array):
 70 |         """
 71 |         Add gradient
 72 | 
 73 |         Parameters
 74 |         ----------
 75 |         grad : vulkpy.Array
 76 |             Gradient to be accumulated
 77 |         """
 78 |         if self.grad is not None:
 79 |             self.grad += grad
 80 | 
 81 |     def zero_grad(self):
 82 |         """
 83 |         Clear gradient to 0.0
 84 |         """
 85 |         if self.grad is not None:
 86 |             self.grad[:] = 0.0
 87 | 
 88 |     def update(self):
 89 |         """
 90 |         Update value
 91 | 
 92 |         Update value with accumulated gradients only if this value is trainable.
 93 |         """
 94 |         if self.grad is not None:
 95 |             self.value += self.opt_state.grad2diff(self.grad)
 96 | 
 97 |     def regular_loss(self) -> Array:
 98 |         """
 99 |         Regularization Loss
100 | 
101 |         Returns
102 |         -------
103 |         vulkpy.nn.Array
104 |             Loss
105 |         """
106 |         if self.R is not None:
107 |             return self.R.loss(self.value)
108 | 
109 |         return zeros(self.value._gpu, shape=(1,))
110 | 
111 |     def regular_grad(self):
112 |         """
113 |         Add Regularization Gradients
114 |         """
115 |         if self.R is not None:
116 |             self.add_grad(self.R.grad(self.value))
117 | 


--------------------------------------------------------------------------------
/vulkpy/nn/regularizers.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Neural Network Regularizer Module (:mod:`vulkpy.nn.regularizers`)
  3 | =================================================================
  4 | """
  5 | from __future__ import annotations
  6 | from typing import Iterable, Tuple
  7 | from typing_extensions import Protocol
  8 | 
  9 | import wblog
 10 | 
 11 | from vulkpy import Array
 12 | from .core import Regularizer
 13 | 
 14 | __all__ = [
 15 |     "Lasso",
 16 |     "Ridge",
 17 |     "Elastic",
 18 | ]
 19 | 
 20 | logger = wblog.getLogger()
 21 | 
 22 | 
 23 | class Lasso(Regularizer):
 24 |     r"""
 25 |     Lasso (L1) Regularization
 26 | 
 27 |     Notes
 28 |     -----
 29 |     .. math::
 30 | 
 31 |          L = \text{coeff} \times \sum_i |W_i|\\
 32 |          dL/dW_i = \text{coeff} \times \rm{sign}(W_i)
 33 |     """
 34 |     def __init__(self, coeff: float = 1.0):
 35 |         """
 36 |         Initialize Lasso Regularizer
 37 | 
 38 |         Parameters
 39 |         ----------
 40 |         coeff : float, optional
 41 |             L1 Coefficient
 42 |         """
 43 |         logger.debug(f"Lasso(L1={coeff})")
 44 |         self.coeff: float = coeff
 45 | 
 46 |     def loss(self, param: Array) -> Array:
 47 |         """
 48 |         L1 Regularization Loss
 49 | 
 50 |         Parameters
 51 |         ----------
 52 |         param : vulkpy.Array
 53 |             Parameter
 54 | 
 55 |         Returns
 56 |         -------
 57 |         loss : vulkpy.Array
 58 |             L1 Regularization Loss
 59 |         """
 60 |         L = param.abs().sum()
 61 |         L *= self.coeff
 62 |         return L
 63 | 
 64 |     def grad(self, param: Array) -> Array:
 65 |         """
 66 |         Gradient of L1 Regularization Loss
 67 | 
 68 |         Parameters
 69 |         ----------
 70 |         param : vulkpy.Array
 71 |             Parameter
 72 | 
 73 |         Returns
 74 |         -------
 75 |         dW : vulkpy.Array
 76 |             Gradient for L1 Regularization Loss
 77 |         """
 78 |         return self.coeff * param.sign()
 79 | 
 80 | class Ridge(Regularizer):
 81 |     r"""
 82 |     Ridge (L2) Regularization
 83 | 
 84 |     Notes
 85 |     -----
 86 |     .. math::
 87 | 
 88 |          L = \text{coeff} \times \sum_i |W_i|^2\\
 89 |          dL/dW_i = 2 \cdot \text{coeff} \times W_i
 90 |     """
 91 |     def __init__(self, coeff: float = 1.0):
 92 |         """
 93 |         Initialize Ridge Regularizer
 94 | 
 95 |         Parameters
 96 |         ----------
 97 |         coef : float, optional
 98 |             L2 Coefficient
 99 |         """
100 |         logger.debug(f"Ridge(L2={coeff})")
101 |         self.coeff: float = coeff
102 | 
103 |     def loss(self, param: Array) -> Array:
104 |         """
105 |         L2 Regularization Loss
106 | 
107 |         Parameters
108 |         ----------
109 |         param : vulkpy.Array
110 |             Parameter
111 | 
112 |         Returns
113 |         -------
114 |         loss : vulkpy.Array
115 |             L2 Regularization Loss
116 |         """
117 |         L = (param ** 2).sum()
118 |         L *= self.coeff
119 |         return L
120 | 
121 |     def grad(self, param: Array) -> Array:
122 |         """
123 |         Gradient of L2 Regularization Loss
124 | 
125 |         Parameters
126 |         ----------
127 |         param : vulkpy.Array
128 |             Parameter
129 | 
130 |         Returns
131 |         -------
132 |         dW : vulkpy.Array
133 |             Gradient for L2 Regularization Loss
134 |         """
135 |         return (2 * self.coeff) * param
136 | 
137 | class Elastic(Regularizer):
138 |     r"""
139 |     Elastic (L1 + L2) Regularization
140 | 
141 |     Notes
142 |     -----
143 |     .. math::
144 | 
145 |          L = \alpha \sum _i |W_i| + \beta \sum _i |W_i|^2\\
146 |          dL/dW_i = \alpha \rm{sign}(W_i) + 2 \beta W_i
147 |     """
148 |     def __init__(self, L1: float = 1.0, L2: float = 1.0):
149 |         """
150 |         Initialize Elastic Regularizer
151 | 
152 |         Parameters
153 |         ----------
154 |         L1 : float, optional
155 |             L1 Coefficient
156 |         L2 : float, optional
157 |             L2 Coefficient
158 |         """
159 |         self.L1 = Lasso(L1)
160 |         self.L2 = Ridge(L2)
161 | 
162 |     def loss(self, param: Array) -> Array:
163 |         """
164 |         L1 + L2 Regularization Loss
165 | 
166 |         Parameters
167 |         ----------
168 |         param : vulkpy.Array
169 |             Parameter
170 | 
171 |         Returns
172 |         -------
173 |         loss : vulkpy.Array
174 |             L1 + L2 Regularization Loss
175 |         """
176 |         return self.L1.loss(param) + self.L2.loss(param)
177 | 
178 |     def grad(self, param: Array) -> Array:
179 |         """
180 |         Gradient of L1 + L2 Regularization Loss
181 | 
182 |         Parameters
183 |         ----------
184 |         param : vulkpy.Array
185 |             Parameter
186 | 
187 |         Returns
188 |         -------
189 |         dW : vulkpy.Array
190 |             Gradient for L1 + L2 Regularization Loss
191 |         """
192 |         return self.L1.grad(param) + self.L2.grad(param)
193 | 


--------------------------------------------------------------------------------
/vulkpy/random.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Random Module (:mod:`vulkpy.random`)
  3 | ====================================
  4 | 
  5 | GPU-based Pseudo Random Number Generator (PRNG)
  6 | 
  7 | 
  8 | Examples
  9 | --------
 10 | >>> import vulkpy as vk
 11 | >>> gpu = vk.GPU()
 12 | >>> r = vk.random.Xoshiro128pp(gpu, seed=0)
 13 | 
 14 | [0, 1) uniform random numbers can be generated by
 15 | ``random(shape=None, buffer=None)``.
 16 | 
 17 | >>> print(r.random(shape=(3,)))
 18 | [0.42977667 0.8235899  0.90622926]
 19 | 
 20 | Gaussian random numbers can be generated by
 21 | ``normal(shape=None, buffer=None, mean=0.0, stddev=1.0)``.
 22 | 
 23 | >>> print(r.normal(shape=(3,)))
 24 | [-2.3403292  0.7247794  0.7118352]
 25 | """
 26 | 
 27 | from __future__ import annotations
 28 | 
 29 | import os
 30 | from typing import cast, Iterable, Optional
 31 | 
 32 | import numpy as np
 33 | 
 34 | from . import _vkarray
 35 | from . import vkarray as vk
 36 | from .util import getShader
 37 | 
 38 | __all__ = ["Xoshiro128pp"]
 39 | 
 40 | class PRNG(vk.Resource):
 41 |     _box_muller = getShader("prng_box_muller.spv")
 42 |     _ibox_muller = getShader("prng_ibox_muller.spv")
 43 |     _randrange = getShader("prng_randrange.spv")
 44 | 
 45 |     _2p32 = int(2 ** 32)
 46 | 
 47 |     def __init__(self, gpu: vk.GPU):
 48 |         self._gpu = gpu
 49 | 
 50 |     def random(self, *,
 51 |                shape: Optional[Iterable[int]] = None,
 52 |                buffer: Optional[vk.Array] = None) -> vk.Array:
 53 |         raise NotImplementedError
 54 | 
 55 |     def randint(self, *,
 56 |                 shape: Optional[Iterable[int]] = None,
 57 |                 buffer: Optional[vk.U32Array] = None) -> vk.U32Array:
 58 |         raise NotImplementedError
 59 | 
 60 |     def normal(self, *,
 61 |                shape: Optional[Iterable[int]] = None,
 62 |                buffer: Optional[vk.Array] = None,
 63 |                mean: float = 0.0,
 64 |                stddev: float = 1.0) -> vk.Array:
 65 |         """
 66 |         Generate Normal Distributing numbers
 67 | 
 68 |         Parameters
 69 |         ----------
 70 |         shape : iterable of ints, optional
 71 |             If specified, new ``vulkpy.Array`` with ``shape`` will be returned.
 72 |         buffer : vulkpy.Array
 73 |             If specified, generated numbers will be stored.
 74 | 
 75 |         Returns
 76 |         -------
 77 |         vulkpy.Array
 78 |             Array which will get random numbers.
 79 | 
 80 |         Raises
 81 |         ------
 82 |         ValueError
 83 |             If neither ``shape`` or ``buffer`` are specified
 84 | 
 85 |         Notes
 86 |         -----
 87 |         This method first generates [0, 1) uniform random numbers,
 88 |         then transforms them to normal distribution with Box-Muller method.
 89 |         Box-Muller might have problem in terms of random number quality,
 90 |         however, it is quite GPU friendly.
 91 |         """
 92 |         _local_size = 64
 93 |         if buffer is None:
 94 |             if shape is None:
 95 |                 raise ValueError("One of `shape` and  `buffer` must be specified.")
 96 | 
 97 |             buffer = vk.Array(self._gpu, shape=shape)
 98 |         else:
 99 |             # For safety, we wait output buffer job.
100 |             buffer.wait()
101 | 
102 |         # After checking, coarse type to Array
103 |         buffer = cast(vk.Array, buffer)
104 | 
105 |         n = int(np.prod(buffer.shape))
106 |         floor_n = n // 2
107 |         dshape = _vkarray.DataShape(floor_n, 1, 1)
108 |         p = _vkarray.VectorScalar2Params(n, mean, stddev)
109 |         if n % 2 == 0:
110 |             # Even: Reuse `buffer`
111 |             buffer = self.random(buffer=buffer)
112 |             buffer.job = self._gpu._submit(self._ibox_muller,
113 |                                            _local_size, 1, 1,
114 |                                            [buffer], dshape, p)
115 |             buffer._keep = []
116 |         else:
117 |             # Odd: Require additional space for intermediate [0, 1)
118 |             rng = self.random(shape=(2*(floor_n + 1),))
119 |             buffer.job = self._gpu._submit(self._box_muller,
120 |                                            _local_size, 1, 1,
121 |                                            [rng, buffer], dshape, p)
122 | 
123 |             buffer._keep = [rng]
124 |         return buffer
125 | 
126 |     def randrange(self, *,
127 |                   shape: Optional[Iterable[int]] = None,
128 |                   buffer: Optional[vk.U32Array] = None,
129 |                   low: int = 0,
130 |                   high: int = int(2 ** 32)) -> vk.U32Array:
131 |         """
132 |         Generate [low, high) random numbers
133 | 
134 |         Parameters
135 |         ----------
136 |         shape : iterable of ints, optional
137 |             If specified, new ``vulkpy.U32Array`` with ``shape`` will be returned.
138 |         buffer : vulkpy.Array
139 |             If specified, generated numbers will be stored.
140 |         low : int, optional
141 |             Inclusive lowest value. The default is ``0``.
142 |         high : int, optional
143 |             Exclusive highest value. The default is ``2^32``.
144 | 
145 |         Returns
146 |         -------
147 |         vulkpy.U32Array
148 |             Array which will get random numbers.
149 | 
150 |         Raises
151 |         ------
152 |         ValueError
153 |             If neither ``shape`` or ``buffer`` are specified.
154 |         ValueError
155 |             If not 0 <= low < high <= 2^32.
156 |         """
157 |         if low < 0:
158 |             raise ValueError(f"`low` must be non negative integer, but {low}")
159 |         if high > self._2p32:
160 |             raise ValueError(f"`high` must not be greater than 2^32, but {high}")
161 |         if low >= high:
162 |             raise ValueError(f"`low` must be smaller than `high`, but {low}, {high}")
163 | 
164 |         if (low == 0) and (high == self._2p32):
165 |             return self.randint(shape=shape, buffer=buffer)
166 | 
167 |         if buffer is None:
168 |             if shape is None:
169 |                 raise ValueError("One of `shape` and `buffer` must be specified.")
170 | 
171 |             buffer = vk.U32Array(self._gpu, shape=shape)
172 |         else:
173 |             # For safety, we wait output buffer job.
174 |             buffer.wait()
175 | 
176 |         # After checking, coarse type to U32Array
177 |         buffer = cast(vk.U32Array, buffer)
178 | 
179 |         size = buffer.buffer.size()
180 |         rng = self.random(shape=buffer.shape)
181 |         buffer.job = self._gpu._submit(self._randrange, 64, 1, 1,
182 |                                        [rng, buffer],
183 |                                        _vkarray.DataShape(size, 1, 1),
184 |                                        _vkarray.VectorRangeParams(size, low, high-1))
185 |         buffer._keep = [rng]
186 |         return buffer
187 | 
188 |     def wait(self):
189 |         pass
190 | 
191 | 
192 | class Xoshiro128pp(PRNG):
193 |     """
194 |     xoshiro128++: Pseudo Random Number Generator
195 | 
196 |     Notes
197 |     -----
198 |     This class implements xoshiro128++ [1]_. Initial internal states are
199 |     sequentially generated during construction on CPU and are spaced 2^64 steps.
200 |     Generating (pseudo-)random numbers are executed parallelly on GPU.
201 | 
202 |     References
203 |     ----------
204 |     .. [1] S. Vigna "xoshiro / xoroshiro generators and the PRNG shootout",
205 |        https://prng.di.unimi.it/
206 |     """
207 |     _spv_uint32 = getShader("prng_xoshiro128pp_uint32.spv")
208 |     _spv_float  = getShader("prng_xoshiro128pp_float.spv")
209 | 
210 |     def __init__(self, gpu: vk.GPU, size: int = 64, *, seed: Optional[int] = None):
211 |         """
212 |         Initialize Xoshiro128pp
213 | 
214 |         Parameters
215 |         ----------
216 |         gpu : vulkpy.GPU
217 |             GPU where PRNG allocates
218 |         size : int
219 |             Number of internal states. These states generate random number parallelly.
220 |         seed : int, optional
221 |             Random seed. If ``None`` (default), use hardware random instead.
222 |         """
223 |         super().__init__(gpu)
224 | 
225 |         if seed is None:
226 |             self.rng = _vkarray.Xoshiro128pp(self._gpu.gpu,
227 |                                              self._spv_uint32, self._spv_float,
228 |                                              size)
229 |         else:
230 |             self.rng = _vkarray.Xoshiro128pp(self._gpu.gpu,
231 |                                              self._spv_uint32, self._spv_float,
232 |                                              size, seed)
233 | 
234 |     def random(self, *,
235 |                shape: Optional[Iterable[int]] = None,
236 |                buffer: Optional[vk.Array] = None) -> vk.Array:
237 |         """
238 |         Generate [0, 1) floating numbers
239 | 
240 |         Parameters
241 |         ----------
242 |         shape : iterable of ints, optional
243 |             If specified, new ``vulkpy.Array`` with ``shape`` will be returned.
244 |         buffer : vulkpy.Array
245 |             If specified, generated numbers will be stored.
246 | 
247 |         Returns
248 |         -------
249 |         vulkpy.Array
250 |             Array which will get random numbers.
251 | 
252 |         Raises
253 |         ------
254 |         ValueError
255 |             If neither ``shape`` or ``buffer`` are specified.
256 |         """
257 |         if buffer is None:
258 |             if shape is None:
259 |                 raise ValueError("One of `shape` and  `buffer` must be specified.")
260 | 
261 |             buffer = vk.Array(self._gpu, shape=shape)
262 |         else:
263 |             # For safety, we wait output buffer job.
264 |             buffer.wait()
265 | 
266 |         # After checking, coarse type to Array
267 |         buffer = cast(vk.Array, buffer)
268 | 
269 |         n = int(np.prod(buffer.shape))
270 |         buffer.job = self.rng.random_float(n, buffer.buffer.info())
271 |         buffer._keep = [self]
272 |         return buffer
273 | 
274 |     def randint(self, *,
275 |                 shape: Optional[Iterable[int]] = None,
276 |                 buffer: Optional[vk.U32Array] = None) -> vk.U32Array:
277 |         """
278 |         Generate [0, 2^32) unsigned integer numbers
279 | 
280 |         Parameters
281 |         ----------
282 |         shape : iterable of ints, optional
283 |             If specified, new ``vulkpy.U32Array`` with ``shape`` will be returned.
284 |         buffer : vulkpy.U32Array
285 |             If specified, generated numbers will be stored.
286 | 
287 |         Returns
288 |         -------
289 |         vulkpy.U32Array
290 |             Array which will get random numbers.
291 | 
292 |         Raises
293 |         ------
294 |         ValueError
295 |             If neither ``shape`` or ``buffer`` are specified.
296 |         """
297 |         if buffer is None:
298 |             if shape is None:
299 |                 raise ValueError("One of `shape` and `buffer` must be specified.")
300 | 
301 |             buffer = vk.U32Array(self._gpu, shape=shape)
302 |         else:
303 |             # For safety, we wait output buffer job
304 |             buffer.wait()
305 | 
306 |         # After checking, coarse type to Array
307 |         buffer = cast(vk.U32Array, buffer)
308 | 
309 |         n = int(np.prod(buffer.shape))
310 |         buffer.job = self.rng.random_uint32(n, buffer.buffer.info())
311 |         buffer._keep = [self]
312 |         return buffer
313 | 


--------------------------------------------------------------------------------
/vulkpy/shader/abs.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) writeonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   b[i] = abs(a[i]);
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/acos.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) writeonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   b[i] = acos(a[i]);
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/acosh.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) writeonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   b[i] = acosh(a[i]);
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/add.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) readonly buffer B {
14 |   float b[];
15 | };
16 | layout(std430, binding = 2) writeonly buffer C {
17 |   float c[];
18 | };
19 | 
20 | 
21 | void main(){
22 |   uint i = gl_GlobalInvocationID.x;
23 |   if(i >= params.size){ return; }
24 | 
25 |   c[i] = a[i] + b[i];
26 | }
27 | 


--------------------------------------------------------------------------------
/vulkpy/shader/add_broadcast.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size[3];
 7 |   uint ndim;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) readonly buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) readonly buffer B {
15 |   float b[];
16 | };
17 | layout(std430, binding = 2) writeonly buffer C {
18 |   float c[];
19 | };
20 | layout(std430, binding = 3) readonly buffer D {
21 |   uint shapeABC[]; // [a0, ..., an, b0, ..., bn, c0, ..., cn] for n = ndim-1
22 | };
23 | 
24 | 
25 | void main(){
26 |   const uint ci = gl_GlobalInvocationID.x;
27 |   if(ci >= params.size[2]){ return; }
28 |   uvec3 size = uvec3(params.size[0], params.size[1], params.size[2]);
29 | 
30 |   uvec2 abi = uvec2(0, 0);
31 |   uint ci_tmp = ci;
32 |   for(uint dim = 0; dim < params.ndim; dim++){
33 |     uvec3 sABC = uvec3(shapeABC[dim],
34 |                        shapeABC[dim + params.ndim],
35 |                        shapeABC[dim + params.ndim * 2]);
36 |     size = size / sABC;
37 | 
38 |     uint d = ci_tmp / size.z;
39 |     abi += size.xy * min(uvec2(d, d), sABC.xy - 1);
40 | 
41 |     ci_tmp = ci_tmp % size.z;
42 |   }
43 | 
44 |   c[ci] = a[abi.x] + b[abi.y];
45 | }
46 | 


--------------------------------------------------------------------------------
/vulkpy/shader/add_scalar.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   float scalar;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) readonly buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) writeonly buffer B {
15 |   float b[];
16 | };
17 | 
18 | 
19 | void main(){
20 |   uint i = gl_GlobalInvocationID.x;
21 |   if(i >= params.size){ return; }
22 | 
23 |   b[i] = a[i] + params.scalar;
24 | }
25 | 


--------------------------------------------------------------------------------
/vulkpy/shader/asin.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) writeonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   b[i] = asin(a[i]);
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/asinh.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) writeonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   b[i] = asinh(a[i]);
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/atan.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) writeonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   b[i] = atan(a[i]);
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/atanh.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) writeonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   b[i] = atanh(a[i]);
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/batch_affine.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 1, local_size_y = 64, local_size_z = 1) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint batch_size;
 7 |   uint input_size;
 8 |   uint output_size;
 9 | } params;
10 | 
11 | 
12 | layout(std430, binding = 0) readonly buffer W {
13 |   float w[]; // [output_size, input_size]
14 | };
15 | layout(std430, binding = 1) readonly buffer B {
16 |   float b[]; // [output_size]
17 | };
18 | layout(std430, binding = 2) readonly buffer X {
19 |   float x[]; // [batch_size, input_size]
20 | };
21 | layout(std430, binding = 3) writeonly buffer Y {
22 |   float y[]; // [batch_size, output_size]
23 | };
24 | 
25 | void main(){
26 |   const uint b_idx = gl_GlobalInvocationID.x;
27 |   const uint o_idx = gl_GlobalInvocationID.y;
28 |   if((b_idx >= params.batch_size) || (o_idx >= params.output_size)){ return; }
29 | 
30 |   const uint batch = b_idx * params.input_size;
31 |   const uint elem = o_idx * params.input_size;
32 | 
33 |   float sum = 0.0;
34 |   for(uint i = 0; i < params.input_size; i++){
35 |     sum += w[elem + i] * x[batch + i];
36 |   }
37 | 
38 |   y[b_idx * params.output_size + o_idx] = sum + b[o_idx];
39 | }
40 | 


--------------------------------------------------------------------------------
/vulkpy/shader/broadcast.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size[2];
 7 |   uint ndim;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) readonly buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) writeonly buffer B {
15 |   float b[];
16 | };
17 | layout(std430, binding = 2) readonly buffer C {
18 |   uint a_shape[];
19 | };
20 | layout(std430, binding = 3) readonly buffer D {
21 |   uint b_shape[];
22 | };
23 | 
24 | 
25 | void main(){
26 |   uint i = gl_GlobalInvocationID.x;
27 |   if(i >= params.size[1]){ return; }
28 | 
29 |   uint i_tmp = i;
30 |   uint j = 0;
31 |   uint sizeA = params.size[0];
32 |   uint sizeB = params.size[1];
33 |   for(uint dim = 0; dim < params.ndim; dim++){
34 |     sizeA = sizeA / a_shape[dim];
35 |     sizeB = sizeB / b_shape[dim];
36 | 
37 |     uint d = min(i_tmp / sizeB, a_shape[dim]-1);
38 |     j += d * sizeA;
39 | 
40 |     i_tmp = i_tmp % sizeB;
41 |   }
42 | 
43 |   b[i] = a[j];
44 | }
45 | 


--------------------------------------------------------------------------------
/vulkpy/shader/clamp.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) readonly buffer B {
14 |   float b[]; // min
15 | };
16 | layout(std430, binding = 2) readonly buffer C {
17 |   float c[]; // max
18 | };
19 | layout(std430, binding = 3) writeonly buffer D {
20 |   float d[];
21 | };
22 | 
23 | 
24 | void main(){
25 |   uint i = gl_GlobalInvocationID.x;
26 |   if(i >= params.size){ return; }
27 | 
28 |   d[i] = clamp(a[i], b[i], c[i]);
29 | }
30 | 


--------------------------------------------------------------------------------
/vulkpy/shader/clamp_ss.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   float scalar[2];
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) readonly buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) writeonly buffer B {
15 |   float b[];
16 | };
17 | 
18 | 
19 | void main(){
20 |   uint i = gl_GlobalInvocationID.x;
21 |   if(i >= params.size){ return; }
22 | 
23 |   b[i] = clamp(a[i], params.scalar[0], params.scalar[1]);
24 | }
25 | 


--------------------------------------------------------------------------------
/vulkpy/shader/clamp_sv.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   float scalar;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) readonly buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) readonly buffer B {
15 |   float b[]; // max
16 | };
17 | layout(std430, binding = 2) writeonly buffer C {
18 |   float c[];
19 | };
20 | 
21 | 
22 | void main(){
23 |   uint i = gl_GlobalInvocationID.x;
24 |   if(i >= params.size){ return; }
25 | 
26 |   c[i] = clamp(a[i], params.scalar, b[i]);
27 | }
28 | 


--------------------------------------------------------------------------------
/vulkpy/shader/clamp_vs.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   float scalar;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) readonly buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) readonly buffer B {
15 |   float b[]; // min
16 | };
17 | layout(std430, binding = 2) writeonly buffer C {
18 |   float c[];
19 | };
20 | 
21 | 
22 | void main(){
23 |   uint i = gl_GlobalInvocationID.x;
24 |   if(i >= params.size){ return; }
25 | 
26 |   c[i] = clamp(a[i], b[i], params.scalar);
27 | }
28 | 


--------------------------------------------------------------------------------
/vulkpy/shader/cos.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) writeonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   b[i] = cos(a[i]);
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/cosh.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) writeonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   b[i] = cosh(a[i]);
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/div.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) readonly buffer B {
14 |   float b[];
15 | };
16 | layout(std430, binding = 2) writeonly buffer C {
17 |   float c[];
18 | };
19 | 
20 | 
21 | void main(){
22 |   uint i = gl_GlobalInvocationID.x;
23 |   if(i >= params.size){ return; }
24 | 
25 |   c[i] = a[i] / b[i];
26 | }
27 | 


--------------------------------------------------------------------------------
/vulkpy/shader/div_broadcast.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size[3];
 7 |   uint ndim;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) readonly buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) readonly buffer B {
15 |   float b[];
16 | };
17 | layout(std430, binding = 2) writeonly buffer C {
18 |   float c[];
19 | };
20 | layout(std430, binding = 3) readonly buffer D {
21 |   uint shapeABC[]; // [a0, ..., an, b0, ..., bn, c0, ..., cn] for n = ndim-1
22 | };
23 | 
24 | 
25 | void main(){
26 |   const uint ci = gl_GlobalInvocationID.x;
27 |   if(ci >= params.size[2]){ return; }
28 |   uvec3 size = uvec3(params.size[0], params.size[1], params.size[2]);
29 | 
30 |   uvec2 abi = uvec2(0, 0);
31 |   uint ci_tmp = ci;
32 |   for(uint dim = 0; dim < params.ndim; dim++){
33 |     uvec3 sABC = uvec3(shapeABC[dim],
34 |                        shapeABC[dim + params.ndim],
35 |                        shapeABC[dim + params.ndim * 2]);
36 |     size = size / sABC;
37 | 
38 |     uint d = ci_tmp / size.z;
39 |     abi += size.xy * min(uvec2(d, d), sABC.xy - 1);
40 | 
41 |     ci_tmp = ci_tmp % size.z;
42 |   }
43 | 
44 |   c[ci] = a[abi.x] / b[abi.y];
45 | }
46 | 


--------------------------------------------------------------------------------
/vulkpy/shader/div_scalar.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   float scalar;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) readonly buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) writeonly buffer B {
15 |   float b[];
16 | };
17 | 
18 | 
19 | void main(){
20 |   uint i = gl_GlobalInvocationID.x;
21 |   if(i >= params.size){ return; }
22 | 
23 |   b[i] = a[i] / params.scalar;
24 | }
25 | 


--------------------------------------------------------------------------------
/vulkpy/shader/exp.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) writeonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   b[i] = exp(a[i]);
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/exp2.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) writeonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   b[i] = exp2(a[i]);
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/gather.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) readonly buffer B {
14 |   uint b[];
15 | };
16 | layout(std430, binding = 2) writeonly buffer C {
17 |   float c[];
18 | };
19 | 
20 | 
21 | void main(){
22 |   uint i = gl_GlobalInvocationID.x;
23 |   if(i >= params.size){ return; }
24 | 
25 |   c[i] = a[b[i]];
26 | }
27 | 


--------------------------------------------------------------------------------
/vulkpy/shader/gather_axis.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 1, local_size_y = 64, local_size_z = 1) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint prev_prod; // Global x
 7 |   uint post_prod; // Global y
 8 |   uint axis_size;
 9 |   uint index_size;// Global z
10 | } params;
11 | 
12 | 
13 | layout(std430, binding = 0) readonly buffer A {
14 |   float a[]; // [prev..., axis_size, post...]
15 | };
16 | layout(std430, binding = 1) readonly buffer B {
17 |   uint b[]; // axis_size
18 | };
19 | layout(std430, binding = 2) writeonly buffer C {
20 |   float c[]; // [index_size, prev..., post...]
21 | };
22 | 
23 | 
24 | void main(){
25 |   const uint i = gl_GlobalInvocationID.x;
26 |   const uint j = gl_GlobalInvocationID.y;
27 |   const uint k = gl_GlobalInvocationID.z;
28 |   if((i >= params.prev_prod) || (j >= params.post_prod) || (k >= params.index_size)){
29 |     return;
30 |   }
31 | 
32 |   const uint bk = clamp(b[k], 0, params.axis_size);
33 |   const uint a_idx =
34 |     i  * params.axis_size * params.post_prod +
35 |     bk * params.post_prod +
36 |     j;
37 |   const uint c_idx =
38 |     k * params.prev_prod * params.post_prod +
39 |     i * params.post_prod +
40 |     j;
41 | 
42 |   c[c_idx] = a[a_idx];
43 | }
44 | 


--------------------------------------------------------------------------------
/vulkpy/shader/iabs.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | 
14 | 
15 | void main(){
16 |   uint i = gl_GlobalInvocationID.x;
17 |   if(i >= params.size){ return; }
18 | 
19 |   a[i] = abs(a[i]);
20 | }
21 | 


--------------------------------------------------------------------------------
/vulkpy/shader/iacos.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | 
14 | 
15 | void main(){
16 |   uint i = gl_GlobalInvocationID.x;
17 |   if(i >= params.size){ return; }
18 | 
19 |   a[i] = acos(a[i]);
20 | }
21 | 


--------------------------------------------------------------------------------
/vulkpy/shader/iacosh.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | 
14 | 
15 | void main(){
16 |   uint i = gl_GlobalInvocationID.x;
17 |   if(i >= params.size){ return; }
18 | 
19 |   a[i] = acosh(a[i]);
20 | }
21 | 


--------------------------------------------------------------------------------
/vulkpy/shader/iadd.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) readonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   a[i] = a[i] + b[i];
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/iadd_broadcast.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size[2];
 7 |   uint ndim;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) readonly buffer B {
15 |   float b[];
16 | };
17 | layout(std430, binding = 2) readonly buffer D {
18 |   uint shapeAB[]; // [a0, ..., an, b0, ..., bn] for n = ndim-1
19 | };
20 | 
21 | 
22 | void main(){
23 |   const uint ai = gl_GlobalInvocationID.x;
24 |   if(ai >= params.size[0]){ return; }
25 |   uvec2 size = uvec2(params.size[0], params.size[1]);
26 | 
27 |   uint bi = 0;
28 |   uint ai_tmp = ai;
29 |   for(uint dim = 0; dim < params.ndim; dim++){
30 |     uvec2 sAB = uvec2(shapeAB[dim],
31 |                       shapeAB[dim + params.ndim]);
32 |     size = size / sAB;
33 | 
34 |     uint d = ai_tmp / size.x;
35 |     bi += size.y * min(d, sAB.y - 1);
36 | 
37 |     ai_tmp = ai_tmp % size.x;
38 |   }
39 | 
40 |   a[ai] += b[bi];
41 | }
42 | 


--------------------------------------------------------------------------------
/vulkpy/shader/iadd_scalar.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   float scalar;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) buffer A {
12 |   float a[];
13 | };
14 | 
15 | 
16 | void main(){
17 |   uint i = gl_GlobalInvocationID.x;
18 |   if(i >= params.size){ return; }
19 | 
20 |   a[i] = a[i] + params.scalar;
21 | }
22 | 


--------------------------------------------------------------------------------
/vulkpy/shader/iasin.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | 
14 | 
15 | void main(){
16 |   uint i = gl_GlobalInvocationID.x;
17 |   if(i >= params.size){ return; }
18 | 
19 |   a[i] = asin(a[i]);
20 | }
21 | 


--------------------------------------------------------------------------------
/vulkpy/shader/iasinh.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | 
14 | 
15 | void main(){
16 |   uint i = gl_GlobalInvocationID.x;
17 |   if(i >= params.size){ return; }
18 | 
19 |   a[i] = asinh(a[i]);
20 | }
21 | 


--------------------------------------------------------------------------------
/vulkpy/shader/iatan.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | 
14 | 
15 | void main(){
16 |   uint i = gl_GlobalInvocationID.x;
17 |   if(i >= params.size){ return; }
18 | 
19 |   a[i] = atan(a[i]);
20 | }
21 | 


--------------------------------------------------------------------------------
/vulkpy/shader/iatanh.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | 
14 | 
15 | void main(){
16 |   uint i = gl_GlobalInvocationID.x;
17 |   if(i >= params.size){ return; }
18 | 
19 |   a[i] = atanh(a[i]);
20 | }
21 | 


--------------------------------------------------------------------------------
/vulkpy/shader/iclamp.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) readonly buffer B {
14 |   float b[]; // min
15 | };
16 | layout(std430, binding = 2) readonly buffer C {
17 |   float c[]; // max
18 | };
19 | 
20 | 
21 | void main(){
22 |   uint i = gl_GlobalInvocationID.x;
23 |   if(i >= params.size){ return; }
24 | 
25 |   a[i] = clamp(a[i], b[i], c[i]);
26 | }
27 | 


--------------------------------------------------------------------------------
/vulkpy/shader/iclamp_ss.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   float scalar[2];
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) buffer A {
12 |   float a[];
13 | };
14 | 
15 | 
16 | void main(){
17 |   uint i = gl_GlobalInvocationID.x;
18 |   if(i >= params.size){ return; }
19 | 
20 |   a[i] = clamp(a[i], params.scalar[0], params.scalar[1]);
21 | }
22 | 


--------------------------------------------------------------------------------
/vulkpy/shader/iclamp_sv.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   float scalar;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) readonly buffer B {
15 |   float b[]; // max
16 | };
17 | 
18 | 
19 | void main(){
20 |   uint i = gl_GlobalInvocationID.x;
21 |   if(i >= params.size){ return; }
22 | 
23 |   a[i] = clamp(a[i], params.scalar, b[i]);
24 | }
25 | 


--------------------------------------------------------------------------------
/vulkpy/shader/iclamp_vs.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   float scalar;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) readonly buffer B {
15 |   float b[]; // min
16 | };
17 | 
18 | 
19 | void main(){
20 |   uint i = gl_GlobalInvocationID.x;
21 |   if(i >= params.size){ return; }
22 | 
23 |   a[i] = clamp(a[i], b[i], params.scalar);
24 | }
25 | 


--------------------------------------------------------------------------------
/vulkpy/shader/icos.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | 
14 | 
15 | void main(){
16 |   uint i = gl_GlobalInvocationID.x;
17 |   if(i >= params.size){ return; }
18 | 
19 |   a[i] = cos(a[i]);
20 | }
21 | 


--------------------------------------------------------------------------------
/vulkpy/shader/icosh.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | 
14 | 
15 | void main(){
16 |   uint i = gl_GlobalInvocationID.x;
17 |   if(i >= params.size){ return; }
18 | 
19 |   a[i] = cosh(a[i]);
20 | }
21 | 


--------------------------------------------------------------------------------
/vulkpy/shader/idiv.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) readonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   a[i] = a[i] / b[i];
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/idiv_broadcast.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size[2];
 7 |   uint ndim;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) readonly buffer B {
15 |   float b[];
16 | };
17 | layout(std430, binding = 2) readonly buffer D {
18 |   uint shapeAB[]; // [a0, ..., an, b0, ..., bn] for n = ndim-1
19 | };
20 | 
21 | 
22 | void main(){
23 |   const uint ai = gl_GlobalInvocationID.x;
24 |   if(ai >= params.size[0]){ return; }
25 |   uvec2 size = uvec2(params.size[0], params.size[1]);
26 | 
27 |   uint bi = 0;
28 |   uint ai_tmp = ai;
29 |   for(uint dim = 0; dim < params.ndim; dim++){
30 |     uvec2 sAB = uvec2(shapeAB[dim],
31 |                       shapeAB[dim + params.ndim]);
32 |     size = size / sAB;
33 | 
34 |     uint d = ai_tmp / size.x;
35 |     bi += size.y * min(d, sAB.y - 1);
36 | 
37 |     ai_tmp = ai_tmp % size.x;
38 |   }
39 | 
40 |   a[ai] /= b[bi];
41 | }
42 | 


--------------------------------------------------------------------------------
/vulkpy/shader/idiv_scalar.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   float scalar;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) buffer A {
12 |   float a[];
13 | };
14 | 
15 | 
16 | void main(){
17 |   uint i = gl_GlobalInvocationID.x;
18 |   if(i >= params.size){ return; }
19 | 
20 |   a[i] = a[i] / params.scalar;
21 | }
22 | 


--------------------------------------------------------------------------------
/vulkpy/shader/iexp.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | 
14 | 
15 | void main(){
16 |   uint i = gl_GlobalInvocationID.x;
17 |   if(i >= params.size){ return; }
18 | 
19 |   a[i] = exp(a[i]);
20 | }
21 | 


--------------------------------------------------------------------------------
/vulkpy/shader/iexp2.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | 
14 | 
15 | void main(){
16 |   uint i = gl_GlobalInvocationID.x;
17 |   if(i >= params.size){ return; }
18 | 
19 |   a[i] = exp2(a[i]);
20 | }
21 | 


--------------------------------------------------------------------------------
/vulkpy/shader/iinvsqrt.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | 
14 | 
15 | void main(){
16 |   uint i = gl_GlobalInvocationID.x;
17 |   if(i >= params.size){ return; }
18 | 
19 |   a[i] = inversesqrt(a[i]);
20 | }
21 | 


--------------------------------------------------------------------------------
/vulkpy/shader/ilog.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | 
14 | 
15 | void main(){
16 |   uint i = gl_GlobalInvocationID.x;
17 |   if(i >= params.size){ return; }
18 | 
19 |   a[i] = log(a[i]);
20 | }
21 | 


--------------------------------------------------------------------------------
/vulkpy/shader/ilog2.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | 
14 | 
15 | void main(){
16 |   uint i = gl_GlobalInvocationID.x;
17 |   if(i >= params.size){ return; }
18 | 
19 |   a[i] = log2(a[i]);
20 | }
21 | 


--------------------------------------------------------------------------------
/vulkpy/shader/imax.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) readonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   a[i] = max(a[i], b[i]);
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/imax_broadcast.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size[2];
 7 |   uint ndim;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) readonly buffer B {
15 |   float b[];
16 | };
17 | layout(std430, binding = 2) readonly buffer D {
18 |   uint shapeAB[]; // [a0, ..., an, b0, ..., bn] for n = ndim-1
19 | };
20 | 
21 | 
22 | void main(){
23 |   const uint ai = gl_GlobalInvocationID.x;
24 |   if(ai >= params.size[0]){ return; }
25 |   uvec2 size = uvec2(params.size[0], params.size[1]);
26 | 
27 |   uint bi = 0;
28 |   uint ai_tmp = ai;
29 |   for(uint dim = 0; dim < params.ndim; dim++){
30 |     uvec2 sAB = uvec2(shapeAB[dim],
31 |                       shapeAB[dim + params.ndim]);
32 |     size = size / sAB;
33 | 
34 |     uint d = ai_tmp / size.x;
35 |     bi += size.y * min(d, sAB.y - 1);
36 | 
37 |     ai_tmp = ai_tmp % size.x;
38 |   }
39 | 
40 |   a[ai] = max(a[ai], b[bi]);
41 | }
42 | 


--------------------------------------------------------------------------------
/vulkpy/shader/imax_scalar.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   float scalar;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) buffer A {
12 |   float a[];
13 | };
14 | 
15 | 
16 | void main(){
17 |   uint i = gl_GlobalInvocationID.x;
18 |   if(i >= params.size){ return; }
19 | 
20 |   a[i] = max(a[i], params.scalar);
21 | }
22 | 


--------------------------------------------------------------------------------
/vulkpy/shader/imin.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) readonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   a[i] = min(a[i], b[i]);
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/imin_broadcast.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size[2];
 7 |   uint ndim;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) readonly buffer B {
15 |   float b[];
16 | };
17 | layout(std430, binding = 2) readonly buffer D {
18 |   uint shapeAB[]; // [a0, ..., an, b0, ..., bn] for n = ndim-1
19 | };
20 | 
21 | 
22 | void main(){
23 |   const uint ai = gl_GlobalInvocationID.x;
24 |   if(ai >= params.size[0]){ return; }
25 |   uvec2 size = uvec2(params.size[0], params.size[1]);
26 | 
27 |   uint bi = 0;
28 |   uint ai_tmp = ai;
29 |   for(uint dim = 0; dim < params.ndim; dim++){
30 |     uvec2 sAB = uvec2(shapeAB[dim],
31 |                       shapeAB[dim + params.ndim]);
32 |     size = size / sAB;
33 | 
34 |     uint d = ai_tmp / size.x;
35 |     bi += size.y * min(d, sAB.y - 1);
36 | 
37 |     ai_tmp = ai_tmp % size.x;
38 |   }
39 | 
40 |   a[ai] = min(a[ai], b[bi]);
41 | }
42 | 


--------------------------------------------------------------------------------
/vulkpy/shader/imin_scalar.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   float scalar;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) buffer A {
12 |   float a[];
13 | };
14 | 
15 | 
16 | void main(){
17 |   uint i = gl_GlobalInvocationID.x;
18 |   if(i >= params.size){ return; }
19 | 
20 |   a[i] = min(a[i], params.scalar);
21 | }
22 | 


--------------------------------------------------------------------------------
/vulkpy/shader/imul.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) readonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   a[i] = a[i] * b[i];
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/imul_broadcast.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size[2];
 7 |   uint ndim;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) readonly buffer B {
15 |   float b[];
16 | };
17 | layout(std430, binding = 2) readonly buffer D {
18 |   uint shapeAB[]; // [a0, ..., an, b0, ..., bn] for n = ndim-1
19 | };
20 | 
21 | 
22 | void main(){
23 |   const uint ai = gl_GlobalInvocationID.x;
24 |   if(ai >= params.size[0]){ return; }
25 |   uvec2 size = uvec2(params.size[0], params.size[1]);
26 | 
27 |   uint bi = 0;
28 |   uint ai_tmp = ai;
29 |   for(uint dim = 0; dim < params.ndim; dim++){
30 |     uvec2 sAB = uvec2(shapeAB[dim],
31 |                       shapeAB[dim + params.ndim]);
32 |     size = size / sAB;
33 | 
34 |     uint d = ai_tmp / size.x;
35 |     bi += size.y * min(d, sAB.y - 1);
36 | 
37 |     ai_tmp = ai_tmp % size.x;
38 |   }
39 | 
40 |   a[ai] *= b[bi];
41 | }
42 | 


--------------------------------------------------------------------------------
/vulkpy/shader/imul_scalar.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   float scalar;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) buffer A {
12 |   float a[];
13 | };
14 | 
15 | 
16 | void main(){
17 |   uint i = gl_GlobalInvocationID.x;
18 |   if(i >= params.size){ return; }
19 | 
20 |   a[i] = a[i] * params.scalar;
21 | }
22 | 


--------------------------------------------------------------------------------
/vulkpy/shader/invsqrt.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) writeonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   b[i] = inversesqrt(a[i]);
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/ipow.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) readonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   a[i] = pow(a[i], b[i]);
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/ipow_broadcast.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size[2];
 7 |   uint ndim;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) readonly buffer B {
15 |   float b[];
16 | };
17 | layout(std430, binding = 2) readonly buffer D {
18 |   uint shapeAB[]; // [a0, ..., an, b0, ..., bn] for n = ndim-1
19 | };
20 | 
21 | 
22 | void main(){
23 |   const uint ai = gl_GlobalInvocationID.x;
24 |   if(ai >= params.size[0]){ return; }
25 |   uvec2 size = uvec2(params.size[0], params.size[1]);
26 | 
27 |   uint bi = 0;
28 |   uint ai_tmp = ai;
29 |   for(uint dim = 0; dim < params.ndim; dim++){
30 |     uvec2 sAB = uvec2(shapeAB[dim],
31 |                       shapeAB[dim + params.ndim]);
32 |     size = size / sAB;
33 | 
34 |     uint d = ai_tmp / size.x;
35 |     bi += size.y * min(d, sAB.y - 1);
36 | 
37 |     ai_tmp = ai_tmp % size.x;
38 |   }
39 | 
40 |   a[ai] = pow(a[ai], b[bi]);
41 | }
42 | 


--------------------------------------------------------------------------------
/vulkpy/shader/ipow_scalar.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   float scalar;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) buffer A {
12 |   float a[];
13 | };
14 | 
15 | 
16 | void main(){
17 |   uint i = gl_GlobalInvocationID.x;
18 |   if(i >= params.size){ return; }
19 | 
20 |   a[i] = pow(a[i], params.scalar);
21 | }
22 | 


--------------------------------------------------------------------------------
/vulkpy/shader/isign.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | 
14 | 
15 | void main(){
16 |   uint i = gl_GlobalInvocationID.x;
17 |   if(i >= params.size){ return; }
18 | 
19 |   a[i] = sign(a[i]);
20 | }
21 | 


--------------------------------------------------------------------------------
/vulkpy/shader/isin.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | 
14 | 
15 | void main(){
16 |   uint i = gl_GlobalInvocationID.x;
17 |   if(i >= params.size){ return; }
18 | 
19 |   a[i] = sin(a[i]);
20 | }
21 | 


--------------------------------------------------------------------------------
/vulkpy/shader/isinh.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | 
14 | 
15 | void main(){
16 |   uint i = gl_GlobalInvocationID.x;
17 |   if(i >= params.size){ return; }
18 | 
19 |   a[i] = sinh(a[i]);
20 | }
21 | 


--------------------------------------------------------------------------------
/vulkpy/shader/isqrt.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | 
14 | 
15 | void main(){
16 |   uint i = gl_GlobalInvocationID.x;
17 |   if(i >= params.size){ return; }
18 | 
19 |   a[i] = sqrt(a[i]);
20 | }
21 | 


--------------------------------------------------------------------------------
/vulkpy/shader/isub.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) readonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   a[i] = a[i] - b[i];
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/isub_broadcast.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size[2];
 7 |   uint ndim;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) readonly buffer B {
15 |   float b[];
16 | };
17 | layout(std430, binding = 2) readonly buffer D {
18 |   uint shapeAB[]; // [a0, ..., an, b0, ..., bn] for n = ndim-1
19 | };
20 | 
21 | 
22 | void main(){
23 |   const uint ai = gl_GlobalInvocationID.x;
24 |   if(ai >= params.size[0]){ return; }
25 |   uvec2 size = uvec2(params.size[0], params.size[1]);
26 | 
27 |   uint bi = 0;
28 |   uint ai_tmp = ai;
29 |   for(uint dim = 0; dim < params.ndim; dim++){
30 |     uvec2 sAB = uvec2(shapeAB[dim],
31 |                       shapeAB[dim + params.ndim]);
32 |     size = size / sAB;
33 | 
34 |     uint d = ai_tmp / size.x;
35 |     bi += size.y * min(d, sAB.y - 1);
36 | 
37 |     ai_tmp = ai_tmp % size.x;
38 |   }
39 | 
40 |   a[ai] -= b[bi];
41 | }
42 | 


--------------------------------------------------------------------------------
/vulkpy/shader/isub_scalar.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   float scalar;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) buffer A {
12 |   float a[];
13 | };
14 | 
15 | 
16 | void main(){
17 |   uint i = gl_GlobalInvocationID.x;
18 |   if(i >= params.size){ return; }
19 | 
20 |   a[i] = a[i] - params.scalar;
21 | }
22 | 


--------------------------------------------------------------------------------
/vulkpy/shader/itan.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | 
14 | 
15 | void main(){
16 |   uint i = gl_GlobalInvocationID.x;
17 |   if(i >= params.size){ return; }
18 | 
19 |   a[i] = tan(a[i]);
20 | }
21 | 


--------------------------------------------------------------------------------
/vulkpy/shader/itanh.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) buffer A {
11 |   float a[];
12 | };
13 | 
14 | 
15 | void main(){
16 |   uint i = gl_GlobalInvocationID.x;
17 |   if(i >= params.size){ return; }
18 | 
19 |   a[i] = tanh(a[i]);
20 | }
21 | 


--------------------------------------------------------------------------------
/vulkpy/shader/log.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) writeonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   b[i] = log(a[i]);
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/log2.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) writeonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   b[i] = log2(a[i]);
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/matmul.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 1, local_size_y = 64, local_size_z = 1) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint rowA;
 7 |   uint contractSize;
 8 |   uint columnB;
 9 | } params;
10 | 
11 | 
12 | layout(std430, binding = 0) readonly buffer A {
13 |   float a[]; // [rowA, contractSize]
14 | };
15 | layout(std430, binding = 1) readonly buffer B {
16 |   float b[]; // [contractSize, columnB]
17 | };
18 | layout(std430, binding = 2) writeonly buffer C {
19 |   float c[]; // [rowA, columnB]
20 | };
21 | 
22 | 
23 | void main(){
24 |   uint row = gl_GlobalInvocationID.x;
25 |   uint col = gl_GlobalInvocationID.y;
26 |   if((row >= params.rowA) || (col >= params.columnB)){ return; }
27 | 
28 |   float sum = 0.0;
29 |   for(uint s = 0; s < params.contractSize; s++){
30 |     sum += a[row * params.contractSize + s] * b[s * params.columnB + col];
31 |   }
32 |   c[row * params.columnB + col] = sum;
33 | }
34 | 


--------------------------------------------------------------------------------
/vulkpy/shader/max.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) readonly buffer B {
14 |   float b[];
15 | };
16 | layout(std430, binding = 2) writeonly buffer C {
17 |   float c[];
18 | };
19 | 
20 | 
21 | void main(){
22 |   uint i = gl_GlobalInvocationID.x;
23 |   if(i >= params.size){ return; }
24 | 
25 |   c[i] = max(a[i], b[i]);
26 | }
27 | 


--------------------------------------------------------------------------------
/vulkpy/shader/max_broadcast.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size[3];
 7 |   uint ndim;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) readonly buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) readonly buffer B {
15 |   float b[];
16 | };
17 | layout(std430, binding = 2) writeonly buffer C {
18 |   float c[];
19 | };
20 | layout(std430, binding = 3) readonly buffer D {
21 |   uint shapeABC[]; // [a0, ..., an, b0, ..., bn, c0, ..., cn] for n = ndim-1
22 | };
23 | 
24 | 
25 | void main(){
26 |   const uint ci = gl_GlobalInvocationID.x;
27 |   if(ci >= params.size[2]){ return; }
28 |   uvec3 size = uvec3(params.size[0], params.size[1], params.size[2]);
29 | 
30 |   uvec2 abi = uvec2(0, 0);
31 |   uint ci_tmp = ci;
32 |   for(uint dim = 0; dim < params.ndim; dim++){
33 |     uvec3 sABC = uvec3(shapeABC[dim],
34 |                        shapeABC[dim + params.ndim],
35 |                        shapeABC[dim + params.ndim * 2]);
36 |     size = size / sABC;
37 | 
38 |     uint d = ci_tmp / size.z;
39 |     abi += size.xy * min(uvec2(d, d), sABC.xy - 1);
40 | 
41 |     ci_tmp = ci_tmp % size.z;
42 |   }
43 | 
44 |   c[ci] = max(a[abi.x], b[abi.y]);
45 | }
46 | 


--------------------------------------------------------------------------------
/vulkpy/shader/max_scalar.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   float scalar;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) readonly buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) writeonly buffer B {
15 |   float b[];
16 | };
17 | 
18 | 
19 | void main(){
20 |   uint i = gl_GlobalInvocationID.x;
21 |   if(i >= params.size){ return; }
22 | 
23 |   b[i] = max(a[i], params.scalar);
24 | }
25 | 


--------------------------------------------------------------------------------
/vulkpy/shader/maximum.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size[2];
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) writeonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   uint sizeA = params.size[0];
21 |   uint sizeB = params.size[1];
22 |   if(i >= sizeB){ return; }
23 | 
24 |   float partial_max = a[i];
25 |   for(uint j = i; j < sizeA; j += sizeB){
26 |     partial_max = max(partial_max, a[j]);
27 |   }
28 | 
29 |   b[i] = partial_max;
30 | }
31 | 


--------------------------------------------------------------------------------
/vulkpy/shader/maximum_axis.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 1, local_size_y = 64, local_size_z = 1) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint prev_prod; // Global x
 7 |   uint axis_size;
 8 |   uint post_prod; // Global y
 9 | } params;
10 | 
11 | 
12 | layout(std430, binding = 0) readonly buffer A {
13 |   float a[]; // [prev..., axis, post...]
14 | };
15 | layout(std430, binding = 1) writeonly buffer B {
16 |   float b[]; // [prev..., post...]
17 | };
18 | 
19 | 
20 | void main(){
21 |   uint i = gl_GlobalInvocationID.x;
22 |   uint j = gl_GlobalInvocationID.y;
23 |   if((i >= params.prev_prod) || (j >= params.post_prod)){ return; }
24 | 
25 |   const uint ij = (i * params.axis_size * params.post_prod) + j;
26 | 
27 |   float partial_max = a[ij];
28 |   for(uint k=0; k<params.axis_size; k++){
29 |     partial_max = max(partial_max, a[(k * params.post_prod) + ij]);
30 |   }
31 |   b[(i * params.post_prod) + j] = partial_max;
32 | }
33 | 


--------------------------------------------------------------------------------
/vulkpy/shader/maximum_axis_rebroadcast.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 1, local_size_y = 64, local_size_z = 1) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint prev_prod; // Global x
 7 |   uint axis_size;
 8 |   uint post_prod; // Global y
 9 | } params;
10 | 
11 | 
12 | layout(std430, binding = 0) readonly buffer A {
13 |   float a[]; // [prev..., axis, post...]
14 | };
15 | layout(std430, binding = 1) writeonly buffer B {
16 |   float b[]; // [prev..., post...]
17 | };
18 | 
19 | 
20 | void main(){
21 |   uint i = gl_GlobalInvocationID.x;
22 |   uint j = gl_GlobalInvocationID.y;
23 |   if((i >= params.prev_prod) || (j >= params.post_prod)){ return; }
24 | 
25 |   const uint ij = (i * params.axis_size * params.post_prod) + j;
26 | 
27 |   float partial_max = a[ij];
28 |   for(uint k=0; k<params.axis_size; k++){
29 |     partial_max = max(partial_max, a[(k * params.post_prod) + ij]);
30 |   }
31 | 
32 |   for(uint k=0; k<params.axis_size; k++){
33 |     b[(k * params.post_prod) + ij] = partial_max;
34 |   }
35 | }
36 | 


--------------------------------------------------------------------------------
/vulkpy/shader/maximum_v1.3.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | #extension GL_KHR_shader_subgroup_arithmetic: enable
 3 | 
 4 | layout(local_size_x = 64) in;
 5 | 
 6 | 
 7 | layout(push_constant) uniform constants {
 8 |   uint size;
 9 | } params;
10 | 
11 | 
12 | layout(std430, binding = 0) readonly buffer A {
13 |   float a[];
14 | };
15 | layout(std430, binding = 1) writeonly buffer B {
16 |   float b[];
17 | };
18 | 
19 | 
20 | void main(){
21 |   uint i = gl_GlobalInvocationID.x;
22 |   if(i >= params.size){ return; }
23 | 
24 |   float partial_max = subgroupMax(a[i]);
25 | 
26 |   if(subgroupElect()){
27 |     b[gl_SubgroupID] = partial_max;
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/vulkpy/shader/min.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) readonly buffer B {
14 |   float b[];
15 | };
16 | layout(std430, binding = 2) writeonly buffer C {
17 |   float c[];
18 | };
19 | 
20 | 
21 | void main(){
22 |   uint i = gl_GlobalInvocationID.x;
23 |   if(i >= params.size){ return; }
24 | 
25 |   c[i] = min(a[i], b[i]);
26 | }
27 | 


--------------------------------------------------------------------------------
/vulkpy/shader/min_broadcast.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size[3];
 7 |   uint ndim;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) readonly buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) readonly buffer B {
15 |   float b[];
16 | };
17 | layout(std430, binding = 2) writeonly buffer C {
18 |   float c[];
19 | };
20 | layout(std430, binding = 3) readonly buffer D {
21 |   uint shapeABC[]; // [a0, ..., an, b0, ..., bn, c0, ..., cn] for n = ndim-1
22 | };
23 | 
24 | 
25 | void main(){
26 |   const uint ci = gl_GlobalInvocationID.x;
27 |   if(ci >= params.size[2]){ return; }
28 |   uvec3 size = uvec3(params.size[0], params.size[1], params.size[2]);
29 | 
30 |   uvec2 abi = uvec2(0, 0);
31 |   uint ci_tmp = ci;
32 |   for(uint dim = 0; dim < params.ndim; dim++){
33 |     uvec3 sABC = uvec3(shapeABC[dim],
34 |                        shapeABC[dim + params.ndim],
35 |                        shapeABC[dim + params.ndim * 2]);
36 |     size = size / sABC;
37 | 
38 |     uint d = ci_tmp / size.z;
39 |     abi += size.xy * min(uvec2(d, d), sABC.xy - 1);
40 | 
41 |     ci_tmp = ci_tmp % size.z;
42 |   }
43 | 
44 |   c[ci] = min(a[abi.x], b[abi.y]);
45 | }
46 | 


--------------------------------------------------------------------------------
/vulkpy/shader/min_scalar.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   float scalar;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) readonly buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) writeonly buffer B {
15 |   float b[];
16 | };
17 | 
18 | 
19 | void main(){
20 |   uint i = gl_GlobalInvocationID.x;
21 |   if(i >= params.size){ return; }
22 | 
23 |   b[i] = min(a[i], params.scalar);
24 | }
25 | 


--------------------------------------------------------------------------------
/vulkpy/shader/minimum.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size[2];
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) writeonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   uint sizeA = params.size[0];
21 |   uint sizeB = params.size[1];
22 |   if(i >= sizeB){ return; }
23 | 
24 |   float partial_min = a[i];
25 |   for(uint j = i; j < sizeA; j += sizeB){
26 |     partial_min = min(partial_min, a[j]);
27 |   }
28 | 
29 |   b[i] = partial_min;
30 | }
31 | 


--------------------------------------------------------------------------------
/vulkpy/shader/minimum_axis.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 1, local_size_y = 64, local_size_z = 1) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint prev_prod; // Global x
 7 |   uint axis_size;
 8 |   uint post_prod; // Global y
 9 | } params;
10 | 
11 | 
12 | layout(std430, binding = 0) readonly buffer A {
13 |   float a[]; // [prev..., axis, post...]
14 | };
15 | layout(std430, binding = 1) writeonly buffer B {
16 |   float b[]; // [prev..., post...]
17 | };
18 | 
19 | 
20 | void main(){
21 |   uint i = gl_GlobalInvocationID.x;
22 |   uint j = gl_GlobalInvocationID.y;
23 |   if((i >= params.prev_prod) || (j >= params.post_prod)){ return; }
24 | 
25 |   const uint ij = (i * params.axis_size * params.post_prod) + j;
26 | 
27 |   float partial_min = a[ij];
28 |   for(uint k=0; k<params.axis_size; k++){
29 |     partial_min = min(partial_min, a[(k * params.post_prod) + ij]);
30 |   }
31 |   b[(i * params.post_prod) + j] = partial_min;
32 | }
33 | 


--------------------------------------------------------------------------------
/vulkpy/shader/minimum_axis_rebroadcast.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 1, local_size_y = 64, local_size_z = 1) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint prev_prod; // Global x
 7 |   uint axis_size;
 8 |   uint post_prod; // Global y
 9 | } params;
10 | 
11 | 
12 | layout(std430, binding = 0) readonly buffer A {
13 |   float a[]; // [prev..., axis, post...]
14 | };
15 | layout(std430, binding = 1) writeonly buffer B {
16 |   float b[]; // [prev..., post...]
17 | };
18 | 
19 | 
20 | void main(){
21 |   uint i = gl_GlobalInvocationID.x;
22 |   uint j = gl_GlobalInvocationID.y;
23 |   if((i >= params.prev_prod) || (j >= params.post_prod)){ return; }
24 | 
25 |   const uint ij = (i * params.axis_size * params.post_prod) + j;
26 | 
27 |   float partial_min = a[ij];
28 |   for(uint k=0; k<params.axis_size; k++){
29 |     partial_min = min(partial_min, a[(k * params.post_prod) + ij]);
30 |   }
31 | 
32 |   for(uint k=0; k<params.axis_size; k++){
33 |     b[(k * params.post_prod) + ij] = partial_min;
34 |   }
35 | }
36 | 


--------------------------------------------------------------------------------
/vulkpy/shader/minimum_v1.3.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | #extension GL_KHR_shader_subgroup_arithmetic: enable
 3 | 
 4 | layout(local_size_x = 64) in;
 5 | 
 6 | 
 7 | layout(push_constant) uniform constants {
 8 |   uint size;
 9 | } params;
10 | 
11 | 
12 | layout(std430, binding = 0) readonly buffer A {
13 |   float a[];
14 | };
15 | layout(std430, binding = 1) writeonly buffer B {
16 |   float b[];
17 | };
18 | 
19 | 
20 | void main(){
21 |   uint i = gl_GlobalInvocationID.x;
22 |   if(i >= params.size){ return; }
23 | 
24 |   float partial_min = subgroupMin(a[i]);
25 | 
26 |   if(subgroupElect()){
27 |     b[gl_SubgroupID] = partial_min;
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/vulkpy/shader/mul.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) readonly buffer B {
14 |   float b[];
15 | };
16 | layout(std430, binding = 2) writeonly buffer C {
17 |   float c[];
18 | };
19 | 
20 | 
21 | void main(){
22 |   uint i = gl_GlobalInvocationID.x;
23 |   if(i >= params.size){ return; }
24 | 
25 |   c[i] = a[i] * b[i];
26 | }
27 | 


--------------------------------------------------------------------------------
/vulkpy/shader/mul_broadcast.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size[3];
 7 |   uint ndim;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) readonly buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) readonly buffer B {
15 |   float b[];
16 | };
17 | layout(std430, binding = 2) writeonly buffer C {
18 |   float c[];
19 | };
20 | layout(std430, binding = 3) readonly buffer D {
21 |   uint shapeABC[]; // [a0, ..., an, b0, ..., bn, c0, ..., cn] for n = ndim-1
22 | };
23 | 
24 | 
25 | void main(){
26 |   const uint ci = gl_GlobalInvocationID.x;
27 |   if(ci >= params.size[2]){ return; }
28 |   uvec3 size = uvec3(params.size[0], params.size[1], params.size[2]);
29 | 
30 |   uvec2 abi = uvec2(0, 0);
31 |   uint ci_tmp = ci;
32 |   for(uint dim = 0; dim < params.ndim; dim++){
33 |     uvec3 sABC = uvec3(shapeABC[dim],
34 |                        shapeABC[dim + params.ndim],
35 |                        shapeABC[dim + params.ndim * 2]);
36 |     size = size / sABC;
37 | 
38 |     uint d = ci_tmp / size.z;
39 |     abi += size.xy * min(uvec2(d, d), sABC.xy - 1);
40 | 
41 |     ci_tmp = ci_tmp % size.z;
42 |   }
43 | 
44 |   c[ci] = a[abi.x] * b[abi.y];
45 | }
46 | 


--------------------------------------------------------------------------------
/vulkpy/shader/mul_scalar.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   float scalar;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) readonly buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) writeonly buffer B {
15 |   float b[];
16 | };
17 | 
18 | 
19 | void main(){
20 |   uint i = gl_GlobalInvocationID.x;
21 |   if(i >= params.size){ return; }
22 | 
23 |   b[i] = a[i] * params.scalar;
24 | }
25 | 


--------------------------------------------------------------------------------
/vulkpy/shader/nn_cross_entropy.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer X {
11 |   float x[];
12 | };
13 | layout(std430, binding = 1) readonly buffer Y {
14 |   float y[];
15 | };
16 | layout(std430, binding = 2) writeonly buffer Loss {
17 |   float L[];
18 | };
19 | 
20 | 
21 | void main(){
22 |   uint i = gl_GlobalInvocationID.x;
23 |   if(i >= params.size){ return; }
24 | 
25 |   L[i] = - y[i] * log(x[i] + 1e-8);
26 | }
27 | 


--------------------------------------------------------------------------------
/vulkpy/shader/nn_cross_entropy_backward.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer X {
11 |   float x[];
12 | };
13 | layout(std430, binding = 1) readonly buffer Y {
14 |   float y[];
15 | };
16 | layout(std430, binding = 2) writeonly buffer dX {
17 |   float dx[];
18 | };
19 | 
20 | 
21 | void main(){
22 |   uint i = gl_GlobalInvocationID.x;
23 |   if(i >= params.size){ return; }
24 | 
25 |   dx[i] = - y[i] / (x[i] + 1e-8);
26 | }
27 | 


--------------------------------------------------------------------------------
/vulkpy/shader/pow.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) readonly buffer B {
14 |   float b[];
15 | };
16 | layout(std430, binding = 2) writeonly buffer C {
17 |   float c[];
18 | };
19 | 
20 | 
21 | void main(){
22 |   uint i = gl_GlobalInvocationID.x;
23 |   if(i >= params.size){ return; }
24 | 
25 |   c[i] = pow(a[i], b[i]);
26 | }
27 | 


--------------------------------------------------------------------------------
/vulkpy/shader/pow_broadcast.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size[3];
 7 |   uint ndim;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) readonly buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) readonly buffer B {
15 |   float b[];
16 | };
17 | layout(std430, binding = 2) writeonly buffer C {
18 |   float c[];
19 | };
20 | layout(std430, binding = 3) readonly buffer D {
21 |   uint shapeABC[]; // [a0, ..., an, b0, ..., bn, c0, ..., cn] for n = ndim-1
22 | };
23 | 
24 | 
25 | void main(){
26 |   const uint ci = gl_GlobalInvocationID.x;
27 |   if(ci >= params.size[2]){ return; }
28 |   uvec3 size = uvec3(params.size[0], params.size[1], params.size[2]);
29 | 
30 |   uvec2 abi = uvec2(0, 0);
31 |   uint ci_tmp = ci;
32 |   for(uint dim = 0; dim < params.ndim; dim++){
33 |     uvec3 sABC = uvec3(shapeABC[dim],
34 |                        shapeABC[dim + params.ndim],
35 |                        shapeABC[dim + params.ndim * 2]);
36 |     size = size / sABC;
37 | 
38 |     uint d = ci_tmp / size.z;
39 |     abi += size.xy * min(uvec2(d, d), sABC.xy - 1);
40 | 
41 |     ci_tmp = ci_tmp % size.z;
42 |   }
43 | 
44 |   c[ci] = pow(a[abi.x], b[abi.y]);
45 | }
46 | 


--------------------------------------------------------------------------------
/vulkpy/shader/pow_scalar.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   float scalar;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) readonly buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) writeonly buffer B {
15 |   float b[];
16 | };
17 | 
18 | 
19 | void main(){
20 |   uint i = gl_GlobalInvocationID.x;
21 |   if(i >= params.size){ return; }
22 | 
23 |   b[i] = pow(a[i], params.scalar);
24 | }
25 | 


--------------------------------------------------------------------------------
/vulkpy/shader/prng_box_muller.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   float scalar[2]; // [mean, stddev]
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) readonly buffer A {
12 |   float a[]; // Uniform Distribution bewteen [0, 1).
13 | };
14 | layout(std430, binding = 1) writeonly buffer B {
15 |   float b[];
16 | };
17 | 
18 | 
19 | void main(){
20 |   const uint i = gl_GlobalInvocationID.x;
21 |   const uint j = 2*i;
22 |   const uint k = j + 1;
23 |   if(j >= params.size){ return; }
24 | 
25 |   const float r = sqrt(-2 * log(1.0 - a[j])) * params.scalar[1];
26 |   const float angle = 6.28318530718f * a[k];
27 | 
28 |   b[j] = params.scalar[0] + r * sin(angle);
29 | 
30 |   if(k >= params.size){ return; }
31 |   b[k] = params.scalar[0] + r * cos(angle);
32 | }
33 | 


--------------------------------------------------------------------------------
/vulkpy/shader/prng_ibox_muller.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   float scalar[2]; // [mean, stddev]
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) buffer A {
12 |   float a[]; // Uniform Distribution bewteen [0, 1).
13 | };
14 | 
15 | 
16 | void main(){
17 |   const uint i = gl_GlobalInvocationID.x;
18 |   const uint j = 2*i;
19 |   const uint k = j + 1;
20 |   if(j >= params.size){ return; }
21 | 
22 |   const float r = sqrt(-2 * log(1.0 - a[j])) * params.scalar[1];
23 |   const float angle = 6.28318530718f * a[k];
24 | 
25 |   a[j] = params.scalar[0] + r * sin(angle);
26 |   a[k] = params.scalar[0] + r * cos(angle);
27 | }
28 | 


--------------------------------------------------------------------------------
/vulkpy/shader/prng_randrange.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   uint low;
 8 |   uint high;
 9 | } params;
10 | 
11 | 
12 | layout(std430, binding = 0) readonly buffer A {
13 |   float a[]; // [0, 1)
14 | };
15 | layout(std430, binding = 1) writeonly buffer B {
16 |   uint b[]; // [low, high]
17 | };
18 | 
19 | 
20 | void main(){
21 |   uint i = gl_GlobalInvocationID.x;
22 |   if(i >= params.size){ return; }
23 | 
24 |   const uint range = params.high - params.low + 1;
25 | 
26 |   b[i] = params.low + uint(range * a[i]);
27 | }
28 | 


--------------------------------------------------------------------------------
/vulkpy/shader/prng_xoshiro128pp_float.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint shift;
 7 |   uint size;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) buffer A {
12 |   uint a[];
13 | };
14 | layout(std430, binding = 1) writeonly buffer B {
15 |   float b[];
16 | };
17 | 
18 | 
19 | uint rotl(uint x, int k){
20 |   return (x << k) | (x >> (32 - k));
21 | }
22 | 
23 | 
24 | // xoshiro128++
25 | // https://prng.di.unimi.it/xoshiro128plusplus.c
26 | void main(){
27 |   uint i = gl_GlobalInvocationID.x;
28 |   uint shifted_i = i+params.shift;
29 |   if(i >= params.size){ return; }
30 |   uint j = 4 * i;
31 | 
32 |   uint result = rotl(a[j] + a[j+3], 7) + a[j];
33 |   b[shifted_i] = uintBitsToFloat((result >> 9) | 0x3f800000) - 1.0;
34 | 
35 |   uint t = (a[j+1] << 9);
36 | 
37 |   a[j+2] ^= a[j  ];
38 |   a[j+3] ^= a[j+1];
39 |   a[j+1] ^= a[j+2];
40 |   a[j  ] ^= a[j+3];
41 | 
42 |   a[j+2] ^= t;
43 |   a[j+3] = rotl(a[j+3], 11);
44 | }
45 | 


--------------------------------------------------------------------------------
/vulkpy/shader/prng_xoshiro128pp_uint32.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint shift;
 7 |   uint size;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) buffer A {
12 |   uint a[];
13 | };
14 | layout(std430, binding = 1) writeonly buffer B {
15 |   uint b[];
16 | };
17 | 
18 | 
19 | uint rotl(uint x, int k){
20 |   return (x << k) | (x >> (32 - k));
21 | }
22 | 
23 | 
24 | // xoshiro128++
25 | // https://prng.di.unimi.it/xoshiro128plusplus.c
26 | void main(){
27 |   uint i = gl_GlobalInvocationID.x;
28 |   uint shifted_i = i+params.shift;
29 |   if(i >= params.size){ return; }
30 |   uint j = 4 * i;
31 | 
32 |   b[shifted_i] = rotl(a[j] + a[j+3], 7) + a[j];
33 | 
34 |   uint t = (a[j+1] << 9);
35 | 
36 |   a[j+2] ^= a[j  ];
37 |   a[j+3] ^= a[j+1];
38 |   a[j+1] ^= a[j+2];
39 |   a[j  ] ^= a[j+3];
40 | 
41 |   a[j+2] ^= t;
42 |   a[j+3] = rotl(a[j+3], 11);
43 | }
44 | 


--------------------------------------------------------------------------------
/vulkpy/shader/prod.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size[2];
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) writeonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   uint sizeA = params.size[0];
21 |   uint sizeB = params.size[1];
22 |   if(i >= sizeB){ return; }
23 | 
24 |   float partial_prod = 1.0f;
25 |   for(uint j = i; j < sizeA; j += sizeB){
26 |     partial_prod *= a[j];
27 |   }
28 | 
29 |   b[i] = partial_prod;
30 | }
31 | 


--------------------------------------------------------------------------------
/vulkpy/shader/prod_axis.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 1, local_size_y = 64, local_size_z = 1) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint prev_prod; // Global x
 7 |   uint axis_size;
 8 |   uint post_prod; // Global y
 9 | } params;
10 | 
11 | 
12 | layout(std430, binding = 0) readonly buffer A {
13 |   float a[]; // [prev..., axis, post...]
14 | };
15 | layout(std430, binding = 1) writeonly buffer B {
16 |   float b[]; // [prev..., post...]
17 | };
18 | 
19 | 
20 | void main(){
21 |   uint i = gl_GlobalInvocationID.x;
22 |   uint j = gl_GlobalInvocationID.y;
23 |   if((i >= params.prev_prod) || (j >= params.post_prod)){ return; }
24 | 
25 |   const uint ij = (i * params.axis_size * params.post_prod) + j;
26 | 
27 |   float partial_prod = 1.0f;
28 |   for(uint k=0; k<params.axis_size; k++){
29 |     partial_prod *= a[(k * params.post_prod) + ij];
30 |   }
31 |   b[(i * params.post_prod) + j] = partial_prod;
32 | }
33 | 


--------------------------------------------------------------------------------
/vulkpy/shader/prod_axis_rebroadcast.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 1, local_size_y = 64, local_size_z = 1) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint prev_prod; // Global x
 7 |   uint axis_size;
 8 |   uint post_prod; // Global y
 9 | } params;
10 | 
11 | 
12 | layout(std430, binding = 0) readonly buffer A {
13 |   float a[]; // [prev..., axis, post...]
14 | };
15 | layout(std430, binding = 1) writeonly buffer B {
16 |   float b[]; // [prev..., post...]
17 | };
18 | 
19 | 
20 | void main(){
21 |   uint i = gl_GlobalInvocationID.x;
22 |   uint j = gl_GlobalInvocationID.y;
23 |   if((i >= params.prev_prod) || (j >= params.post_prod)){ return; }
24 | 
25 |   const uint ij = (i * params.axis_size * params.post_prod) + j;
26 | 
27 |   float partial_prod = 1.0f;
28 |   for(uint k=0; k<params.axis_size; k++){
29 |     partial_prod *= a[(k * params.post_prod) + ij];
30 |   }
31 | 
32 |   for(uint k=0; k<params.axis_size; k++){
33 |     b[(k * params.post_prod) + ij] = partial_prod;
34 |   }
35 | }
36 | 


--------------------------------------------------------------------------------
/vulkpy/shader/prod_v1.3.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | #extension GL_KHR_shader_subgroup_arithmetic: enable
 3 | 
 4 | layout(local_size_x = 64) in;
 5 | 
 6 | 
 7 | layout(push_constant) uniform constants {
 8 |   uint size;
 9 | } params;
10 | 
11 | 
12 | layout(std430, binding = 0) readonly buffer A {
13 |   float a[];
14 | };
15 | layout(std430, binding = 1) writeonly buffer B {
16 |   float b[];
17 | };
18 | 
19 | 
20 | void main(){
21 |   uint i = gl_GlobalInvocationID.x;
22 |   if(i >= params.size){ return; }
23 | 
24 |   float partial_prod = subgroupMul(a[i]);
25 | 
26 |   if(subgroupElect()){
27 |     b[gl_SubgroupID] = partial_prod;
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/vulkpy/shader/rdiv_scalar.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   float scalar;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) readonly buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) writeonly buffer B {
15 |   float b[];
16 | };
17 | 
18 | 
19 | void main(){
20 |   uint i = gl_GlobalInvocationID.x;
21 |   if(i >= params.size){ return; }
22 | 
23 |   b[i] = params.scalar / a[i];
24 | }
25 | 


--------------------------------------------------------------------------------
/vulkpy/shader/rpow_scalar.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   float scalar;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) readonly buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) writeonly buffer B {
15 |   float b[];
16 | };
17 | 
18 | 
19 | void main(){
20 |   uint i = gl_GlobalInvocationID.x;
21 |   if(i >= params.size){ return; }
22 | 
23 |   b[i] = pow(params.scalar, a[i]);
24 | }
25 | 


--------------------------------------------------------------------------------
/vulkpy/shader/rsub_scalar.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   float scalar;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) readonly buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) writeonly buffer B {
15 |   float b[];
16 | };
17 | 
18 | 
19 | void main(){
20 |   uint i = gl_GlobalInvocationID.x;
21 |   if(i >= params.size){ return; }
22 | 
23 |   b[i] = params.scalar - a[i];
24 | }
25 | 


--------------------------------------------------------------------------------
/vulkpy/shader/sign.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) writeonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   b[i] = sign(a[i]);
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/sin.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) writeonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   b[i] = sin(a[i]);
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/sinh.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) writeonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   b[i] = sinh(a[i]);
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/sqrt.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) writeonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   b[i] = sqrt(a[i]);
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/sub.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) readonly buffer B {
14 |   float b[];
15 | };
16 | layout(std430, binding = 2) writeonly buffer C {
17 |   float c[];
18 | };
19 | 
20 | 
21 | void main(){
22 |   uint i = gl_GlobalInvocationID.x;
23 |   if(i >= params.size){ return; }
24 | 
25 |   c[i] = a[i] - b[i];
26 | }
27 | 


--------------------------------------------------------------------------------
/vulkpy/shader/sub_broadcast.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size[3];
 7 |   uint ndim;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) readonly buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) readonly buffer B {
15 |   float b[];
16 | };
17 | layout(std430, binding = 2) writeonly buffer C {
18 |   float c[];
19 | };
20 | layout(std430, binding = 3) readonly buffer D {
21 |   uint shapeABC[]; // [a0, ..., an, b0, ..., bn, c0, ..., cn] for n = ndim-1
22 | };
23 | 
24 | 
25 | void main(){
26 |   const uint ci = gl_GlobalInvocationID.x;
27 |   if(ci >= params.size[2]){ return; }
28 |   uvec3 size = uvec3(params.size[0], params.size[1], params.size[2]);
29 | 
30 |   uvec2 abi = uvec2(0, 0);
31 |   uint ci_tmp = ci;
32 |   for(uint dim = 0; dim < params.ndim; dim++){
33 |     uvec3 sABC = uvec3(shapeABC[dim],
34 |                        shapeABC[dim + params.ndim],
35 |                        shapeABC[dim + params.ndim * 2]);
36 |     size = size / sABC;
37 | 
38 |     uint d = ci_tmp / size.z;
39 |     abi += size.xy * min(uvec2(d, d), sABC.xy - 1);
40 | 
41 |     ci_tmp = ci_tmp % size.z;
42 |   }
43 | 
44 |   c[ci] = a[abi.x] - b[abi.y];
45 | }
46 | 


--------------------------------------------------------------------------------
/vulkpy/shader/sub_scalar.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 |   float scalar;
 8 | } params;
 9 | 
10 | 
11 | layout(std430, binding = 0) readonly buffer A {
12 |   float a[];
13 | };
14 | layout(std430, binding = 1) writeonly buffer B {
15 |   float b[];
16 | };
17 | 
18 | 
19 | void main(){
20 |   uint i = gl_GlobalInvocationID.x;
21 |   if(i >= params.size){ return; }
22 | 
23 |   b[i] = a[i] - params.scalar;
24 | }
25 | 


--------------------------------------------------------------------------------
/vulkpy/shader/sum.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size[2];
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) writeonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   uint sizeA = params.size[0];
21 |   uint sizeB = params.size[1];
22 |   if(i >= sizeB){ return; }
23 | 
24 |   float partial_sum = 0.0f;
25 |   for(uint j = i; j < sizeA; j += sizeB){
26 |     partial_sum += a[j];
27 |   }
28 | 
29 |   b[i] = partial_sum;
30 | }
31 | 


--------------------------------------------------------------------------------
/vulkpy/shader/sum_axis.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 1, local_size_y = 64, local_size_z = 1) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint prev_prod; // Global x
 7 |   uint axis_size;
 8 |   uint post_prod; // Global y
 9 | } params;
10 | 
11 | 
12 | layout(std430, binding = 0) readonly buffer A {
13 |   float a[]; // [prev..., axis, post...]
14 | };
15 | layout(std430, binding = 1) writeonly buffer B {
16 |   float b[]; // [prev..., post...]
17 | };
18 | 
19 | 
20 | void main(){
21 |   uint i = gl_GlobalInvocationID.x;
22 |   uint j = gl_GlobalInvocationID.y;
23 |   if((i >= params.prev_prod) || (j >= params.post_prod)){ return; }
24 | 
25 |   const uint ij = (i * params.axis_size * params.post_prod) + j;
26 | 
27 |   float partial_sum = 0.0f;
28 |   for(uint k=0; k<params.axis_size; k++){
29 |     partial_sum += a[(k * params.post_prod) + ij];
30 |   }
31 |   b[(i * params.post_prod) + j] = partial_sum;
32 | }
33 | 


--------------------------------------------------------------------------------
/vulkpy/shader/sum_axis_rebroadcast.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 1, local_size_y = 64, local_size_z = 1) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint prev_prod; // Global x
 7 |   uint axis_size;
 8 |   uint post_prod; // Global y
 9 | } params;
10 | 
11 | 
12 | layout(std430, binding = 0) readonly buffer A {
13 |   float a[]; // [prev..., axis, post...]
14 | };
15 | layout(std430, binding = 1) writeonly buffer B {
16 |   float b[]; // [prev..., post...]
17 | };
18 | 
19 | 
20 | void main(){
21 |   uint i = gl_GlobalInvocationID.x;
22 |   uint j = gl_GlobalInvocationID.y;
23 |   if((i >= params.prev_prod) || (j >= params.post_prod)){ return; }
24 | 
25 |   const uint ij = (i * params.axis_size * params.post_prod) + j;
26 | 
27 |   float partial_sum = 0.0f;
28 |   for(uint k=0; k<params.axis_size; k++){
29 |     partial_sum += a[(k * params.post_prod) + ij];
30 |   }
31 | 
32 |   for(uint k=0; k<params.axis_size; k++){
33 |     b[(k * params.post_prod) + ij] = partial_sum;
34 |   }
35 | }
36 | 


--------------------------------------------------------------------------------
/vulkpy/shader/sum_v1.3.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | #extension GL_KHR_shader_subgroup_arithmetic: enable
 3 | 
 4 | layout(local_size_x = 64) in;
 5 | 
 6 | 
 7 | layout(push_constant) uniform constants {
 8 |   uint size;
 9 | } params;
10 | 
11 | 
12 | layout(std430, binding = 0) readonly buffer A {
13 |   float a[];
14 | };
15 | layout(std430, binding = 1) writeonly buffer B {
16 |   float b[];
17 | };
18 | 
19 | 
20 | void main(){
21 |   uint i = gl_GlobalInvocationID.x;
22 |   if(i >= params.size){ return; }
23 | 
24 |   float partial_sum = subgroupAdd(a[i]);
25 | 
26 |   if(subgroupElect()){
27 |     b[gl_SubgroupID] = partial_sum;
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/vulkpy/shader/tan.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) writeonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   b[i] = tan(a[i]);
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/shader/tanh.comp:
--------------------------------------------------------------------------------
 1 | #version 460
 2 | layout(local_size_x = 64) in;
 3 | 
 4 | 
 5 | layout(push_constant) uniform constants {
 6 |   uint size;
 7 | } params;
 8 | 
 9 | 
10 | layout(std430, binding = 0) readonly buffer A {
11 |   float a[];
12 | };
13 | layout(std430, binding = 1) writeonly buffer B {
14 |   float b[];
15 | };
16 | 
17 | 
18 | void main(){
19 |   uint i = gl_GlobalInvocationID.x;
20 |   if(i >= params.size){ return; }
21 | 
22 |   b[i] = tanh(a[i]);
23 | }
24 | 


--------------------------------------------------------------------------------
/vulkpy/util.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utility Module (:mod:`vulkpy.util`)
 3 | ===================================
 4 | 
 5 | 
 6 | Examples
 7 | --------
 8 | >>> from vulkpy.util import enable_debug
 9 | >>> enable_debug(api_dump=False)
10 | """
11 | 
12 | import os
13 | import logging
14 | 
15 | import wblog
16 | logger = wblog.getLogger()
17 | 
18 | 
19 | def enable_debug(*, validation: bool = True, api_dump: bool = True):
20 |     """
21 |     Enable debug message
22 | 
23 |     Parameters
24 |     ----------
25 |     validation : bool, optional
26 |         If ``True`` (default), enable Vulkan validation.
27 |     api_dump : bool, optional
28 |         If ``True`` (default), enable Vulkan API dump.
29 | 
30 |     Notes
31 |     -----
32 |     ``validation`` requires validation layer [1]_.
33 |     ``api_dump`` requires LunarG API dump layer [2]_.
34 |     If required layers are not installed, the options are ignored.
35 | 
36 |     References
37 |     ----------
38 |     .. [1] VK_LAYER_KHRONOS_validation
39 |        https://github.com/KhronosGroup/Vulkan-ValidationLayers
40 |     .. [2] VK_LAYER_LUNARG_api_dump
41 |        https://github.com/LunarG/VulkanTools/blob/main/layersvt/api_dump_layer.md
42 |     """
43 |     wblog.start_logging("vulkpy", level=logging.DEBUG)
44 |     logger.debug("Enable debug mode")
45 | 
46 |     layers = []
47 |     if validation:
48 |         layers.append("VK_LAYER_KHRONOS_validation")
49 |         logger.debug("Enable Vulkan Validation")
50 |     if api_dump:
51 |         layers.append("VK_LAYER_LUNARG_api_dump")
52 |         logger.debug("Enable Vulkan API dump")
53 | 
54 |     if len(layers) > 0:
55 |         os.environ["VK_INSTANCE_LAYERS"] = ":".join(layers)
56 | 
57 | 
58 | def getShader(name: str) -> str:
59 |     """
60 |     Get Shader Path
61 | 
62 |     Parameters
63 |     ----------
64 |     name : str
65 |         SPIR-V (.spv) name
66 | 
67 |     Returns
68 |     -------
69 |     str
70 |         Shader file path
71 |     """
72 |     return os.path.join(os.path.dirname(__file__), "shader", name)
73 | 


--------------------------------------------------------------------------------
/vulkpy/vktyping.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | from typing import Tuple, Union
 3 | from typing_extensions import Protocol
 4 | 
 5 | import numpy as np
 6 | 
 7 | 
 8 | KeyType = Union[int, np.ndarray, slice]
 9 | ValueType = Union[int, float, np.ndarray, Tuple]
10 | 
11 | class Resource:
12 |     pass
13 | 
14 | class ArrayProtocol(Protocol):
15 |     @property
16 |     def shape(self) -> Tuple[int, ...]: ...
17 | 
18 |     @property
19 |     def array(self) -> np.ndarray: ...
20 | 
21 |     def wait(self): ...
22 | 


--------------------------------------------------------------------------------