├── .cargo
└── config.toml
├── .gitattributes
├── .github
└── workflows
│ └── ci.yml
├── .gitignore
├── Cargo.lock
├── Cargo.toml
├── LICENSE
├── README.md
├── crates
├── altius_py
│ ├── .gitignore
│ ├── Cargo.toml
│ ├── altius_py
│ │ └── __init__.py
│ ├── bert.py
│ ├── deeplab.py
│ ├── deit.py
│ ├── export-bert.sh
│ ├── export-fugumt.sh
│ ├── export-gpt2.sh
│ ├── export-tinystories.sh
│ ├── export_vit.py
│ ├── fastvit.py
│ ├── fcn.py
│ ├── fuse_attn.py
│ ├── gpt2.py
│ ├── mandelbrot.py
│ ├── mobilenet.py
│ ├── pyproject.toml
│ ├── real-esrgan.py
│ ├── resnet50.py
│ ├── show-ort-profile.py
│ ├── src
│ │ └── lib.rs
│ ├── test.sh
│ ├── tests
│ │ ├── test.rs
│ │ ├── test_ops_bin.py
│ │ ├── test_ops_concat.py
│ │ ├── test_ops_conv.py
│ │ ├── test_ops_elemwise.py
│ │ ├── test_ops_gather.py
│ │ ├── test_ops_gemm.py
│ │ ├── test_ops_matmul.py
│ │ ├── test_ops_norm.py
│ │ ├── test_ops_pool.py
│ │ ├── test_ops_reduce.py
│ │ ├── test_ops_resize.py
│ │ ├── test_ops_transpose.py
│ │ └── test_ops_where.py
│ ├── translation.py
│ ├── uv.lock
│ ├── vit.py
│ └── yolov5.py
├── core
│ ├── Cargo.toml
│ ├── build.rs
│ └── src
│ │ ├── analysis
│ │ ├── mod.rs
│ │ └── shape.rs
│ │ ├── dim.rs
│ │ ├── fixed_dim.rs
│ │ ├── flops.rs
│ │ ├── graph.rs
│ │ ├── lib.rs
│ │ ├── model.rs
│ │ ├── node.rs
│ │ ├── onnx
│ │ ├── load.rs
│ │ ├── mod.rs
│ │ ├── onnx.proto
│ │ └── save.rs
│ │ ├── op.rs
│ │ ├── optimize
│ │ ├── conv_act_fusion.rs
│ │ ├── elemwise_fusion.rs
│ │ ├── fast_gelu_fusion.rs
│ │ ├── gelu_fusion.rs
│ │ ├── identity_elim.rs
│ │ ├── layer_norm_fusion.rs
│ │ ├── mod.rs
│ │ └── transpose_fusion.rs
│ │ ├── snapshots
│ │ ├── altius_core__model__mnist_model.snap
│ │ ├── altius_core__tensor__dump_bool_tensor.snap
│ │ ├── altius_core__tensor__dump_f32_tensor.snap
│ │ ├── altius_core__tensor__dump_i32_tensor.snap
│ │ └── altius_core__tensor__dump_i64_tensor.snap
│ │ ├── tensor.rs
│ │ └── value.rs
├── session
│ ├── Cargo.toml
│ ├── src
│ │ ├── lib.rs
│ │ └── plan.rs
│ └── tests
│ │ └── ort.rs
├── session_clang
│ ├── Cargo.toml
│ ├── examples
│ │ ├── deit_cpu.rs
│ │ ├── mnist_cpu.rs
│ │ ├── mobilenet_cpu.rs
│ │ └── vit_cpu.rs
│ ├── src
│ │ ├── builder.rs
│ │ ├── lib.rs
│ │ ├── session.rs
│ │ └── translator.rs
│ └── tests
│ │ ├── ops_bin.rs
│ │ └── ops_conv.rs
├── session_interpreter
│ ├── Cargo.toml
│ ├── benches
│ │ └── interpreter.rs
│ ├── examples
│ │ ├── deit.rs
│ │ ├── infer.rs
│ │ ├── mnist.rs
│ │ ├── mobilenet.rs
│ │ └── vit.rs
│ ├── src
│ │ ├── builder.rs
│ │ ├── conv2d.rs
│ │ ├── fast_math.rs
│ │ ├── gemm.rs
│ │ ├── lib.rs
│ │ ├── session.rs
│ │ └── thread.rs
│ └── tests
│ │ ├── mobilenet.rs
│ │ └── op_bin.rs
└── wasm
│ ├── .gitignore
│ ├── Cargo.toml
│ ├── package.json
│ ├── src
│ ├── index.tsx
│ └── lib.rs
│ ├── static
│ ├── index.css
│ └── index.html
│ ├── tsconfig.json
│ ├── webpack.config.ts
│ └── yarn.lock
├── models
├── MNIST_test.txt
├── download.sh
└── imagenet_classes.txt
├── rust-toolchain.toml
└── snippets
├── coreml
├── mobilenet.py
└── requirements.txt
├── cuda
├── Makefile
├── cuda-gemm-act.cu
└── main.c
├── float.c
├── onnx_float16.py
├── q.cc
├── sgemm
├── .gitignore
├── Makefile
├── gemm.deit.cc
└── main.cc
├── softmax.c
├── softmax.cc
└── test_nchwc.py
/.cargo/config.toml:
--------------------------------------------------------------------------------
1 | [build]
2 | rustflags = ["-Ctarget-cpu=native"]
3 |
4 | [env]
5 | RUST_LOG = "debug"
6 | CC = "clang"
7 | CXX = "clang++"
8 | GOMP_CPU_AFFINITY='0-7'
9 | MACOSX_DEPLOYMENT_TARGET = "14.0"
10 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | core/examples/MNIST_test.txt filter=lfs diff=lfs merge=lfs -text
2 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on:
4 | push:
5 | branches: [ '*' ]
6 | pull_request:
7 | branches: [ '*' ]
8 |
9 | concurrency:
10 | group: ${{ github.workflow }}-${{ github.ref }}
11 | cancel-in-progress: true
12 |
13 | env:
14 | CARGO_TERM_COLOR: always
15 |
16 | jobs:
17 | Linux:
18 | runs-on: ubuntu-latest
19 | steps:
20 | - uses: actions/checkout@v4
21 | - uses: actions/cache@v4
22 | with:
23 | path: |
24 | ~/.cargo/registry
25 | ~/.cargo/git
26 | target
27 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
28 | - name: Download large files
29 | working-directory: ./models
30 | run: ./download.sh CI
31 | - name: Add llvm-tools-preview
32 | run: rustup component add llvm-tools-preview
33 | - name: Install grcov
34 | run: cargo install grcov
35 | - name: Install dependencies
36 | run: sudo apt install libomp-dev xz-utils
37 | - name: Install uv
38 | uses: astral-sh/setup-uv@v6
39 | - name: Free up disk
40 | run: sudo rm -rf /usr/local/lib/android || true
41 | - name: Test
42 | run: |
43 | cargo test --release
44 | ALTIUS_ENABLE_CLIF=1 cargo test --release
45 | env:
46 | RUSTFLAGS: -Cinstrument-coverage
47 | LLVM_PROFILE_FILE: coverage-%p-%m.profraw
48 | - name: Run examples
49 | run: |
50 | (cd crates/altius_py && uv run python deit.py)
51 | # (cd crates/altius_py && uv run python resnet50.py)
52 | (cd crates/altius_py && uv run python export_vit.py)
53 | cargo run --release --example mnist
54 | cargo run --release --example mobilenet
55 | cargo run --release --example deit
56 | cargo run --release --example mnist_cpu
57 | cargo run --release --example mobilenet_cpu
58 | cargo run --release --example deit_cpu
59 | cargo run --release --example vit_cpu
60 | cargo run --release --example vit
61 | cargo run --release --example infer -- ./models/mnist-8.onnx
62 | env:
63 | RUSTFLAGS: -Cinstrument-coverage
64 | LLVM_PROFILE_FILE: coverage-%p-%m.profraw
65 | - name: Submit coverage
66 | run: |
67 | mkdir -p /tmp/cov/
68 | cp -rf ./target/release/* /tmp/cov/
69 | grcov . --binary-path /tmp/cov/ -s . -t cobertura --branch --ignore-not-existing --ignore "*cargo*" -o coverage.xml
70 | bash <(curl -s https://codecov.io/bash)
71 | env:
72 | RUSTFLAGS: -Cinstrument-coverage
73 | LLVM_PROFILE_FILE: coverage-%p-%m.profraw
74 |
75 | macOS:
76 | runs-on: macos-14
77 | steps:
78 | - uses: actions/checkout@v4
79 | - uses: actions/cache@v4
80 | with:
81 | path: |
82 | ~/.cargo/registry
83 | ~/.cargo/git
84 | target
85 | key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
86 | - name: Download large files
87 | working-directory: ./models
88 | run: ./download.sh CI
89 | - name: Install dependencies
90 | run: |
91 | brew install llvm libomp
92 | echo "PATH=$(brew --prefix llvm)/bin:${PATH}" >> $GITHUB_ENV
93 | echo "CPPFLAGS=-I$(brew --prefix libomp)/include" >> $GITHUB_ENV
94 | echo "LDFLAGS=-L$(brew --prefix libomp)/lib" >> $GITHUB_ENV
95 | - name: Install uv
96 | uses: astral-sh/setup-uv@v6
97 | - name: Setup Python environment
98 | working-directory: ./crates/altius_py
99 | run: uv sync
100 | - name: Test
101 | run: |
102 | cargo test --release
103 | ALTIUS_ENABLE_CLIF=1 cargo test --release
104 | env:
105 | RUSTFLAGS: "-C target-cpu=apple-m1"
106 | PYO3_PYTHON: ${{ github.workspace }}/crates/altius_py/.venv/bin/python
107 | - name: Run examples
108 | run: |
109 | (cd crates/altius_py && uv run python deit.py)
110 | (cd crates/altius_py && uv run python export_vit.py)
111 | cargo run --release --example mnist
112 | cargo run --release --example mobilenet
113 | cargo run --release --example deit
114 | cargo run --release --example mnist_cpu
115 | cargo run --release --example mobilenet_cpu
116 | cargo run --release --example deit_cpu
117 | cargo run --release --example vit_cpu
118 | cargo run --release --example vit
119 | cargo run --release --example infer -- ./models/mnist-8.onnx
120 | env:
121 | RUSTFLAGS: "-C target-cpu=apple-m1"
122 | PYO3_PYTHON: ${{ github.workspace }}/crates/altius_py/.venv/bin/python
123 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /pkg
3 |
--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [workspace]
2 | members = [
3 | "crates/core",
4 | "crates/session",
5 | "crates/session_clang",
6 | "crates/session_interpreter",
7 | "crates/altius_py",
8 | "crates/wasm"
9 | ]
10 | resolver = "2"
11 |
12 | [workspace.dependencies]
13 | thiserror = "^1.0.31"
14 | log = "^0.4.17"
15 | rustc-hash = "^1.1.0"
16 | cranelift = "^0.111.0"
17 | cranelift-module = "^0.111.0"
18 | cranelift-object = "^0.111.0"
19 | cranelift-codegen = "^0.111.0"
20 | ndarray = "^0.15.6"
21 |
22 | [profile.release]
23 | opt-level = 3
24 | overflow-checks = false
25 | codegen-units = 8
26 | debug = true
27 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 uint256_t
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
21 |
22 | # Requirements
23 |
24 | - cargo
25 | - uv
26 |
27 | # Run
28 |
29 | ```sh
30 | # Download models.
31 | (cd models && ./download.sh)
32 | # Download minimum models.
33 | # (cd models && ./download.sh CI)
34 |
35 | # Run examples.
36 | # {mnist, mobilenet, deit, vit} are available.
37 | # You can specify the number of threads for computation by editing the code.
38 | cargo run --release --example mnist
39 | cargo run --release --example mobilenet
40 | cargo run --release --example deit
41 | cargo run --release --example vit
42 |
43 | # Experimental CPU backend (that generates code in C)
44 | cargo run --release --example mnist_cpu -- --iters 10
45 | cargo run --release --example mobilenet_cpu -- --iters 10 --profile
46 | cargo run --release --example deit_cpu -- --iters 10 --threads 8 --profile
47 | ```
48 |
49 | # Run from WebAssembly
50 |
51 | Currently, mobilenet v3 runs on web browsers.
52 |
53 | ```sh
54 | cd wasm
55 | cargo install wasm-pack
56 | wasm-pack build --target web
57 | yarn
58 | yarn serve
59 | ```
60 |
61 | # Run from Python
62 |
63 | ```sh
64 | cd ./crates/altius_py
65 | uv sync
66 | uv run maturin develop -r
67 | uv run python mobilenet.py
68 | ```
69 |
--------------------------------------------------------------------------------
/crates/altius_py/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 |
3 | # Byte-compiled / optimized / DLL files
4 | __pycache__/
5 | .pytest_cache/
6 | *.py[cod]
7 |
8 | # C extensions
9 | *.so
10 |
11 | # Distribution / packaging
12 | .Python
13 | .venv/
14 | env/
15 | bin/
16 | build/
17 | develop-eggs/
18 | dist/
19 | eggs/
20 | lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | include/
26 | man/
27 | venv/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 |
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 | pip-selfcheck.json
36 |
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 |
45 | # Translations
46 | *.mo
47 |
48 | # Mr Developer
49 | .mr.developer.cfg
50 | .project
51 | .pydevproject
52 |
53 | # Rope
54 | .ropeproject
55 |
56 | # Django stuff:
57 | *.log
58 | *.pot
59 |
60 | .DS_Store
61 |
62 | # Sphinx documentation
63 | docs/_build/
64 |
65 | # PyCharm
66 | .idea/
67 |
68 | # VSCode
69 | .vscode/
70 |
71 | # Pyenv
72 | .python-version
73 |
74 | # Environments
75 | .env
76 | .venv
77 | env/
78 | venv/
79 | ENV/
80 | env.bak/
81 | venv.bak/
82 |
--------------------------------------------------------------------------------
/crates/altius_py/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "altius_py"
3 | version = "0.1.0"
4 | edition = "2021"
5 |
6 | [lib]
7 | name = "altius_py"
8 | crate-type = ["cdylib"]
9 | test = false
10 |
11 | [dependencies]
12 | altius-core = { path = "../core" }
13 | altius_session = { path = "../session" }
14 | altius_session_clang = { path = "../session_clang" }
15 | altius_session_interpreter = { path = "../session_interpreter" }
16 | pyo3 = { version = "^0.20.0", features = ["extension-module"] }
17 | pyo3-log = "^0.9.0"
18 | numpy = "^0.20.0"
19 |
20 | [dev-dependencies]
21 | cargo-util = "^0.2.1"
22 |
23 | [features]
24 | default = ["cblas"]
25 | matrixmultiply-threading = [ "altius_session_interpreter/matrixmultiply-threading" ]
26 | cuda = [ "altius_session_interpreter/cuda" ]
27 | heavy-log = [ "altius_session_interpreter/heavy-log" ]
28 | cblas = []
29 |
--------------------------------------------------------------------------------
/crates/altius_py/altius_py/__init__.py:
--------------------------------------------------------------------------------
1 | from .altius_py import load, session
2 |
3 |
4 | class InferenceSession:
5 | """
6 | ``InferenceSession`` is the class used to run a model.
7 | """
8 |
9 | def __init__(self, model_path, enable_profile=False, intra_op_num_threads=1, backend="interpreter"):
10 | self.model_path = model_path
11 | self.model = load(model_path)
12 | self.session = session(self.model, enable_profile, intra_op_num_threads, backend)
13 |
14 | def run(self, output, input):
15 | """
16 | Compute the predictions.
17 |
18 | Args:
19 | output (Optional[list[str]]): Name of the outputs, but must be None for now.
20 | input (dict[str, numpy.ndarray]): Dictionary ``{ input_name: input_value }``.
21 |
22 | Returns:
23 | list[numpy.ndarray]: Output values.
24 | """
25 |
26 | assert output is None
27 | return self.session.run(input)
28 |
--------------------------------------------------------------------------------
/crates/altius_py/bert.py:
--------------------------------------------------------------------------------
1 | # python -m transformers.onnx --model=bert-base-cased --feature=masked-lm ./a
2 |
3 | import time
4 | import logging
5 | import os
6 | import sys
7 |
8 | from transformers import AutoTokenizer, BertTokenizer
9 | import onnxruntime as ort
10 | import numpy as np
11 | import altius_py
12 |
13 | logging.basicConfig(level=logging.INFO)
14 |
15 | tokenizer = BertTokenizer.from_pretrained("bert-base-cased", mask_token="[MASK]")
16 |
17 | if not os.path.exists("../../models/bert.onnx"):
18 | print("Run ../../models/download.sh to download ../../models/bert.onnx")
19 | sys.exit(0)
20 |
21 | # session = ort.InferenceSession("../../models/bert.onnx")
22 | session = altius_py.InferenceSession(
23 | "../../models/bert.onnx", intra_op_num_threads=8, enable_profile=True
24 | )
25 |
26 | # msg = "Paris is the [MASK] city of France"
27 | # msg = "Deep [MASK] network has been widely used"
28 | msg = "We usually use a [MASK] to input characters to a computer"
29 | # msg = "The number [MASK] is famous as the ultimate answer of everything"
30 | mask_pos = msg.split().index("[MASK]") + 1
31 | print(f"Masked sentence (up to 20 tokens): {msg}")
32 |
33 | inputs = tokenizer(msg, return_tensors="np")
34 | for name in ["input_ids", "attention_mask", "token_type_ids"]:
35 | input = np.zeros((1, 20), dtype=np.int64)
36 | input[0, : inputs[name].shape[1]] = inputs[name]
37 | inputs[name] = input
38 |
39 | repeat = 10 # TIPS: First run is usually slow.
40 | for _ in range(repeat):
41 | start = time.time()
42 | outputs = session.run(None, dict(inputs))
43 | end = time.time()
44 | print(f"Inference time: {end - start}")
45 |
46 | ids = np.argsort(-outputs[0][0, mask_pos])[:5]
47 | for i, tok in enumerate(tokenizer.convert_ids_to_tokens(ids.tolist())):
48 | print(f"Top{i+1}: {msg.replace('[MASK]', tok.upper())}")
49 |
--------------------------------------------------------------------------------
/crates/altius_py/deeplab.py:
--------------------------------------------------------------------------------
1 | import time
2 | import os
3 | import random
4 | import logging
5 | from itertools import cycle
6 |
7 | import numpy as np
8 | import torch
9 | from torchvision.transforms.functional import to_pil_image
10 | from torchvision.models.segmentation import fcn_resnet50, FCN_ResNet50_Weights
11 | from torchvision import transforms
12 | from matplotlib import colors as mcolors
13 | from PIL import Image
14 |
15 | import onnxruntime as ort
16 | import altius_py
17 |
18 |
19 | def main():
20 | logging.basicConfig(level=logging.INFO)
21 |
22 | path = "../../models/cat.png"
23 | image = Image.open(path).resize((520, 520))
24 |
25 | weights = FCN_ResNet50_Weights.DEFAULT
26 | preprocess = weights.transforms()
27 | input = np.ascontiguousarray((preprocess(image).unsqueeze(0)))
28 |
29 | # sess_options = ort.SessionOptions()
30 | # # sess_options.intra_op_num_threads = 1
31 | # sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL
32 | # sess = ort.InferenceSession(
33 | # "../../models/deeplab_mobilenetv3.onnx", sess_options=sess_options
34 | # )
35 | sess = altius_py.InferenceSession(
36 | "../../models/deeplab_mobilenetv3.onnx", enable_profile=True
37 | )
38 |
39 | inputs = {"input.1": input}
40 |
41 | start = time.time()
42 | output = sess.run(None, inputs)[0]
43 | print(f"Inference elapsed: {time.time() - start}")
44 |
45 | prediction = torch.tensor(output)
46 | normalized_masks = prediction.softmax(dim=1)
47 | class_to_idx = {cls: idx for (idx, cls) in enumerate(weights.meta["categories"])}
48 | colors = cycle(mcolors.BASE_COLORS.values())
49 | color_like = lambda input: [torch.full_like(input, c) for c in next(colors)]
50 |
51 | for klass, idx in class_to_idx.items():
52 | if klass == "__background__":
53 | continue
54 |
55 | mask = normalized_masks[0, idx]
56 | if torch.max(mask) < 0.2:
57 | # No objects of this class
58 | continue
59 |
60 | mask_img = to_pil_image(
61 | torch.stack(color_like(mask) + [mask * 0.5]),
62 | mode="RGBA",
63 | )
64 | image = Image.alpha_composite(image.convert("RGBA"), mask_img)
65 |
66 | image.save("masked.png")
67 | image.show()
68 |
69 |
70 | if __name__ == "__main__":
71 | main()
72 |
--------------------------------------------------------------------------------
/crates/altius_py/deit.py:
--------------------------------------------------------------------------------
1 | import time
2 | import logging
3 | import os
4 |
5 | import onnx
6 | import altius_py
7 | import torch
8 |
9 | from PIL import Image
10 | from torchvision import transforms
11 |
12 |
13 | def main():
14 | logging.basicConfig(level=logging.INFO)
15 |
16 | image = Image.open("../../models/cat.png")
17 | labels = open("../../models/imagenet_classes.txt").readlines()
18 |
19 | preprocess = transforms.Compose(
20 | [
21 | transforms.Resize(224),
22 | transforms.ToTensor(),
23 | transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
24 | ]
25 | )
26 | input = preprocess(image)
27 | input = input.unsqueeze(0).numpy()
28 |
29 | onnx_path = "../../models/deit.onnx"
30 |
31 | if not os.path.exists(onnx_path):
32 | import onnxsim
33 | from transformers import ViTImageProcessor, ViTForImageClassification
34 |
35 | model = ViTForImageClassification.from_pretrained(
36 | "facebook/deit-small-patch16-224"
37 | )
38 | torch.onnx.export(model, torch.randn(1, 3, 224, 224), onnx_path)
39 | simplified_model, success = onnxsim.simplify(onnx_path)
40 | assert success
41 | onnx.save(simplified_model, onnx_path)
42 |
43 | altius_model = altius_py.InferenceSession(
44 | onnx_path, intra_op_num_threads=1, enable_profile=True
45 | )
46 |
47 | with torch.no_grad():
48 | for i in range(1):
49 | output = altius_model.run(None, {"input.1": input})
50 | pred = torch.tensor(output).reshape((-1,)).argsort().numpy()[::-1][:5]
51 | top5 = [labels[i].strip() for i in pred]
52 | print(top5)
53 |
54 |
55 | if __name__ == "__main__":
56 | main()
57 |
--------------------------------------------------------------------------------
/crates/altius_py/export-bert.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -eux
2 |
3 | EXPORTER_VENV=.exporter.venv
4 |
5 | if [ ! -d ${EXPORTER_VENV} ]; then
6 | python3 -m venv ${EXPORTER_VENV}
7 | source ${EXPORTER_VENV}/bin/activate
8 | pip install -U pip
9 | pip install onnx onnxruntime onnxsim optimum==1.16.2
10 | fi
11 |
12 | source ${EXPORTER_VENV}/bin/activate
13 |
14 | DIR=bert-onnx
15 |
16 | python -m optimum.exporters.onnx --model "bert-base-uncased" --task fill-mask --opset 14 ${DIR}
17 |
18 | ONNXSIM_FIXED_POINT_ITERS=1000 \
19 | onnxsim ./${DIR}/model.onnx ./${DIR}/model.onnx --overwrite-input-shape input_ids:1,100 attention_mask:1,100 token_type_ids:1,100
20 |
21 | printf "\e[1;32mExported in ${DIR}\e[0m\n"
22 |
--------------------------------------------------------------------------------
/crates/altius_py/export-fugumt.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -eux
2 |
3 | EXPORTER_VENV=.exporter.venv
4 |
5 | if [ ! -d ${EXPORTER_VENV} ]; then
6 | python3 -m venv ${EXPORTER_VENV}
7 | source ${EXPORTER_VENV}/bin/activate
8 | pip install -U pip
9 | pip install onnx onnxruntime optimum==1.16.2
10 | fi
11 |
12 | source ${EXPORTER_VENV}/bin/activate
13 |
14 | DIR=fugumt-en-ja
15 |
16 | python -m optimum.exporters.onnx --model "staka/${DIR}" ${DIR}
17 |
18 | onnxsim ./${DIR}/encoder_model.onnx ./${DIR}/encoder_model.onnx --overwrite-input-shape input_ids:1,100 attention_mask:1,100
19 | onnxsim ./${DIR}/decoder_model.onnx ./${DIR}/decoder_model.onnx --overwrite-input-shape encoder_attention_mask:1,100 input_ids:1,100 encoder_hidden_states:1,100,512
20 |
21 | onnxsim ./${DIR}/decoder_model.onnx ./${DIR}/decoder_model.onnx --unused-output \
22 | present.0.encoder.key present.1.encoder.key present.2.encoder.key present.3.encoder.key present.4.encoder.key present.5.encoder.key \
23 | present.0.encoder.value present.1.encoder.value present.2.encoder.value present.3.encoder.value present.4.encoder.value present.5.encoder.value \
24 | present.0.decoder.key present.1.decoder.key present.2.decoder.key present.3.decoder.key present.4.decoder.key present.5.decoder.key \
25 | present.0.decoder.value present.1.decoder.value present.2.decoder.value present.3.decoder.value present.4.decoder.value present.5.decoder.value
26 |
27 | printf "\e[1;32mExported in ${DIR}\e[0m\n"
28 |
--------------------------------------------------------------------------------
/crates/altius_py/export-gpt2.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -eux
2 |
3 | EXPORTER_VENV=.exporter.venv
4 |
5 | if [ ! -d ${EXPORTER_VENV} ]; then
6 | python3 -m venv ${EXPORTER_VENV}
7 | source ${EXPORTER_VENV}/bin/activate
8 | pip install -U pip
9 | pip install onnx onnxruntime onnxsim optimum==1.16.2
10 | fi
11 |
12 | source ${EXPORTER_VENV}/bin/activate
13 |
14 | DIR=gpt2-onnx
15 |
16 | python -m optimum.exporters.onnx --model "gpt2" --task text-generation --opset 14 ${DIR}
17 |
18 | ONNXSIM_FIXED_POINT_ITERS=1000 \
19 | onnxsim ./${DIR}/model.onnx ./${DIR}/model.onnx --overwrite-input-shape input_ids:1,100 attention_mask:1,100 position_ids:1,100
20 |
21 | printf "\e[1;32mExported in ${DIR}\e[0m\n"
22 |
--------------------------------------------------------------------------------
/crates/altius_py/export-tinystories.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh -eux
2 |
3 | DIR=TinyStories-33M
4 |
5 | optimum-cli export onnx \
6 | -m 'roneneldan/TinyStories-33M' \
7 | --opset 13 \
8 | --task causal-lm \
9 | $DIR
10 |
11 | onnxsim \
12 | $DIR/decoder_model.onnx $DIR/decoder_model.onnxsim.onnx \
13 | --overwrite-input-shape input_ids:1,100 attention_mask:1,100
14 |
15 |
--------------------------------------------------------------------------------
/crates/altius_py/export_vit.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torchvision, torchvision
3 |
4 | import onnx, onnx.checker, onnx.shape_inference
5 | import onnxsim
6 | from torchvision.models import ViT_B_16_Weights
7 |
8 | model = torchvision.models.vit_b_16(weights=ViT_B_16_Weights.IMAGENET1K_SWAG_LINEAR_V1)
9 | model.eval()
10 |
11 | path = "../../models/vit_b_16.onnx"
12 | torch.onnx.export(model, torch.randn(1, 3, 224, 224), path, opset_version=14)
13 |
14 | model, ok = onnxsim.simplify(path)
15 | assert ok
16 |
17 | onnx.checker.check_model(model)
18 | model = onnx.shape_inference.infer_shapes(model)
19 | onnx.save(model, path)
20 |
--------------------------------------------------------------------------------
/crates/altius_py/fastvit.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 |
4 | from urllib.request import urlopen
5 | from PIL import Image
6 |
7 | import onnx
8 | import onnxsim
9 |
10 | import torch
11 | import timm
12 | import onnxruntime as ort
13 | import altius_py as alt
14 |
15 |
16 | def main():
17 | img = Image.open(
18 | urlopen(
19 | "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png"
20 | )
21 | )
22 |
23 | model = timm.create_model("fastvit_s12.apple_in1k", pretrained=True)
24 | model = model.eval()
25 |
26 | path = "../../models/fastvit.onnx"
27 | if not os.path.exists(path):
28 | torch.onnx.export(
29 | model,
30 | torch.randn(1, 3, 256, 256),
31 | path,
32 | input_names=["input"],
33 | output_names=["output"],
34 | opset_version=12,
35 | )
36 | _model, check = onnxsim.simplify(onnx.load(path))
37 | assert check, "Failed to simplify model"
38 | onnx.save(_model, path)
39 |
40 | data_config = timm.data.resolve_model_data_config(model)
41 | transforms = timm.data.create_transform(**data_config, is_training=False)
42 |
43 | # For altius
44 | os.environ["GOMP_WAIT_POLICY"] = "ACTIVE"
45 | os.environ["GOMP_CPU_AFFINITY"] = "0-7"
46 |
47 | # ort_sess = ort.InferenceSession("fastvit.onnx", providers=["CPUExecutionProvider"])
48 | alt_sess = alt.InferenceSession(path, backend="cpu", intra_op_num_threads=8)
49 |
50 | with open("../../models/imagenet_classes.txt") as f:
51 | class_names = [line.strip() for line in f.readlines()]
52 | class_idx_to_label = {i: class_names[i] for i in range(len(class_names))}
53 |
54 | output = alt_sess.run(
55 | None,
56 | {"input": transforms(img).unsqueeze(0).numpy()},
57 | )[0]
58 |
59 | top5_probabilities, top5_class_indices = torch.topk(
60 | torch.tensor(output).softmax(dim=1) * 100, k=5
61 | )
62 | print(f"top 5 probs: {top5_probabilities}")
63 | print(
64 | f"top 5 labels: {[class_idx_to_label[idx] for idx in top5_class_indices.squeeze(0).tolist()]}"
65 | )
66 |
67 |
68 | if __name__ == "__main__":
69 | main()
70 |
--------------------------------------------------------------------------------
/crates/altius_py/fcn.py:
--------------------------------------------------------------------------------
1 | import time
2 | import os
3 | import random
4 | import logging
5 | from itertools import cycle
6 |
7 | import numpy as np
8 | import torch
9 | from torchvision.transforms.functional import to_pil_image
10 | from torchvision.models.segmentation import fcn_resnet50, FCN_ResNet50_Weights
11 | from torchvision import transforms
12 | from matplotlib import colors as mcolors
13 | from PIL import Image
14 |
15 | import onnxruntime as ort
16 | import altius_py
17 |
18 |
19 | def main():
20 | logging.basicConfig(level=logging.INFO)
21 |
22 | path = "../../models/cat.png"
23 | image = Image.open(path).resize((520, 520))
24 |
25 | weights = FCN_ResNet50_Weights.DEFAULT
26 | preprocess = weights.transforms()
27 | input = np.ascontiguousarray((preprocess(image).unsqueeze(0)))
28 |
29 | # sess_options = ort.SessionOptions()
30 | # sess_options.intra_op_num_threads = 1
31 | # sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL
32 | # sess = ort.InferenceSession(
33 | # "../../models/fcn-resnet50.onnx", sess_options=sess_options
34 | # )
35 | sess = altius_py.InferenceSession("../../models/fcn-resnet50.onnx")
36 |
37 | inputs = {"input.1": input}
38 |
39 | start = time.time()
40 | output = sess.run(None, inputs)[0]
41 | print(f"Inference elapsed: {time.time() - start}")
42 |
43 | prediction = torch.tensor(output)
44 | normalized_masks = prediction.softmax(dim=1)
45 | class_to_idx = {cls: idx for (idx, cls) in enumerate(weights.meta["categories"])}
46 | colors = cycle(mcolors.BASE_COLORS.values())
47 | color_like = lambda input: [torch.full_like(input, c) for c in next(colors)]
48 |
49 | for klass, idx in class_to_idx.items():
50 | if klass == "__background__":
51 | continue
52 |
53 | mask = normalized_masks[0, idx]
54 | if torch.max(mask) < 0.2:
55 | # No objects of this class
56 | continue
57 |
58 | mask_img = to_pil_image(
59 | torch.stack(color_like(mask) + [mask * 0.5]),
60 | mode="RGBA",
61 | )
62 | image = Image.alpha_composite(image.convert("RGBA"), mask_img)
63 |
64 | image.save("masked.png")
65 | image.show()
66 |
67 |
68 | if __name__ == "__main__":
69 | main()
70 |
--------------------------------------------------------------------------------
/crates/altius_py/fuse_attn.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import copy
3 | from typing import DefaultDict, Dict, List, Optional, Set, Tuple
4 |
5 | import onnx
6 | from onnx import ModelProto, NodeProto, helper
7 | import onnxruntime as ort
8 |
9 | # from onnxruntime.transformers.onnx_model import OnnxModel
10 | # from onnxruntime.transformers.fusion_attention import FusionAttention
11 | # from onnxruntime.transformers.fusion_attention import AttentionMask
12 |
13 | # class DeiT(OnnxModel):
14 | # def __init__(self, model: ModelProto):
15 | # super().__init__(model)
16 | # self.attn_mask = AttentionMask(self)
17 | # self.attn_fusion = FusionAttention(self, 384, 6, self.attn_mask)
18 | #
19 | # def fuse(self):
20 | # self.attn_fusion.apply()
21 |
22 |
23 | def create_value_to_users(
24 | model: onnx.ModelProto,
25 | ) -> DefaultDict[str, List[onnx.NodeProto]]:
26 | value_to_users = collections.defaultdict(lambda: [])
27 | for node in model.graph.node:
28 | for input in node.input:
29 | value_to_users[input].append(node)
30 | return value_to_users
31 |
32 |
33 | def fuse_mha(
34 | root: NodeProto,
35 | visited: Set[str],
36 | value_to_users: DefaultDict[str, List[NodeProto]],
37 | ) -> Optional[NodeProto]:
38 | if root.op_type != "LayerNormalization":
39 | return None
40 |
41 | ln = root
42 | if len(value_to_users[ln.output[0]]) != 3:
43 | return None
44 |
45 | mm1, mm2, mm3 = value_to_users[ln.output[0]]
46 | if mm1.op_type != "MatMul" or mm2.op_type != "MatMul" or mm3.op_type != "MatMul":
47 | return None
48 | if mm1.op_type != "MatMul" or mm2.op_type != "MatMul" or mm3.op_type != "MatMul":
49 | return None
50 |
51 | add1, add2, add3 = (
52 | value_to_users[mm1.output[0]],
53 | value_to_users[mm2.output[0]],
54 | value_to_users[mm3.output[0]],
55 | )
56 | if len(add1) != 1 or len(add2) != 1 or len(add3) != 1:
57 | return None
58 | add1, add2, add3 = add1[0], add2[0], add3[0]
59 | if add1.op_type != "Add" or add2.op_type != "Add" or add3.op_type != "Add":
60 | return None
61 |
62 | key = None
63 | query = None
64 | value = None
65 | for out in [add1.output[0], add2.output[0], add3.output[0]]:
66 | if "attention/key" in out:
67 | key = out
68 | elif "attention/query" in out:
69 | query = out
70 | elif "attention/value" in out:
71 | value = out
72 | if key is None or query is None or value is None:
73 | return None
74 |
75 | que = []
76 | que.extend(value_to_users[key])
77 | que.extend(value_to_users[query])
78 | que.extend(value_to_users[value])
79 | exit_reshape_node = None
80 | while que:
81 | node = que.pop(0)
82 | visited.add(node.name)
83 | if node.op_type == "Reshape" and "attention/attention/Reshape_3" in node.name:
84 | print(node.name)
85 | exit_reshape_node = node
86 | break
87 | users = value_to_users[node.output[0]]
88 | que.extend(users)
89 | assert exit_reshape_node is not None
90 |
91 | num_heads = 6
92 | mha_node = helper.make_node(
93 | "MultiHeadAttention",
94 | inputs=[query, key, value],
95 | outputs=[exit_reshape_node.output[0]],
96 | name=f"MultiHeadAttention@{ln.name}",
97 | )
98 | mha_node.domain = "com.microsoft"
99 | mha_node.attribute.extend([helper.make_attribute("num_heads", num_heads)])
100 |
101 | print("Fusing MHA")
102 |
103 | return mha_node
104 |
105 |
106 | def topo_sort(
107 | model: onnx.ModelProto, nodes: List[onnx.NodeProto]
108 | ) -> List[onnx.NodeProto]:
109 | node_to_order = {}
110 | for i, n in enumerate(model.graph.node):
111 | node_to_order[n.output[0]] = i
112 |
113 | order_and_nodes = []
114 | for n in nodes:
115 | order_and_nodes.append((node_to_order[n.output[0]], n))
116 |
117 | order_and_nodes.sort(key=lambda x: x[0])
118 | return [n for _, n in order_and_nodes]
119 |
120 |
121 | def fuse(model: ModelProto) -> ModelProto:
122 | users = create_value_to_users(model)
123 | new_model = copy.deepcopy(model)
124 | del new_model.graph.node[:]
125 |
126 | visited: Set[str] = set()
127 | nodes = []
128 | for node in model.graph.node:
129 | if node.name in visited:
130 | continue
131 |
132 | nodes.append(node)
133 |
134 | mha = fuse_mha(node, visited, users)
135 | if mha is not None:
136 | nodes.append(mha)
137 |
138 | sorted_nodes = topo_sort(model, nodes)
139 | for node in sorted_nodes:
140 | new_model.graph.node.add().CopyFrom(node)
141 |
142 | new_model.opset_import.append(helper.make_opsetid("com.microsoft", 1))
143 |
144 | onnx.checker.check_model(new_model)
145 |
146 | return new_model
147 |
148 |
149 | def main():
150 | model = onnx.load("../../models/deit.onnx")
151 | # deit = DeiT(copy.deepcopy(model))
152 | # deit.fuse()
153 |
154 | new_model = fuse(model)
155 |
156 | onnx.save(new_model, "./fused_deit.onnx")
157 |
158 |
159 | if __name__ == "__main__":
160 | main()
161 |
--------------------------------------------------------------------------------
/crates/altius_py/gpt2.py:
--------------------------------------------------------------------------------
1 | # python -m transformers.onnx --model=gpt2 --feature=causal-lm ./a
2 |
3 | import time
4 | import logging
5 | import os
6 | import sys
7 |
8 | from transformers import AutoTokenizer, BertTokenizer, top_k_top_p_filtering
9 | from transformers import GPT2Tokenizer, GPT2Model, GPT2LMHeadModel, GPT2Tokenizer
10 | import onnxruntime as ort
11 | import numpy as np
12 | import altius_py
13 | import torch
14 | from torch.nn import functional as F
15 |
16 |
17 | logging.basicConfig(level=logging.INFO)
18 |
19 | tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
20 | sess = altius_py.InferenceSession(
21 | "./gpt2-onnx/model.onnx", intra_op_num_threads=16, enable_profile=True, backend="cpu"
22 | )
23 | # sess = ort.InferenceSession("./gpt2-onnx/model.onnx", providers=["CPUExecutionProvider"])
24 |
25 | torch.manual_seed(42)
26 |
27 | max_tokens = 100
28 | text = "Rust is a multi-paradigm, general-purpose programming language. Rust emphasizes performance,"
29 | for _ in range(1000):
30 | inputs = tokenizer(text, return_tensors="np")
31 | len = inputs["input_ids"].shape[1]
32 |
33 | if len >= max_tokens:
34 | break
35 |
36 | for name in ["input_ids", "attention_mask"]:
37 | input = np.zeros((1, max_tokens), dtype=np.int64)
38 | input[0, : inputs[name].shape[1]] = inputs[name]
39 | inputs[name] = input
40 |
41 | inputs["position_ids"] = np.arange(max_tokens).reshape((1, -1))
42 |
43 | outputs = sess.run(None, dict(inputs))
44 |
45 | next_token_logits = outputs[0][:, len - 1, :]
46 |
47 | filtered_next_token_logits = top_k_top_p_filtering(
48 | torch.tensor(next_token_logits), top_k=50, top_p=1.0
49 | )
50 | probs = F.softmax(filtered_next_token_logits, dim=-1)
51 | next_token = torch.multinomial(probs, num_samples=1)
52 | generated = torch.cat([torch.tensor(inputs["input_ids"][0, :len]), next_token[0]])
53 | resulting_string = tokenizer.decode(generated.tolist())
54 | print(resulting_string)
55 | text = resulting_string
56 |
--------------------------------------------------------------------------------
/crates/altius_py/mandelbrot.py:
--------------------------------------------------------------------------------
1 | import os
2 | import matplotlib.pyplot as plt
3 |
4 | import torch.nn as nn
5 | import onnxsim
6 | import onnx
7 | import torch
8 | import onnxruntime as ort
9 | import altius_py
10 |
11 | W = 320 * 3
12 | H = 240 * 3
13 | XMIN = -2.4
14 | XMAX = 1.2
15 | YMIN = -1.2
16 | YMAX = 1.2
17 |
18 |
19 | class Mandelbrot(nn.Module):
20 | def forward(self, k, zx, zy):
21 | w = W
22 | h = H
23 | x = torch.linspace(XMIN, XMAX, W, dtype=torch.float32)
24 | y = torch.linspace(YMIN, YMAX, H, dtype=torch.float32)
25 | cx, cy = torch.meshgrid([x, y])
26 | cx = cx.to(torch.float32)
27 | cy = cy.to(torch.float32)
28 |
29 | zx2 = zx**2
30 | zy2 = zy**2
31 | inf = (zx2 + zy2) > 4
32 | max = torch.max(k)
33 | k[inf] = max + 1
34 | zxn = zx2 - zy2 + cx
35 | zyn = 2 * zx * zy + cy
36 | return k, zxn, zyn
37 |
38 |
39 | if __name__ == "__main__":
40 | model = Mandelbrot()
41 |
42 | zx = torch.zeros(W * H, dtype=torch.float32).reshape(W, H)
43 | zy = torch.zeros(W * H, dtype=torch.float32).reshape(W, H)
44 | k = torch.zeros(W * H, dtype=torch.float32).reshape(W, H)
45 | path = "/tmp/mandelbrot.onnx"
46 |
47 | if not os.path.exists(path):
48 | torch.onnx.export(
49 | model,
50 | {"k": k, "zx": zx, "zy": zy},
51 | path,
52 | input_names=["k", "zx", "zy"],
53 | opset_version=12,
54 | )
55 | simplified, ok = onnxsim.simplify(onnx.load(path))
56 | assert ok
57 | onnx.save(simplified, path)
58 |
59 | model = ort.InferenceSession(path, providers=["CPUExecutionProvider"])
60 | # model = altius_py.InferenceSession(path)
61 |
62 | k = k.numpy()
63 | zx = zx.numpy()
64 | zy = zy.numpy()
65 |
66 | for i in range(100):
67 | k, zxn, zyn = model.run(None, {"k": k, "zx": zx, "zy": zy})
68 | zx = zxn
69 | zy = zyn
70 |
71 | mandelbrot = k.T
72 |
73 | plt.figure(figsize=(3.200, 2.400), dpi=1000)
74 | img = plt.imshow(mandelbrot)
75 | img.set_cmap("hot")
76 | plt.axis("off")
77 | # plt.savefig("mandel.png", dpi=100)
78 | plt.show()
79 |
--------------------------------------------------------------------------------
/crates/altius_py/mobilenet.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | import time
4 |
5 | from matplotlib import pyplot as plt
6 | from PIL import Image
7 | import numpy as np
8 |
9 | from torchvision import transforms
10 | import onnxruntime as ort
11 | import altius_py
12 |
13 |
14 | def main():
15 | labels = open("../../models/imagenet_classes.txt").readlines()
16 | image = Image.open("../../models/cat.png")
17 |
18 | preprocess = transforms.Compose(
19 | [
20 | transforms.Resize(256),
21 | transforms.CenterCrop(224),
22 | transforms.ToTensor(),
23 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
24 | ]
25 | )
26 | input = preprocess(image)
27 | input = input.unsqueeze(0).numpy()
28 |
29 | sess = altius_py.InferenceSession("../../models/mobilenetv3.onnx")
30 |
31 | inputs = {"input": input}
32 | output = sess.run(None, inputs)[0][0]
33 | output = np.argsort(output)[::-1][:5]
34 | output = [labels[i].strip() for i in output]
35 | print(f"top5: {output}")
36 |
37 |
38 | if __name__ == "__main__":
39 | main()
40 |
--------------------------------------------------------------------------------
/crates/altius_py/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "altius_py"
3 | version = "0.1.0"
4 | authors = [
5 | { name = "maekawatoshiki" }
6 | ]
7 | dependencies = [
8 | "pillow>=9.2.0",
9 | "matplotlib>=3.5.3",
10 | "maturin>=1.1.0",
11 | "onnx>=1.15.0",
12 | "transformers==4.41.2",
13 | "onnxsim==0.4.17",
14 | "numpy>=1.25.2",
15 | "onnxruntime>=1.15.1",
16 | "pytest>=7.4.4",
17 | "pytest-xdist>=3.3.1",
18 | "torch>=2.2.2",
19 | "onnxscript>=0.1.0.dev20240227",
20 | "pip>=24.0",
21 | "optimum>=1.18.1",
22 | "torchvision>=0.17.2",
23 | "tabulate>=0.9.0",
24 | "timm>=0.9.16",
25 | "packaging>=24.1",
26 | "einops>=0.8.0",
27 | "fpzip>=1.2.4",
28 | "zfpy>=1.0.0",
29 | "onnxruntime-genai>=0.3.0; sys_platform == 'linux'",
30 | "huggingface>=0.0.1",
31 | "netron>=7.8.3",
32 | "patchelf>=0.17.2.1; sys_platform == 'linux'",
33 | ]
34 | requires-python = "==3.12.*"
35 |
36 | [build-system]
37 | requires = ["maturin>=1.1.0"]
38 | build-backend = "maturin"
39 |
--------------------------------------------------------------------------------
/crates/altius_py/real-esrgan.py:
--------------------------------------------------------------------------------
1 | import altius_py
2 | import time
3 | import numpy as np
4 | from PIL import Image
5 | from torchvision import transforms
6 | import os, random
7 | from matplotlib import pyplot as plt
8 | import onnxruntime as ort
9 | import logging
10 | from torchvision.transforms.functional import to_pil_image
11 | import torch
12 |
13 |
14 | def main():
15 | logging.basicConfig(level=logging.INFO)
16 | image = Image.open("../../models/cat.png").convert("RGB")
17 |
18 | preprocess = transforms.Compose(
19 | [
20 | transforms.Resize(256),
21 | transforms.CenterCrop(256),
22 | transforms.ToTensor(),
23 | # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
24 | ]
25 | )
26 | input = preprocess(image)
27 | input = input.unsqueeze(0).numpy()
28 | print(input.shape)
29 |
30 | path = "../../models/realesrgan_256x256.onnx"
31 | sess = altius_py.InferenceSession(
32 | path, intra_op_num_threads=32, enable_profile=True
33 | )
34 | # sess = ort.InferenceSession(path, providers=["CUDAExecutionProvider"])
35 | # sess_options = ort.SessionOptions()
36 | # # sess_options.enable_profiling = True
37 | # sess_options.intra_op_num_threads = 16
38 | # sess_options.inter_op_num_threads = 1
39 | # sess = ort.InferenceSession(
40 | # path,
41 | # providers=["CPUExecutionProvider"],
42 | # # providers=["CUDAExecutionProvider"],
43 | # sess_options=sess_options,
44 | # )
45 |
46 | inputs = {"input.1": input}
47 | start = time.time()
48 | output = sess.run(None, inputs)[0]
49 | print(f"elapsed: {time.time() - start}")
50 |
51 | # print(output.shape)
52 | # print(output.max())
53 | # print(output.min())
54 | img = to_pil_image(torch.tensor(output.clip(0, 1)).squeeze())
55 |
56 | img.save("a.png")
57 | img.show()
58 |
59 |
60 | if __name__ == "__main__":
61 | main()
62 |
--------------------------------------------------------------------------------
/crates/altius_py/resnet50.py:
--------------------------------------------------------------------------------
1 | import time
2 | import os
3 |
4 | # os.environ["GOMP_CPU_AFFINITY"] = "0-7"
5 | # os.environ["OMP_WAIT_POLICY"] = "active"
6 |
7 | from PIL import Image
8 | import numpy as np
9 |
10 | import torch
11 | from torchvision import transforms
12 | from torchvision.models import resnet50
13 |
14 | import onnxruntime as ort
15 | import altius_py
16 |
17 |
18 | def main():
19 | model_path = "../../models/resnet50.onnx"
20 |
21 | if not os.path.exists(model_path):
22 | with torch.no_grad():
23 | model = resnet50(pretrained=True)
24 | torch.onnx.export(
25 | model,
26 | torch.randn(1, 3, 224, 224, dtype=torch.float32),
27 | model_path,
28 | verbose=True,
29 | )
30 |
31 | labels = open("../../models/imagenet_classes.txt").readlines()
32 | image = Image.open("../../models/cat.png")
33 |
34 | preprocess = transforms.Compose(
35 | [
36 | transforms.Resize(256),
37 | transforms.CenterCrop(224),
38 | transforms.ToTensor(),
39 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
40 | ]
41 | )
42 | input = preprocess(image).unsqueeze(0).numpy()
43 |
44 | use_ort = False
45 | if use_ort:
46 | sess = ort.InferenceSession(model_path)
47 | else:
48 | sess = altius_py.InferenceSession(
49 | model_path,
50 | intra_op_num_threads=1,
51 | backend="cpu",
52 | )
53 |
54 | inputs = {"input.1": input}
55 | for _ in range(10):
56 | start = time.time()
57 | output = sess.run(None, inputs)[0][0]
58 | print(f"Elapsed: {(time.time() - start) * 1000.0:.3f} [ms]")
59 | output = np.argsort(output)[::-1][:5]
60 | output = [labels[i].strip() for i in output]
61 | print(f"Top-5: {output}")
62 |
63 |
64 | if __name__ == "__main__":
65 | main()
66 |
--------------------------------------------------------------------------------
/crates/altius_py/show-ort-profile.py:
--------------------------------------------------------------------------------
1 | import json
2 | import argparse
3 | from collections import defaultdict
4 |
5 | from tabulate import tabulate
6 |
7 |
8 | def main():
9 | parser = argparse.ArgumentParser()
10 | parser.add_argument("filepath", help="onnxruntime profile json", type=str)
11 | args = parser.parse_args()
12 |
13 | profile = json.load(open(args.filepath))
14 | durations = defaultdict(lambda: 0)
15 |
16 | for elem in profile:
17 | args = elem.get("args")
18 | if args:
19 | op = args.get("op_name")
20 | if op:
21 | dur = int(elem.get("dur"))
22 | durations[op] += dur
23 |
24 | table = [(op, dur / 1000.0) for op, dur in durations.items()]
25 | table.append(("*Total*", sum(dur for _, dur in table)))
26 | table = sorted(table, key=lambda x: x[1])
27 |
28 | print(tabulate(table, tablefmt="simple_outline", headers=["Op", "Duration [ms]"]))
29 |
30 |
31 | if __name__ == "__main__":
32 | main()
33 |
--------------------------------------------------------------------------------
/crates/altius_py/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -eux
2 |
3 | if [ ! -d ".venv" ]; then
4 | uv sync
5 | fi
6 |
7 | export RUST_LOG=INFO
8 |
9 | if [ "${1:-nobuild}" = "build" ]; then
10 | if [ -z "${GITHUB_ACTIONS}" ]; then
11 | uv run maturin develop -r --target-dir ./target > /dev/null
12 | else
13 | uv run maturin develop -r > /dev/null
14 | fi
15 | fi
16 |
17 | unset GOMP_CPU_AFFINITY
18 |
19 | n=$(nproc)
20 | uv run python -m pytest . -n $((n > 16 ? 16 : n))
21 |
--------------------------------------------------------------------------------
/crates/altius_py/tests/test.rs:
--------------------------------------------------------------------------------
1 | use std::{
2 | env,
3 | fs::read_dir,
4 | io,
5 | path::{Path, PathBuf},
6 | };
7 |
8 | use cargo_util::paths::mtime_recursive;
9 |
10 | #[test]
11 | fn run_python_tests() {
12 | // If build artifacts are modified, run `maturin develop -r` by passing `build` option to
13 | // `./test.sh`.
14 | let root = get_project_root().unwrap();
15 | let target_mtime = mtime_recursive(&root.join("target/")).unwrap();
16 | // TODO: Better not hard-code venv dir `.venv`.
17 | let build = mtime_recursive(Path::new(".venv")).map_or("build", |src_mtime| {
18 | if target_mtime > src_mtime {
19 | "build"
20 | } else {
21 | ""
22 | }
23 | });
24 | assert!(std::process::Command::new("bash")
25 | .arg("./test.sh")
26 | .arg(build)
27 | .spawn()
28 | .unwrap()
29 | .wait()
30 | .unwrap()
31 | .success())
32 | }
33 |
34 | #[cfg(test)]
35 | fn get_project_root() -> io::Result {
36 | let path = env::current_dir()?;
37 | let path_ancestors = path.as_path().ancestors();
38 |
39 | for p in path_ancestors {
40 | let has_cargo = read_dir(p)?.any(|p| p.unwrap().file_name() == *"Cargo.lock");
41 | if has_cargo {
42 | return Ok(PathBuf::from(p));
43 | }
44 | }
45 |
46 | Err(io::Error::new(
47 | io::ErrorKind::NotFound,
48 | "Cargo.lock not found",
49 | ))
50 | }
51 |
--------------------------------------------------------------------------------
/crates/altius_py/tests/test_ops_concat.py:
--------------------------------------------------------------------------------
1 | import altius_py
2 | import onnxruntime as ort
3 | import onnx
4 | import tempfile
5 | import pytest
6 | import os
7 | import numpy as np
8 | from onnx import helper, ValueInfoProto, TensorProto
9 |
10 |
11 | def test_concat_1():
12 | with tempfile.TemporaryDirectory() as tmpdir:
13 | op_concat(
14 | os.path.join(tmpdir, "model.onnx"),
15 | [1, 1, 10],
16 | [1, 3, 10],
17 | [1, 4, 10],
18 | axis=1,
19 | )
20 |
21 |
22 | def op_concat(filepath, shape_x, shape_y, shape_z, **kwargs):
23 | inputs = [
24 | helper.make_tensor_value_info("x", TensorProto.FLOAT, shape_x),
25 | helper.make_tensor_value_info("y", TensorProto.FLOAT, shape_y),
26 | ]
27 | outputs = [helper.make_tensor_value_info("z", TensorProto.FLOAT, shape_z)]
28 | nodes = [helper.make_node("Concat", ["x", "y"], ["z"], **kwargs)]
29 | graph = helper.make_graph(nodes, "graph", inputs, outputs)
30 | model = helper.make_model(graph)
31 |
32 | onnx.save(model, filepath)
33 | ort_sess = ort.InferenceSession(filepath, providers=["CPUExecutionProvider"])
34 |
35 | for backend in ["interpreter", "cpu"]:
36 | altius_sess = altius_py.InferenceSession(filepath, backend=backend)
37 |
38 | x = np.random.random_sample(shape_x).astype(np.float32)
39 | y = np.random.random_sample(shape_y).astype(np.float32)
40 | inputs = {"x": x, "y": y}
41 | expected = ort_sess.run(None, inputs)
42 | actual = altius_sess.run(None, inputs)
43 |
44 | for expected, actual in zip(expected, actual):
45 | assert np.allclose(expected, actual)
46 |
--------------------------------------------------------------------------------
/crates/altius_py/tests/test_ops_conv.py:
--------------------------------------------------------------------------------
1 | import altius_py
2 | import onnxruntime as ort
3 | import onnx
4 | import tempfile
5 | import pytest
6 | import os
7 | import numpy as np
8 | from onnx import helper, ValueInfoProto, TensorProto
9 |
10 |
11 | @pytest.mark.parametrize("bias", [False, True])
12 | def test_conv2d_1(bias):
13 | with tempfile.TemporaryDirectory() as tmpdir:
14 | op_conv2d(
15 | os.path.join(tmpdir, "model.onnx"),
16 | [1, 3, 224, 224],
17 | [16, 3, 3, 3],
18 | [1, 16, 112, 112],
19 | bias=bias,
20 | pads=[1, 1, 1, 1],
21 | strides=[2, 2],
22 | )
23 |
24 |
25 | @pytest.mark.parametrize("bias", [False, True])
26 | def test_conv2d_2(bias):
27 | with tempfile.TemporaryDirectory() as tmpdir:
28 | op_conv2d(
29 | os.path.join(tmpdir, "model.onnx"),
30 | [1, 16, 112, 112],
31 | [16, 1, 3, 3],
32 | [1, 16, 112, 112],
33 | bias=bias,
34 | group=16,
35 | pads=[1, 1, 1, 1],
36 | )
37 |
38 |
39 | def op_conv2d(filepath, shape_x, shape_w, shape_y, bias=False, **kwargs):
40 | inputs = [
41 | helper.make_tensor_value_info("x", TensorProto.FLOAT, shape_x),
42 | helper.make_tensor_value_info("w", TensorProto.FLOAT, shape_w),
43 | ]
44 | if bias:
45 | inputs.append(
46 | helper.make_tensor_value_info("b", TensorProto.FLOAT, [shape_w[0]])
47 | )
48 |
49 | outputs = [helper.make_tensor_value_info("y", TensorProto.FLOAT, shape_y)]
50 | nodes = [
51 | helper.make_node(
52 | "Conv",
53 | ["x", "w", "b"] if bias else ["x", "w"],
54 | ["y"],
55 | kernel_shape=[shape_w[2], shape_w[3]],
56 | **kwargs,
57 | )
58 | ]
59 | graph = helper.make_graph(nodes, "graph", inputs, outputs)
60 | model = helper.make_model(graph)
61 |
62 | onnx.save(model, filepath)
63 | ort_sess = ort.InferenceSession(filepath, providers=["CPUExecutionProvider"])
64 | altius_sess = altius_py.InferenceSession(filepath)
65 |
66 | x = np.random.random_sample(shape_x).astype(np.float32)
67 | w = np.random.random_sample(shape_w).astype(np.float32)
68 | b = np.random.random_sample(shape_w[0]).astype(np.float32) if bias else None
69 | inputs = {"x": x, "w": w, "b": b} if bias else {"x": x, "w": w}
70 | expected = ort_sess.run(None, inputs)
71 | actual = altius_sess.run(None, inputs)
72 |
73 | for expected, actual in zip(expected, actual):
74 | assert np.allclose(expected, actual)
75 |
--------------------------------------------------------------------------------
/crates/altius_py/tests/test_ops_gather.py:
--------------------------------------------------------------------------------
1 | import altius_py
2 | import onnxruntime as ort
3 | import onnx
4 | import tempfile
5 | import pytest
6 | import os
7 | import numpy as np
8 | from onnx import helper, ValueInfoProto, TensorProto
9 |
10 |
11 | def test_gather_1():
12 | with tempfile.TemporaryDirectory() as tmpdir:
13 | op_gather(
14 | os.path.join(tmpdir, "model.onnx"),
15 | [1, 5, 10],
16 | 2,
17 | [1, 1, 10],
18 | axis=1,
19 | )
20 |
21 |
22 | def test_gather_2():
23 | with tempfile.TemporaryDirectory() as tmpdir:
24 | op_gather(
25 | os.path.join(tmpdir, "model.onnx"),
26 | [5, 10],
27 | [1, 3],
28 | [2, 10],
29 | axis=0,
30 | )
31 |
32 |
33 | def op_gather(filepath, shape_x, indices, shape_z, **kwargs):
34 | shape_y = [] if isinstance(indices, int) else [1, len(indices)]
35 | inputs = [
36 | helper.make_tensor_value_info("x", TensorProto.FLOAT, shape_x),
37 | helper.make_tensor_value_info("y", TensorProto.INT64, shape_y),
38 | ]
39 | outputs = [helper.make_tensor_value_info("z", TensorProto.FLOAT, shape_z)]
40 | nodes = [helper.make_node("Gather", ["x", "y"], ["z"], **kwargs)]
41 | graph = helper.make_graph(nodes, "graph", inputs, outputs)
42 | model = helper.make_model(graph)
43 |
44 | onnx.save(model, filepath)
45 | ort_sess = ort.InferenceSession(filepath, providers=["CPUExecutionProvider"])
46 |
47 | for backend in ["interpreter", "cpu"]:
48 | altius_sess = altius_py.InferenceSession(filepath, backend="cpu")
49 |
50 | x = np.random.random_sample(shape_x).astype(np.float32)
51 | y = np.array(indices).astype(np.int64).reshape(shape_y)
52 | inputs = {"x": x, "y": y}
53 | expected = ort_sess.run(None, inputs)
54 | actual = altius_sess.run(None, inputs)
55 |
56 | for expected, actual in zip(expected, actual):
57 | assert np.allclose(expected, actual)
58 |
--------------------------------------------------------------------------------
/crates/altius_py/tests/test_ops_gemm.py:
--------------------------------------------------------------------------------
1 | import altius_py
2 | import onnxruntime as ort
3 | import onnx
4 | import tempfile
5 | import pytest
6 | import os
7 | import numpy as np
8 | from onnx import helper, ValueInfoProto, TensorProto
9 |
10 |
11 | def test_gemm_1():
12 | with tempfile.TemporaryDirectory() as tmpdir:
13 | op_gemm(os.path.join(tmpdir, "model.onnx"), [5, 10], [10, 15], [5, 15], [15])
14 |
15 |
16 | def test_gemm_2():
17 | with tempfile.TemporaryDirectory() as tmpdir:
18 | op_gemm(os.path.join(tmpdir, "model.onnx"), [5, 10], [10, 15], [5, 15], [5, 15])
19 |
20 |
21 | def test_gemm_3():
22 | with tempfile.TemporaryDirectory() as tmpdir:
23 | op_gemm(os.path.join(tmpdir, "model.onnx"), [5, 10], [10, 15], [5, 15])
24 |
25 |
26 | # TODO
27 | # def test_gemm_3():
28 | # with tempfile.TemporaryDirectory() as tmpdir:
29 | # op_gemm(
30 | # os.path.join(tmpdir, "model.onnx"), [3, 5, 10], [3, 10, 15], [3, 5, 15]
31 | # )
32 | #
33 | #
34 | # def test_gemm_4():
35 | # with tempfile.TemporaryDirectory() as tmpdir:
36 | # op_gemm(os.path.join(tmpdir, "model.onnx"), [1, 5, 10], [10, 15], [1, 5, 15])
37 |
38 |
39 | def op_gemm(filepath, shape_a, shape_b, shape_y, shape_c=None):
40 | inputs = [
41 | helper.make_tensor_value_info("a", TensorProto.FLOAT, shape_a),
42 | helper.make_tensor_value_info("b", TensorProto.FLOAT, shape_b),
43 | ]
44 | if shape_c:
45 | inputs.append(helper.make_tensor_value_info("c", TensorProto.FLOAT, shape_c))
46 | outputs = [helper.make_tensor_value_info("y", TensorProto.FLOAT, shape_y)]
47 | nodes = [
48 | helper.make_node(
49 | "Gemm",
50 | ["a", "b", "c"] if shape_c else ["a", "b"],
51 | ["y"],
52 | )
53 | ]
54 | graph = helper.make_graph(nodes, "graph", inputs, outputs)
55 | model = helper.make_model(graph)
56 |
57 | onnx.save(model, filepath)
58 | ort_sess = ort.InferenceSession(filepath, providers=["CPUExecutionProvider"])
59 |
60 | for backend in ["interpreter", "cpu"]:
61 | altius_sess = altius_py.InferenceSession(filepath, backend=backend)
62 |
63 | a = np.random.random_sample(shape_a).astype(np.float32)
64 | b = np.random.random_sample(shape_b).astype(np.float32)
65 | inputs = {}
66 | if shape_c:
67 | c = np.random.random_sample(shape_c).astype(np.float32)
68 | inputs = {"a": a, "b": b, "c": c}
69 | else:
70 | inputs = {"a": a, "b": b}
71 | expected = ort_sess.run(None, inputs)
72 | actual = altius_sess.run(None, inputs)
73 |
74 | for expected, actual in zip(expected, actual):
75 | assert np.allclose(expected, actual)
76 |
--------------------------------------------------------------------------------
/crates/altius_py/tests/test_ops_matmul.py:
--------------------------------------------------------------------------------
1 | import altius_py
2 | import onnxruntime as ort
3 | import onnx
4 | import tempfile
5 | import pytest
6 | import os
7 | import numpy as np
8 | from onnx import helper, ValueInfoProto, TensorProto
9 |
10 |
11 | def test_matmul_1():
12 | with tempfile.TemporaryDirectory() as tmpdir:
13 | op_matmul(os.path.join(tmpdir, "model.onnx"), [5, 10], [10, 15], [5, 15])
14 |
15 |
16 | def test_matmul_2():
17 | with tempfile.TemporaryDirectory() as tmpdir:
18 | op_matmul(os.path.join(tmpdir, "model.onnx"), [3, 5, 10], [10, 15], [3, 5, 15])
19 |
20 |
21 | def test_matmul_3():
22 | with tempfile.TemporaryDirectory() as tmpdir:
23 | op_matmul(
24 | os.path.join(tmpdir, "model.onnx"), [3, 5, 10], [3, 10, 15], [3, 5, 15]
25 | )
26 |
27 |
28 | def test_matmul_4():
29 | with tempfile.TemporaryDirectory() as tmpdir:
30 | op_matmul(os.path.join(tmpdir, "model.onnx"), [1, 5, 10], [10, 15], [1, 5, 15])
31 |
32 |
33 | def op_matmul(
34 | filepath,
35 | shape_x,
36 | shape_y,
37 | shape_z,
38 | ):
39 | inputs = [
40 | helper.make_tensor_value_info("x", TensorProto.FLOAT, shape_x),
41 | helper.make_tensor_value_info("y", TensorProto.FLOAT, shape_y),
42 | ]
43 | outputs = [helper.make_tensor_value_info("z", TensorProto.FLOAT, shape_z)]
44 | nodes = [
45 | helper.make_node(
46 | "MatMul",
47 | ["x", "y"],
48 | ["z"],
49 | )
50 | ]
51 | graph = helper.make_graph(nodes, "graph", inputs, outputs)
52 | model = helper.make_model(graph)
53 |
54 | onnx.save(model, filepath)
55 | ort_sess = ort.InferenceSession(filepath, providers=["CPUExecutionProvider"])
56 |
57 | for backend in ["interpreter", "cpu"]:
58 | altius_sess = altius_py.InferenceSession(filepath, backend=backend)
59 |
60 | x = np.random.random_sample(shape_x).astype(np.float32)
61 | y = np.random.random_sample(shape_y).astype(np.float32)
62 | inputs = {"x": x, "y": y}
63 | expected = ort_sess.run(None, inputs)
64 | actual = altius_sess.run(None, inputs)
65 |
66 | for expected, actual in zip(expected, actual):
67 | assert np.allclose(expected, actual)
68 |
--------------------------------------------------------------------------------
/crates/altius_py/tests/test_ops_norm.py:
--------------------------------------------------------------------------------
1 | import altius_py
2 | import onnxruntime as ort
3 | import onnx
4 | import tempfile
5 | import pytest
6 | import os
7 | import numpy as np
8 | from onnx import helper, ValueInfoProto, TensorProto
9 |
10 |
11 | def test_batch_norm_1():
12 | with tempfile.TemporaryDirectory() as tmpdir:
13 | op_batch_norm(os.path.join(tmpdir, "model.onnx"), [1, 20, 10, 10])
14 |
15 |
16 | def test_layer_norm_1():
17 | with tempfile.TemporaryDirectory() as tmpdir:
18 | op_layer_norm(os.path.join(tmpdir, "model.onnx"), [1, 20, 10])
19 |
20 |
21 | def op_batch_norm(filepath, shape, **kwargs):
22 | assert len(shape) == 4
23 | inputs = [
24 | helper.make_tensor_value_info("x", TensorProto.FLOAT, shape),
25 | helper.make_tensor_value_info("scale", TensorProto.FLOAT, [shape[1]]),
26 | helper.make_tensor_value_info("bias", TensorProto.FLOAT, [shape[1]]),
27 | helper.make_tensor_value_info("mean", TensorProto.FLOAT, [shape[1]]),
28 | helper.make_tensor_value_info("var", TensorProto.FLOAT, [shape[1]]),
29 | ]
30 | outputs = [helper.make_tensor_value_info("z", TensorProto.FLOAT, shape)]
31 | nodes = [
32 | helper.make_node(
33 | "BatchNormalization", ["x", "scale", "bias", "mean", "var"], ["z"], **kwargs
34 | )
35 | ]
36 | graph = helper.make_graph(nodes, "graph", inputs, outputs)
37 | model = helper.make_model(graph)
38 |
39 | onnx.save(model, filepath)
40 | ort_sess = ort.InferenceSession(filepath, providers=["CPUExecutionProvider"])
41 |
42 | for backend in ["interpreter", "cpu"]:
43 | altius_sess = altius_py.InferenceSession(filepath, backend=backend)
44 |
45 | x = np.random.random_sample(shape).astype(np.float32)
46 | scale = np.random.random_sample(shape[1]).astype(np.float32)
47 | bias = np.random.random_sample(shape[1]).astype(np.float32)
48 | mean = np.random.random_sample(shape[1]).astype(np.float32)
49 | var = np.random.random_sample(shape[1]).astype(np.float32)
50 | inputs = {"x": x, "scale": scale, "bias": bias, "mean": mean, "var": var}
51 | expected = ort_sess.run(None, inputs)
52 | actual = altius_sess.run(None, inputs)
53 |
54 | for expected, actual in zip(expected, actual):
55 | assert np.allclose(expected, actual, rtol=1e-4, atol=1e-5)
56 |
57 |
58 | def op_layer_norm(filepath, shape, **kwargs):
59 | shape_scale = [1] * (len(shape) - 1) + [shape[-1]]
60 | inputs = [
61 | helper.make_tensor_value_info("x", TensorProto.FLOAT, shape),
62 | helper.make_tensor_value_info("scale", TensorProto.FLOAT, shape_scale),
63 | helper.make_tensor_value_info("bias", TensorProto.FLOAT, shape_scale),
64 | ]
65 | outputs = [helper.make_tensor_value_info("z", TensorProto.FLOAT, shape)]
66 | nodes = [
67 | helper.make_node("LayerNormalization", ["x", "scale", "bias"], ["z"], **kwargs)
68 | ]
69 | graph = helper.make_graph(nodes, "graph", inputs, outputs)
70 | model = helper.make_model(graph)
71 |
72 | onnx.save(model, filepath)
73 | ort_sess = ort.InferenceSession(filepath, providers=["CPUExecutionProvider"])
74 |
75 | for backend in ["interpreter", "cpu"]:
76 | altius_sess = altius_py.InferenceSession(filepath, backend=backend)
77 |
78 | x = np.random.random_sample(shape).astype(np.float32)
79 | scale = np.random.random_sample(shape_scale).astype(np.float32)
80 | bias = np.random.random_sample(shape_scale).astype(np.float32)
81 | inputs = {"x": x, "scale": scale, "bias": bias}
82 | expected = ort_sess.run(None, inputs)
83 | actual = altius_sess.run(None, inputs)
84 |
85 | for expected, actual in zip(expected, actual):
86 | assert np.allclose(expected, actual, rtol=1e-4, atol=1e-5)
87 |
--------------------------------------------------------------------------------
/crates/altius_py/tests/test_ops_pool.py:
--------------------------------------------------------------------------------
1 | import altius_py
2 | import onnxruntime as ort
3 | import onnx
4 | import tempfile
5 | import pytest
6 | import os
7 | import numpy as np
8 | from onnx import helper, ValueInfoProto, TensorProto
9 |
10 |
11 | def test_maxpool_1():
12 | with tempfile.TemporaryDirectory() as tmpdir:
13 | op_maxpool(
14 | os.path.join(tmpdir, "model.onnx"),
15 | [1, 3, 224, 224],
16 | [1, 3, 112, 112],
17 | kernel_shape=[2, 2],
18 | pads=[0, 0, 0, 0],
19 | strides=[2, 2],
20 | auto_pad="NOTSET",
21 | )
22 |
23 |
24 | def test_maxpool_2():
25 | with tempfile.TemporaryDirectory() as tmpdir:
26 | op_maxpool(
27 | os.path.join(tmpdir, "model.onnx"),
28 | [1, 256, 20, 20],
29 | [1, 256, 20, 20],
30 | kernel_shape=[5, 5],
31 | pads=[2, 2, 2, 2],
32 | strides=[1, 1],
33 | ceil_mode=0,
34 | )
35 |
36 |
37 | def op_maxpool(filepath, shape_x, shape_y, **kwargs):
38 | inputs = [helper.make_tensor_value_info("x", TensorProto.FLOAT, shape_x)]
39 | outputs = [helper.make_tensor_value_info("y", TensorProto.FLOAT, shape_y)]
40 | nodes = [
41 | helper.make_node(
42 | "MaxPool",
43 | ["x"],
44 | ["y"],
45 | **kwargs,
46 | )
47 | ]
48 | graph = helper.make_graph(nodes, "graph", inputs, outputs)
49 | model = helper.make_model(graph)
50 |
51 | onnx.save(model, filepath)
52 | ort_sess = ort.InferenceSession(filepath, providers=["CPUExecutionProvider"])
53 |
54 | for backend in ["interpreter", "cpu"]:
55 | altius_sess = altius_py.InferenceSession(filepath, backend=backend)
56 |
57 | x = np.random.random_sample(shape_x).astype(np.float32)
58 | inputs = {"x": x}
59 | expected = ort_sess.run(None, inputs)
60 | actual = altius_sess.run(None, inputs)
61 |
62 | for expected, actual in zip(expected, actual):
63 | assert np.allclose(expected, actual)
64 |
--------------------------------------------------------------------------------
/crates/altius_py/tests/test_ops_reduce.py:
--------------------------------------------------------------------------------
1 | import altius_py
2 | import onnxruntime as ort
3 | import onnx
4 | import tempfile
5 | import os
6 | import numpy as np
7 | from onnx import helper, TensorProto
8 |
9 |
10 | def test_reduce_mean_1():
11 | with tempfile.TemporaryDirectory() as tmpdir:
12 | op_reduce(
13 | os.path.join(tmpdir, "model.onnx"),
14 | "ReduceMean",
15 | [1, 50, 70],
16 | [1, 50, 1],
17 | axes=[-1],
18 | )
19 |
20 |
21 | def test_reduce_mean_2():
22 | with tempfile.TemporaryDirectory() as tmpdir:
23 | op_reduce(
24 | os.path.join(tmpdir, "model.onnx"),
25 | "ReduceMean",
26 | [8, 4, 5, 5],
27 | [8, 4, 1, 1],
28 | axes=[2, 3],
29 | backends=["cpu"],
30 | )
31 |
32 |
33 | def test_reduce_max_1():
34 | with tempfile.TemporaryDirectory() as tmpdir:
35 | op_reduce(
36 | os.path.join(tmpdir, "model.onnx"),
37 | "ReduceMax",
38 | [1, 50, 70],
39 | [],
40 | keepdims=0,
41 | )
42 |
43 |
44 | def op_reduce(
45 | filepath, op_type, shape_x, shape_y, backends=["interpreter", "cpu"], **kwargs
46 | ):
47 | inputs = [helper.make_tensor_value_info("x", TensorProto.FLOAT, shape_x)]
48 | outputs = [helper.make_tensor_value_info("y", TensorProto.FLOAT, shape_y)]
49 | nodes = [helper.make_node(op_type, ["x"], ["y"], **kwargs)]
50 | graph = helper.make_graph(nodes, "graph", inputs, outputs)
51 | model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
52 |
53 | onnx.checker.check_model(model)
54 | onnx.save(model, filepath)
55 | ort_sess = ort.InferenceSession(filepath, providers=["CPUExecutionProvider"])
56 |
57 | for backend in backends:
58 | altius_sess = altius_py.InferenceSession(filepath, backend=backend)
59 |
60 | x = np.random.random_sample(shape_x).astype(np.float32)
61 | inputs = {"x": x}
62 | expected = ort_sess.run(None, inputs)
63 | actual = altius_sess.run(None, inputs)
64 |
65 | for expected, actual in zip(expected, actual):
66 | assert np.allclose(expected, actual)
67 |
--------------------------------------------------------------------------------
/crates/altius_py/tests/test_ops_resize.py:
--------------------------------------------------------------------------------
1 | import tempfile
2 | import pytest
3 | import os
4 |
5 | import numpy as np
6 |
7 | import onnxruntime as ort
8 | import onnx
9 | from onnx import helper, ValueInfoProto, TensorProto, numpy_helper
10 | import altius_py
11 |
12 |
13 | def test_resize_1():
14 | with tempfile.TemporaryDirectory() as tmpdir:
15 | op_resize(
16 | os.path.join(tmpdir, "model.onnx"),
17 | [1, 256, 20, 20],
18 | [1, 256, 40, 40],
19 | np.array([1.0, 1.0, 2.0, 2.0], dtype=np.float32),
20 | coordinate_transformation_mode="asymmetric",
21 | cubic_coeff_a=-0.75,
22 | mode="nearest",
23 | nearest_mode="floor",
24 | )
25 |
26 |
27 | def op_resize(filepath, shape_x, shape_y, scales, **kwargs):
28 | inputs = [helper.make_tensor_value_info("x", TensorProto.FLOAT, shape_x)]
29 | outputs = [helper.make_tensor_value_info("y", TensorProto.FLOAT, shape_y)]
30 | nodes = [
31 | helper.make_node(
32 | "Resize",
33 | ["x", "roi", "scales"],
34 | ["y"],
35 | **kwargs,
36 | )
37 | ]
38 |
39 | roi = numpy_helper.from_array(np.array([], dtype=np.float32), name="roi")
40 | scales = numpy_helper.from_array(scales, name="scales")
41 | graph = helper.make_graph(
42 | nodes, "graph", inputs, outputs, initializer=[roi, scales]
43 | )
44 | model = helper.make_model(graph)
45 |
46 | onnx.save(model, filepath)
47 | ort_sess = ort.InferenceSession(filepath, providers=["CPUExecutionProvider"])
48 | altius_sess = altius_py.InferenceSession(filepath)
49 |
50 | x = np.random.random_sample(shape_x).astype(np.float32)
51 | inputs = {"x": x}
52 | expected = ort_sess.run(None, inputs)
53 | actual = altius_sess.run(None, inputs)
54 |
55 | for expected, actual in zip(expected, actual):
56 | assert np.allclose(expected, actual)
57 |
--------------------------------------------------------------------------------
/crates/altius_py/tests/test_ops_transpose.py:
--------------------------------------------------------------------------------
1 | import altius_py
2 | import onnxruntime as ort
3 | import onnx
4 | import tempfile
5 | import pytest
6 | import os
7 | import numpy as np
8 | from onnx import helper, ValueInfoProto, TensorProto
9 |
10 |
11 | def test_transpose_1():
12 | with tempfile.TemporaryDirectory() as tmpdir:
13 | op_transpose(
14 | os.path.join(tmpdir, "model.onnx"),
15 | [50, 12, 64],
16 | [12, 64, 50],
17 | perm=[1, 2, 0],
18 | )
19 |
20 |
21 | def test_transpose_2():
22 | with tempfile.TemporaryDirectory() as tmpdir:
23 | op_transpose(
24 | os.path.join(tmpdir, "model.onnx"),
25 | [12, 64],
26 | [64, 12],
27 | perm=[1, 0],
28 | )
29 |
30 |
31 | def test_transpose_3():
32 | with tempfile.TemporaryDirectory() as tmpdir:
33 | op_transpose(
34 | os.path.join(tmpdir, "model.onnx"),
35 | [12, 64, 3, 5],
36 | [3, 64, 12, 5],
37 | perm=[2, 1, 0, 3],
38 | )
39 |
40 |
41 | def test_transpose_4():
42 | with tempfile.TemporaryDirectory() as tmpdir:
43 | op_transpose(
44 | os.path.join(tmpdir, "model.onnx"),
45 | [50, 12],
46 | [50, 12],
47 | perm=[0, 1],
48 | )
49 |
50 |
51 | def op_transpose(filepath, shape_x, shape_y, **kwargs):
52 | inputs = [helper.make_tensor_value_info("x", TensorProto.FLOAT, shape_x)]
53 | outputs = [helper.make_tensor_value_info("y", TensorProto.FLOAT, shape_y)]
54 | nodes = [helper.make_node("Transpose", ["x"], ["y"], **kwargs)]
55 | graph = helper.make_graph(nodes, "graph", inputs, outputs)
56 | model = helper.make_model(graph)
57 |
58 | onnx.checker.check_model(model)
59 | onnx.save(model, filepath)
60 | ort_sess = ort.InferenceSession(filepath, providers=["CPUExecutionProvider"])
61 |
62 | for backend in ["interpreter", "cpu"]:
63 | altius_sess = altius_py.InferenceSession(filepath, backend=backend)
64 |
65 | x = np.random.random_sample(shape_x).astype(np.float32)
66 | y = np.random.random_sample(shape_y).astype(np.float32)
67 | inputs = {"x": x}
68 | expected = ort_sess.run(None, inputs)
69 | actual = altius_sess.run(None, inputs)
70 |
71 | for expected, actual in zip(expected, actual):
72 | assert np.allclose(expected, actual)
73 |
--------------------------------------------------------------------------------
/crates/altius_py/tests/test_ops_where.py:
--------------------------------------------------------------------------------
1 | import altius_py
2 | import onnxruntime as ort
3 | import onnx
4 | import tempfile
5 | import pytest
6 | import os
7 | import numpy as np
8 | from onnx import helper, ValueInfoProto, TensorProto
9 |
10 |
11 | def test_where_1():
12 | with tempfile.TemporaryDirectory() as tmpdir:
13 | op_where(
14 | os.path.join(tmpdir, "model.onnx"),
15 | [1, 1, 10, 10],
16 | [1, 128, 10, 10],
17 | [1],
18 | )
19 |
20 |
21 | def test_where_2():
22 | with tempfile.TemporaryDirectory() as tmpdir:
23 | op_where(
24 | os.path.join(tmpdir, "model.onnx"),
25 | [1, 1, 1, 1],
26 | [1, 128, 1, 1],
27 | [1],
28 | )
29 |
30 |
31 | def op_where(filepath, shape_c, shape_x, shape_y, **kwargs):
32 | inputs = [
33 | helper.make_tensor_value_info("c", TensorProto.BOOL, shape_c),
34 | helper.make_tensor_value_info("x", TensorProto.FLOAT, shape_x),
35 | helper.make_tensor_value_info("y", TensorProto.FLOAT, shape_y),
36 | ]
37 | outputs = [helper.make_tensor_value_info("z", TensorProto.FLOAT, shape_x)]
38 | nodes = [helper.make_node("Where", ["c", "x", "y"], ["z"], **kwargs)]
39 | graph = helper.make_graph(nodes, "graph", inputs, outputs)
40 | model = helper.make_model(graph)
41 |
42 | onnx.checker.check_model(model)
43 | onnx.save(model, filepath)
44 | ort_sess = ort.InferenceSession(filepath, providers=["CPUExecutionProvider"])
45 | altius_sess = altius_py.InferenceSession(filepath)
46 |
47 | c = np.random.choice(a=[False, True], size=shape_c)
48 | x = np.random.random_sample(shape_x).astype(np.float32)
49 | y = np.random.random_sample(shape_y).astype(np.float32)
50 | inputs = {"c": c, "x": x, "y": y}
51 | expected = ort_sess.run(None, inputs)
52 | actual = altius_sess.run(None, inputs)
53 |
54 | for expected, actual in zip(expected, actual):
55 | assert np.allclose(expected, actual)
56 |
--------------------------------------------------------------------------------
/crates/altius_py/translation.py:
--------------------------------------------------------------------------------
1 | # python -m optimum.exporters.onnx --model "staka/fugumt-en-ja" --for-ort fugu
2 |
3 | import time
4 | import logging
5 | import os
6 | import sys
7 |
8 | from transformers import pipeline
9 | from transformers import MarianTokenizer
10 | import onnxruntime as ort
11 | import numpy as np
12 |
13 | import torch
14 | from torch.nn import functional as F
15 |
16 |
17 | def translate_baseline(text_en):
18 | fugu_translator = pipeline("translation", model="staka/fugumt-en-ja", device="cpu")
19 | result = fugu_translator(text_en)[0]["translation_text"]
20 | return result
21 |
22 |
23 | def translate_onnx(text_en):
24 | tokenizer = MarianTokenizer.from_pretrained("staka/fugumt-en-ja")
25 |
26 | use_altius = False
27 | if use_altius:
28 | import altius_py
29 |
30 | os.environ["GOMP_CPU_AFFINITY"] = "0-7"
31 | encoder = altius_py.InferenceSession(
32 | "./fugumt-en-ja/encoder_model.onnx",
33 | intra_op_num_threads=8,
34 | enable_profile=True,
35 | # backend="cpu"
36 | )
37 | decoder = altius_py.InferenceSession(
38 | "./fugumt-en-ja/decoder_model.onnx",
39 | intra_op_num_threads=8,
40 | enable_profile=True,
41 | # backend="cpu"
42 | )
43 | else:
44 | encoder = ort.InferenceSession(
45 | "./fugumt-en-ja/encoder_model.onnx", providers=["CPUExecutionProvider"]
46 | )
47 | decoder = ort.InferenceSession(
48 | "./fugumt-en-ja/decoder_model.onnx", providers=["CPUExecutionProvider"]
49 | )
50 |
51 | max_tokens = 100
52 | text = text_en
53 | text += ""
54 |
55 | inputs = tokenizer(
56 | text,
57 | return_tensors="np",
58 | padding=False,
59 | add_special_tokens=False,
60 | )
61 | len_ = inputs["input_ids"].shape[1]
62 |
63 | assert len_ < max_tokens
64 |
65 | if len_ >= max_tokens:
66 | raise Exception("Too long")
67 |
68 | for name in ["input_ids", "attention_mask"]:
69 | input = np.zeros((1, max_tokens), dtype=np.int64)
70 | input[0, : inputs[name].shape[1]] = inputs[name]
71 | inputs[name] = input
72 |
73 | last_hidden_state = encoder.run(None, dict(inputs))[0]
74 |
75 | translated_text = ""
76 | for i in range(100):
77 | decoder_text = tokenizer(
78 | translated_text,
79 | return_tensors="np",
80 | padding=False,
81 | text_target="ja",
82 | add_special_tokens=False,
83 | )
84 | len_ = decoder_text["input_ids"].shape[1]
85 |
86 | for name in ["input_ids", "attention_mask"]:
87 | input = np.zeros((1, max_tokens), dtype=np.int64)
88 | input[0, : decoder_text[name].shape[1]] = decoder_text[name]
89 | decoder_text[name] = input
90 |
91 | outputs = decoder.run(
92 | None,
93 | {
94 | "encoder_attention_mask": inputs["attention_mask"],
95 | "input_ids": decoder_text["input_ids"].reshape(1, -1),
96 | "encoder_hidden_states": last_hidden_state,
97 | },
98 | )
99 |
100 | if i >= len_:
101 | break
102 |
103 | next_token_logits = outputs[0][:, i, :32000]
104 |
105 | probs = F.softmax(torch.tensor(next_token_logits), dim=-1)
106 | ids = torch.argsort(-probs[0])
107 | for i in ids:
108 | if i == 2:
109 | continue
110 | if i == tokenizer.pad_token_id:
111 | print("PAD!")
112 | continue
113 | id = i
114 | break
115 | resulting_string = tokenizer.decode(
116 | [id],
117 | skip_special_tokens=True, # clean_up_tokenization_spaces=False
118 | )
119 | print(resulting_string)
120 | translated_text += resulting_string
121 |
122 | _, translated_text = translated_text.split("")
123 |
124 | return translated_text
125 |
126 |
127 | def main():
128 | text = "Attention is all you need."
129 |
130 | baseline_result = translate_baseline(text)
131 | onnx_result = translate_onnx(text)
132 | print(f"baseline: {baseline_result}")
133 | print(f"onnx: {onnx_result}")
134 |
135 |
136 | if __name__ == "__main__":
137 | logging.basicConfig(level=logging.INFO)
138 |
139 | main()
140 |
--------------------------------------------------------------------------------
/crates/altius_py/vit.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | import sys
4 | import logging
5 | import time
6 |
7 | import numpy as np
8 | from torchvision import transforms
9 | from PIL import Image
10 |
11 | import altius_py
12 | import onnxruntime as ort
13 |
14 |
15 | def main():
16 | logging.basicConfig(level=logging.INFO)
17 | os.environ["OMP_PROC_BIND"] = "TRUE"
18 | os.environ["BLIS_NUM_THREADS"] = "1" # Increase this number
19 |
20 | labels = open("../../models/imagenet_classes.txt").readlines()
21 | image = Image.open("../../models/cat.png")
22 |
23 | preprocess = transforms.Compose(
24 | [
25 | transforms.Resize(224),
26 | transforms.CenterCrop(224),
27 | transforms.ToTensor(),
28 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
29 | ]
30 | )
31 | input = preprocess(image)
32 | input = input.unsqueeze(0).numpy()
33 |
34 | # opt = ort.SessionOptions()
35 | # # opt.intra_op_num_threads = 1
36 | # # opt.inter_op_num_threads = 1
37 | # sess = ort.InferenceSession("../../models/vit_b_16.onnx", sess_options=opt)
38 | sess = altius_py.InferenceSession("../../models/vit_b_16.onnx", True)
39 |
40 | inputs = {"x": input}
41 | start = time.time()
42 | output = sess.run(None, inputs)[0].reshape(1000)
43 | print(f"elapsed: {time.time() - start}")
44 | output = np.argsort(output)[::-1][:5]
45 | output = [labels[i].strip() for i in output]
46 | print(f"top5: {output}")
47 |
48 |
49 | if __name__ == "__main__":
50 | main()
51 |
--------------------------------------------------------------------------------
/crates/core/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "altius-core"
3 | version = "0.1.0"
4 | edition = "2024"
5 |
6 | [dependencies]
7 | id-arena = "^2.2.1"
8 | rustc-hash = { workspace = true }
9 | prost = "^0.10"
10 | thiserror = { workspace = true }
11 | log = { workspace = true }
12 | rand = "^0.8.5"
13 | ndarray = { workspace = true }
14 |
15 | [build-dependencies]
16 | prost-build = "^0.10"
17 |
18 | [dev-dependencies]
19 | insta = "^1.14.1"
20 |
--------------------------------------------------------------------------------
/crates/core/build.rs:
--------------------------------------------------------------------------------
1 | extern crate prost_build;
2 |
3 | fn main() {
4 | prost_build::compile_protos(&["src/onnx/onnx.proto"], &["src/"]).unwrap();
5 | }
6 |
--------------------------------------------------------------------------------
/crates/core/src/analysis/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod shape;
2 |
--------------------------------------------------------------------------------
/crates/core/src/dim.rs:
--------------------------------------------------------------------------------
1 | use std::{
2 | fmt,
3 | ops::{Deref, Index, IndexMut},
4 | slice::SliceIndex,
5 | };
6 |
7 | use crate::fixed_dim::{FixedDimension, FixedDimensions};
8 |
9 | #[derive(Clone, PartialEq, Eq, Hash)]
10 | pub enum Dimension {
11 | Fixed(FixedDimension),
12 | Dynamic(String),
13 | }
14 |
15 | /// An alternative to `FixedDimensions` that allows dynamic shape.
16 | #[derive(Clone, PartialEq, Eq, Hash)]
17 | pub struct Dimensions(pub Vec);
18 |
19 | impl Dimensions {
20 | pub const fn new(dims: Vec) -> Self {
21 | Self(dims)
22 | }
23 |
24 | pub fn is_fixed(&self) -> bool {
25 | self.0.iter().all(|d| matches!(d, Dimension::Fixed(_)))
26 | }
27 |
28 | pub fn is_dynamic(&self) -> bool {
29 | self.0.iter().any(|d| matches!(d, Dimension::Dynamic(_)))
30 | }
31 |
32 | pub fn as_fixed_dims(&self) -> Option {
33 | if self.is_dynamic() {
34 | return None;
35 | }
36 |
37 | Some(FixedDimensions(
38 | self.iter()
39 | .map(|d| match d {
40 | Dimension::Fixed(d) => *d,
41 | Dimension::Dynamic(_) => unreachable!(),
42 | })
43 | .collect(),
44 | ))
45 | }
46 | }
47 |
48 | impl AsRef for Dimensions {
49 | fn as_ref(&self) -> &Dimensions {
50 | self
51 | }
52 | }
53 |
54 | impl Index for Dimensions
55 | where
56 | I: SliceIndex<[Dimension]>,
57 | {
58 | type Output = >::Output;
59 |
60 | fn index(&self, index: I) -> &Self::Output {
61 | &self.0[index]
62 | }
63 | }
64 |
65 | impl IndexMut for Dimensions
66 | where
67 | I: SliceIndex<[Dimension]>,
68 | {
69 | fn index_mut(&mut self, index: I) -> &mut Self::Output {
70 | &mut self.0[index]
71 | }
72 | }
73 |
74 | impl From> for Dimensions {
75 | fn from(v: Vec) -> Dimensions {
76 | Dimensions(v)
77 | }
78 | }
79 |
80 | impl Deref for Dimensions {
81 | type Target = Vec;
82 | fn deref(&self) -> &Self::Target {
83 | &self.0
84 | }
85 | }
86 |
87 | impl fmt::Debug for Dimension {
88 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
89 | match self {
90 | Dimension::Fixed(d) => write!(f, "{d}"),
91 | Dimension::Dynamic(s) => write!(f, "{s}"),
92 | }
93 | }
94 | }
95 |
96 | impl fmt::Debug for Dimensions {
97 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
98 | write!(f, "{:?}", self.0)
99 | }
100 | }
101 |
102 | #[test]
103 | fn use_symdims() {
104 | let _ = Dimensions(vec![
105 | Dimension::Dynamic("batch".into()),
106 | Dimension::Fixed(8),
107 | ]);
108 | }
109 |
--------------------------------------------------------------------------------
/crates/core/src/flops.rs:
--------------------------------------------------------------------------------
1 | use rustc_hash::FxHashMap;
2 |
3 | use crate::{
4 | analysis::shape::{ShapeError, infer_shapes},
5 | model::Model,
6 | op::Op,
7 | };
8 |
9 | pub fn compute_flops(model: &Model) -> Result {
10 | let nodes = model.topo_sort_nodes(); // TODO: Dead node elimination
11 | let mut inferred_shapes = FxHashMap::default();
12 | let mut value_shapes = FxHashMap::default();
13 | infer_shapes(model, &mut inferred_shapes, &mut value_shapes)?;
14 | let mut flops = 0;
15 | for node_id in nodes {
16 | let node = &model.graph.nodes[node_id];
17 | flops += match &node.op {
18 | Op::MatMul => {
19 | let a_shape = &value_shapes[&node.inputs[0]];
20 | let b_shape = &value_shapes[&node.inputs[1]];
21 | let m = a_shape.dims[a_shape.dims.len() - 2];
22 | let k = a_shape.dims[a_shape.dims.len() - 1];
23 | let n = b_shape.dims[b_shape.dims.len() - 1];
24 | let rem = a_shape.dims[..a_shape.dims.len() - 2]
25 | .iter()
26 | .product::();
27 | 2 * rem * m * n * k
28 | }
29 | Op::Conv2d(c) => {
30 | let input_shape = &value_shapes[&node.inputs[0]];
31 | let kernel_shape = &value_shapes[&node.inputs[1]];
32 | let output_shape = &value_shapes[&node.outputs[0]];
33 | output_shape.dims.total_elems()
34 | * (input_shape.dims[1] / c.group as usize
35 | * kernel_shape.dims[2..].iter().product::())
36 | * (1 + (node.inputs.len() == 3) as usize)
37 | }
38 | Op::Gemm(_) => {
39 | let a_shape = &value_shapes[&node.inputs[0]];
40 | let b_shape = &value_shapes[&node.inputs[1]];
41 | assert_eq!(a_shape.dims.len(), 2);
42 | assert_eq!(b_shape.dims.len(), 2);
43 | let m = a_shape.dims[0];
44 | let k = a_shape.dims[1];
45 | let n = b_shape.dims[1];
46 | 2 * m * n * k + 3 * m * n
47 | }
48 | _ => 0,
49 | };
50 | }
51 | Ok(flops)
52 | }
53 |
54 | #[test]
55 | fn test_compute_flops() {
56 | let model = Model::default();
57 | let flops = compute_flops(&model).unwrap();
58 | assert_eq!(flops, 0);
59 | }
60 |
--------------------------------------------------------------------------------
/crates/core/src/graph.rs:
--------------------------------------------------------------------------------
1 | use rustc_hash::FxHashMap as HashMap;
2 |
3 | use crate::{
4 | node::{Node, NodeArena, NodeId},
5 | tensor::Tensor,
6 | value::{ValueArena, ValueId},
7 | };
8 |
9 | #[derive(Default, Clone)]
10 | pub struct Graph {
11 | pub nodes: NodeArena,
12 | pub values: ValueArena,
13 | pub inits: HashMap,
14 | pub inputs: Vec,
15 | pub outputs: Vec,
16 | }
17 |
18 | impl Graph {
19 | pub fn add_node(&mut self, node: Node) -> NodeId {
20 | self.nodes.alloc(node)
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/crates/core/src/lib.rs:
--------------------------------------------------------------------------------
1 | #![allow(clippy::excessive_precision)]
2 |
3 | pub mod analysis;
4 | pub mod dim;
5 | pub mod fixed_dim;
6 | pub mod flops;
7 | pub mod graph;
8 | pub mod model;
9 | pub mod node;
10 | pub mod onnx;
11 | pub mod op;
12 | pub mod optimize;
13 | pub mod tensor;
14 | pub mod value;
15 |
--------------------------------------------------------------------------------
/crates/core/src/node.rs:
--------------------------------------------------------------------------------
1 | use crate::{op::Op, value::ValueId};
2 | use id_arena::{Arena, Id};
3 |
4 | pub type NodeId = Id;
5 | pub type NodeArena = Arena;
6 |
7 | #[derive(Debug, Clone)]
8 | pub struct Node {
9 | pub op: Op,
10 | pub name: Option,
11 | pub inputs: Vec,
12 | pub outputs: Vec,
13 | pub deleted: bool,
14 | }
15 |
16 | impl Node {
17 | pub fn new(op: Op) -> Self {
18 | Self {
19 | op,
20 | name: None,
21 | inputs: Vec::new(),
22 | outputs: Vec::new(),
23 | deleted: false,
24 | }
25 | }
26 |
27 | pub fn with_name(mut self, name: impl Into