├── .gitattributes ├── .gitignore ├── .gitmodules ├── README.md ├── archive ├── compute_linear_param_encodings.py ├── convert_model.py ├── export_quantized_model.py └── make_calibration_samples.py ├── assets ├── b_rwkv_vocab_v20230424.txt ├── lambada_test.txt ├── mmlu_dev_dataset │ ├── data-00000-of-00001.arrow │ ├── dataset_info.json │ └── state.json ├── mmlu_test_dataset.json ├── mmlu_test_dataset │ ├── data-00000-of-00001.arrow │ ├── dataset_info.json │ └── state.json ├── rwkv_vocab_v20230424.txt └── rwkv_vocab_v20230424_tts.txt ├── build_hexagon_wkv_kernel.sh ├── compute_quant_encodings_experimental.py ├── convert_model.py ├── convert_model_dlc.py ├── convert_vocab.py ├── docs ├── optrace.md └── xelite_npu_rwkv.png ├── hexagon ├── CPU │ └── RwkvWkvOpPackage │ │ ├── Makefile │ │ ├── config │ │ └── RwkvWkvOpPackageCPU.xml │ │ ├── makefiles │ │ ├── Android.mk │ │ ├── Application.mk │ │ └── Makefile.linux-x86_64 │ │ └── src │ │ ├── CpuCustomOpPackage.cpp │ │ ├── RwkvWkvOpPackageInterface.cpp │ │ ├── ops │ │ ├── wkv6.cpp │ │ ├── wkv7_output.cpp │ │ └── wkv7_state.cpp │ │ └── utils │ │ ├── BackendUtils.hpp │ │ ├── CPU │ │ ├── CpuBackendUtils.cpp │ │ └── CpuBackendUtils.hpp │ │ └── CustomOpUtils.hpp ├── HTP │ ├── RwkvWkvOpPackage │ │ ├── Makefile │ │ ├── config │ │ │ └── RwkvWkvOpPackageHTP.xml │ │ └── src │ │ │ ├── RwkvWkvOpPackageInterface.cpp │ │ │ └── ops │ │ │ ├── wkv6.cpp │ │ │ ├── wkv7.cpp.old │ │ │ ├── wkv7_output.cpp │ │ │ └── wkv7_state.cpp │ └── prebuilt │ │ ├── libQnnRwkvWkvOpPackageV68.so │ │ ├── libQnnRwkvWkvOpPackageV69.so │ │ ├── libQnnRwkvWkvOpPackageV73.so │ │ ├── libQnnRwkvWkvOpPackageV75.so │ │ └── libQnnRwkvWkvOpPackageV79.so └── test │ ├── test_qnn_wkv_kernel.py │ └── wkv_custom.py ├── librwkv-qualcomm ├── CMakeLists.txt ├── Makefile ├── make │ ├── Android-demo.mk │ ├── Android-eval.mk │ ├── Android-mmlu.mk │ ├── Android.mk │ ├── Application.mk │ ├── Makefile.linux-x86_64 │ ├── Makefile.oe-linux-aarch64-gcc11.2 │ ├── Makefile.oe-linux-aarch64-gcc8.2 │ ├── Makefile.oe-linux-aarch64-gcc9.3 │ └── Makefile.ubuntu-aarch64-gcc9.4 └── src │ ├── CMakeLists.txt │ ├── Interfaces.hpp │ ├── Log │ ├── LogUtils.cpp │ ├── LogUtils.hpp │ ├── Logger.cpp │ └── Logger.hpp │ ├── PAL │ ├── include │ │ └── PAL │ │ │ ├── Debug.hpp │ │ │ ├── Directory.hpp │ │ │ ├── DynamicLoading.hpp │ │ │ ├── FileOp.hpp │ │ │ ├── Path.hpp │ │ │ └── StringOp.hpp │ └── src │ │ ├── common │ │ └── StringOp.cpp │ │ ├── linux │ │ ├── Directory.cpp │ │ ├── DynamicLoading.cpp │ │ ├── FileOp.cpp │ │ └── Path.cpp │ │ └── windows │ │ ├── Common.cpp │ │ ├── Common.hpp │ │ ├── Directory.cpp │ │ ├── DynamicLoading.cpp │ │ ├── FileOp.cpp │ │ └── Path.cpp │ ├── QnnTypeDef.hpp │ ├── QnnTypeMacros.hpp │ ├── Utils │ ├── BuildId.hpp │ ├── ClientBuffer.cpp │ ├── ClientBuffer.hpp │ ├── DataUtil.cpp │ ├── DataUtil.hpp │ ├── DmaBufAllocator.cpp │ ├── DmaBufAllocator.hpp │ ├── DynamicLoadUtil.cpp │ ├── DynamicLoadUtil.hpp │ ├── IBufferAlloc.hpp │ ├── IOTensor.cpp │ ├── IOTensor.hpp │ ├── RpcMem.cpp │ ├── RpcMem.hpp │ ├── Utils.cpp │ ├── Utils.hpp │ ├── dlwrap.cpp │ └── dlwrap.hpp │ ├── WrapperUtils │ ├── QnnWrapperUtils.cpp │ └── QnnWrapperUtils.hpp │ ├── eval_text.cpp │ ├── half.hpp │ ├── json.hpp │ ├── librwkv-qualcomm-app.cpp │ ├── librwkv-qualcomm-app.hpp │ ├── librwkv-qualcomm.cpp │ ├── librwkv-qualcomm.h │ ├── main.cpp │ ├── mmlu.cpp │ ├── soc_detect.cpp │ ├── soc_detect.h │ ├── tokenizer.cpp │ ├── tokenizer.h │ └── trie.hpp ├── make_context_cache_binary.py ├── make_context_cache_binary_dlc.py ├── quant_encodings └── README.md ├── quantizers ├── advanced_ptq │ └── actmse_quantizer.py ├── base_quantizer.py ├── configs │ ├── backend_aware_htp_quantsim_config_v75.json │ ├── default_per_channel_config.json │ ├── htp_quantsim_config_v75.json │ ├── htp_quantsim_config_v75_per_channel.json │ ├── qsim_config_per_channel_with_exceptions.json │ ├── rwkv_activation_exceptions.json │ └── rwkv_gptq_exceptions.json └── exceptions.py ├── quantsim_eval_lambada.py ├── quantsim_eval_mmlu.py ├── rwkv_src ├── elemwise_ops.py ├── rwkv_model.py ├── rwkv_tokenizer.py ├── rwkv_v5_modules.py ├── rwkv_v6_modules.py ├── rwkv_v7_modules.py ├── rwkv_v7_modules_conv.py └── wkv_custom.py └── utils ├── dataset_builder.py ├── htp_devices_config.py ├── indexed_dataset.py ├── model_preparer.py ├── model_utils.py └── split_onnx.py /.gitattributes: -------------------------------------------------------------------------------- 1 | *.encodings filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | onnx/ 2 | libs/ 3 | lib/ 4 | build/ 5 | output/ 6 | .pkl_memoize_py3/ 7 | gmon.out 8 | qacc_temp/ 9 | obj/ 10 | bin/ 11 | *.pyc 12 | __pycache__ 13 | .vscode/ 14 | tmp/ 15 | samples*/ 16 | input_list* 17 | dataset_cache/ 18 | quant_export/ 19 | trace_output/ 20 | test_wkv* 21 | test_data* 22 | QNN/ 23 | v7_*_quant/ -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MollySophia/rwkv-qualcomm/fd403b7c9f3b6c4ac5a810f334a0a51c8693fb42/.gitmodules -------------------------------------------------------------------------------- /archive/compute_linear_param_encodings.py: -------------------------------------------------------------------------------- 1 | # from rwkv_src.modeling_rwkv6 import Rwkv6ForCausalLM 2 | from rwkv_src.rwkv_model import RWKV_RNN 3 | from transformers import AutoConfig, AutoTokenizer 4 | import types 5 | import torch 6 | import torch.nn as nn 7 | from transformers.tokenization_utils_base import BatchEncoding 8 | 9 | from utils.model_utils import get_dummy_input_for_rwkv_causal_llm 10 | from quantizers.advanced_ptq.actmse_quantizer import ActMSEQuantizer 11 | from utils.dataset_builder import DatasetBuilder 12 | 13 | import argparse 14 | from pathlib import Path 15 | 16 | parser = argparse.ArgumentParser(description='Compute param encodings for linear modules') 17 | parser.add_argument('model', type=Path, help='Path to RWKV pth file') 18 | parser.add_argument('--weights_bitwidth', type=int, default=4, help='Weights bitwidth') 19 | parser.add_argument('--use_cuda', action='store_true', default=True, help='Use CUDA') 20 | parser.add_argument('--strategy', type=str, choices=['symqt', 'symfp', 'asym'], default='asym', help='Quantization strategy') 21 | args_parser = parser.parse_args() 22 | 23 | args = types.SimpleNamespace() 24 | ############################## 25 | args.quant_scheme = "tf" 26 | args.activation_bit_width = 32 27 | args.parameter_bit_width = args_parser.weights_bitwidth 28 | args.in_place_quantsim = False 29 | args.config_file = "quantizers/configs/default_per_channel_config.json" 30 | args.num_cands = 20 31 | args.export_dir = "quant_export" 32 | args.output_dir = "quant_export" 33 | args.model_name = str(args_parser.model).replace(".pth", "").split("/")[-1] 34 | args.input_symmetry = args_parser.strategy 35 | args.exceptions_file = "quantizers/configs/rwkv_gptq_exceptions.json" 36 | args.act_mse_loss_type = "mse" 37 | args.parameter_encoding_file = None 38 | args.encoding_path = None 39 | args.do_actmse = True 40 | args.disable_act_quantizers = True 41 | args.fp16 = False 42 | args.do_train = False 43 | args.clip_activation = None 44 | args.load_sim_checkpoint = False 45 | args.save_sim_checkpoint = False 46 | ############################## 47 | args.calib_dataset_name = "wikitext" 48 | args.calib_dataset_config_name = "wikitext-2-raw-v1" 49 | args.dataset_cache_dir = "./dataset_cache" 50 | args.calib_dataset_split = None 51 | args.calib_dataset_preprocessor = "gpt2" 52 | args.eval_dataset_name = "wikitext" 53 | args.eval_dataset_config_name = "wikitext-103-raw-v1" 54 | args.eval_dataset_split = "test" 55 | args.eval_dataset_preprocessor = "gptq" 56 | args.num_calibration_batches = 20 57 | args.per_device_calib_batch_size = 1 58 | args.per_device_eval_batch_size = 1 59 | args.block_size = 1024 60 | args.seed = 1234 61 | ############################## 62 | 63 | device = torch.device("cuda") if args_parser.use_cuda and torch.cuda.is_available() else torch.device("cpu") 64 | args.device = device 65 | 66 | model_args = types.SimpleNamespace() 67 | model_args.USE_CUDA = args_parser.use_cuda 68 | model_args.fp16 = False 69 | model_args.wkv_customop = False 70 | model_args.USE_EMBEDDING = True 71 | model_args.MODEL_NAME = str(args_parser.model) 72 | model_args.RESCALE_LAYER = 0 73 | model_args.eos_token_id = 0 74 | model = RWKV_RNN(model_args) 75 | 76 | tokenizer = AutoTokenizer.from_pretrained("RWKV/rwkv-5-world-1b5", trust_remote_code=True) 77 | tokenizer.model_max_length = 1024 78 | 79 | dummy_input = get_dummy_input_for_rwkv_causal_llm(1, 1, device, model_cfg=model.args) 80 | 81 | dataset_builder = DatasetBuilder(args) 82 | dataset_builder.make_dataset(tokenizer=tokenizer, args=args, column_name="text", shuffle=True) 83 | 84 | quantizer = ActMSEQuantizer(model, args, model.args) 85 | quantizer.orig_model = model 86 | quantizer.prepare_quantsim(dummy_input, args, dataset_builder.train_dataloader, tokenizer) 87 | -------------------------------------------------------------------------------- /archive/make_calibration_samples.py: -------------------------------------------------------------------------------- 1 | from rwkv_src.rwkv_tokenizer import RWKV_TOKENIZER 2 | from rwkv_src.rwkv_model import RWKV_RNN, make_chunks, run_prompt 3 | import types 4 | import os, sys 5 | import torch 6 | import argparse 7 | from pathlib import Path 8 | 9 | from torchvision import datasets 10 | from datasets import load_dataset 11 | 12 | def main(): 13 | parser = argparse.ArgumentParser(description='Make calibration sample files') 14 | parser.add_argument('model', type=Path, help='Path to RWKV pth file') 15 | parser.add_argument('output', type=Path, help='Path to output folder') 16 | parser.add_argument('chunks', type=int, help='Number of chunks') 17 | parser.add_argument('--ext_embedding', action='store_true', default=False, help='Use external embedding') 18 | parser.add_argument('--prefill', action='store_true', default=False, help='Prefill model') 19 | args = parser.parse_args() 20 | 21 | seq_length = 32 if args.prefill else 1 22 | 23 | model_args = types.SimpleNamespace() 24 | model_args.USE_CUDA = torch.cuda.is_available() 25 | model_args.fp16 = False 26 | model_args.USE_EMBEDDING = False if args.ext_embedding else True 27 | model_args.RESCALE_LAYER = 0 28 | model_args.wkv_customop = False 29 | 30 | model_args.MODEL_NAME = str(args.model) 31 | 32 | tokenizer = RWKV_TOKENIZER("./assets/rwkv_vocab_v20230424.txt") 33 | 34 | model = make_chunks(args.chunks, model_args) if args.chunks > 1 else RWKV_RNN(model_args) 35 | 36 | dataset = load_dataset('wikitext', 'wikitext-2-raw-v1', split='train') 37 | print("dataset len:", len(dataset['text'])) 38 | for i in range(20): 39 | run_prompt(model, dataset['text'][i], tokenizer=tokenizer, length=0, seq_length=seq_length, generate_samples=True, samples_output=str(args.output)) 40 | 41 | if __name__ == '__main__': 42 | main() -------------------------------------------------------------------------------- /assets/mmlu_dev_dataset/data-00000-of-00001.arrow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MollySophia/rwkv-qualcomm/fd403b7c9f3b6c4ac5a810f334a0a51c8693fb42/assets/mmlu_dev_dataset/data-00000-of-00001.arrow -------------------------------------------------------------------------------- /assets/mmlu_dev_dataset/dataset_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "builder_name": "parquet", 3 | "citation": "", 4 | "config_name": "all", 5 | "dataset_name": "mmlu", 6 | "dataset_size": 168871380, 7 | "description": "", 8 | "download_checksums": { 9 | "hf://datasets/cais/mmlu@c30699e8356da336a370243923dbaf21066bb9fe/all/test-00000-of-00001.parquet": { 10 | "num_bytes": 3504718, 11 | "checksum": null 12 | }, 13 | "hf://datasets/cais/mmlu@c30699e8356da336a370243923dbaf21066bb9fe/all/validation-00000-of-00001.parquet": { 14 | "num_bytes": 408449, 15 | "checksum": null 16 | }, 17 | "hf://datasets/cais/mmlu@c30699e8356da336a370243923dbaf21066bb9fe/all/dev-00000-of-00001.parquet": { 18 | "num_bytes": 76504, 19 | "checksum": null 20 | }, 21 | "hf://datasets/cais/mmlu@c30699e8356da336a370243923dbaf21066bb9fe/all/auxiliary_train-00000-of-00001.parquet": { 22 | "num_bytes": 47513731, 23 | "checksum": null 24 | } 25 | }, 26 | "download_size": 51503402, 27 | "features": { 28 | "question": { 29 | "dtype": "string", 30 | "_type": "Value" 31 | }, 32 | "subject": { 33 | "dtype": "string", 34 | "_type": "Value" 35 | }, 36 | "choices": { 37 | "feature": { 38 | "dtype": "string", 39 | "_type": "Value" 40 | }, 41 | "_type": "Sequence" 42 | }, 43 | "answer": { 44 | "names": [ 45 | "A", 46 | "B", 47 | "C", 48 | "D" 49 | ], 50 | "_type": "ClassLabel" 51 | } 52 | }, 53 | "homepage": "", 54 | "license": "", 55 | "size_in_bytes": 220374782, 56 | "splits": { 57 | "test": { 58 | "name": "test", 59 | "num_bytes": 6969209, 60 | "num_examples": 14042, 61 | "dataset_name": "mmlu" 62 | }, 63 | "validation": { 64 | "name": "validation", 65 | "num_bytes": 763676, 66 | "num_examples": 1531, 67 | "dataset_name": "mmlu" 68 | }, 69 | "dev": { 70 | "name": "dev", 71 | "num_bytes": 125389, 72 | "num_examples": 285, 73 | "dataset_name": "mmlu" 74 | }, 75 | "auxiliary_train": { 76 | "name": "auxiliary_train", 77 | "num_bytes": 161013106, 78 | "num_examples": 99842, 79 | "dataset_name": "mmlu" 80 | } 81 | }, 82 | "version": { 83 | "version_str": "0.0.0", 84 | "major": 0, 85 | "minor": 0, 86 | "patch": 0 87 | } 88 | } -------------------------------------------------------------------------------- /assets/mmlu_dev_dataset/state.json: -------------------------------------------------------------------------------- 1 | { 2 | "_data_files": [ 3 | { 4 | "filename": "data-00000-of-00001.arrow" 5 | } 6 | ], 7 | "_fingerprint": "ca7a71e4c243f30b", 8 | "_format_columns": null, 9 | "_format_kwargs": {}, 10 | "_format_type": null, 11 | "_output_all_columns": false, 12 | "_split": "dev" 13 | } -------------------------------------------------------------------------------- /assets/mmlu_test_dataset/data-00000-of-00001.arrow: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MollySophia/rwkv-qualcomm/fd403b7c9f3b6c4ac5a810f334a0a51c8693fb42/assets/mmlu_test_dataset/data-00000-of-00001.arrow -------------------------------------------------------------------------------- /assets/mmlu_test_dataset/dataset_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "builder_name": "parquet", 3 | "citation": "", 4 | "config_name": "all", 5 | "dataset_name": "mmlu", 6 | "dataset_size": 168871380, 7 | "description": "", 8 | "download_checksums": { 9 | "hf://datasets/cais/mmlu@c30699e8356da336a370243923dbaf21066bb9fe/all/test-00000-of-00001.parquet": { 10 | "num_bytes": 3504718, 11 | "checksum": null 12 | }, 13 | "hf://datasets/cais/mmlu@c30699e8356da336a370243923dbaf21066bb9fe/all/validation-00000-of-00001.parquet": { 14 | "num_bytes": 408449, 15 | "checksum": null 16 | }, 17 | "hf://datasets/cais/mmlu@c30699e8356da336a370243923dbaf21066bb9fe/all/dev-00000-of-00001.parquet": { 18 | "num_bytes": 76504, 19 | "checksum": null 20 | }, 21 | "hf://datasets/cais/mmlu@c30699e8356da336a370243923dbaf21066bb9fe/all/auxiliary_train-00000-of-00001.parquet": { 22 | "num_bytes": 47513731, 23 | "checksum": null 24 | } 25 | }, 26 | "download_size": 51503402, 27 | "features": { 28 | "question": { 29 | "dtype": "string", 30 | "_type": "Value" 31 | }, 32 | "subject": { 33 | "dtype": "string", 34 | "_type": "Value" 35 | }, 36 | "choices": { 37 | "feature": { 38 | "dtype": "string", 39 | "_type": "Value" 40 | }, 41 | "_type": "Sequence" 42 | }, 43 | "answer": { 44 | "names": [ 45 | "A", 46 | "B", 47 | "C", 48 | "D" 49 | ], 50 | "_type": "ClassLabel" 51 | } 52 | }, 53 | "homepage": "", 54 | "license": "", 55 | "size_in_bytes": 220374782, 56 | "splits": { 57 | "test": { 58 | "name": "test", 59 | "num_bytes": 6969209, 60 | "num_examples": 14042, 61 | "dataset_name": "mmlu" 62 | }, 63 | "validation": { 64 | "name": "validation", 65 | "num_bytes": 763676, 66 | "num_examples": 1531, 67 | "dataset_name": "mmlu" 68 | }, 69 | "dev": { 70 | "name": "dev", 71 | "num_bytes": 125389, 72 | "num_examples": 285, 73 | "dataset_name": "mmlu" 74 | }, 75 | "auxiliary_train": { 76 | "name": "auxiliary_train", 77 | "num_bytes": 161013106, 78 | "num_examples": 99842, 79 | "dataset_name": "mmlu" 80 | } 81 | }, 82 | "version": { 83 | "version_str": "0.0.0", 84 | "major": 0, 85 | "minor": 0, 86 | "patch": 0 87 | } 88 | } -------------------------------------------------------------------------------- /assets/mmlu_test_dataset/state.json: -------------------------------------------------------------------------------- 1 | { 2 | "_data_files": [ 3 | { 4 | "filename": "data-00000-of-00001.arrow" 5 | } 6 | ], 7 | "_fingerprint": "436299c1c09696bb", 8 | "_format_columns": null, 9 | "_format_kwargs": {}, 10 | "_format_type": null, 11 | "_output_all_columns": false, 12 | "_split": "test" 13 | } -------------------------------------------------------------------------------- /build_hexagon_wkv_kernel.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | rm -rf hexagon/HTP/RwkvWkvOpPackage/build 4 | make -C hexagon/HTP/RwkvWkvOpPackage/ htp_x86 htp_v68 htp_v69 htp_v73 htp_v75 htp_v79 -j4 5 | 6 | make -C hexagon/CPU/RwkvWkvOpPackage/ -j4 7 | 8 | rm -rf hexagon/HTP/prebuilt 9 | mkdir -p hexagon/HTP/prebuilt 10 | 11 | cp hexagon/HTP/RwkvWkvOpPackage/build/hexagon-v68/libQnnRwkvWkvOpPackage.so hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV68.so 12 | cp hexagon/HTP/RwkvWkvOpPackage/build/hexagon-v69/libQnnRwkvWkvOpPackage.so hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV69.so 13 | cp hexagon/HTP/RwkvWkvOpPackage/build/hexagon-v73/libQnnRwkvWkvOpPackage.so hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV73.so 14 | cp hexagon/HTP/RwkvWkvOpPackage/build/hexagon-v75/libQnnRwkvWkvOpPackage.so hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV75.so 15 | cp hexagon/HTP/RwkvWkvOpPackage/build/hexagon-v79/libQnnRwkvWkvOpPackage.so hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV79.so -------------------------------------------------------------------------------- /convert_vocab.py: -------------------------------------------------------------------------------- 1 | import sys, ast 2 | 3 | vocab_file = sys.argv[1] 4 | vocab = None 5 | with open(vocab_file, 'r') as f: 6 | vocab = f.readlines() 7 | 8 | vocab_new = [] 9 | for line in vocab: 10 | parts = line.split(' ') 11 | assert len(parts) >= 3 12 | idx, token, token_len = int(parts[0]), ast.literal_eval(' '.join(parts[1:-1])), int(parts[-1]) 13 | token = token.encode("utf-8") if isinstance(token, str) else token 14 | token_raw = "b'" 15 | for byte in token: 16 | token_raw += '\\x' + hex(byte)[2:].zfill(2) 17 | token_raw += "'" 18 | vocab_new.append(f"{idx} {token_raw} {token_len}\n") 19 | 20 | with open("b_" + vocab_file, 'w') as f: 21 | f.writelines(vocab_new) -------------------------------------------------------------------------------- /docs/optrace.md: -------------------------------------------------------------------------------- 1 | ``` 2 | rm -rf trace_output 3 | ./qnn-net-run --profiling_level detailed --profiling_option optrace --output_data_type float_and_native --retrieve_context RWKV-x070-World-1.5B-v3-20250127-ctx4096.bin --backend libQnnHtp.so --input_list ./input_list.txt --output_dir ./trace_output --log_level info --perf_profile burst --io_tensor_mem_handle_type=ion 4 | # or with customop: 5 | ./qnn-net-run --profiling_level detailed --profiling_option optrace --output_data_type float_and_native --retrieve_context RWKV-x070-World-1.5B-v3-20250127-ctx4096.bin --backend libQnnHtp.so --input_list ./input_list.txt --output_dir ./trace_output --log_level info --perf_profile burst --io_tensor_mem_handle_type=ion --op_packages libQnnRwkvWkvOpPackage.so:RwkvWkvOpPackageInterfaceProvider 6 | ``` 7 | 8 | ``` 9 | adb pull /data/local/tmp/rwkv/trace_output 10 | qnn-profile-viewer --reader $QNN_SDK_ROOT/lib/x86_64-linux-clang/libQnnHtpOptraceProfilingReader.so --input_log ./trace_output/qnn-profiling-data_0.log --schematic ./RWKV-x070-World-1.5B-v3-20250127-ctx4096_schematic.bin --output ./chrometrace.json 11 | ``` -------------------------------------------------------------------------------- /docs/xelite_npu_rwkv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MollySophia/rwkv-qualcomm/fd403b7c9f3b6c4ac5a810f334a0a51c8693fb42/docs/xelite_npu_rwkv.png -------------------------------------------------------------------------------- /hexagon/CPU/RwkvWkvOpPackage/Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 3 | # All rights reserved. 4 | # Confidential and Proprietary - Qualcomm Technologies, Inc. 5 | # 6 | 7 | # define default 8 | default: all 9 | 10 | # define package name 11 | export PACKAGE_NAME := $(notdir $(shell pwd)) 12 | 13 | # define library prerequisites list 14 | lib_cpu := src 15 | make_dir := makefiles 16 | LIB_SOURCES = $(lib_cpu)) 17 | 18 | # define target_architecture 19 | export TARGET_AARCH_VARS:= -march=x86-64 20 | 21 | # define target name 22 | export TARGET = linux-x86_64 23 | 24 | # specify compiler 25 | export CXX ?= clang++-9 26 | 27 | # define default Android ABI 28 | PLATFORM ?= arm64-v8a 29 | 30 | .PHONY: all $(LIB_SOURCES) all_android all_x86 cpu cpu_x86 cpu_android 31 | all: $(LIB_SOURCES) all_x86 all_android 32 | 33 | # Combined Targets 34 | cpu: cpu_x86 cpu_android 35 | clean: clean_x86 clean_android clean_qnx clean_qos 36 | 37 | # x86 Targets 38 | all_x86: cpu_x86 39 | 40 | cpu_x86: 41 | $(call build_if_exists,$(lib_cpu),-$(MAKE) -f $(make_dir)/Makefile.linux-x86_64) 42 | 43 | clean_x86: 44 | @rm -rf libs obj 45 | 46 | # qnx Targets 47 | all_qnx: cpu_qnx cpu_qos 48 | 49 | cpu_qnx: check_qnx 50 | $(call build_if_exists,$(lib_cpu),-$(MAKE) -f $(make_dir)/Makefile.qnx-aarch64) 51 | 52 | clean_qnx: 53 | @rm -rf libs obj 54 | 55 | cpu_qos: check_qnx 56 | $(call build_if_exists,$(lib_cpu),-$(MAKE) -f $(make_dir)/Makefile.qos224-aarch64) 57 | 58 | clean_qos: 59 | @rm -rf libs obj 60 | 61 | # Android Targets 62 | 63 | all_android: cpu_android 64 | 65 | cpu_android: cpu_aarch64-android 66 | 67 | cpu_aarch64-android: check_ndk clean_aarch64-android 68 | $(call build_if_exists,$(lib_cpu),$(ANDROID_NDK_ROOT)/ndk-build APP_ALLOW_MISSING_DEPS=true APP_ABI="arm64-v8a" NDK_PROJECT_PATH=./ NDK_APPLICATION_MK=$(make_dir)/Application.mk APP_BUILD_SCRIPT=$(make_dir)/Android.mk) 69 | @$(rename_target_dirs) 70 | 71 | clean_android: check_ndk clean_aarch64-android 72 | 73 | clean_aarch64-android: 74 | @rm -rf libs/aarch64-android 75 | @rm -rf obj/local/aarch64-android 76 | 77 | # utilities 78 | # Syntax: $(call build_if_exists ,) 79 | build_if_exists = $(if $(wildcard $(1)),$(2),$(warning WARNING: $(1) does not exist. Skipping Compilation)) 80 | rename_target_dirs = find . -type d -execdir rename 's/arm64-v8a/aarch64-android/' '{}' \+ \ 81 | 82 | check_ndk: 83 | ifeq ($(ANDROID_NDK_ROOT),) 84 | $(error ERROR: ANDROID_NDK_ROOT not set, skipping compilation for Android platform(s).) 85 | endif 86 | 87 | check_qnx: 88 | ifeq ($(QNX_HOST),) 89 | $(error ERROR: QNX_HOST not set, skipping compilation for QNX platform.) 90 | endif 91 | ifeq ($(QNX_TARGET),) 92 | $(error ERROR: QNX_TARGET not set, skipping compilation for QNX platform.) 93 | endif 94 | -------------------------------------------------------------------------------- /hexagon/CPU/RwkvWkvOpPackage/makefiles/Android.mk: -------------------------------------------------------------------------------- 1 | # ============================================================================== 2 | # 3 | # Copyright (c) 2020, 2023-2024 Qualcomm Technologies, Inc. 4 | # All Rights Reserved. 5 | # Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | # 7 | # =============================================================== 8 | 9 | LOCAL_PATH := $(call my-dir) 10 | SUPPORTED_TARGET_ABI := arm64-v8a x86 x86_64 11 | 12 | #============================ Verify Target Info and Application Variables ========================================= 13 | ifneq ($(filter $(TARGET_ARCH_ABI),$(SUPPORTED_TARGET_ABI)),) 14 | ifneq ($(APP_STL), c++_shared) 15 | $(error Unsupported APP_STL: "$(APP_STL)") 16 | endif 17 | else 18 | $(error Unsupported TARGET_ARCH_ABI: '$(TARGET_ARCH_ABI)') 19 | endif 20 | 21 | #============================ Define Common Variables =============================================================== 22 | # Include paths 23 | UTIL_SRC_DIR := $(LOCAL_PATH)/../src/utils 24 | # QNN_SDK_ROOT should be set and points to the SDK path, it will be used. 25 | ifdef QNN_SDK_ROOT 26 | # define directories 27 | CUSTOM_OP_DIR :=$(QNN_SDK_ROOT)/share/QNN/OpPackageGenerator/CustomOp 28 | 29 | # setup include paths 30 | PACKAGE_C_INCLUDES += -I $(QNN_SDK_ROOT)/include/QNN -I $(QNN_SDK_ROOT)/include/QNN/CPU -I $(LOCAL_PATH)/../include/ -I $(UTIL_SRC_DIR) -I $(UTIL_SRC_DIR)/CPU -I $(CUSTOM_OP_DIR) 31 | # copy source files from SDK if not present 32 | $(info Copying custom op source files from SDK) 33 | COPYFILES := $(shell find $(CUSTOM_OP_DIR)/CPU -name "*.cpp" -exec cp -rf {} $(LOCAL_PATH)/../src 2>/dev/null \;) 34 | else 35 | $(error QNN_SDK_ROOT: Please set QNN_SDK_ROOT) 36 | endif 37 | 38 | #========================== Define OpPackage Library Build Variables ============================================= 39 | include $(CLEAR_VARS) 40 | LOCAL_C_INCLUDES := $(PACKAGE_C_INCLUDES) 41 | MY_SRC_FILES = $(wildcard $(LOCAL_PATH)/../src/*.cpp) $(wildcard $(LOCAL_PATH)/../src/utils/*.cpp) $(wildcard $(LOCAL_PATH)/../src/utils/CPU/*.cpp) $(wildcard $(LOCAL_PATH)/../src/ops/*.cpp) 42 | LOCAL_MODULE := RwkvWkvOpPackage 43 | LOCAL_SRC_FILES := $(subst makefiles/,,$(MY_SRC_FILES)) 44 | LOCAL_LDLIBS := -lGLESv2 -lEGL 45 | include $(BUILD_SHARED_LIBRARY) 46 | -------------------------------------------------------------------------------- /hexagon/CPU/RwkvWkvOpPackage/makefiles/Application.mk: -------------------------------------------------------------------------------- 1 | # ============================================================================== 2 | # 3 | # Copyright (c) 2020, 2023 Qualcomm Technologies, Inc. 4 | # All Rights Reserved. 5 | # Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | # 7 | # =============================================================== 8 | 9 | APP_ABI := arm64-v8a 10 | APP_STL := c++_shared 11 | APP_PLATFORM := android-21 12 | APP_CPPFLAGS += -std=c++11 -O3 -fvisibility=hidden -DQNN_API="__attribute__((visibility(\"default\")))" 13 | APP_LDFLAGS += -lc -lm -ldl -------------------------------------------------------------------------------- /hexagon/CPU/RwkvWkvOpPackage/makefiles/Makefile.linux-x86_64: -------------------------------------------------------------------------------- 1 | # ============================================================================== 2 | # 3 | # Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | # All rights reserved. 5 | # Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | # 7 | # ============================================================================== 8 | 9 | # define relevant directories 10 | SRC_DIR := src 11 | SRC_DIR_OPS := src/ops 12 | SRC_DIR_UTILS := src/utils/CPU 13 | 14 | # Checking if clang++-9 is present. If not switch to clang++ 15 | ifeq ($(shell $(CXX) -v 2>&1 | grep -c "clang version"), 0) 16 | CXX := clang++ 17 | endif 18 | 19 | # define library name and corresponding directory 20 | QNN_TARGET ?= x86_64-linux-clang 21 | export LIB_DIR := ./libs/$(QNN_TARGET) 22 | 23 | ifdef PACKAGE_NAME 24 | library := $(LIB_DIR)/lib$(PACKAGE_NAME).so 25 | else 26 | library :=$(LIB_DIR)/libCpuCustomPackage.so 27 | endif 28 | 29 | # define target architecture if not previously defined, default is x86 30 | ifndef TARGET_AARCH_VARS 31 | TARGET_AARCH_VARS:= -march=x86-64 32 | endif 33 | 34 | # Include paths 35 | # QNN_SDK_ROOT should be set and points to the SDK path, it will be used. 36 | ifdef QNN_SDK_ROOT 37 | # setup custom op directory path 38 | CUSTOM_OP_DIR :=$(QNN_SDK_ROOT)/share/QNN/OpPackageGenerator/CustomOp 39 | 40 | # setup include paths 41 | 42 | INCLUDES += -I$(QNN_SDK_ROOT)/include/QNN -I include -I$(QNN_SDK_ROOT)/include/QNN/CPU -I $(CUSTOM_OP_DIR) 43 | INCLUDES += -I $(SRC_DIR)/utils -I $(SRC_DIR)/utils/CPU 44 | 45 | # copy source files from custom op directory 46 | $(info Copying custom op source files from SDK) 47 | COPYFILES := $(shell find $(CUSTOM_OP_DIR)/CPU -name "*.cpp" -exec cp -rf {} $(SRC_DIR) 2>/dev/null \;) 48 | else 49 | $(error QNN_SDK_ROOT: Please set QNN_SDK_ROOT) 50 | endif 51 | 52 | # set compiler flags 53 | COMMON_CXXFLAGS = -std=c++11 -fno-exceptions -fPIC -pg $(INCLUDES) 54 | COMMON_LDFLAGS = -shared -s -fPIC 55 | 56 | ifdef QNN_DEBUG_ENABLE 57 | CXXFLAGS += $(COMMON_CXXFLAGS) -march=x86-64 -O0 -g -DQNN_API="" 58 | LDFLAGS += $(COMMON_LDFLAGS) 59 | else 60 | CXXFLAGS += $(COMMON_CXXFLAGS) -march=x86-64 -O3 -Wno-write-strings -fvisibility=hidden -DQNN_API="__attribute__((visibility(\"default\")))" 61 | LDFLAGS += $(COMMON_LDFLAGS) -fvisibility=hidden -flto 62 | endif 63 | 64 | # define library sources 65 | SOURCES := $(wildcard $(SRC_DIR)/*.cpp) 66 | SOURCES_OPS := $(wildcard $(SRC_DIR_OPS)/*.cpp) 67 | SOURCE_UTILS := $(wildcard $(SRC_DIR_UTILS)/*.cpp) 68 | 69 | # define object directories 70 | OBJ_DIR := obj/$(QNN_TARGET) 71 | OBJ_DIR_OPS := obj/$(QNN_TARGET)/ops 72 | OBJ_DIR_UTILS := obj/$(QNN_TARGET)/utils 73 | 74 | # setup object files in object directory 75 | OBJECTS := $(patsubst %.cpp,$(OBJ_DIR)/%.o,$(foreach x,$(SOURCES),$(notdir $(x)))) 76 | OBJECTS_OPS := $(patsubst %.cpp,$(OBJ_DIR_OPS)/%.o,$(foreach x,$(SOURCES_OPS),$(notdir $(x)))) 77 | OBJECTS_UTILS := $(patsubst %.cpp,$(OBJ_DIR_UTILS)/%.o,$(foreach x,$(SOURCE_UTILS),$(notdir $(x)))) 78 | 79 | # Rule to make library 80 | .PHONY: library 81 | library: $(library) 82 | 83 | # Implicit rule to compile and link object files 84 | $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp 85 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 86 | 87 | $(OBJ_DIR_UTILS)/%.o: $(SRC_DIR_UTILS)/%.cpp 88 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 89 | 90 | # set up resources 91 | directories := $(LIB_DIR) $(OBJ_DIR) $(OBJ_DIR_OPS) $(OBJ_DIR_UTILS) 92 | 93 | # Compile 94 | $(library): $(OBJECTS) $(OBJECTS_OPS) $(OBJECTS_UTILS) | $(directories) 95 | $(CXX) $(CXXFLAGS) $(LINKFLAGS) -shared $^ -o $@ 96 | 97 | # rule for object directory resource 98 | $(OBJECTS): | $(OBJ_DIR) $(COPYFILES) 99 | $(OBJECTS_OPS): | $(OBJ_DIR_OPS) 100 | $(OBJECTS_UTILS): | $(OBJ_DIR_UTILS) 101 | 102 | # rule to create directories 103 | $(directories): 104 | mkdir -p $@ 105 | 106 | .PHONY: clean 107 | clean: 108 | rm -rf $(OBJ_DIR) $(LIB_DIR) 109 | -------------------------------------------------------------------------------- /hexagon/CPU/RwkvWkvOpPackage/src/CpuCustomOpPackage.cpp: -------------------------------------------------------------------------------- 1 | //============================================================================= 2 | // 3 | // Copyright (c) 2020-2022 Qualcomm Technologies, Inc. 4 | // All Rights Reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================= 8 | 9 | #include "CPU/QnnCpuOpPackage.h" 10 | #include "CustomBEMacros.hpp" 11 | #include "CustomOpPackage.hpp" 12 | #include "QnnSdkBuildId.h" 13 | 14 | using namespace qnn::custom; 15 | using namespace qnn::custom::utils; 16 | 17 | static Qnn_ErrorHandle_t QnnOpPackage_execute(void* opPkgNodeData) { 18 | auto opPkg = CustomOpPackage::getInstance(); 19 | std::shared_ptr op; 20 | 21 | opPkg->getOpResolver()->getCustomOp((opHandle)opPkgNodeData, op); 22 | auto opRegistration = opPkg->getOpRegistration(op->m_typeName); 23 | 24 | QNN_CUSTOM_BE_ENSURE(opPkg, QNN_OP_PACKAGE_ERROR_GENERAL); 25 | QNN_CUSTOM_BE_ENSURE_STATUS(opRegistration->execute(op.get())); 26 | 27 | return QNN_SUCCESS; 28 | } 29 | 30 | std::mutex CustomOpPackage::s_mtx; 31 | std::shared_ptr CustomOpPackage ::s_opPackageInstance; 32 | bool CustomOpPackage::s_isInitialized; 33 | 34 | Qnn_ErrorHandle_t CustomOpPackage::getPackageInfo(const QnnOpPackage_Info_t** info) { 35 | QNN_CUSTOM_BE_ENSURE(info, QNN_OP_PACKAGE_ERROR_INVALID_INFO) 36 | 37 | for (auto op : m_registered_ops) { 38 | m_operationNames.push_back(op.first.c_str()); 39 | } 40 | 41 | m_sdkApiVersion = QNN_CPU_API_VERSION_INIT; 42 | m_packageInfo = QNN_OP_PACKAGE_INFO_INIT; 43 | m_packageInfo.packageName = m_packageName; 44 | m_packageInfo.operationNames = m_operationNames.data(); 45 | m_packageInfo.numOperations = static_cast(m_operationNames.size()); 46 | m_packageInfo.sdkBuildId = QNN_SDK_BUILD_ID; 47 | m_packageInfo.sdkApiVersion = &m_sdkApiVersion; 48 | *info = &m_packageInfo; 49 | 50 | return QNN_SUCCESS; 51 | } 52 | 53 | Qnn_ErrorHandle_t CustomOpPackage::createOpImpl( 54 | QnnOpPackage_GraphInfrastructure_t graphInfrastructure, 55 | QnnOpPackage_Node_t node, 56 | QnnOpPackage_OpImpl_t* opImplPtr) { 57 | // initialize op resolver if not already set 58 | if (!m_opResolver) { 59 | m_opResolver.reset(new CustomOpResolver()); 60 | } 61 | auto cpuNode = reinterpret_cast(node); 62 | auto customOp = std::shared_ptr(new CustomOp(cpuNode->name, cpuNode->typeName)); 63 | const auto opRegistration = m_registered_ops[cpuNode->typeName]; 64 | 65 | // Get op from op factory 66 | QNN_CUSTOM_BE_ENSURE_STATUS( 67 | opRegistration->initialize(node, graphInfrastructure, customOp.get())); 68 | 69 | // Update op reference 70 | auto opImpl = std::make_shared(); 71 | opImpl->opImplFn = QnnOpPackage_execute; 72 | opImpl->userData = (void*)m_opResolver->registerCustomOp(std::move(customOp)); 73 | 74 | // update out kernel param 75 | auto cpuImpl = reinterpret_cast(opImplPtr); 76 | *cpuImpl = opImpl.get(); 77 | 78 | // update opImpl list 79 | m_OpImplList.emplace_back(opImpl); 80 | 81 | return QNN_SUCCESS; 82 | } 83 | 84 | Qnn_ErrorHandle_t CustomOpPackage::freeOpImpl(QnnOpPackage_OpImpl_t opImpl) { 85 | QNN_CUSTOM_BE_ENSURE(opImpl, QNN_OP_PACKAGE_ERROR_GENERAL); 86 | 87 | auto op = std::shared_ptr(new CustomOp()); 88 | 89 | auto cpuOpImpl = reinterpret_cast(opImpl); 90 | m_opResolver->getCustomOp((opHandle)cpuOpImpl->userData, op); 91 | 92 | auto opRegistration = m_registered_ops[op->m_typeName]; 93 | QNN_CUSTOM_BE_ENSURE_STATUS(m_opResolver->removeCustomOp((opHandle)cpuOpImpl->userData)); 94 | 95 | if (opRegistration->free) { 96 | opRegistration->free(*op); 97 | } 98 | 99 | return QNN_SUCCESS; 100 | } 101 | 102 | std::shared_ptr CustomOpPackage::getInstance() noexcept { 103 | std::lock_guard locker(s_mtx); 104 | if (!s_opPackageInstance) { 105 | s_opPackageInstance.reset(new (std::nothrow) CustomOpPackage()); 106 | } 107 | return s_opPackageInstance; 108 | } 109 | 110 | void CustomOpPackage::setIsInitialized(bool isInitialized) { 111 | std::lock_guard locker(s_mtx); 112 | s_isInitialized = isInitialized; 113 | } 114 | 115 | bool CustomOpPackage::getIsInitialized() { 116 | std::lock_guard locker(s_mtx); 117 | return s_isInitialized; 118 | } 119 | 120 | void CustomOpPackage::destroyInstance() { 121 | if (s_opPackageInstance && s_isInitialized) s_opPackageInstance.reset(); 122 | s_isInitialized = false; 123 | } 124 | 125 | void CustomOpPackage::freeResolver() { 126 | if (m_opResolver) m_opResolver.reset(); 127 | } 128 | -------------------------------------------------------------------------------- /hexagon/CPU/RwkvWkvOpPackage/src/RwkvWkvOpPackageInterface.cpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // Auto Generated Code for RwkvWkvOpPackage 3 | //============================================================================== 4 | #include "QnnCpuOpPackage.h" 5 | #include "CustomOpPackage.hpp" 6 | 7 | using namespace qnn::custom; 8 | using namespace qnn::custom::macros; 9 | 10 | static Qnn_ErrorHandle_t RwkvWkvOpPackageInitialize( 11 | QnnOpPackage_GlobalInfrastructure_t globalInfrastructure) { 12 | 13 | QNN_CUSTOM_BE_ENSURE(!(CustomOpPackage::getIsInitialized()),QNN_OP_PACKAGE_ERROR_LIBRARY_ALREADY_INITIALIZED); 14 | 15 | INIT_BE_OP_PACKAGE(RwkvWkvOpPackage) 16 | 17 | REGISTER_PACKAGE_OP(wkv6) 18 | REGISTER_PACKAGE_OP(wkv7_state) 19 | REGISTER_PACKAGE_OP(wkv7_output) 20 | 21 | // INIT_BE_PACKAGE_OPTIMIZATIONS(); 22 | 23 | CustomOpPackage::setIsInitialized(true); 24 | 25 | return QNN_SUCCESS; 26 | } 27 | 28 | static Qnn_ErrorHandle_t RwkvWkvOpPackageGetInfo(const QnnOpPackage_Info_t** info) { 29 | auto opPkg = CustomOpPackage::getInstance(); 30 | 31 | QNN_CUSTOM_BE_ENSURE(opPkg, QNN_OP_PACKAGE_ERROR_LIBRARY_NOT_INITIALIZED); 32 | 33 | QNN_CUSTOM_BE_ENSURE_STATUS(opPkg->getPackageInfo(info)); 34 | 35 | return QNN_SUCCESS; 36 | } 37 | 38 | static Qnn_ErrorHandle_t RwkvWkvOpPackageValidateOpConfig(Qnn_OpConfig_t opConfig) { 39 | auto opPkg = CustomOpPackage::getInstance(); 40 | 41 | QNN_CUSTOM_BE_ENSURE(opPkg, QNN_OP_PACKAGE_ERROR_LIBRARY_NOT_INITIALIZED); 42 | 43 | auto opRegistration = opPkg->getOpRegistration(opConfig.v1.typeName); 44 | 45 | QNN_CUSTOM_BE_ENSURE(opRegistration, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE) 46 | 47 | QNN_CUSTOM_BE_ENSURE_STATUS(opRegistration->validateOpConfig(opConfig)); 48 | 49 | return QNN_SUCCESS; 50 | } 51 | 52 | static Qnn_ErrorHandle_t RwkvWkvOpPackageCreateOpImpl( 53 | QnnOpPackage_GraphInfrastructure_t graphInfrastructure, 54 | QnnOpPackage_Node_t node, 55 | QnnOpPackage_OpImpl_t* opImpl) { 56 | auto opPkg = CustomOpPackage::getInstance(); 57 | 58 | QNN_CUSTOM_BE_ENSURE(opPkg, QNN_OP_PACKAGE_ERROR_LIBRARY_NOT_INITIALIZED); 59 | 60 | QNN_CUSTOM_BE_ENSURE_STATUS( 61 | opPkg->createOpImpl(graphInfrastructure, node, opImpl)); 62 | 63 | return QNN_SUCCESS; 64 | } 65 | 66 | static Qnn_ErrorHandle_t RwkvWkvOpPackageFreeOpImpl( 67 | QnnCpuOpPackage_OpImpl_t* opImpl) { 68 | auto opPkg = CustomOpPackage::getInstance(); 69 | 70 | QNN_CUSTOM_BE_ENSURE(opPkg, QNN_OP_PACKAGE_ERROR_LIBRARY_NOT_INITIALIZED); 71 | 72 | QNN_CUSTOM_BE_ENSURE_STATUS(opPkg->freeOpImpl(opImpl)); 73 | 74 | return QNN_SUCCESS; 75 | } 76 | 77 | static Qnn_ErrorHandle_t RwkvWkvOpPackageTerminate() { 78 | auto opPkg = CustomOpPackage::getInstance(); 79 | 80 | CustomOpPackage::destroyInstance(); 81 | opPkg->freeResolver(); 82 | 83 | return QNN_SUCCESS; 84 | } 85 | 86 | static Qnn_ErrorHandle_t RwkvWkvOpPackageLogInitialize( 87 | QnnLog_Callback_t callback, QnnLog_Level_t maxLogLevel) { 88 | // function should be used if at least two backends support it 89 | // USER SHOULD NOTE THIS FUNCTION IS UNUSED BY BE 90 | 91 | return QNN_SUCCESS; 92 | } 93 | 94 | static Qnn_ErrorHandle_t RwkvWkvOpPackageLogSetLevel( 95 | QnnLog_Level_t maxLogLevel) { 96 | // USER SHOULD NOTE THIS FUNCTION IS UNUSED BY CPU BE 97 | 98 | return QNN_SUCCESS; 99 | } 100 | 101 | static Qnn_ErrorHandle_t RwkvWkvOpPackageLogTerminate() { 102 | // USER SHOULD NOTE THIS FUNCTION IS UNUSED BY CPU BE 103 | 104 | return QNN_SUCCESS; 105 | } 106 | 107 | 108 | extern "C" QNN_API Qnn_ErrorHandle_t RwkvWkvOpPackageInterfaceProvider( 109 | QnnOpPackage_Interface_t* interface) { 110 | interface->interfaceVersion.major = 1; 111 | interface->interfaceVersion.minor = 4; 112 | interface->interfaceVersion.patch = 0; 113 | interface->v1_4.init = RwkvWkvOpPackageInitialize; 114 | interface->v1_4.terminate = RwkvWkvOpPackageTerminate; 115 | interface->v1_4.getInfo = RwkvWkvOpPackageGetInfo; 116 | interface->v1_4.validateOpConfig = RwkvWkvOpPackageValidateOpConfig; 117 | interface->v1_4.createOpImpl = RwkvWkvOpPackageCreateOpImpl; 118 | interface->v1_4.freeOpImpl = RwkvWkvOpPackageFreeOpImpl; 119 | interface->v1_4.logInitialize = RwkvWkvOpPackageLogInitialize; 120 | interface->v1_4.logSetLevel = RwkvWkvOpPackageLogSetLevel; 121 | interface->v1_4.logTerminate = RwkvWkvOpPackageLogTerminate; 122 | return QNN_SUCCESS; 123 | } 124 | 125 | -------------------------------------------------------------------------------- /hexagon/CPU/RwkvWkvOpPackage/src/ops/wkv6.cpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // Auto Generated Code for RwkvWkvOpPackage 3 | //============================================================================== 4 | #include 5 | #include 6 | 7 | #include "CpuBackendUtils.hpp" 8 | #include "CustomOpPackage.hpp" 9 | 10 | using namespace qnn::custom; 11 | using namespace qnn::custom::utils; 12 | 13 | namespace wkv6 { 14 | 15 | Qnn_ErrorHandle_t execute(CustomOp* operation) { 16 | /* 17 | * To have good performance and stability, it is required to avoid heap memory 18 | * allocation in this function. The heap memory allocation includes but not 19 | * limited to calling malloc, operator new, constructing STL container objects 20 | * like std::vector with default allocator, and adding items like calling 21 | * std::vector::push_back to STL container objects with default allocator. 22 | * 23 | * Please check in SDK documentation for more information. 24 | */ 25 | 26 | float* k = (float*)operation->getInput(0)->data; 27 | float* v = (float*)operation->getInput(1)->data; 28 | float* r = (float*)operation->getInput(2)->data; 29 | float* state_in = (float*)operation->getInput(3)->data; 30 | float* tf = (float*)operation->getInput(4)->data; 31 | float* td = (float*)operation->getInput(5)->data; 32 | float* output = (float*)operation->getOutput(0)->data; 33 | float* state_out = (float*)operation->getOutput(1)->data; 34 | 35 | int num_heads = operation->getInput(3)->currentDimensions[0]; 36 | int head_size = operation->getInput(3)->currentDimensions[1]; 37 | int seq_length = operation->getInput(0)->currentDimensions[0] / num_heads; 38 | 39 | memset(output, 0, seq_length * num_heads * head_size * sizeof(float)); 40 | for (int t = 0; t < seq_length; t++) { 41 | if (t > 0) state_in = state_out; 42 | for (int h = 0; h < num_heads; h++) { 43 | for (int i = 0; i < head_size; i++) { 44 | auto k_val = k[t * num_heads * head_size + h * head_size + i]; 45 | auto r_val = r[t * num_heads * head_size + h * head_size + i]; 46 | auto td_val = td[t * num_heads * head_size + h * head_size + i]; 47 | auto tf_val = tf[h * head_size + i]; 48 | for (int j = 0; j < head_size; j++) { 49 | auto v_val = v[t * num_heads * head_size + h * head_size + j]; 50 | auto kv_val = k_val * v_val; 51 | auto prev_state_val = state_in[h * head_size * head_size + i * head_size + j]; 52 | output[t * num_heads * head_size + h * head_size + j] += r_val * (kv_val * tf_val + prev_state_val); 53 | state_out[h * head_size * head_size + i * head_size + j] = prev_state_val * td_val + kv_val; 54 | } 55 | } 56 | } 57 | } 58 | 59 | return QNN_SUCCESS; 60 | } 61 | 62 | Qnn_ErrorHandle_t finalize(const CustomOp* operation) { 63 | QNN_CUSTOM_BE_ENSURE_EQ(operation->numInput(), 6, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE) 64 | QNN_CUSTOM_BE_ENSURE_EQ(operation->numOutput(), 2, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE) 65 | 66 | /** 67 | * Add code here 68 | **/ 69 | 70 | return QNN_SUCCESS; 71 | } 72 | 73 | Qnn_ErrorHandle_t free(CustomOp& operation) { 74 | 75 | /** 76 | * Add code here 77 | **/ 78 | 79 | return QNN_SUCCESS; 80 | } 81 | 82 | Qnn_ErrorHandle_t populateFromNode(const QnnOpPackage_Node_t node, 83 | QnnOpPackage_GraphInfrastructure_t graphInfrastructure, 84 | CustomOp* operation) { 85 | // Add input 86 | for (uint32_t i = 0; i < numInputs(node); i++) { 87 | operation->addInput(getInput(node, i)); 88 | } 89 | 90 | // Add output 91 | for (uint32_t i = 0; i < numOutputs(node); i++) { 92 | operation->addOutput(getOutput(node, i)); 93 | } 94 | 95 | 96 | return QNN_SUCCESS; 97 | } 98 | 99 | Qnn_ErrorHandle_t validateOpConfig(Qnn_OpConfig_t opConfig) { 100 | QNN_CUSTOM_BE_ENSURE_EQ( 101 | strcmp(opConfig.v1.typeName, "wkv6"), 0, QNN_OP_PACKAGE_ERROR_INVALID_ARGUMENT) 102 | 103 | QNN_CUSTOM_BE_ENSURE_EQ(opConfig.v1.numOfInputs, 6, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE) 104 | QNN_CUSTOM_BE_ENSURE_EQ(opConfig.v1.numOfOutputs, 2, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE) 105 | 106 | return QNN_SUCCESS; 107 | } 108 | } // namespace wkv6 109 | 110 | CustomOpRegistration_t* register_Wkv6CustomOp() { 111 | using namespace wkv6; 112 | static CustomOpRegistration_t WkvRegister = {execute, finalize, free, validateOpConfig, populateFromNode}; 113 | return &WkvRegister; 114 | } 115 | 116 | REGISTER_OP(wkv6, register_Wkv6CustomOp); 117 | -------------------------------------------------------------------------------- /hexagon/CPU/RwkvWkvOpPackage/src/ops/wkv7_output.cpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // Auto Generated Code for RwkvWkvOpPackage 3 | //============================================================================== 4 | #include 5 | #include 6 | 7 | #include "CpuBackendUtils.hpp" 8 | #include "CustomOpPackage.hpp" 9 | 10 | using namespace qnn::custom; 11 | using namespace qnn::custom::utils; 12 | 13 | namespace wkv7_output { 14 | 15 | Qnn_ErrorHandle_t execute(CustomOp* operation) { 16 | /* 17 | * To have good performance and stability, it is required to avoid heap memory 18 | * allocation in this function. The heap memory allocation includes but not 19 | * limited to calling malloc, operator new, constructing STL container objects 20 | * like std::vector with default allocator, and adding items like calling 21 | * std::vector::push_back to STL container objects with default allocator. 22 | * 23 | * Please check in SDK documentation for more information. 24 | */ 25 | 26 | 27 | return QNN_SUCCESS; 28 | } 29 | 30 | Qnn_ErrorHandle_t finalize(const CustomOp* operation) { 31 | QNN_CUSTOM_BE_ENSURE_EQ(operation->numInput(), 2, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE) 32 | QNN_CUSTOM_BE_ENSURE_EQ(operation->numOutput(), 1, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE) 33 | 34 | /** 35 | * Add code here 36 | **/ 37 | 38 | return QNN_SUCCESS; 39 | } 40 | 41 | Qnn_ErrorHandle_t free(CustomOp& operation) { 42 | 43 | /** 44 | * Add code here 45 | **/ 46 | 47 | return QNN_SUCCESS; 48 | } 49 | 50 | Qnn_ErrorHandle_t populateFromNode(const QnnOpPackage_Node_t node, 51 | QnnOpPackage_GraphInfrastructure_t graphInfrastructure, 52 | CustomOp* operation) { 53 | // Add input 54 | for (uint32_t i = 0; i < numInputs(node); i++) { 55 | operation->addInput(getInput(node, i)); 56 | } 57 | 58 | // Add output 59 | for (uint32_t i = 0; i < numOutputs(node); i++) { 60 | operation->addOutput(getOutput(node, i)); 61 | } 62 | 63 | 64 | return QNN_SUCCESS; 65 | } 66 | 67 | Qnn_ErrorHandle_t validateOpConfig(Qnn_OpConfig_t opConfig) { 68 | QNN_CUSTOM_BE_ENSURE_EQ( 69 | strcmp(opConfig.v1.typeName, "wkv7_output"), 0, QNN_OP_PACKAGE_ERROR_INVALID_ARGUMENT) 70 | 71 | QNN_CUSTOM_BE_ENSURE_EQ(opConfig.v1.numOfInputs, 2, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE) 72 | QNN_CUSTOM_BE_ENSURE_EQ(opConfig.v1.numOfOutputs, 1, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE) 73 | 74 | return QNN_SUCCESS; 75 | } 76 | } // namespace wkv7_output 77 | 78 | CustomOpRegistration_t* register_Wkv7OutputCustomOp() { 79 | using namespace wkv7_output; 80 | static CustomOpRegistration_t WkvRegister = {execute, finalize, free, validateOpConfig, populateFromNode}; 81 | return &WkvRegister; 82 | } 83 | 84 | REGISTER_OP(wkv7_output, register_Wkv7OutputCustomOp); 85 | -------------------------------------------------------------------------------- /hexagon/CPU/RwkvWkvOpPackage/src/ops/wkv7_state.cpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // Auto Generated Code for RwkvWkvOpPackage 3 | //============================================================================== 4 | #include 5 | #include 6 | 7 | #include "CpuBackendUtils.hpp" 8 | #include "CustomOpPackage.hpp" 9 | 10 | using namespace qnn::custom; 11 | using namespace qnn::custom::utils; 12 | 13 | namespace wkv7_state { 14 | 15 | Qnn_ErrorHandle_t execute(CustomOp* operation) { 16 | /* 17 | * To have good performance and stability, it is required to avoid heap memory 18 | * allocation in this function. The heap memory allocation includes but not 19 | * limited to calling malloc, operator new, constructing STL container objects 20 | * like std::vector with default allocator, and adding items like calling 21 | * std::vector::push_back to STL container objects with default allocator. 22 | * 23 | * Please check in SDK documentation for more information. 24 | */ 25 | 26 | float* r = (float*)operation->getInput(0)->data; 27 | float* w = (float*)operation->getInput(1)->data; 28 | float* k = (float*)operation->getInput(2)->data; 29 | float* v = (float*)operation->getInput(3)->data; 30 | float* a = (float*)operation->getInput(4)->data; 31 | float* b = (float*)operation->getInput(5)->data; 32 | float* state_in = (float*)operation->getInput(6)->data; 33 | float* output = (float*)operation->getOutput(0)->data; 34 | float* state_out = (float*)operation->getOutput(1)->data; 35 | 36 | int num_heads = operation->getInput(6)->currentDimensions[0]; 37 | int head_size = operation->getInput(6)->currentDimensions[1]; 38 | // int seq_length = operation->getInput(0)->currentDimensions[0]; 39 | int seq_length = operation->getInput(0)->currentDimensions[0] / num_heads; 40 | 41 | for (int t = 0; t < seq_length; t++) { 42 | if (t > 0) state_in = state_out; 43 | for (int h = 0; h < num_heads; h++) { 44 | for (int i = 0; i < head_size; i++) { 45 | auto v_val = v[t * num_heads * head_size + h * head_size + i]; 46 | 47 | float sa = 0, result = 0; 48 | for (int j = 0; j < head_size; j++) { 49 | sa += a[t * num_heads * head_size + h * head_size + j] * state_in[h * head_size * head_size + i * head_size + j]; 50 | } 51 | 52 | for (int j = 0; j < head_size; j++) { 53 | auto r_val = r[t * num_heads * head_size + h * head_size + j]; 54 | auto w_val = w[t * num_heads * head_size + h * head_size + j]; 55 | auto k_val = k[t * num_heads * head_size + h * head_size + j]; 56 | auto b_val = b[t * num_heads * head_size + h * head_size + j]; 57 | auto kv_val = k_val * v_val; 58 | auto state_val = state_in[h * head_size * head_size + i * head_size + j] * w_val + kv_val + sa * b_val; 59 | result += state_val * r_val; 60 | state_out[h * head_size * head_size + i * head_size + j] = state_val; 61 | } 62 | output[t * num_heads * head_size + h * head_size + i] = result; 63 | } 64 | } 65 | } 66 | 67 | return QNN_SUCCESS; 68 | } 69 | 70 | Qnn_ErrorHandle_t finalize(const CustomOp* operation) { 71 | QNN_CUSTOM_BE_ENSURE_EQ(operation->numInput(), 6, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE) 72 | QNN_CUSTOM_BE_ENSURE_EQ(operation->numOutput(), 1, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE) 73 | 74 | /** 75 | * Add code here 76 | **/ 77 | 78 | return QNN_SUCCESS; 79 | } 80 | 81 | Qnn_ErrorHandle_t free(CustomOp& operation) { 82 | 83 | /** 84 | * Add code here 85 | **/ 86 | 87 | return QNN_SUCCESS; 88 | } 89 | 90 | Qnn_ErrorHandle_t populateFromNode(const QnnOpPackage_Node_t node, 91 | QnnOpPackage_GraphInfrastructure_t graphInfrastructure, 92 | CustomOp* operation) { 93 | // Add input 94 | for (uint32_t i = 0; i < numInputs(node); i++) { 95 | operation->addInput(getInput(node, i)); 96 | } 97 | 98 | // Add output 99 | for (uint32_t i = 0; i < numOutputs(node); i++) { 100 | operation->addOutput(getOutput(node, i)); 101 | } 102 | 103 | 104 | return QNN_SUCCESS; 105 | } 106 | 107 | Qnn_ErrorHandle_t validateOpConfig(Qnn_OpConfig_t opConfig) { 108 | QNN_CUSTOM_BE_ENSURE_EQ( 109 | strcmp(opConfig.v1.typeName, "wkv7_state"), 0, QNN_OP_PACKAGE_ERROR_INVALID_ARGUMENT) 110 | 111 | QNN_CUSTOM_BE_ENSURE_EQ(opConfig.v1.numOfInputs, 6, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE) 112 | QNN_CUSTOM_BE_ENSURE_EQ(opConfig.v1.numOfOutputs, 1, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE) 113 | 114 | return QNN_SUCCESS; 115 | } 116 | } // namespace wkv7_state 117 | 118 | CustomOpRegistration_t* register_Wkv7StateCustomOp() { 119 | using namespace wkv7_state; 120 | static CustomOpRegistration_t WkvRegister = {execute, finalize, free, validateOpConfig, populateFromNode}; 121 | return &WkvRegister; 122 | } 123 | 124 | REGISTER_OP(wkv7_state, register_Wkv7StateCustomOp); 125 | -------------------------------------------------------------------------------- /hexagon/CPU/RwkvWkvOpPackage/src/utils/BackendUtils.hpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) 2020-2023 Qualcomm Technologies, Inc. 4 | // All Rights Reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #pragma once 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include "QnnOpPackage.h" 17 | #include "QnnTypes.h" 18 | 19 | //============================================================================ 20 | // Backend Defined Behavior 21 | //============================================================================= 22 | // A required backend defined tensor object which designates an input or output tensor 23 | typedef struct CustomOpTensor* CustomOpTensorPtr_t; 24 | 25 | // A required backend defined parameter object which designates scalar, tensor and string parameters 26 | typedef struct CustomOpParam* CustomOpParamPtr_t; 27 | 28 | // A backend defined object which contains additional info about an operation such as connectivity, 29 | // buffers etc 30 | typedef struct CustomOpContext* CustomOpContextPtr_t; 31 | 32 | // A backend defined object which contains information about a kernel such as its string path, its 33 | // buffers, assigned memory, local dimensions etc. 34 | typedef struct CustomOpKernelContext* CustomOpKernelContextPtr_t; 35 | 36 | namespace qnn { 37 | 38 | namespace custom { 39 | 40 | namespace utils { 41 | 42 | // Each backend is expected to define these utilities to aid users in accessing basic info about 43 | // an operation package node. 44 | const CustomOpTensorPtr_t* getInput(QnnOpPackage_Node_t node); 45 | 46 | const CustomOpTensorPtr_t* getOutput(QnnOpPackage_Node_t node); 47 | 48 | const CustomOpParamPtr_t* getParam(QnnOpPackage_Node_t node); 49 | 50 | const CustomOpTensorPtr_t getInput(QnnOpPackage_Node_t node, size_t idx); 51 | 52 | CustomOpTensorPtr_t getOutput(QnnOpPackage_Node_t node, size_t idx); 53 | 54 | const std::pair getParam(QnnOpPackage_Node_t node, 55 | const std::string& paramName); 56 | 57 | uint32_t numInputs(QnnOpPackage_Node_t node); 58 | 59 | uint32_t numOutputs(QnnOpPackage_Node_t node); 60 | 61 | uint32_t numDimensions(CustomOpTensorPtr_t tensor); 62 | 63 | const uint32_t* getTensorShape(CustomOpTensorPtr_t tensor); 64 | 65 | void* getTensorData(CustomOpTensorPtr_t tensor); 66 | 67 | uint32_t numTensorSize(CustomOpTensorPtr_t tensor); 68 | // Additional backend utilities should be included under this namespace 69 | namespace backend_utils {} 70 | } // namespace utils 71 | } // namespace custom 72 | } // namespace qnn 73 | -------------------------------------------------------------------------------- /hexagon/CPU/RwkvWkvOpPackage/src/utils/CPU/CpuBackendUtils.cpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) 2020, 2023 Qualcomm Technologies, Inc. 4 | // All Rights Reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #include 10 | 11 | #include 12 | 13 | #include "CpuBackendUtils.hpp" 14 | 15 | namespace qnn { 16 | 17 | namespace custom { 18 | 19 | namespace utils { 20 | 21 | // Each backend is expected to define these utilities to aid users in accessing basic info about 22 | // an operation package node. 23 | const CustomOpTensorPtr_t* getInput(QnnOpPackage_Node_t node) { 24 | return (CustomOpTensorPtr_t*)reinterpret_cast(node)->inputs; 25 | } 26 | 27 | const CustomOpTensorPtr_t* getOutput(QnnOpPackage_Node_t node) { 28 | return (CustomOpTensorPtr_t*)reinterpret_cast(node)->outputs; 29 | } 30 | 31 | const CustomOpParamPtr_t* getParam(QnnOpPackage_Node_t node) { 32 | return (CustomOpParamPtr_t*)reinterpret_cast(node)->params; 33 | } 34 | 35 | const std::pair getParam(QnnOpPackage_Node_t node, 36 | const std::string& name) { 37 | auto cpuNode = reinterpret_cast(node); 38 | auto params = (CustomOpParamPtr_t*)cpuNode->params; 39 | 40 | for (uint32_t idx = 0; idx < cpuNode->numOfParams; idx++) { 41 | auto paramName = params[idx]->name; 42 | 43 | if (strcmp(paramName, name.c_str()) == 0) { 44 | return {true, params[idx]}; 45 | } 46 | } 47 | 48 | return {false, nullptr}; 49 | } 50 | 51 | const CustomOpTensorPtr_t getInput(QnnOpPackage_Node_t node, size_t idx) { 52 | return (CustomOpTensorPtr_t) reinterpret_cast(node)->inputs[idx]; 53 | } 54 | 55 | CustomOpTensorPtr_t getOutput(QnnOpPackage_Node_t node, size_t idx) { 56 | return (CustomOpTensorPtr_t) reinterpret_cast(node)->outputs[idx]; 57 | } 58 | 59 | uint32_t numInputs(QnnOpPackage_Node_t node) { 60 | return reinterpret_cast(node)->numOfInputs; 61 | } 62 | 63 | uint32_t numOutputs(QnnOpPackage_Node_t node) { 64 | return reinterpret_cast(node)->numOfOutputs; 65 | } 66 | 67 | uint32_t numDimensions(CustomOpTensorPtr_t tensor) { 68 | return reinterpret_cast(tensor)->rank; 69 | } 70 | 71 | uint32_t numTensorSize(CustomOpTensorPtr_t tensor) { 72 | uint32_t size = 1; 73 | auto cpuTensor = reinterpret_cast(tensor); 74 | 75 | for (uint32_t i = 0; i < cpuTensor->rank; i++) { 76 | size *= cpuTensor->currentDimensions[i]; 77 | } 78 | return size; 79 | } 80 | 81 | const uint32_t* getTensorShape(CustomOpTensorPtr_t tensor) { 82 | return reinterpret_cast(tensor)->currentDimensions; 83 | } 84 | 85 | template 86 | const T* getTensorData(CustomOpTensorPtr_t tensor) { 87 | auto tempTensor = reinterpret_cast(tensor); 88 | auto dataRef = reinterpret_cast(tempTensor->data); 89 | return const_cast(dataRef); 90 | } 91 | 92 | template 93 | T& getTensorDataRef(CustomOpTensorPtr_t tensor) { 94 | auto tempTensor = reinterpret_cast(tensor); 95 | auto dataRef = reinterpret_cast(tempTensor->data); 96 | return &dataRef; 97 | } 98 | 99 | namespace backend_utils { 100 | 101 | const double getScalarParam(const CustomOpParamPtr_t param) { 102 | auto cpuParam = reinterpret_cast(param); 103 | return cpuParam->scalarParam; 104 | } 105 | 106 | const CustomOpTensorPtr_t getTensorParam(const CustomOpParamPtr_t param) { 107 | auto cpuParam = reinterpret_cast(param); 108 | return (CustomOpTensorPtr_t)cpuParam->tensorParam; 109 | } 110 | 111 | } // namespace backend_utils 112 | } // namespace utils 113 | } // namespace custom 114 | } // namespace qnn 115 | -------------------------------------------------------------------------------- /hexagon/CPU/RwkvWkvOpPackage/src/utils/CPU/CpuBackendUtils.hpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) 2020 Qualcomm Technologies, Inc. 4 | // All Rights Reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #pragma once 10 | 11 | #include "BackendUtils.hpp" 12 | #include "QnnCpuOpPackage.h" 13 | 14 | // Tensor and parameter definitions 15 | struct CustomOpTensor : public QnnCpuOpPackage_Tensor_t {}; 16 | 17 | struct CustomOpParam : public QnnCpuOpPackage_Param_t {}; 18 | 19 | namespace qnn { 20 | namespace custom { 21 | namespace utils { 22 | namespace backend_utils { 23 | 24 | const double getScalarParam(const CustomOpParamPtr_t param); 25 | 26 | const CustomOpTensorPtr_t getTensorParam(const CustomOpParamPtr_t param); 27 | } // namespace backend_utils 28 | } // namespace utils 29 | } // namespace custom 30 | } // namespace qnn -------------------------------------------------------------------------------- /hexagon/CPU/RwkvWkvOpPackage/src/utils/CustomOpUtils.hpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) 2020 Qualcomm Technologies, Inc. 4 | // All Rights Reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "BackendUtils.hpp" 17 | #include "CustomBEMacros.hpp" 18 | 19 | namespace qnn { 20 | 21 | namespace custom { 22 | 23 | namespace utils { 24 | 25 | /** 26 | * @brief Helper class that can hold information that holds information extracted 27 | * from a QNN node. This class function signatures and private members are freely extensible and 28 | * modifiable. The public class member variables must remain unchanged. 29 | */ 30 | 31 | class CustomOp { 32 | public: 33 | const char* m_name; 34 | const char* m_typeName; 35 | uint32_t m_numKernels; 36 | 37 | CustomOp() = default; 38 | virtual ~CustomOp() = default; 39 | 40 | /** 41 | * @brief The custom op constructor 42 | * @param name The name of the operation 43 | * @param typeName The type of the operation 44 | * @return 45 | */ 46 | CustomOp(const char* name, const char* typeName) : m_name(name), m_typeName(typeName) {} 47 | 48 | /** 49 | * @brief Adds an input tensor to the operation 50 | * @param inTensor An input tensor to this operation as defined by each backend 51 | * @return 52 | */ 53 | virtual Qnn_ErrorHandle_t addInput(CustomOpTensorPtr_t inTensor) { 54 | m_Inputs.emplace_back(inTensor); 55 | return QNN_SUCCESS; 56 | }; 57 | 58 | /** 59 | * @brief Adds an output tensor to the operation 60 | * @param outTensor An output tensor of this operation as defined by each backend 61 | * @return 62 | */ 63 | virtual Qnn_ErrorHandle_t addOutput(CustomOpTensorPtr_t outTensor) { 64 | m_Outputs.emplace_back(outTensor); 65 | return QNN_SUCCESS; 66 | }; 67 | 68 | /** 69 | * Adds the parameter name 70 | * @param paramName The name of each parameter to be added 71 | * @param param The param object to be added as defined by the backend 72 | * @return 73 | */ 74 | virtual Qnn_ErrorHandle_t addParam(const std::string& paramName, CustomOpParamPtr_t param) { 75 | m_Params[paramName] = param; 76 | return QNN_SUCCESS; 77 | }; 78 | 79 | /** 80 | * Returns a pointer to the input tensor specified by index 81 | * @param index 82 | * @return 83 | */ 84 | const CustomOpTensorPtr_t getInput(size_t index = 0) const { return m_Inputs[index]; } 85 | 86 | /** 87 | * Returns a reference to the output tensor specified by index 88 | * @param index 89 | * @return 90 | */ 91 | CustomOpTensorPtr_t getOutput(size_t index = 0) const { return m_Outputs[index]; } 92 | 93 | /** 94 | * Returns a reference to the output tensor data 95 | * @param index 96 | * @return 97 | */ 98 | CustomOpTensorPtr_t* getOutputsFlat() { return m_Outputs.data(); } 99 | 100 | /** Returns the requested parameter specified by name 101 | * @param name the name of the parameter 102 | */ 103 | CustomOpParamPtr_t getParam(const std::string& name) { return m_Params[name]; } 104 | 105 | /** 106 | * 107 | * @return The number of inputs 108 | */ 109 | uint32_t numInput() const { return m_Inputs.size(); } 110 | 111 | /** 112 | * 113 | * @return The number of outputs 114 | */ 115 | uint32_t numOutput() const { return m_Outputs.size(); } 116 | 117 | protected: 118 | std::vector m_Inputs; 119 | std::vector m_Outputs; 120 | std::map m_Params; 121 | std::unique_ptr m_tempTensor; 122 | }; 123 | 124 | } // namespace utils 125 | } // namespace custom 126 | } // namespace qnn 127 | -------------------------------------------------------------------------------- /hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV68.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MollySophia/rwkv-qualcomm/fd403b7c9f3b6c4ac5a810f334a0a51c8693fb42/hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV68.so -------------------------------------------------------------------------------- /hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV69.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MollySophia/rwkv-qualcomm/fd403b7c9f3b6c4ac5a810f334a0a51c8693fb42/hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV69.so -------------------------------------------------------------------------------- /hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV73.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MollySophia/rwkv-qualcomm/fd403b7c9f3b6c4ac5a810f334a0a51c8693fb42/hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV73.so -------------------------------------------------------------------------------- /hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV75.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MollySophia/rwkv-qualcomm/fd403b7c9f3b6c4ac5a810f334a0a51c8693fb42/hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV75.so -------------------------------------------------------------------------------- /hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV79.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MollySophia/rwkv-qualcomm/fd403b7c9f3b6c4ac5a810f334a0a51c8693fb42/hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV79.so -------------------------------------------------------------------------------- /hexagon/test/wkv_custom.py: -------------------------------------------------------------------------------- 1 | wkv_c_impl_src = """ 2 | #include 3 | #include 4 | 5 | std::tuple wkv6( 6 | torch::Tensor k, torch::Tensor v, torch::Tensor r, 7 | torch::Tensor state2, torch::Tensor time_first, 8 | torch::Tensor time_decay) { 9 | state2 = state2.squeeze(0); 10 | auto num_head = state2.size(0); 11 | auto head_size = state2.size(1); 12 | int seq_length = k.size(0); 13 | 14 | k = k.reshape({seq_length, num_head, head_size, 1}); 15 | v = v.reshape({seq_length, num_head, 1, head_size}); 16 | r = r.reshape({seq_length, num_head, 1, head_size}); 17 | time_first = time_first.reshape({num_head, head_size, 1}); 18 | time_decay = time_decay.reshape({seq_length, num_head, head_size, 1}); 19 | auto kv = torch::matmul(k, v); 20 | std::vector wkv; 21 | for (int i = 0; i < seq_length; i++) { 22 | wkv.push_back(torch::matmul(r[i], (time_first * kv[i] + state2))); 23 | state2 = time_decay[i] * state2 + kv[i]; 24 | } 25 | auto wkv_tensor = torch::stack(wkv, 0).reshape({seq_length, num_head, head_size}); 26 | 27 | return std::make_tuple(wkv_tensor, state2); 28 | } 29 | 30 | std::tuple wkv7( 31 | torch::Tensor r, torch::Tensor w, torch::Tensor k, torch::Tensor v, 32 | torch::Tensor a, torch::Tensor b, torch::Tensor state2) { 33 | state2 = state2.squeeze(0); 34 | auto num_head = state2.size(0); 35 | auto head_size = state2.size(1); 36 | int seq_length = k.size(0); 37 | 38 | w = w.reshape({seq_length, num_head, 1, head_size}); 39 | k = k.reshape({seq_length, num_head, 1, head_size}); 40 | v = v.reshape({seq_length, num_head, head_size, 1}); 41 | r = r.reshape({seq_length, num_head, head_size, 1}); 42 | b = b.reshape({seq_length, num_head, 1, head_size}); 43 | a = a.reshape({seq_length, num_head, head_size, 1}); 44 | 45 | auto kv = torch::matmul(v, k); 46 | auto ab = torch::matmul(a, b); 47 | std::vector x; 48 | for (int i = 0; i < seq_length; i++) { 49 | state2 = w[i] * state2 + kv[i] + torch::matmul(state2, ab[i]); 50 | x.push_back(torch::matmul(state2, r[i])); 51 | } 52 | auto x_tensor = torch::stack(x, 0).reshape({seq_length, num_head, head_size}); 53 | return std::make_tuple(x_tensor, state2); 54 | } 55 | 56 | TORCH_LIBRARY(rwkv, m) { 57 | m.def("wkv6", &wkv6); 58 | m.def("wkv7", &wkv7); 59 | } 60 | 61 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 62 | } 63 | """ -------------------------------------------------------------------------------- /librwkv-qualcomm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 3.14) 2 | project(librwkv-qualcomm) 3 | add_subdirectory(src) 4 | -------------------------------------------------------------------------------- /librwkv-qualcomm/Makefile: -------------------------------------------------------------------------------- 1 | default: all 2 | 3 | # define package name 4 | PACKAGE_NAME := $(notdir $(shell pwd)) 5 | 6 | # define library prerequisites list 7 | src_folder := src 8 | make_dir := make 9 | EXE_SOURCES = $(src_folder) 10 | 11 | # define target_architecture 12 | export TARGET_AARCH_VARS:= -march=x86-64 13 | 14 | # define target name 15 | export TARGET = linux-x86_64 16 | 17 | # specify compiler 18 | export CXX := clang++-9 19 | 20 | .PHONY: all $(EXE_SOURCES) all_android 21 | 22 | all: $(EXE_SOURCES) all_android 23 | 24 | # Combined Targets 25 | clean: clean_x86 clean_android 26 | 27 | all_x86: clean_x86 28 | $(call build_if_exists,$(src_folder),-$(MAKE) -f $(make_dir)/Makefile.linux-x86_64) 29 | 30 | clean_x86: 31 | @rm -rf bin obj include 32 | 33 | # Android Targets 34 | 35 | all_android: aarch64-android aarch64-android-demo aarch64-android-eval aarch64-android-mmlu 36 | 37 | aarch64-android: check_ndk clean_aarch64-android 38 | $(call build_if_exists,$(src_folder),$(ANDROID_NDK_ROOT)/ndk-build APP_ALLOW_MISSING_DEPS=true APP_ABI="arm64-v8a" NDK_PROJECT_PATH=./ NDK_APPLICATION_MK=$(make_dir)/Application.mk APP_BUILD_SCRIPT=$(make_dir)/Android.mk) 39 | 40 | aarch64-android-demo: check_ndk clean_aarch64-android 41 | $(call build_if_exists,$(src_folder),$(ANDROID_NDK_ROOT)/ndk-build APP_ALLOW_MISSING_DEPS=true APP_ABI="arm64-v8a" NDK_PROJECT_PATH=./ NDK_APPLICATION_MK=$(make_dir)/Application.mk APP_BUILD_SCRIPT=$(make_dir)/Android-demo.mk) 42 | 43 | aarch64-android-eval: check_ndk clean_aarch64-android 44 | $(call build_if_exists,$(src_folder),$(ANDROID_NDK_ROOT)/ndk-build APP_ALLOW_MISSING_DEPS=true APP_ABI="arm64-v8a" NDK_PROJECT_PATH=./ NDK_APPLICATION_MK=$(make_dir)/Application.mk APP_BUILD_SCRIPT=$(make_dir)/Android-eval.mk) 45 | 46 | aarch64-android-mmlu: check_ndk clean_aarch64-android 47 | $(call build_if_exists,$(src_folder),$(ANDROID_NDK_ROOT)/ndk-build APP_ALLOW_MISSING_DEPS=true APP_ABI="arm64-v8a" NDK_PROJECT_PATH=./ NDK_APPLICATION_MK=$(make_dir)/Application.mk APP_BUILD_SCRIPT=$(make_dir)/Android-mmlu.mk) 48 | 49 | clean_android: check_ndk clean_aarch64-android 50 | 51 | clean_aarch64-android: 52 | @rm -rf bin/aarch64-android 53 | @rm -rf obj/local/aarch64-android 54 | 55 | all_ubuntu_aarch64_gcc94: check_ubuntu_aarch64_gcc94 56 | $(call build_if_exists,$(src_folder),-$(MAKE) -f $(make_dir)/Makefile.ubuntu-aarch64-gcc9.4) 57 | 58 | clean_ubuntu_aarch64_gcc94: 59 | @rm -rf bin/aarch64-ubuntu-gcc9.4 obj/aarch64-ubuntu-gcc9.4 60 | 61 | check_ubuntu_aarch64_gcc94: 62 | ifeq ($(QNN_AARCH64_UBUNTU_GCC_94),) 63 | $(error ERROR: QNN_AARCH64_UBUNTU_GCC_94 not set, skipping compilation for Ubuntu platform.) 64 | endif 65 | 66 | # utilities 67 | # Syntax: $(call build_if_exists ,) 68 | build_if_exists = $(if $(wildcard $(1)),$(2),$(warning WARNING: $(1) does not exist. Skipping Compilation)) 69 | 70 | check_ndk: 71 | ifeq ($(ANDROID_NDK_ROOT),) 72 | $(error ERROR: ANDROID_NDK_ROOT not set, skipping compilation for Android platform(s).) 73 | endif 74 | -------------------------------------------------------------------------------- /librwkv-qualcomm/make/Android-demo.mk: -------------------------------------------------------------------------------- 1 | LOCAL_PATH := $(call my-dir) 2 | SUPPORTED_TARGET_ABI := arm64-v8a 3 | 4 | #============================ Define Common Variables =============================================================== 5 | # Include paths 6 | PACKAGE_C_INCLUDES += -I $(QNN_SDK_ROOT)/include/QNN 7 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/ 8 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/CachingUtil 9 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/Log 10 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/PAL/include 11 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/Utils 12 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/WrapperUtils 13 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../include/flatbuffers 14 | 15 | #========================== Define OpPackage Library Build Variables ============================================= 16 | include $(CLEAR_VARS) 17 | LOCAL_C_INCLUDES := $(PACKAGE_C_INCLUDES) 18 | MY_SRC_FILES := $(wildcard $(LOCAL_PATH)/../src/main.cpp) 19 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/tokenizer.cpp) 20 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/librwkv-qualcomm-app.cpp) 21 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/librwkv-qualcomm.cpp) 22 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/soc_detect.cpp) 23 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/Log/*.cpp) 24 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/PAL/src/linux/*.cpp) 25 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/PAL/src/common/*.cpp) 26 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/Utils/*.cpp) 27 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/WrapperUtils/*.cpp) 28 | LOCAL_MODULE := rwkv-qualcomm-demo 29 | LOCAL_SRC_FILES := $(subst make/,,$(MY_SRC_FILES)) 30 | LOCAL_LDLIBS := -lGLESv2 -lEGL -llog 31 | include $(BUILD_EXECUTABLE) 32 | -------------------------------------------------------------------------------- /librwkv-qualcomm/make/Android-eval.mk: -------------------------------------------------------------------------------- 1 | LOCAL_PATH := $(call my-dir) 2 | SUPPORTED_TARGET_ABI := arm64-v8a 3 | 4 | #============================ Define Common Variables =============================================================== 5 | # Include paths 6 | PACKAGE_C_INCLUDES += -I $(QNN_SDK_ROOT)/include/QNN 7 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/ 8 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/CachingUtil 9 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/Log 10 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/PAL/include 11 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/Utils 12 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/WrapperUtils 13 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../include/flatbuffers 14 | 15 | #========================== Define OpPackage Library Build Variables ============================================= 16 | include $(CLEAR_VARS) 17 | LOCAL_C_INCLUDES := $(PACKAGE_C_INCLUDES) 18 | MY_SRC_FILES := $(wildcard $(LOCAL_PATH)/../src/eval_text.cpp) 19 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/tokenizer.cpp) 20 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/librwkv-qualcomm-app.cpp) 21 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/librwkv-qualcomm.cpp) 22 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/soc_detect.cpp) 23 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/Log/*.cpp) 24 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/PAL/src/linux/*.cpp) 25 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/PAL/src/common/*.cpp) 26 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/Utils/*.cpp) 27 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/WrapperUtils/*.cpp) 28 | LOCAL_MODULE := rwkv-qualcomm-eval 29 | LOCAL_SRC_FILES := $(subst make/,,$(MY_SRC_FILES)) 30 | LOCAL_LDLIBS := -lGLESv2 -lEGL -llog 31 | include $(BUILD_EXECUTABLE) 32 | -------------------------------------------------------------------------------- /librwkv-qualcomm/make/Android-mmlu.mk: -------------------------------------------------------------------------------- 1 | LOCAL_PATH := $(call my-dir) 2 | SUPPORTED_TARGET_ABI := arm64-v8a 3 | 4 | #============================ Define Common Variables =============================================================== 5 | # Include paths 6 | PACKAGE_C_INCLUDES += -I $(QNN_SDK_ROOT)/include/QNN 7 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/ 8 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/CachingUtil 9 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/Log 10 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/PAL/include 11 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/Utils 12 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/WrapperUtils 13 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../include/flatbuffers 14 | 15 | #========================== Define OpPackage Library Build Variables ============================================= 16 | include $(CLEAR_VARS) 17 | LOCAL_C_INCLUDES := $(PACKAGE_C_INCLUDES) 18 | MY_SRC_FILES := $(wildcard $(LOCAL_PATH)/../src/mmlu.cpp) 19 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/tokenizer.cpp) 20 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/librwkv-qualcomm-app.cpp) 21 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/librwkv-qualcomm.cpp) 22 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/soc_detect.cpp) 23 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/Log/*.cpp) 24 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/PAL/src/linux/*.cpp) 25 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/PAL/src/common/*.cpp) 26 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/Utils/*.cpp) 27 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/WrapperUtils/*.cpp) 28 | LOCAL_MODULE := rwkv-qualcomm-mmlu 29 | LOCAL_SRC_FILES := $(subst make/,,$(MY_SRC_FILES)) 30 | LOCAL_LDLIBS := -lGLESv2 -lEGL -llog 31 | include $(BUILD_EXECUTABLE) 32 | -------------------------------------------------------------------------------- /librwkv-qualcomm/make/Android.mk: -------------------------------------------------------------------------------- 1 | LOCAL_PATH := $(call my-dir) 2 | SUPPORTED_TARGET_ABI := arm64-v8a 3 | 4 | #============================ Define Common Variables =============================================================== 5 | # Include paths 6 | PACKAGE_C_INCLUDES += -I $(QNN_SDK_ROOT)/include/QNN 7 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/ 8 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/CachingUtil 9 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/Log 10 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/PAL/include 11 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/Utils 12 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/WrapperUtils 13 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../include/flatbuffers 14 | 15 | #========================== Define OpPackage Library Build Variables ============================================= 16 | include $(CLEAR_VARS) 17 | LOCAL_C_INCLUDES := $(PACKAGE_C_INCLUDES) 18 | MY_SRC_FILES := $(wildcard $(LOCAL_PATH)/../src/librwkv-qualcomm-app.cpp) 19 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/librwkv-qualcomm.cpp) 20 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/soc_detect.cpp) 21 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/Log/*.cpp) 22 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/PAL/src/linux/*.cpp) 23 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/PAL/src/common/*.cpp) 24 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/Utils/*.cpp) 25 | MY_SRC_FILES += $(wildcard $(LOCAL_PATH)/../src/WrapperUtils/*.cpp) 26 | LOCAL_MODULE := librwkv-qualcomm 27 | LOCAL_SRC_FILES := $(subst make/,,$(MY_SRC_FILES)) 28 | LOCAL_LDLIBS := -lGLESv2 -lEGL -llog 29 | include $(BUILD_STATIC_LIBRARY) 30 | -------------------------------------------------------------------------------- /librwkv-qualcomm/make/Application.mk: -------------------------------------------------------------------------------- 1 | APP_ABI := arm64-v8a 2 | APP_STL := c++_static 3 | APP_PLATFORM := android-21 4 | APP_CPPFLAGS += -std=c++20 -fexceptions -O3 -fvisibility=hidden -DQNN_API="__attribute__((visibility(\"default\")))" -DANDROID 5 | APP_LDFLAGS += -lc -lm -ldl 6 | -------------------------------------------------------------------------------- /librwkv-qualcomm/make/Makefile.linux-x86_64: -------------------------------------------------------------------------------- 1 | # define relevant directories 2 | SRC_DIR := src 3 | SRC_DIR_LOG := src/Log 4 | SRC_DIR_PAL_LINUX := src/PAL/src/linux 5 | SRC_DIR_PAL_COMMON := src/PAL/src/common 6 | SRC_DIR_UTILS := src/Utils 7 | SRC_DIR_WRAPPER_UTILS := src/WrapperUtils 8 | QNN_API_INCLUDE := $(QNN_SDK_ROOT)/include/QNN 9 | PAL_INCLUDE := src/PAL/include 10 | 11 | # Checking if clang++ is present. If not switch to clang++ 12 | ifeq ($(shell $(CXX) -v 2>&1 | grep -c "clang version"), 0) 13 | CXX := clang++ 14 | endif 15 | 16 | QNN_TARGET ?= x86_64-linux-clang 17 | export TARGET_DIR := ./bin/$(QNN_TARGET) 18 | 19 | librwkv-qualcomm := $(TARGET_DIR)/librwkv-qualcomm.a 20 | 21 | # define target architecture if not previously defined, default is x86 22 | ifndef TARGET_AARCH_VARS 23 | TARGET_AARCH_VARS:= -march=x86-64 24 | endif 25 | 26 | .PHONY: librwkv-qualcomm_all 27 | .DEFAULT: librwkv-qualcomm_all 28 | librwkv-qualcomm_all: $(librwkv-qualcomm) 29 | 30 | # Include paths 31 | INCLUDES += -I$(SRC_DIR) -I$(SRC_DIR_LOG) -I$(SRC_DIR_UTILS) -I$(SRC_DIR_WRAPPER_UTILS) -I$(PAL_INCLUDE) -I$(QNN_API_INCLUDE) 32 | 33 | # set compiler flags 34 | # pthread is needed for AIC and HTP-MCP Backend 35 | COMMON_CXXFLAGS = -std=c++20 -fno-rtti -fPIC -Wall -Werror -pg -pthread $(INCLUDES) 36 | COMMON_LDFLAGS = -shared -s -fPIC -pthread 37 | 38 | ifdef QNN_DEBUG_ENABLE 39 | CXXFLAGS += $(COMMON_CXXFLAGS) -march=x86-64 -O0 -g -DQNN_API="" 40 | LDFLAGS += $(COMMON_LDFLAGS) 41 | else 42 | CXXFLAGS += $(COMMON_CXXFLAGS) -march=x86-64 -O3 -Wno-write-strings -fvisibility=hidden -DQNN_API="__attribute__((visibility(\"default\")))" 43 | LDFLAGS += $(COMMON_LDFLAGS) -fvisibility=hidden -flto 44 | endif 45 | 46 | # define library sources 47 | SOURCES := $(wildcard $(SRC_DIR)/*.cpp) 48 | SOURCES_LOG := $(wildcard $(SRC_DIR_LOG)/*.cpp) 49 | SOURCES_PAL := $(wildcard $(SRC_DIR_PAL_LINUX)/*.cpp) 50 | SOURCES_PAL += $(wildcard $(SRC_DIR_PAL_COMMON)/*.cpp) 51 | SOURCES_UTILS := $(wildcard $(SRC_DIR_UTILS)/*.cpp) 52 | SOURCES_WRAPPER_UTILS := $(wildcard $(SRC_DIR_WRAPPER_UTILS)/*.cpp) 53 | 54 | # define object directory 55 | OBJ_ROOT := obj 56 | OBJ_DIR := obj/$(QNN_TARGET) 57 | OBJ_DIR_LOG := obj/$(QNN_TARGET)/Log/ 58 | OBJ_DIR_PAL := obj/$(QNN_TARGET)/PAL 59 | OBJ_DIR_UTILS := obj/$(QNN_TARGET)/Utils/ 60 | OBJ_DIR_WRAPPER_UTILS := obj/$(QNN_TARGET)/WrapperUtils/ 61 | 62 | # setup object files in object directory 63 | OBJECTS := $(patsubst %.cpp,$(OBJ_DIR)/%.o,$(foreach x,$(SOURCES),$(notdir $(x)))) 64 | OBJECTS_LOG := $(patsubst %.cpp,$(OBJ_DIR_LOG)/%.o,$(foreach x,$(SOURCES_LOG),$(notdir $(x)))) 65 | OBJECTS_PAL := $(patsubst %.cpp,$(OBJ_DIR_PAL)/%.o,$(foreach x,$(SOURCES_PAL),$(notdir $(x)))) 66 | OBJECTS_UTILS := $(patsubst %.cpp,$(OBJ_DIR_UTILS)/%.o,$(foreach x,$(SOURCES_UTILS),$(notdir $(x)))) 67 | OBJECTS_WRAPPER_UTILS := $(patsubst %.cpp,$(OBJ_DIR_WRAPPER_UTILS)/%.o,$(foreach x,$(SOURCES_WRAPPER_UTILS),$(notdir $(x)))) 68 | 69 | #LIBS=-l/usr/lib/x86_64-linux-gnu/libflatbuffers.a 70 | LIBS=-ldl 71 | 72 | # Rule to make executable 73 | .PHONY: librwkv-qualcomm 74 | librwkv-qualcomm: $(librwkv-qualcomm) 75 | 76 | # Implicit rule to compile and link object files 77 | $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp 78 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 79 | 80 | $(OBJ_DIR_LOG)/%.o: $(SRC_DIR_LOG)/%.cpp 81 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 82 | 83 | $(OBJ_DIR_PAL)/%.o: $(SRC_DIR_PAL_LINUX)/%.cpp 84 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 85 | 86 | $(OBJ_DIR_PAL)/%.o: $(SRC_DIR_PAL_COMMON)/%.cpp 87 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 88 | 89 | $(OBJ_DIR_UTILS)/%.o: $(SRC_DIR_UTILS)/%.cpp 90 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 91 | 92 | $(OBJ_DIR_WRAPPER_UTILS)/%.o: $(SRC_DIR_WRAPPER_UTILS)/%.cpp 93 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 94 | 95 | # set up resources 96 | directories := $(TARGET_DIR) $(OBJ_DIR) $(OBJ_DIR_LOG) $(OBJ_DIR_PAL) $(OBJ_DIR_UTILS) $(OBJ_DIR_WRAPPER_UTILS) 97 | 98 | # Compile 99 | $(librwkv-qualcomm): $(OBJECTS) $(OBJECTS_LOG) $(OBJECTS_PAL) $(OBJECTS_UTILS) $(OBJECTS_WRAPPER_UTILS) | $(directories) 100 | $(CXX) $(CXXFLAGS) $(LINKFLAGS) -o $@ $^ $(LIBS) 101 | 102 | $(librwkv-qualcomm): $(OBJECTS) $(OBJECTS_LOG) $(OBJECTS_PAL) $(OBJECTS_UTILS) $(OBJECTS_WRAPPER_UTILS) | $(directories) 103 | ar cr $@ $(LIBS) 104 | 105 | # rule for object directory resource 106 | $(OBJECTS): | $(OBJ_DIR) 107 | $(OBJECTS_LOG): | $(OBJ_DIR_LOG) 108 | $(OBJECTS_PAL): | $(OBJ_DIR_PAL) 109 | $(OBJECTS_UTILS): | $(OBJ_DIR_UTILS) 110 | $(OBJECTS_WRAPPER_UTILS): | $(OBJ_DIR_WRAPPER_UTILS) 111 | 112 | # rule to create directories 113 | $(directories): 114 | mkdir -p $@ 115 | 116 | .PHONY: clean 117 | clean: 118 | rm -rf $(OBJ_ROOT) $(TARGET_DIR) 119 | -------------------------------------------------------------------------------- /librwkv-qualcomm/make/Makefile.oe-linux-aarch64-gcc11.2: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2023 Qualcomm Technologies, Inc. 3 | # All Rights Reserved. 4 | # Confidential and Proprietary - Qualcomm Technologies, Inc. 5 | # 6 | 7 | # define relevant directories 8 | SRC_DIR := src 9 | SRC_DIR_LOG := src/Log 10 | SRC_DIR_PAL_LINUX := src/PAL/src/linux 11 | SRC_DIR_PAL_COMMON := src/PAL/src/common 12 | SRC_DIR_UTILS := src/Utils 13 | SRC_DIR_WRAPPER_UTILS := src/WrapperUtils 14 | ifeq ($(shell test -d ../../../target && echo 0),0) 15 | QNN_API_INCLUDE := ../../../include 16 | else 17 | QNN_API_INCLUDE := ../../../../include/QNN 18 | endif 19 | PAL_INCLUDE := src/PAL/include 20 | 21 | QNN_TARGET ?= aarch64-oe-linux-gcc11.2 22 | export TARGET_DIR := ./bin/$(QNN_TARGET) 23 | CXX=$(QNN_AARCH64_LINUX_OE_GCC_112)/sysroots/x86_64-qtisdk-linux/usr/bin/aarch64-oe-linux/aarch64-oe-linux-g++ --sysroot=$(QNN_AARCH64_LINUX_OE_GCC_112)/sysroots/armv8a-oe-linux 24 | 25 | qnn-sample-app := $(TARGET_DIR)/qnn-sample-app 26 | 27 | .PHONY: sample_app_all 28 | .DEFAULT: sample_app_all 29 | sample_app_all: $(qnn-sample-app) 30 | 31 | # Include paths 32 | INCLUDES += -I$(SRC_DIR) -I$(SRC_DIR_LOG) -I$(SRC_DIR_UTILS) -I$(SRC_DIR_WRAPPER_UTILS) -I$(PAL_INCLUDE) -I$(QNN_API_INCLUDE) 33 | 34 | # set compiler flags 35 | COMMON_CXXFLAGS = -ldl -std=gnu++11 -fPIC -Wl,-lstdc++ -Wall -Werror -fno-exceptions -fno-rtti -fPIC -pg $(INCLUDES) 36 | COMMON_LDFLAGS = -shared -s -fPIC 37 | 38 | ifdef QNN_DEBUG_ENABLE 39 | CXXFLAGS += $(COMMON_CXXFLAGS) -g -DQNN_API="" 40 | LDFLAGS += $(COMMON_LDFLAGS) 41 | else 42 | CXXFLAGS += $(COMMON_CXXFLAGS) -Wno-write-strings -fvisibility=hidden -DQNN_API="__attribute__((visibility(\"default\")))" 43 | LDFLAGS += $(COMMON_LDFLAGS) -fvisibility=hidden -flto 44 | endif 45 | 46 | # define library sources 47 | SOURCES := $(wildcard $(SRC_DIR)/*.cpp) 48 | SOURCES_LOG := $(wildcard $(SRC_DIR_LOG)/*.cpp) 49 | SOURCES_PAL := $(wildcard $(SRC_DIR_PAL_LINUX)/*.cpp) 50 | SOURCES_PAL += $(wildcard $(SRC_DIR_PAL_COMMON)/*.cpp) 51 | SOURCES_UTILS := $(wildcard $(SRC_DIR_UTILS)/*.cpp) 52 | SOURCES_WRAPPER_UTILS := $(wildcard $(SRC_DIR_WRAPPER_UTILS)/*.cpp) 53 | 54 | # define object directory 55 | OBJ_ROOT := obj 56 | OBJ_DIR := obj/$(QNN_TARGET) 57 | OBJ_DIR_LOG := obj/$(QNN_TARGET)/Log/ 58 | OBJ_DIR_PAL := obj/$(QNN_TARGET)/PAL 59 | OBJ_DIR_UTILS := obj/$(QNN_TARGET)/Utils/ 60 | OBJ_DIR_WRAPPER_UTILS := obj/$(QNN_TARGET)/WrapperUtils/ 61 | 62 | # setup object files in object directory 63 | OBJECTS := $(patsubst %.cpp,$(OBJ_DIR)/%.o,$(foreach x,$(SOURCES),$(notdir $(x)))) 64 | OBJECTS_LOG := $(patsubst %.cpp,$(OBJ_DIR_LOG)/%.o,$(foreach x,$(SOURCES_LOG),$(notdir $(x)))) 65 | OBJECTS_PAL := $(patsubst %.cpp,$(OBJ_DIR_PAL)/%.o,$(foreach x,$(SOURCES_PAL),$(notdir $(x)))) 66 | OBJECTS_UTILS := $(patsubst %.cpp,$(OBJ_DIR_UTILS)/%.o,$(foreach x,$(SOURCES_UTILS),$(notdir $(x)))) 67 | OBJECTS_WRAPPER_UTILS := $(patsubst %.cpp,$(OBJ_DIR_WRAPPER_UTILS)/%.o,$(foreach x,$(SOURCES_WRAPPER_UTILS),$(notdir $(x)))) 68 | 69 | LIBS=-ldl 70 | 71 | # Rule to make executable 72 | .PHONY: qnn-sample-app 73 | qnn-sample-app: $(qnn-sample-app) 74 | 75 | # Implicit rule to compile and link object files 76 | $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp 77 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 78 | 79 | $(OBJ_DIR_LOG)/%.o: $(SRC_DIR_LOG)/%.cpp 80 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 81 | 82 | $(OBJ_DIR_PAL)/%.o: $(SRC_DIR_PAL_LINUX)/%.cpp 83 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 84 | 85 | $(OBJ_DIR_PAL)/%.o: $(SRC_DIR_PAL_COMMON)/%.cpp 86 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 87 | 88 | $(OBJ_DIR_UTILS)/%.o: $(SRC_DIR_UTILS)/%.cpp 89 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 90 | 91 | $(OBJ_DIR_WRAPPER_UTILS)/%.o: $(SRC_DIR_WRAPPER_UTILS)/%.cpp 92 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 93 | 94 | # set up resources 95 | directories := $(TARGET_DIR) $(OBJ_DIR) $(OBJ_DIR_LOG) $(OBJ_DIR_PAL) $(OBJ_DIR_UTILS) $(OBJ_DIR_WRAPPER_UTILS) 96 | 97 | # Compile 98 | $(qnn-sample-app): obj/$(QNN_TARGET)/main.o obj/$(QNN_TARGET)/QnnSampleApp.o $(OBJECTS_LOG) $(OBJECTS_PAL) $(OBJECTS_UTILS) $(OBJECTS_WRAPPER_UTILS) | $(directories) 99 | $(CXX) $(CXXFLAGS) $(LINKFLAGS) -o $@ $^ $(LIBS) 100 | 101 | # rule for object directory resource 102 | $(OBJECTS): | $(OBJ_DIR) 103 | $(OBJECTS_LOG): | $(OBJ_DIR_LOG) 104 | $(OBJECTS_PAL): | $(OBJ_DIR_PAL) 105 | $(OBJECTS_UTILS): | $(OBJ_DIR_UTILS) 106 | $(OBJECTS_WRAPPER_UTILS): | $(OBJ_DIR_WRAPPER_UTILS) 107 | 108 | # rule to create directories 109 | $(directories): 110 | mkdir -p $@ 111 | 112 | .PHONY: clean 113 | clean: 114 | rm -rf $(OBJ_ROOT) $(TARGET_DIR) 115 | -------------------------------------------------------------------------------- /librwkv-qualcomm/make/Makefile.oe-linux-aarch64-gcc8.2: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021-2023 Qualcomm Technologies, Inc. 3 | # All Rights Reserved. 4 | # Confidential and Proprietary - Qualcomm Technologies, Inc. 5 | # 6 | 7 | # define relevant directories 8 | SRC_DIR := src 9 | SRC_DIR_LOG := src/Log 10 | SRC_DIR_PAL_LINUX := src/PAL/src/linux 11 | SRC_DIR_PAL_COMMON := src/PAL/src/common 12 | SRC_DIR_UTILS := src/Utils 13 | SRC_DIR_WRAPPER_UTILS := src/WrapperUtils 14 | ifeq ($(shell test -d ../../../target && echo 0),0) 15 | QNN_API_INCLUDE := ../../../include 16 | else 17 | QNN_API_INCLUDE := ../../../../include/QNN 18 | endif 19 | PAL_INCLUDE := src/PAL/include 20 | 21 | QNN_TARGET ?= aarch64-oe-linux-gcc8.2 22 | export TARGET_DIR := ./bin/$(QNN_TARGET) 23 | CXX=$(QNN_AARCH64_LINUX_OE_GCC_82)/sysroots/x86_64-oesdk-linux/usr/bin/aarch64-oe-linux/aarch64-oe-linux-g++ --sysroot=$(QNN_AARCH64_LINUX_OE_GCC_82)/sysroots/aarch64-oe-linux 24 | 25 | qnn-sample-app := $(TARGET_DIR)/qnn-sample-app 26 | 27 | .PHONY: sample_app_all 28 | .DEFAULT: sample_app_all 29 | sample_app_all: $(qnn-sample-app) 30 | 31 | # Include paths 32 | INCLUDES += -I$(SRC_DIR) -I$(SRC_DIR_LOG) -I$(SRC_DIR_UTILS) -I$(SRC_DIR_WRAPPER_UTILS) -I$(PAL_INCLUDE) -I$(QNN_API_INCLUDE) 33 | 34 | # set compiler flags 35 | COMMON_CXXFLAGS = -ldl -std=gnu++11 -fPIC -Wl,-lstdc++ -Wall -Werror -fno-exceptions -fno-rtti -fPIC -pg $(INCLUDES) 36 | COMMON_LDFLAGS = -shared -s -fPIC 37 | 38 | ifdef QNN_DEBUG_ENABLE 39 | CXXFLAGS += $(COMMON_CXXFLAGS) -g -DQNN_API="" 40 | LDFLAGS += $(COMMON_LDFLAGS) 41 | else 42 | CXXFLAGS += $(COMMON_CXXFLAGS) -Wno-write-strings -fvisibility=hidden -DQNN_API="__attribute__((visibility(\"default\")))" 43 | LDFLAGS += $(COMMON_LDFLAGS) -fvisibility=hidden -flto 44 | endif 45 | 46 | # define library sources 47 | SOURCES := $(wildcard $(SRC_DIR)/*.cpp) 48 | SOURCES_LOG := $(wildcard $(SRC_DIR_LOG)/*.cpp) 49 | SOURCES_PAL := $(wildcard $(SRC_DIR_PAL_LINUX)/*.cpp) 50 | SOURCES_PAL += $(wildcard $(SRC_DIR_PAL_COMMON)/*.cpp) 51 | SOURCES_UTILS := $(wildcard $(SRC_DIR_UTILS)/*.cpp) 52 | SOURCES_WRAPPER_UTILS := $(wildcard $(SRC_DIR_WRAPPER_UTILS)/*.cpp) 53 | 54 | # define object directory 55 | OBJ_ROOT := obj 56 | OBJ_DIR := obj/$(QNN_TARGET) 57 | OBJ_DIR_LOG := obj/$(QNN_TARGET)/Log/ 58 | OBJ_DIR_PAL := obj/$(QNN_TARGET)/PAL 59 | OBJ_DIR_UTILS := obj/$(QNN_TARGET)/Utils/ 60 | OBJ_DIR_WRAPPER_UTILS := obj/$(QNN_TARGET)/WrapperUtils/ 61 | 62 | # setup object files in object directory 63 | OBJECTS := $(patsubst %.cpp,$(OBJ_DIR)/%.o,$(foreach x,$(SOURCES),$(notdir $(x)))) 64 | OBJECTS_LOG := $(patsubst %.cpp,$(OBJ_DIR_LOG)/%.o,$(foreach x,$(SOURCES_LOG),$(notdir $(x)))) 65 | OBJECTS_PAL := $(patsubst %.cpp,$(OBJ_DIR_PAL)/%.o,$(foreach x,$(SOURCES_PAL),$(notdir $(x)))) 66 | OBJECTS_UTILS := $(patsubst %.cpp,$(OBJ_DIR_UTILS)/%.o,$(foreach x,$(SOURCES_UTILS),$(notdir $(x)))) 67 | OBJECTS_WRAPPER_UTILS := $(patsubst %.cpp,$(OBJ_DIR_WRAPPER_UTILS)/%.o,$(foreach x,$(SOURCES_WRAPPER_UTILS),$(notdir $(x)))) 68 | 69 | LIBS=-ldl 70 | 71 | # Rule to make executable 72 | .PHONY: qnn-sample-app 73 | qnn-sample-app: $(qnn-sample-app) 74 | 75 | # Implicit rule to compile and link object files 76 | $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp 77 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 78 | 79 | $(OBJ_DIR_LOG)/%.o: $(SRC_DIR_LOG)/%.cpp 80 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 81 | 82 | $(OBJ_DIR_PAL)/%.o: $(SRC_DIR_PAL_LINUX)/%.cpp 83 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 84 | 85 | $(OBJ_DIR_PAL)/%.o: $(SRC_DIR_PAL_COMMON)/%.cpp 86 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 87 | 88 | $(OBJ_DIR_UTILS)/%.o: $(SRC_DIR_UTILS)/%.cpp 89 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 90 | 91 | $(OBJ_DIR_WRAPPER_UTILS)/%.o: $(SRC_DIR_WRAPPER_UTILS)/%.cpp 92 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 93 | 94 | # set up resources 95 | directories := $(TARGET_DIR) $(OBJ_DIR) $(OBJ_DIR_LOG) $(OBJ_DIR_PAL) $(OBJ_DIR_UTILS) $(OBJ_DIR_WRAPPER_UTILS) 96 | 97 | # Compile 98 | $(qnn-sample-app): obj/$(QNN_TARGET)/main.o obj/$(QNN_TARGET)/QnnSampleApp.o $(OBJECTS_LOG) $(OBJECTS_PAL) $(OBJECTS_UTILS) $(OBJECTS_WRAPPER_UTILS) | $(directories) 99 | $(CXX) $(CXXFLAGS) $(LINKFLAGS) -o $@ $^ $(LIBS) 100 | 101 | # rule for object directory resource 102 | $(OBJECTS): | $(OBJ_DIR) 103 | $(OBJECTS_LOG): | $(OBJ_DIR_LOG) 104 | $(OBJECTS_PAL): | $(OBJ_DIR_PAL) 105 | $(OBJECTS_UTILS): | $(OBJ_DIR_UTILS) 106 | $(OBJECTS_WRAPPER_UTILS): | $(OBJ_DIR_WRAPPER_UTILS) 107 | 108 | # rule to create directories 109 | $(directories): 110 | mkdir -p $@ 111 | 112 | .PHONY: clean 113 | clean: 114 | rm -rf $(OBJ_ROOT) $(TARGET_DIR) 115 | -------------------------------------------------------------------------------- /librwkv-qualcomm/make/Makefile.oe-linux-aarch64-gcc9.3: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021-2023 Qualcomm Technologies, Inc. 3 | # All Rights Reserved. 4 | # Confidential and Proprietary - Qualcomm Technologies, Inc. 5 | # 6 | 7 | # define relevant directories 8 | SRC_DIR := src 9 | SRC_DIR_LOG := src/Log 10 | SRC_DIR_PAL_LINUX := src/PAL/src/linux 11 | SRC_DIR_PAL_COMMON := src/PAL/src/common 12 | SRC_DIR_UTILS := src/Utils 13 | SRC_DIR_WRAPPER_UTILS := src/WrapperUtils 14 | ifeq ($(shell test -d ../../../target && echo 0),0) 15 | QNN_API_INCLUDE := ../../../include 16 | else 17 | QNN_API_INCLUDE := ../../../../include/QNN 18 | endif 19 | PAL_INCLUDE := src/PAL/include 20 | 21 | QNN_TARGET ?= aarch64-oe-linux-gcc9.3 22 | export TARGET_DIR := ./bin/$(QNN_TARGET) 23 | CXX=$(QNN_AARCH64_LINUX_OE_GCC_93)/sysroots/x86_64-oesdk-linux/usr/bin/aarch64-oe-linux/aarch64-oe-linux-g++ --sysroot=$(QNN_AARCH64_LINUX_OE_GCC_93)/sysroots/aarch64-oe-linux 24 | 25 | qnn-sample-app := $(TARGET_DIR)/qnn-sample-app 26 | 27 | .PHONY: sample_app_all 28 | .DEFAULT: sample_app_all 29 | sample_app_all: $(qnn-sample-app) 30 | 31 | # Include paths 32 | INCLUDES += -I$(SRC_DIR) -I$(SRC_DIR_LOG) -I$(SRC_DIR_UTILS) -I$(SRC_DIR_WRAPPER_UTILS) -I$(PAL_INCLUDE) -I$(QNN_API_INCLUDE) 33 | 34 | # set compiler flags 35 | COMMON_CXXFLAGS = -ldl -std=gnu++11 -fPIC -Wl,-lstdc++ -Wall -Werror -fno-exceptions -fno-rtti -fPIC -pg $(INCLUDES) 36 | COMMON_LDFLAGS = -shared -s -fPIC 37 | 38 | ifdef QNN_DEBUG_ENABLE 39 | CXXFLAGS += $(COMMON_CXXFLAGS) -g -DQNN_API="" 40 | LDFLAGS += $(COMMON_LDFLAGS) 41 | else 42 | CXXFLAGS += $(COMMON_CXXFLAGS) -Wno-write-strings -fvisibility=hidden -DQNN_API="__attribute__((visibility(\"default\")))" 43 | LDFLAGS += $(COMMON_LDFLAGS) -fvisibility=hidden -flto 44 | endif 45 | 46 | # define library sources 47 | SOURCES := $(wildcard $(SRC_DIR)/*.cpp) 48 | SOURCES_LOG := $(wildcard $(SRC_DIR_LOG)/*.cpp) 49 | SOURCES_PAL := $(wildcard $(SRC_DIR_PAL_LINUX)/*.cpp) 50 | SOURCES_PAL += $(wildcard $(SRC_DIR_PAL_COMMON)/*.cpp) 51 | SOURCES_UTILS := $(wildcard $(SRC_DIR_UTILS)/*.cpp) 52 | SOURCES_WRAPPER_UTILS := $(wildcard $(SRC_DIR_WRAPPER_UTILS)/*.cpp) 53 | 54 | # define object directory 55 | OBJ_ROOT := obj 56 | OBJ_DIR := obj/$(QNN_TARGET) 57 | OBJ_DIR_LOG := obj/$(QNN_TARGET)/Log/ 58 | OBJ_DIR_PAL := obj/$(QNN_TARGET)/PAL 59 | OBJ_DIR_UTILS := obj/$(QNN_TARGET)/Utils/ 60 | OBJ_DIR_WRAPPER_UTILS := obj/$(QNN_TARGET)/WrapperUtils/ 61 | 62 | # setup object files in object directory 63 | OBJECTS := $(patsubst %.cpp,$(OBJ_DIR)/%.o,$(foreach x,$(SOURCES),$(notdir $(x)))) 64 | OBJECTS_LOG := $(patsubst %.cpp,$(OBJ_DIR_LOG)/%.o,$(foreach x,$(SOURCES_LOG),$(notdir $(x)))) 65 | OBJECTS_PAL := $(patsubst %.cpp,$(OBJ_DIR_PAL)/%.o,$(foreach x,$(SOURCES_PAL),$(notdir $(x)))) 66 | OBJECTS_UTILS := $(patsubst %.cpp,$(OBJ_DIR_UTILS)/%.o,$(foreach x,$(SOURCES_UTILS),$(notdir $(x)))) 67 | OBJECTS_WRAPPER_UTILS := $(patsubst %.cpp,$(OBJ_DIR_WRAPPER_UTILS)/%.o,$(foreach x,$(SOURCES_WRAPPER_UTILS),$(notdir $(x)))) 68 | 69 | LIBS=-ldl 70 | 71 | # Rule to make executable 72 | .PHONY: qnn-sample-app 73 | qnn-sample-app: $(qnn-sample-app) 74 | 75 | # Implicit rule to compile and link object files 76 | $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp 77 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 78 | 79 | $(OBJ_DIR_LOG)/%.o: $(SRC_DIR_LOG)/%.cpp 80 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 81 | 82 | $(OBJ_DIR_PAL)/%.o: $(SRC_DIR_PAL_LINUX)/%.cpp 83 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 84 | 85 | $(OBJ_DIR_PAL)/%.o: $(SRC_DIR_PAL_COMMON)/%.cpp 86 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 87 | 88 | $(OBJ_DIR_UTILS)/%.o: $(SRC_DIR_UTILS)/%.cpp 89 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 90 | 91 | $(OBJ_DIR_WRAPPER_UTILS)/%.o: $(SRC_DIR_WRAPPER_UTILS)/%.cpp 92 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 93 | 94 | # set up resources 95 | directories := $(TARGET_DIR) $(OBJ_DIR) $(OBJ_DIR_LOG) $(OBJ_DIR_PAL) $(OBJ_DIR_UTILS) $(OBJ_DIR_WRAPPER_UTILS) 96 | 97 | # Compile 98 | $(qnn-sample-app): obj/$(QNN_TARGET)/main.o obj/$(QNN_TARGET)/QnnSampleApp.o $(OBJECTS_LOG) $(OBJECTS_PAL) $(OBJECTS_UTILS) $(OBJECTS_WRAPPER_UTILS) | $(directories) 99 | $(CXX) $(CXXFLAGS) $(LINKFLAGS) -o $@ $^ $(LIBS) 100 | 101 | # rule for object directory resource 102 | $(OBJECTS): | $(OBJ_DIR) 103 | $(OBJECTS_LOG): | $(OBJ_DIR_LOG) 104 | $(OBJECTS_PAL): | $(OBJ_DIR_PAL) 105 | $(OBJECTS_UTILS): | $(OBJ_DIR_UTILS) 106 | $(OBJECTS_WRAPPER_UTILS): | $(OBJ_DIR_WRAPPER_UTILS) 107 | 108 | # rule to create directories 109 | $(directories): 110 | mkdir -p $@ 111 | 112 | .PHONY: clean 113 | clean: 114 | rm -rf $(OBJ_ROOT) $(TARGET_DIR) 115 | -------------------------------------------------------------------------------- /librwkv-qualcomm/make/Makefile.ubuntu-aarch64-gcc9.4: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2023 Qualcomm Technologies, Inc. 3 | # All Rights Reserved. 4 | # Confidential and Proprietary - Qualcomm Technologies, Inc. 5 | # 6 | 7 | # define relevant directories 8 | SRC_DIR := src 9 | SRC_DIR_LOG := src/Log 10 | SRC_DIR_PAL_LINUX := src/PAL/src/linux 11 | SRC_DIR_PAL_COMMON := src/PAL/src/common 12 | SRC_DIR_UTILS := src/Utils 13 | SRC_DIR_WRAPPER_UTILS := src/WrapperUtils 14 | QNN_API_INCLUDE := $(QNN_SDK_ROOT)/include/QNN 15 | PAL_INCLUDE := src/PAL/include 16 | 17 | QNN_TARGET ?= aarch64-ubuntu-gcc9.4 18 | export TARGET_DIR := ./bin/$(QNN_TARGET) 19 | CXX=$(QNN_AARCH64_UBUNTU_GCC_94)/usr/bin/aarch64-linux-gnu-g++ --sysroot=$(QNN_AARCH64_UBUNTU_GCC_94) 20 | 21 | rwkv-qualcomm-demo := $(TARGET_DIR)/rwkv-qualcomm-demo 22 | 23 | .PHONY: rwkv_qualcomm_demo_all 24 | .DEFAULT: rwkv_qualcomm_demo_all 25 | rwkv_qualcomm_demo_all: $(rwkv-qualcomm-demo) 26 | 27 | # Include paths 28 | INCLUDES += -I$(SRC_DIR) -I$(SRC_DIR_LOG) -I$(SRC_DIR_UTILS) -I$(SRC_DIR_WRAPPER_UTILS) -I$(PAL_INCLUDE) -I$(QNN_API_INCLUDE) 29 | 30 | # set compiler flags 31 | COMMON_CXXFLAGS = -ldl -std=gnu++20 -fPIC -Wl,-lstdc++ -Wall -fno-rtti -fPIC -pg $(INCLUDES) 32 | COMMON_LDFLAGS = -shared -s -fPIC 33 | 34 | ifdef QNN_DEBUG_ENABLE 35 | CXXFLAGS += $(COMMON_CXXFLAGS) -g -DQNN_API="" 36 | LDFLAGS += $(COMMON_LDFLAGS) 37 | else 38 | CXXFLAGS += $(COMMON_CXXFLAGS) -Wno-write-strings -fvisibility=hidden -DQNN_API="__attribute__((visibility(\"default\")))" 39 | LDFLAGS += $(COMMON_LDFLAGS) -fvisibility=hidden -flto 40 | endif 41 | 42 | # define library sources 43 | SOURCES := $(wildcard $(SRC_DIR)/*.cpp) 44 | SOURCES_LOG := $(wildcard $(SRC_DIR_LOG)/*.cpp) 45 | SOURCES_PAL := $(wildcard $(SRC_DIR_PAL_LINUX)/*.cpp) 46 | SOURCES_PAL += $(wildcard $(SRC_DIR_PAL_COMMON)/*.cpp) 47 | SOURCES_UTILS := $(wildcard $(SRC_DIR_UTILS)/*.cpp) 48 | SOURCES_WRAPPER_UTILS := $(wildcard $(SRC_DIR_WRAPPER_UTILS)/*.cpp) 49 | 50 | # define object directory 51 | OBJ_ROOT := obj 52 | OBJ_DIR := obj/$(QNN_TARGET) 53 | OBJ_DIR_LOG := obj/$(QNN_TARGET)/Log/ 54 | OBJ_DIR_PAL := obj/$(QNN_TARGET)/PAL 55 | OBJ_DIR_UTILS := obj/$(QNN_TARGET)/Utils/ 56 | OBJ_DIR_WRAPPER_UTILS := obj/$(QNN_TARGET)/WrapperUtils/ 57 | 58 | # setup object files in object directory 59 | OBJECTS := $(patsubst %.cpp,$(OBJ_DIR)/%.o,$(foreach x,$(SOURCES),$(notdir $(x)))) 60 | OBJECTS_LOG := $(patsubst %.cpp,$(OBJ_DIR_LOG)/%.o,$(foreach x,$(SOURCES_LOG),$(notdir $(x)))) 61 | OBJECTS_PAL := $(patsubst %.cpp,$(OBJ_DIR_PAL)/%.o,$(foreach x,$(SOURCES_PAL),$(notdir $(x)))) 62 | OBJECTS_UTILS := $(patsubst %.cpp,$(OBJ_DIR_UTILS)/%.o,$(foreach x,$(SOURCES_UTILS),$(notdir $(x)))) 63 | OBJECTS_WRAPPER_UTILS := $(patsubst %.cpp,$(OBJ_DIR_WRAPPER_UTILS)/%.o,$(foreach x,$(SOURCES_WRAPPER_UTILS),$(notdir $(x)))) 64 | 65 | LIBS=-ldl 66 | 67 | # Rule to make executable 68 | .PHONY: rwkv-qualcomm-demo 69 | rwkv-qualcomm-demo: $(rwkv-qualcomm-demo) 70 | 71 | # Implicit rule to compile and link object files 72 | $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp 73 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 74 | 75 | $(OBJ_DIR_LOG)/%.o: $(SRC_DIR_LOG)/%.cpp 76 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 77 | 78 | $(OBJ_DIR_PAL)/%.o: $(SRC_DIR_PAL_LINUX)/%.cpp 79 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 80 | 81 | $(OBJ_DIR_PAL)/%.o: $(SRC_DIR_PAL_COMMON)/%.cpp 82 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 83 | 84 | $(OBJ_DIR_UTILS)/%.o: $(SRC_DIR_UTILS)/%.cpp 85 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 86 | 87 | $(OBJ_DIR_WRAPPER_UTILS)/%.o: $(SRC_DIR_WRAPPER_UTILS)/%.cpp 88 | $(CXX) $(CXXFLAGS) -c $^ -o $@ 89 | 90 | # set up resources 91 | directories := $(TARGET_DIR) $(OBJ_DIR) $(OBJ_DIR_LOG) $(OBJ_DIR_PAL) $(OBJ_DIR_UTILS) $(OBJ_DIR_WRAPPER_UTILS) 92 | 93 | # Compile 94 | $(rwkv-qualcomm-demo): obj/$(QNN_TARGET)/main.o obj/$(QNN_TARGET)/librwkv-qualcomm-app.o obj/$(QNN_TARGET)/tokenizer.o obj/$(QNN_TARGET)/librwkv-qualcomm.o obj/$(QNN_TARGET)/soc_detect.o $(OBJECTS_LOG) $(OBJECTS_PAL) $(OBJECTS_UTILS) $(OBJECTS_WRAPPER_UTILS) | $(directories) 95 | $(CXX) $(CXXFLAGS) $(LINKFLAGS) -o $@ $^ $(LIBS) 96 | 97 | # rule for object directory resource 98 | $(OBJECTS): | $(OBJ_DIR) 99 | $(OBJECTS_LOG): | $(OBJ_DIR_LOG) 100 | $(OBJECTS_PAL): | $(OBJ_DIR_PAL) 101 | $(OBJECTS_UTILS): | $(OBJ_DIR_UTILS) 102 | $(OBJECTS_WRAPPER_UTILS): | $(OBJ_DIR_WRAPPER_UTILS) 103 | 104 | # rule to create directories 105 | $(directories): 106 | mkdir -p $@ 107 | 108 | .PHONY: clean 109 | clean: 110 | rm -rf $(OBJ_ROOT) $(TARGET_DIR) 111 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(CMAKE_CXX_STANDARD 20) 2 | 3 | set(LIB "rwkv-qualcomm") 4 | set(LIB_SOURCES "librwkv-qualcomm-app.cpp" 5 | "librwkv-qualcomm.cpp" 6 | "soc_detect.cpp" 7 | "Log/Logger.cpp" 8 | "Log/LogUtils.cpp" 9 | "PAL/src/windows/Common.cpp" 10 | "PAL/src/windows/Directory.cpp" 11 | "PAL/src/windows/DynamicLoading.cpp" 12 | "PAL/src/windows/FileOp.cpp" 13 | "PAL/src/windows/Path.cpp" 14 | "PAL/src/common/StringOp.cpp" 15 | "Utils/DataUtil.cpp" 16 | "Utils/DynamicLoadUtil.cpp" 17 | "Utils/IOTensor.cpp" 18 | "Utils/Utils.cpp" 19 | "Utils/ClientBuffer.cpp" 20 | "Utils/dlwrap.cpp" 21 | "Utils/RpcMem.cpp" 22 | "WrapperUtils/QnnWrapperUtils.cpp") 23 | 24 | add_library(${LIB} STATIC ${LIB_SOURCES}) 25 | 26 | target_compile_definitions(${LIB} PUBLIC "-DNOMINMAX") 27 | target_link_libraries(${LIB} PRIVATE Shlwapi Shell32) 28 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") 29 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /O2 /Ob3") 30 | target_include_directories(${LIB} PUBLIC CachingUtil 31 | Log 32 | PAL/include 33 | Utils 34 | WrapperUtils 35 | ${CMAKE_BINARY_DIR} 36 | ${QNN_SDK_ROOT}/include/QNN 37 | ./) 38 | 39 | set(DEMO "rwkv-qualcomm-demo") 40 | set(DEMO_SOURCES "main.cpp" 41 | "tokenizer.cpp" 42 | "soc_detect.cpp" 43 | "librwkv-qualcomm-app.cpp" 44 | "librwkv-qualcomm.cpp" 45 | "soc_detect.cpp" 46 | "Log/Logger.cpp" 47 | "Log/LogUtils.cpp" 48 | "PAL/src/windows/Common.cpp" 49 | "PAL/src/windows/Directory.cpp" 50 | "PAL/src/windows/DynamicLoading.cpp" 51 | "PAL/src/windows/FileOp.cpp" 52 | "PAL/src/windows/Path.cpp" 53 | "PAL/src/common/StringOp.cpp" 54 | "Utils/DataUtil.cpp" 55 | "Utils/DynamicLoadUtil.cpp" 56 | "Utils/IOTensor.cpp" 57 | "Utils/Utils.cpp" 58 | "Utils/ClientBuffer.cpp" 59 | "Utils/dlwrap.cpp" 60 | "Utils/RpcMem.cpp" 61 | "WrapperUtils/QnnWrapperUtils.cpp") 62 | add_executable(${DEMO} ${DEMO_SOURCES}) 63 | 64 | target_compile_definitions(${DEMO} PUBLIC "-DNOMINMAX") 65 | target_link_libraries(${DEMO} PRIVATE Shlwapi Shell32) 66 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}") 67 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /O2 /Ob3") 68 | target_include_directories(${DEMO} PUBLIC CachingUtil 69 | Log 70 | PAL/include 71 | Utils 72 | WrapperUtils 73 | ${CMAKE_BINARY_DIR} 74 | ${QNN_SDK_ROOT}/include/QNN 75 | ./) -------------------------------------------------------------------------------- /librwkv-qualcomm/src/Interfaces.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "QnnInterface.h" 4 | #include "QnnWrapperUtils.hpp" 5 | #include "System/QnnSystemInterface.h" 6 | 7 | namespace qnn { 8 | namespace tools { 9 | namespace rwkv_app { 10 | 11 | // Graph Related Function Handle Types 12 | typedef ModelError_t (*ComposeGraphsFnHandleType_t)( 13 | Qnn_BackendHandle_t, 14 | QNN_INTERFACE_VER_TYPE, 15 | Qnn_ContextHandle_t, 16 | const GraphConfigInfo_t **, 17 | const uint32_t, 18 | GraphInfo_t ***, 19 | uint32_t *, 20 | bool, 21 | QnnLog_Callback_t, 22 | QnnLog_Level_t); 23 | typedef ModelError_t (*FreeGraphInfoFnHandleType_t)( 24 | GraphInfo_t ***, uint32_t); 25 | 26 | typedef struct QnnFunctionPointers { 27 | ComposeGraphsFnHandleType_t composeGraphsFnHandle; 28 | FreeGraphInfoFnHandleType_t freeGraphInfoFnHandle; 29 | QNN_INTERFACE_VER_TYPE qnnInterface; 30 | QNN_SYSTEM_INTERFACE_VER_TYPE qnnSystemInterface; 31 | } QnnFunctionPointers; 32 | 33 | } // namespace rwkv_app 34 | } // namespace tools 35 | } // namespace qnn 36 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/Log/LogUtils.cpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All rights reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #include "LogUtils.hpp" 10 | #ifdef ANDROID 11 | #include 12 | #endif 13 | 14 | void qnn::log::utils::logDefaultCallback(const char* fmt, 15 | QnnLog_Level_t level, 16 | uint64_t timestamp, 17 | va_list argp) { 18 | const char* levelStr = ""; 19 | switch (level) { 20 | case QNN_LOG_LEVEL_ERROR: 21 | levelStr = " ERROR "; 22 | break; 23 | case QNN_LOG_LEVEL_WARN: 24 | levelStr = "WARNING"; 25 | break; 26 | case QNN_LOG_LEVEL_INFO: 27 | levelStr = " INFO "; 28 | break; 29 | case QNN_LOG_LEVEL_DEBUG: 30 | levelStr = " DEBUG "; 31 | break; 32 | case QNN_LOG_LEVEL_VERBOSE: 33 | levelStr = "VERBOSE"; 34 | break; 35 | case QNN_LOG_LEVEL_MAX: 36 | levelStr = "UNKNOWN"; 37 | break; 38 | } 39 | 40 | double ms = (double)timestamp / 1000000.0; 41 | // To avoid interleaved messages 42 | { 43 | std::lock_guard lock(sg_logUtilMutex); 44 | fprintf(stdout, "%8.1fms [%-7s] ", ms, levelStr); 45 | vfprintf(stdout, fmt, argp); 46 | fprintf(stdout, "\n"); 47 | } 48 | } 49 | 50 | #ifdef ANDROID 51 | void qnn::log::utils::logAndroidCallback(const char* fmt, 52 | QnnLog_Level_t level, 53 | uint64_t timestamp, 54 | va_list argp){ 55 | int loglevel = ANDROID_LOG_UNKNOWN; 56 | switch (level) { 57 | case QNN_LOG_LEVEL_ERROR: 58 | loglevel = ANDROID_LOG_ERROR; 59 | break; 60 | case QNN_LOG_LEVEL_WARN: 61 | loglevel = ANDROID_LOG_WARN; 62 | break; 63 | case QNN_LOG_LEVEL_INFO: 64 | loglevel = ANDROID_LOG_INFO; 65 | break; 66 | case QNN_LOG_LEVEL_DEBUG: 67 | loglevel = ANDROID_LOG_DEBUG; 68 | break; 69 | case QNN_LOG_LEVEL_VERBOSE: 70 | loglevel = ANDROID_LOG_VERBOSE; 71 | break; 72 | case QNN_LOG_LEVEL_MAX: 73 | loglevel = ANDROID_LOG_UNKNOWN; 74 | break; 75 | } 76 | char logStr[1024]; 77 | vsnprintf(logStr, sizeof(logStr), fmt, argp); 78 | __android_log_print(loglevel, "RWKV-QNN", "%s\n", logStr); 79 | } 80 | #endif 81 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/Log/LogUtils.hpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All rights reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "QnnLog.h" 17 | 18 | namespace qnn { 19 | namespace log { 20 | namespace utils { 21 | 22 | void logAndroidCallback(const char* fmt, QnnLog_Level_t level, uint64_t timestamp, va_list argp); 23 | 24 | // In non-hexagon app stdout is used and for hexagon farf logging is used 25 | void logDefaultCallback(const char* fmt, QnnLog_Level_t level, uint64_t timestamp, va_list argp); 26 | 27 | static std::mutex sg_logUtilMutex; 28 | 29 | } // namespace utils 30 | } // namespace log 31 | } // namespace qnn 32 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/Log/Logger.cpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All rights reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "LogUtils.hpp" 15 | #include "Logger.hpp" 16 | 17 | using namespace qnn::log; 18 | 19 | std::shared_ptr Logger::s_logger = nullptr; 20 | 21 | std::mutex Logger::s_logMutex; 22 | 23 | std::shared_ptr Logger::createLogger(QnnLog_Callback_t callback, 24 | QnnLog_Level_t maxLevel, 25 | QnnLog_Error_t* status) { 26 | std::lock_guard lock(s_logMutex); 27 | if ((maxLevel > QNN_LOG_LEVEL_VERBOSE) || (maxLevel == 0)) { 28 | if (status) { 29 | *status = QNN_LOG_ERROR_INVALID_ARGUMENT; 30 | } 31 | return nullptr; 32 | } 33 | if (!s_logger) { 34 | s_logger = std::shared_ptr(new (std::nothrow) Logger(callback, maxLevel, status)); 35 | } 36 | *status = QNN_LOG_NO_ERROR; 37 | return s_logger; 38 | } 39 | 40 | Logger::Logger(QnnLog_Callback_t callback, QnnLog_Level_t maxLevel, QnnLog_Error_t* status) 41 | : m_callback(callback), m_maxLevel(maxLevel), m_epoch(getTimestamp()) { 42 | if (!callback) { 43 | #ifdef ANDROID 44 | m_callback = utils::logAndroidCallback; 45 | #else 46 | m_callback = utils::logDefaultCallback; 47 | #endif 48 | } 49 | } 50 | 51 | void Logger::log(QnnLog_Level_t level, const char* file, long line, const char* fmt, ...) { 52 | if (m_callback) { 53 | if (level > m_maxLevel.load(std::memory_order_seq_cst)) { 54 | return; 55 | } 56 | va_list argp; 57 | va_start(argp, fmt); 58 | std::string logString(fmt); 59 | std::ignore = file; 60 | std::ignore = line; 61 | (*m_callback)(logString.c_str(), level, getTimestamp() - m_epoch, argp); 62 | va_end(argp); 63 | } 64 | } 65 | 66 | uint64_t Logger::getTimestamp() const { 67 | return std::chrono::duration_cast( 68 | std::chrono::system_clock::now().time_since_epoch()) 69 | .count(); 70 | } 71 | 72 | std::shared_ptr<::qnn::log::Logger> g_logger{nullptr}; 73 | 74 | bool qnn::log::initializeLogging() { 75 | QnnLog_Level_t logLevel; 76 | QnnLog_Error_t status; 77 | #ifdef QNN_ENABLE_DEBUG 78 | logLevel = QNN_LOG_LEVEL_DEBUG; 79 | #else 80 | logLevel = QNN_LOG_LEVEL_ERROR; 81 | #endif 82 | // Default log stream is enabled in Core/Logger component 83 | g_logger = ::qnn::log::Logger::createLogger(nullptr, logLevel, &status); 84 | if (QNN_LOG_NO_ERROR != status || !g_logger) { 85 | return false; 86 | } 87 | return true; 88 | } 89 | 90 | QnnLog_Callback_t qnn::log::getLogCallback() { return g_logger->getLogCallback(); } 91 | 92 | QnnLog_Level_t qnn::log::getLogLevel() { return g_logger->getMaxLevel(); } 93 | 94 | bool qnn::log::isLogInitialized() { 95 | if (g_logger == nullptr) { 96 | return false; 97 | } 98 | return true; 99 | } 100 | 101 | bool qnn::log::setLogLevel(QnnLog_Level_t maxLevel) { 102 | if (!::qnn::log::Logger::isValid() || 103 | !(maxLevel >= QNN_LOG_LEVEL_ERROR && maxLevel <= QNN_LOG_LEVEL_DEBUG)) { 104 | return false; 105 | } 106 | 107 | g_logger->setMaxLevel(maxLevel); 108 | return true; 109 | } 110 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/Log/Logger.hpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All rights reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "QnnLog.h" 18 | 19 | #define __FILENAME__ (strrchr(__FILE__, '/') + 1) 20 | 21 | /** 22 | * @brief Log something with the current logger. Always valid to call, though 23 | * it won't do something if no logger has been set. 24 | */ 25 | 26 | #define QNN_LOG_LEVEL(level, fmt, ...) \ 27 | do { \ 28 | auto logger = ::qnn::log::Logger::getLogger(); \ 29 | if (logger) { \ 30 | logger->log(level, __FILENAME__, __LINE__, fmt, ##__VA_ARGS__); \ 31 | } \ 32 | } while (0) 33 | 34 | #define QNN_ERROR(fmt, ...) QNN_LOG_LEVEL(QNN_LOG_LEVEL_ERROR, fmt, ##__VA_ARGS__) 35 | 36 | #define QNN_ERROR_EXIT(fmt, ...) \ 37 | { \ 38 | QNN_ERROR(fmt, ##__VA_ARGS__); \ 39 | exit(EXIT_FAILURE); \ 40 | } 41 | 42 | #define QNN_WARN(fmt, ...) QNN_LOG_LEVEL(QNN_LOG_LEVEL_WARN, fmt, ##__VA_ARGS__) 43 | 44 | #define QNN_INFO(fmt, ...) QNN_LOG_LEVEL(QNN_LOG_LEVEL_INFO, fmt, ##__VA_ARGS__) 45 | 46 | #define QNN_DEBUG(fmt, ...) QNN_LOG_LEVEL(QNN_LOG_LEVEL_DEBUG, fmt, ##__VA_ARGS__) 47 | 48 | #define QNN_VERBOSE(fmt, ...) QNN_LOG_LEVEL(QNN_LOG_LEVEL_VERBOSE, fmt, ##__VA_ARGS__) 49 | 50 | #define QNN_FUNCTION_ENTRY_LOG QNN_LOG_LEVEL(QNN_LOG_LEVEL_VERBOSE, "Entering %s", __func__) 51 | 52 | #define QNN_FUNCTION_EXIT_LOG QNN_LOG_LEVEL(QNN_LOG_LEVEL_VERBOSE, "Returning from %s", __func__) 53 | 54 | namespace qnn { 55 | namespace log { 56 | 57 | bool initializeLogging(); 58 | 59 | QnnLog_Callback_t getLogCallback(); 60 | 61 | QnnLog_Level_t getLogLevel(); 62 | 63 | bool isLogInitialized(); 64 | 65 | bool setLogLevel(QnnLog_Level_t maxLevel); 66 | 67 | class Logger final { 68 | public: 69 | Logger(const Logger&) = delete; 70 | Logger& operator=(const Logger&) = delete; 71 | Logger(Logger&&) = delete; 72 | Logger& operator=(Logger&&) = delete; 73 | 74 | void setMaxLevel(QnnLog_Level_t maxLevel) { 75 | m_maxLevel.store(maxLevel, std::memory_order_seq_cst); 76 | } 77 | 78 | QnnLog_Level_t getMaxLevel() { return m_maxLevel.load(std::memory_order_seq_cst); } 79 | 80 | QnnLog_Callback_t getLogCallback() { return m_callback; } 81 | 82 | void log(QnnLog_Level_t level, const char* file, long line, const char* fmt, ...); 83 | 84 | static std::shared_ptr createLogger(QnnLog_Callback_t callback, 85 | QnnLog_Level_t maxLevel, 86 | QnnLog_Error_t* status); 87 | 88 | static bool isValid() { return (s_logger != nullptr); } 89 | 90 | static std::shared_ptr getLogger() { return s_logger; } 91 | 92 | static void reset() { s_logger = nullptr; } 93 | uint64_t getTimestamp() const; 94 | 95 | private: 96 | Logger(QnnLog_Callback_t callback, QnnLog_Level_t maxLevel, QnnLog_Error_t* status); 97 | 98 | QnnLog_Callback_t m_callback; 99 | std::atomic m_maxLevel; 100 | uint64_t m_epoch; 101 | static std::shared_ptr s_logger; 102 | static std::mutex s_logMutex; 103 | }; 104 | 105 | } // namespace log 106 | } // namespace qnn 107 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/PAL/include/PAL/Debug.hpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All rights reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #pragma once 10 | 11 | #define DEBUG_ON 0 12 | 13 | #if DEBUG_ON 14 | #define DEBUG_MSG(...) \ 15 | { \ 16 | fprintf(stderr, __VA_ARGS__); \ 17 | fprintf(stderr, "\n"); \ 18 | } 19 | #else 20 | #define DEBUG_MSG(...) 21 | #endif 22 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/PAL/include/PAL/Directory.hpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All rights reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | //--------------------------------------------------------------------------- 10 | /// @file 11 | /// This file includes APIs for directory operations on supported platforms 12 | //--------------------------------------------------------------------------- 13 | 14 | #pragma once 15 | 16 | #include 17 | 18 | #include "PAL/FileOp.hpp" 19 | 20 | namespace pal { 21 | class Directory; 22 | } 23 | 24 | class pal::Directory { 25 | public: 26 | using DirMode = pal::FileOp::FileMode; 27 | //--------------------------------------------------------------------------- 28 | /// @brief 29 | /// Creates a directory in the file system. 30 | /// @param path 31 | /// Name of directory to create. 32 | /// @param dirmode 33 | /// Directory mode 34 | /// @return 35 | /// True if 36 | /// 1. create a directory successfully 37 | /// 2. or directory exist already 38 | /// False otherwise 39 | /// 40 | /// For example: 41 | /// 42 | /// - Create a directory in default. 43 | /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 44 | /// pal::Directory::Create(path, pal::Directory::DirMode::S_DEFAULT_); 45 | /// pal::Directory::Create(path); 46 | /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 47 | /// 48 | /// - Create a directory with specific permission. 49 | /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 50 | /// pal::Directory::Create(path, pal::Directory::DirMode::S_IRWXU_| 51 | /// pal::Directory::DirMode::S_IRWXG_| 52 | /// pal::Directory::DirMode::S_IRWXO_); 53 | /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 54 | /// 55 | /// @note For windows, dirmode is not used. 56 | /// @note For linux, dirmode is used to set the permission of the folder. 57 | //--------------------------------------------------------------------------- 58 | static bool create(const std::string &path, 59 | pal::Directory::DirMode dirmode = pal::Directory::DirMode::S_DEFAULT_); 60 | 61 | //--------------------------------------------------------------------------- 62 | /// @brief 63 | /// Removes the entire directory whether it's empty or not. 64 | /// @param path 65 | /// Name of directory to delete. 66 | /// @return 67 | /// True if the directory was successfully deleted, false otherwise. 68 | //--------------------------------------------------------------------------- 69 | static bool remove(const std::string &path); 70 | 71 | //--------------------------------------------------------------------------- 72 | /// @brief 73 | /// Creates a directory and all parent directories required. 74 | /// @param path 75 | /// Path of directory to create. 76 | /// @return 77 | /// True if the directory was successfully created, false otherwise. 78 | //--------------------------------------------------------------------------- 79 | static bool makePath(const std::string &path); 80 | }; 81 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/PAL/include/PAL/DynamicLoading.hpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All rights reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | //--------------------------------------------------------------------------- 10 | /// @file 11 | /// This file includes APIs for dynamic loading on supported platforms 12 | //--------------------------------------------------------------------------- 13 | 14 | #pragma once 15 | 16 | #include 17 | 18 | namespace pal { 19 | namespace dynamicloading { 20 | // we only support subset of POSIX of dlopen/dlsym/dladdr/dlerror/dlclose 21 | // except the following flags for dlopen, others should be done only 22 | // when we really need them 23 | // DL_NOW is MUST 24 | // DL_LOCAL is enabled if not specified 25 | enum { 26 | DL_NOW = 0x0001, 27 | DL_LOCAL = 0x0002, 28 | DL_GLOBAL = 0x0004, 29 | }; 30 | 31 | // specify this address to distingiush from NULL pointer 32 | #define DL_DEFAULT (void *)(0x4) 33 | 34 | //--------------------------------------------------------------------------- 35 | /// @brief 36 | /// Loads the dynamic shared object 37 | /// @param filename 38 | /// If contains path separators, treat it as relative or absolute pathname 39 | /// or search it for the rule of dynamic linker 40 | /// @param flags 41 | /// - DL_NOW: resolve undefined symbols before return. MUST be specified. 42 | /// - DL_LOCAL: optional, but the default specified. Symbols defined in this 43 | /// shared object are not made available to resolve references in subsequently 44 | /// loaded shared objects 45 | /// - DL_GLOBAL: optional, resolve symbol globally 46 | /// @return 47 | /// On success, a non-NULL handle for the loaded library. 48 | /// On error, NULL 49 | //--------------------------------------------------------------------------- 50 | void *dlOpen(const char *filename, int flags); 51 | 52 | //--------------------------------------------------------------------------- 53 | /// @brief 54 | /// Obtain address of a symbol in a shared object or executable 55 | /// @param handle 56 | /// A handle of a dynamic loaded shared object returned by dlopen 57 | /// @param symbol 58 | /// A null-terminated symbol name 59 | /// @return 60 | /// On success, return the address associated with symbol 61 | /// On error, NULL 62 | //--------------------------------------------------------------------------- 63 | void *dlSym(void *handle, const char *symbol); 64 | 65 | //--------------------------------------------------------------------------- 66 | /// @brief 67 | /// Translate the address of a symbol to the path of the belonging shared object 68 | /// @param addr 69 | /// Address of symbol in a shared object 70 | /// @param path 71 | /// Full name of shared object that contains address, usually it is an absolute path 72 | /// @return 73 | /// On success, return a non-zero value 74 | /// On error, return 0 75 | //--------------------------------------------------------------------------- 76 | int dlAddrToLibName(void *addr, std::string &name); 77 | 78 | //--------------------------------------------------------------------------- 79 | /// @brief 80 | /// Decrements the reference count on the dynamically loaded shared object 81 | /// referred to by handle. If the reference count drops to 0, then the 82 | /// object is unloaded. 83 | /// @return 84 | /// On success, 0; on error, a nonzero value 85 | //--------------------------------------------------------------------------- 86 | int dlClose(void *handle); 87 | 88 | //--------------------------------------------------------------------------- 89 | /// @brief 90 | /// Obtain error diagnostic for functions in the dl-family APIs. 91 | /// @return 92 | /// Returns a human-readable, null-terminated string describing the most 93 | /// recent error that occurred from a call to one of the functions in the 94 | /// dl-family APIs. 95 | //--------------------------------------------------------------------------- 96 | char *dlError(void); 97 | 98 | } // namespace dynamicloading 99 | } // namespace pal 100 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/PAL/include/PAL/Path.hpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All rights reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | //------------------------------------------------------------------------------ 10 | /// @file 11 | /// The file includes APIs for path related operations on supported platforms 12 | //------------------------------------------------------------------------------ 13 | 14 | #pragma once 15 | 16 | #include 17 | #include 18 | 19 | namespace pal { 20 | class Path; 21 | } 22 | 23 | class pal::Path { 24 | public: 25 | //--------------------------------------------------------------------------- 26 | /// @brief Returns path separator for the system 27 | //--------------------------------------------------------------------------- 28 | static char getSeparator(); 29 | 30 | //--------------------------------------------------------------------------- 31 | /// @brief Concatenate s1 and s2 32 | //--------------------------------------------------------------------------- 33 | static std::string combine(const std::string &s1, const std::string &s2); 34 | 35 | //--------------------------------------------------------------------------- 36 | /// @brief Get the directory name 37 | //--------------------------------------------------------------------------- 38 | static std::string getDirectoryName(const std::string &path); 39 | 40 | //--------------------------------------------------------------------------- 41 | /// @brief Get absolute path 42 | //--------------------------------------------------------------------------- 43 | static std::string getAbsolute(const std::string &path); 44 | 45 | //--------------------------------------------------------------------------- 46 | /// @brief Check if the input path is absolute path 47 | //--------------------------------------------------------------------------- 48 | static bool isAbsolute(const std::string &path); 49 | 50 | private: 51 | }; 52 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/PAL/include/PAL/StringOp.hpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All rights reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | //----------------------------------------------------------------------------- 10 | /// @file 11 | /// The file inludes APIs for string operations on supported platforms 12 | //----------------------------------------------------------------------------- 13 | 14 | #pragma once 15 | 16 | #include 17 | 18 | namespace pal { 19 | class StringOp; 20 | } 21 | 22 | //------------------------------------------------------------------------------ 23 | /// @brief 24 | /// FileOp contains OS Specific file system functionality. 25 | //------------------------------------------------------------------------------ 26 | class pal::StringOp { 27 | public: 28 | //--------------------------------------------------------------------------- 29 | /// @brief 30 | /// Copy copy_size bytes from buffer src to buffer dst. Behaviour of the 31 | /// function is undefined if src and dst overlap. 32 | /// @param dst 33 | /// Destination buffer 34 | /// @param dst_size 35 | /// Size of destination buffer 36 | /// @param src 37 | /// Source buffer 38 | /// @param copy_size 39 | /// Number of bytes to copy 40 | /// @return 41 | /// Number of bytes copied 42 | //--------------------------------------------------------------------------- 43 | static size_t memscpy(void *dst, size_t dstSize, const void *src, size_t copySize); 44 | 45 | //--------------------------------------------------------------------------- 46 | /// @brief 47 | /// Returns a pointer to a null-terminated byte string, which contains copies 48 | /// of at most size bytes from the string pointed to by str. If the null 49 | /// terminator is not encountered in the first size bytes, it is added to the 50 | /// duplicated string. 51 | /// @param source 52 | /// Source string 53 | /// @param maxlen 54 | /// Max number of bytes to copy from str 55 | /// @return 56 | /// A pointer to the newly allocated string, or a null pointer if an error 57 | /// occurred. 58 | //--------------------------------------------------------------------------- 59 | static char *strndup(const char *source, size_t maxlen); 60 | }; 61 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/PAL/src/common/StringOp.cpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All rights reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #include 10 | #include 11 | 12 | #include "PAL/StringOp.hpp" 13 | 14 | //--------------------------------------------------------------------------- 15 | // pal::StringOp::memscpy 16 | //--------------------------------------------------------------------------- 17 | size_t pal::StringOp::memscpy(void *dst, size_t dstSize, const void *src, size_t copySize) { 18 | if (!dst || !src || !dstSize || !copySize) return 0; 19 | 20 | size_t minSize = dstSize < copySize ? dstSize : copySize; 21 | 22 | memcpy(dst, src, minSize); 23 | 24 | return minSize; 25 | } 26 | 27 | #ifdef __hexagon__ 28 | size_t strnlen(const char *s, size_t n) { 29 | size_t i; 30 | for (i = 0; i < n && s[i] != '\0'; i++) continue; 31 | return i; 32 | } 33 | #endif 34 | 35 | //--------------------------------------------------------------------------- 36 | // pal::StringOp::strndup 37 | //--------------------------------------------------------------------------- 38 | char *pal::StringOp::strndup(const char *source, size_t maxlen) { 39 | #ifdef _WIN32 40 | size_t length = ::strnlen(source, maxlen); 41 | 42 | char *destination = (char *)malloc((length + 1) * sizeof(char)); 43 | if (destination == nullptr) return nullptr; 44 | 45 | // copy length bytes to destination and leave destination[length] to be 46 | // null terminator 47 | strncpy_s(destination, length + 1, source, length); 48 | 49 | return destination; 50 | #elif __hexagon__ 51 | size_t length = strnlen(source, maxlen); 52 | 53 | char *destination = (char *)malloc((length + 1) * sizeof(char)); 54 | if (destination == nullptr) return nullptr; 55 | // copy length bytes to destination and leave destination[length] to be 56 | // null terminator 57 | strncpy(destination, source, length); 58 | destination[length] = '\0'; 59 | return destination; 60 | #else 61 | return ::strndup(source, maxlen); 62 | #endif 63 | } 64 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/PAL/src/linux/Directory.cpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All rights reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #include 10 | #include 11 | #include 12 | #ifndef __QNXNTO__ 13 | #include 14 | #endif 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include "PAL/Directory.hpp" 27 | #include "PAL/FileOp.hpp" 28 | #include "PAL/Path.hpp" 29 | 30 | //------------------------------------------------------------------------------ 31 | //------------------------------------------------------------------------------ 32 | #ifdef __QNXNTO__ 33 | static bool is_qnx_dir(const struct dirent *ep) { 34 | struct dirent_extra *exp; 35 | bool is_dir = false; 36 | 37 | for (exp = _DEXTRA_FIRST(ep); _DEXTRA_VALID(exp, ep); exp = _DEXTRA_NEXT(exp)) { 38 | if (exp->d_type == _DTYPE_STAT || exp->d_type == _DTYPE_LSTAT) { 39 | struct stat *statbuff = &((dirent_extra_stat *)exp)->d_stat; 40 | if (statbuff && S_ISDIR(statbuff->st_mode)) { 41 | is_dir = true; 42 | break; 43 | } 44 | } 45 | } 46 | return is_dir; 47 | } 48 | #endif 49 | 50 | // ------------------------------------------------------------------------------ 51 | // pal::Directory::create 52 | // ------------------------------------------------------------------------------ 53 | bool pal::Directory::create(const std::string &path, pal::Directory::DirMode dirmode) { 54 | struct stat st; 55 | int status = 0; 56 | if (stat(path.c_str(), &st) != 0) { 57 | // Directory does not exist 58 | status = mkdir(path.c_str(), static_cast(dirmode)); 59 | } else if (!S_ISDIR(st.st_mode)) { 60 | errno = ENOTDIR; 61 | status = -1; 62 | } 63 | return (status == 0); 64 | } 65 | 66 | //------------------------------------------------------------------------------ 67 | //------------------------------------------------------------------------------ 68 | bool pal::Directory::remove(const std::string &dirName) { 69 | DIR *dir; 70 | struct dirent *entry; 71 | 72 | dir = opendir(dirName.c_str()); 73 | if (dir == nullptr) { 74 | // If the directory doesn't exist then just return true. 75 | if (errno == ENOENT) { 76 | return true; 77 | } 78 | return false; 79 | } 80 | 81 | #ifdef __QNXNTO__ 82 | if (dircntl(dir, D_SETFLAG, D_FLAG_STAT) == -1) { 83 | return false; 84 | } 85 | #endif 86 | 87 | // Recursively traverse the directory tree. 88 | while ((entry = readdir(dir)) != nullptr) { 89 | if (strcmp(entry->d_name, ".") && strcmp(entry->d_name, "..")) { 90 | std::stringstream ss; 91 | ss << dirName << Path::getSeparator() << entry->d_name; 92 | std::string path = ss.str(); 93 | #ifdef __QNXNTO__ 94 | if (is_qnx_dir(entry)) 95 | #else 96 | if (entry->d_type == DT_DIR) 97 | #endif 98 | { 99 | // It's a directory so we need to drill down into it and delete 100 | // its contents. 101 | if (!remove(path)) { 102 | return false; 103 | } 104 | } else { 105 | if (::remove(path.c_str())) { 106 | return false; 107 | } 108 | } 109 | } 110 | } 111 | 112 | closedir(dir); 113 | 114 | if (::remove(dirName.c_str())) { 115 | return false; 116 | } 117 | 118 | return true; 119 | } 120 | 121 | bool pal::Directory::makePath(const std::string &path) { 122 | struct stat st; 123 | bool rc = false; 124 | 125 | if (path == ".") { 126 | rc = true; 127 | } else if (stat(path.c_str(), &st) == 0) { 128 | if (st.st_mode & S_IFDIR) { 129 | rc = true; 130 | } 131 | } else { 132 | size_t offset = path.find_last_of(Path::getSeparator()); 133 | if (offset != std::string::npos) { 134 | std::string newPath = path.substr(0, offset); 135 | if (!makePath(newPath)) { 136 | return false; 137 | } 138 | } 139 | 140 | // There is a possible race condition, where a file/directory can be 141 | // created in between the stat() above, and the mkdir() call here. 142 | // So, ignore the return code from the mkdir() call, and then re-check 143 | // for existence of the directory after it. Ensure both that it exists 144 | // and that it is a directory - just like above. 145 | mkdir(path.c_str(), 0777); 146 | 147 | if ((stat(path.c_str(), &st) == 0) && (st.st_mode & S_IFDIR)) { 148 | rc = true; 149 | } 150 | } 151 | 152 | return rc; 153 | } 154 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/PAL/src/linux/DynamicLoading.cpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All rights reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #include 10 | #include 11 | 12 | #include "PAL/Debug.hpp" 13 | #include "PAL/DynamicLoading.hpp" 14 | 15 | void *pal::dynamicloading::dlOpen(const char *filename, int flags) { 16 | int realFlags = 0; 17 | 18 | if (flags & DL_NOW) { 19 | realFlags |= RTLD_NOW; 20 | } 21 | 22 | if (flags & DL_LOCAL) { 23 | realFlags |= RTLD_LOCAL; 24 | } 25 | 26 | if (flags & DL_GLOBAL) { 27 | realFlags |= RTLD_GLOBAL; 28 | } 29 | 30 | return ::dlopen(filename, realFlags); 31 | } 32 | 33 | void *pal::dynamicloading::dlSym(void *handle, const char *symbol) { 34 | if (handle == DL_DEFAULT) { 35 | return ::dlsym(RTLD_DEFAULT, symbol); 36 | } 37 | 38 | return ::dlsym(handle, symbol); 39 | } 40 | 41 | int pal::dynamicloading::dlAddrToLibName(void *addr, std::string &name) { 42 | // Clean the output buffer 43 | name = std::string(); 44 | 45 | // If the address is empty, return zero as treating failure 46 | if (!addr) { 47 | DEBUG_MSG("Input address is nullptr."); 48 | return 0; 49 | } 50 | 51 | // Dl_info do not maintain the lifetime of its string members, 52 | // it would be maintained by dlopen() and dlclose(), 53 | // so we do not need to release it manually 54 | Dl_info info; 55 | int result = ::dladdr(addr, &info); 56 | 57 | // If dladdr() successes, set name to the library name 58 | if (result) { 59 | name = std::string(info.dli_fname); 60 | } else { 61 | DEBUG_MSG("Input address could not be matched to a shared object."); 62 | } 63 | 64 | return result; 65 | } 66 | 67 | int pal::dynamicloading::dlClose(void *handle) { 68 | if (!handle) { 69 | return 0; 70 | } 71 | 72 | return ::dlclose(handle); 73 | } 74 | 75 | char *pal::dynamicloading::dlError(void) { return ::dlerror(); } 76 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/PAL/src/linux/Path.cpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All rights reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #include 10 | 11 | #include 12 | #ifndef PATH_MAX 13 | #include 14 | #endif 15 | 16 | #include "PAL/FileOp.hpp" 17 | #include "PAL/Path.hpp" 18 | 19 | char pal::Path::getSeparator() { return '/'; } 20 | 21 | std::string pal::Path::combine(const std::string &s1, const std::string &s2) { 22 | std::stringstream ss; 23 | ss << s1; 24 | if (s1.size() > 0 && s1[s1.size() - 1] != getSeparator()) { 25 | ss << getSeparator(); 26 | } 27 | ss << s2; 28 | return ss.str(); 29 | } 30 | 31 | std::string pal::Path::getDirectoryName(const std::string &path) { 32 | std::string rc = path; 33 | size_t index = path.find_last_of(pal::Path::getSeparator()); 34 | if (index != std::string::npos) { 35 | rc = path.substr(0, index); 36 | } 37 | return rc; 38 | } 39 | 40 | std::string pal::Path::getAbsolute(const std::string &path) { 41 | // Functionality was duplicated of function in FileOp 42 | // Just call that function directly instead 43 | return pal::FileOp::getAbsolutePath(path); 44 | } 45 | 46 | bool pal::Path::isAbsolute(const std::string &path) { 47 | return path.size() > 0 && path[0] == getSeparator(); 48 | } 49 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/PAL/src/windows/Common.cpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All rights reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | #include "Common.hpp" 20 | #include "PAL/Debug.hpp" 21 | 22 | int32_t pal::scanDir(const std::string &path, std::vector &namelist) { 23 | // example : "C:/Users/guest" scan nothing, "C:/Users/guest/*" can scan the 24 | // entire directory instead 25 | std::string scanPath = path + "/*"; 26 | WIN32_FIND_DATAA findFileData; 27 | HANDLE hFind = FindFirstFileA(scanPath.c_str(), &findFileData); 28 | if (hFind == INVALID_HANDLE_VALUE) { 29 | DEBUG_MSG("scanDir fail! Error code : %d", GetLastError()); 30 | return -1; 31 | } 32 | 33 | do { 34 | // will compare char until '\0' to allow filename with first char = '.' 35 | if (strncmp(findFileData.cFileName, ".", 2) == 0 || 36 | strncmp(findFileData.cFileName, "..", 3) == 0) { 37 | continue; 38 | } 39 | namelist.push_back(findFileData); 40 | } while (FindNextFileA(hFind, &findFileData)); 41 | FindClose(hFind); 42 | 43 | return namelist.size(); 44 | } 45 | 46 | void pal::normalizeSeparator(std::string &path) { replace(path.begin(), path.end(), '\\', '/'); } 47 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/PAL/src/windows/Common.hpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All rights reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | 17 | namespace pal { 18 | /** 19 | * @brief 20 | * Scans elements in a directory. 21 | * @param path 22 | * Path in string which we are going to scan. 23 | * @param namelist 24 | * Data struct for each element, which will be stored as WIN32_FIND_DATAA. 25 | * @return 26 | * Number of elements in this path, return -1 if fail. 27 | */ 28 | int32_t scanDir(const std::string &path, std::vector &namelist); 29 | 30 | /** 31 | * @brief 32 | * Replace all the '\\' in path with '/' to keep consistency. 33 | * @param path 34 | * The string which you want to format. 35 | */ 36 | void normalizeSeparator(std::string &path); 37 | } // namespace pal 38 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/PAL/src/windows/Directory.cpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All rights reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include "Common.hpp" 17 | #include "PAL/Debug.hpp" 18 | #include "PAL/Directory.hpp" 19 | #include "PAL/FileOp.hpp" 20 | #include "PAL/Path.hpp" 21 | 22 | //-------------------------------------------------------------------------------------- 23 | // pal::Directory::Create 24 | //-------------------------------------------------------------------------------------- 25 | bool pal::Directory::create(const std::string &path, pal::Directory::DirMode dirmode) { 26 | struct stat st; 27 | // it create a directory successfully or directory exists already, return true. 28 | if ((stat(path.c_str(), &st) != 0 && (CreateDirectoryA(path.c_str(), NULL) != 0)) || 29 | ((st.st_mode & S_IFDIR) != 0)) { 30 | return true; 31 | } else { 32 | DEBUG_MSG("Create Folder fail! Error code : %d", GetLastError()); 33 | } 34 | return false; 35 | } 36 | 37 | //-------------------------------------------------------------------------------------- 38 | // pal::Directory::Remove 39 | //-------------------------------------------------------------------------------------- 40 | bool pal::Directory::remove(const std::string &dirName) { 41 | struct stat st; 42 | if (stat(dirName.c_str(), &st) == 0) { 43 | if ((st.st_mode & S_IFDIR) != 0) { 44 | // a directory exist and remove it ! 45 | std::string fullPath = dirName; 46 | if (pal::Path::isAbsolute(dirName) == 0) { 47 | fullPath = pal::Path::getAbsolute(dirName); 48 | } 49 | // Note This string MUST be double-null terminated. 50 | fullPath = fullPath + '\0' + '\0'; 51 | SHFILEOPSTRUCTA fileOp = { 52 | NULL, // hwnd 53 | FO_DELETE, // wFunc, delete usage 54 | fullPath.c_str(), // pFrom, delete target folder 55 | "", // pTo, delete operation can ignore this 56 | FOF_NO_UI, // Perform operation silently, presenting no UI to user 57 | false, // fAnyOperationsAborted, 58 | 0, // hNameMappings 59 | "" // lpszProgressTitle, used only if for FOF_SIMPLEPROGRESS 60 | }; 61 | if (SHFileOperationA(&fileOp) == 0) { 62 | return true; 63 | } else { 64 | DEBUG_MSG("Delete folder fail! Error code : %d", GetLastError()); 65 | } 66 | } 67 | } else { 68 | // If the directory doesn't exist then just, return true. Behaves like Linux 69 | if (errno == ENOENT) { 70 | return true; 71 | } else { 72 | DEBUG_MSG("Remove stat fail! Error code : %d", errno); 73 | } 74 | } 75 | return false; 76 | } 77 | 78 | //-------------------------------------------------------------------------------------- 79 | // pal::Directory::MakePath 80 | //-------------------------------------------------------------------------------------- 81 | bool pal::Directory::makePath(const std::string &path) { 82 | struct stat st; 83 | bool rc = false; 84 | if (path == ".") { 85 | rc = true; 86 | } else if (stat(path.c_str(), &st) == 0) { 87 | if ((st.st_mode & S_IFDIR) != 0) { 88 | // if a directory path is already exist 89 | rc = true; 90 | } 91 | } else { 92 | size_t offset = std::min(path.find_last_of('/'), path.find_last_of('\\')); 93 | if (offset != std::string::npos) { 94 | std::string newPath = path.substr(0, offset); 95 | if (!makePath(newPath)) { 96 | return false; 97 | } 98 | } 99 | pal::Directory::create(path.c_str()); 100 | if ((stat(path.c_str(), &st) == 0) && ((st.st_mode & S_IFDIR) != 0)) { 101 | rc = true; 102 | } 103 | } 104 | return rc; 105 | } -------------------------------------------------------------------------------- /librwkv-qualcomm/src/PAL/src/windows/Path.cpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All rights reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | #include "Common.hpp" 17 | #include "PAL/FileOp.hpp" 18 | #include "PAL/Path.hpp" 19 | 20 | //------------------------------------------------------------------------------ 21 | // PAL::Path::GetSeparator 22 | //------------------------------------------------------------------------------ 23 | char pal::Path::getSeparator() { return '/'; } 24 | 25 | //------------------------------------------------------------------------------ 26 | // pal::Path::Combine 27 | //------------------------------------------------------------------------------ 28 | std::string pal::Path::combine(const std::string &s1, const std::string &s2) { 29 | std::stringstream ss; 30 | ss << s1; 31 | if (s1.size() > 0 && ((s1[s1.size() - 1] != '/') && (s1[s1.size() - 1] != '\\'))) { 32 | ss << getSeparator(); 33 | } 34 | ss << s2; 35 | return ss.str(); 36 | } 37 | 38 | //------------------------------------------------------------------------------ 39 | // pal::Path::getDirectoryName 40 | //------------------------------------------------------------------------------ 41 | std::string pal::Path::getDirectoryName(const std::string &path) { 42 | std::string rc = path; 43 | int32_t index = std::max(static_cast(path.find_last_of('\\')), 44 | static_cast(path.find_last_of('/'))); 45 | if (index != static_cast(std::string::npos)) { 46 | rc = path.substr(0, index); 47 | } 48 | pal::normalizeSeparator(rc); 49 | return rc; 50 | } 51 | 52 | //------------------------------------------------------------------------------ 53 | // pal::Path::getAbsolute 54 | //------------------------------------------------------------------------------ 55 | std::string pal::Path::getAbsolute(const std::string &path) { 56 | std::string res = pal::FileOp::getAbsolutePath(path); 57 | pal::normalizeSeparator(res); 58 | return res; 59 | } 60 | 61 | //------------------------------------------------------------------------------ 62 | // PAL::Path::isAbsolute 63 | // requirement : shlwapi.lib 64 | //------------------------------------------------------------------------------ 65 | bool pal::Path::isAbsolute(const std::string &path) { 66 | std::string windowsPath = path; 67 | // in windows, when we need to check relative or absolute path, 68 | // separator MUST be '\\' rather than '/' 69 | // for more information : https://docs.microsoft.com/en-us/dotnet/standard/io/file-path-formats 70 | replace(windowsPath.begin(), windowsPath.end(), '/', '\\'); 71 | return PathIsRelativeA(windowsPath.c_str()) == false; 72 | } 73 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/QnnTypeDef.hpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All Rights Reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #ifndef QNN_TYPE_DEF_H_ 10 | #define QNN_TYPE_DEF_H_ 11 | 12 | #include "Logger.hpp" 13 | #include "QnnInterface.h" 14 | #include "QnnTypeMacros.hpp" 15 | #include "QnnTypes.h" 16 | 17 | typedef enum ModelError { 18 | MODEL_NO_ERROR = 0, 19 | MODEL_TENSOR_ERROR = 1, 20 | MODEL_PARAMS_ERROR = 2, 21 | MODEL_NODES_ERROR = 3, 22 | MODEL_GRAPH_ERROR = 4, 23 | MODEL_CONTEXT_ERROR = 5, 24 | MODEL_GENERATION_ERROR = 6, 25 | MODEL_SETUP_ERROR = 7, 26 | MODEL_INVALID_ARGUMENT_ERROR = 8, 27 | MODEL_FILE_ERROR = 9, 28 | MODEL_MEMORY_ALLOCATE_ERROR = 10, 29 | // Value selected to ensure 32 bits. 30 | MODEL_UNKNOWN_ERROR = 0x7FFFFFFF 31 | } ModelError_t; 32 | 33 | using TensorWrapper = Qnn_Tensor_t; 34 | #define GET_TENSOR_WRAPPER_TENSOR(tensorWrapper) tensorWrapper 35 | #define GET_TENSOR_WRAPPER_NAME(tensorWrapper) QNN_TENSOR_GET_NAME(tensorWrapper) 36 | 37 | typedef struct GraphInfo { 38 | Qnn_GraphHandle_t graph; 39 | char* graphName; 40 | TensorWrapper* inputTensors; 41 | uint32_t numInputTensors; 42 | TensorWrapper* outputTensors; 43 | uint32_t numOutputTensors; 44 | } GraphInfo_t; 45 | typedef GraphInfo_t* GraphInfoPtr_t; 46 | 47 | typedef struct GraphConfigInfo { 48 | char* graphName; 49 | const QnnGraph_Config_t** graphConfigs; 50 | } GraphConfigInfo_t; 51 | 52 | #endif // QNN_TYPE_DEF_H_ 53 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/Utils/BuildId.hpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) 2020, 2024 Qualcomm Technologies, Inc. 4 | // All Rights Reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #pragma once 10 | 11 | namespace qnn { 12 | namespace tools { 13 | 14 | inline std::string getBuildId() { return std::string("v2.31.0.250130151446_114721"); } 15 | 16 | } // namespace tools 17 | } // namespace qnn 18 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/Utils/ClientBuffer.cpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All Rights Reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #include "ClientBuffer.hpp" 10 | #include "QnnTypeMacros.hpp" 11 | 12 | void* ClientBuffer::getBuffer(Qnn_Tensor_t* tensor) { 13 | if (!tensor) { 14 | QNN_WARN("getBuffer: received a null pointer to a tensor"); 15 | return nullptr; 16 | } 17 | return QNN_TENSOR_GET_CLIENT_BUF(tensor).data; 18 | } 19 | 20 | size_t ClientBuffer::getBufferSize(Qnn_Tensor_t* tensor) { 21 | if (!tensor) { 22 | QNN_WARN("getBufferSize: received a null pointer to a tensor"); 23 | return 0; 24 | } 25 | return QNN_TENSOR_GET_CLIENT_BUF(tensor).dataSize; 26 | }; 27 | 28 | bool ClientBuffer::allocateTensorBuffer(Qnn_Tensor_t* tensor, size_t tensorDataSize) { 29 | if (!tensor) { 30 | QNN_ERROR("Received nullptr for tensors"); 31 | return false; 32 | } 33 | QNN_TENSOR_SET_MEM_TYPE(tensor, QNN_TENSORMEMTYPE_RAW); 34 | Qnn_ClientBuffer_t clientBuffer; 35 | clientBuffer.data = malloc(tensorDataSize); 36 | if (nullptr == clientBuffer.data) { 37 | QNN_ERROR("mem alloc failed for clientBuffer.data"); 38 | return false; 39 | } 40 | clientBuffer.dataSize = tensorDataSize; 41 | QNN_TENSOR_SET_CLIENT_BUF(tensor, clientBuffer); 42 | return true; 43 | } 44 | 45 | bool ClientBuffer::freeTensorBuffer(Qnn_Tensor_t* tensor) { 46 | if (!tensor) { 47 | QNN_ERROR("Received nullptr for tensors"); 48 | return false; 49 | } 50 | if (QNN_TENSOR_GET_CLIENT_BUF(tensor).data) { 51 | if (m_sameMemoryFreeTensors.find(tensor) == m_sameMemoryFreeTensors.end()) { 52 | free(QNN_TENSOR_GET_CLIENT_BUF(tensor).data); 53 | } 54 | QNN_TENSOR_SET_CLIENT_BUF(tensor, Qnn_ClientBuffer_t({nullptr, 0u})); 55 | QNN_TENSOR_SET_MEM_TYPE(tensor, QNN_TENSORMEMTYPE_UNDEFINED); 56 | } 57 | return true; 58 | } 59 | 60 | bool ClientBuffer::useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src) { 61 | if (nullptr == dest || nullptr == src) { 62 | QNN_ERROR("Received nullptr"); 63 | return false; 64 | } 65 | if (false == freeTensorBuffer(dest)) { 66 | return false; 67 | } 68 | 69 | QNN_TENSOR_SET_MEM_TYPE(dest, QNN_TENSOR_GET_MEM_TYPE(src)); 70 | QNN_TENSOR_SET_CLIENT_BUF(dest, QNN_TENSOR_GET_CLIENT_BUF(src)); 71 | m_sameMemoryFreeTensors.insert(dest); 72 | return true; 73 | } 74 | 75 | bool ClientBuffer::useExternalMemory(Qnn_Tensor_t* dest, void* extMem) { 76 | if (nullptr == dest || nullptr == extMem) { 77 | QNN_ERROR("Received nullptr"); 78 | return false; 79 | } 80 | 81 | Qnn_ClientBuffer_t clientBuffer; 82 | clientBuffer.data = extMem; 83 | clientBuffer.dataSize = QNN_TENSOR_GET_CLIENT_BUF(dest).dataSize; 84 | if (false == freeTensorBuffer(dest)) { 85 | return false; 86 | } 87 | 88 | QNN_TENSOR_SET_MEM_TYPE(dest, QNN_TENSORMEMTYPE_RAW); 89 | QNN_TENSOR_SET_CLIENT_BUF(dest, clientBuffer); 90 | m_sameMemoryFreeTensors.insert(dest); 91 | return true; 92 | } 93 | 94 | void* ClientBuffer::allocateTensorFusedBuffer(uint64_t bufferSize, int32_t* fd) { return nullptr; } 95 | 96 | bool ClientBuffer::mapFusedBufferOffset(Qnn_Tensor_t* tensor, 97 | size_t tensorDataSize, 98 | int32_t fd, 99 | uint32_t offset, 100 | uint64_t totalBufferSize, 101 | void* memPointer, 102 | Qnn_ContextHandle_t contextHandle) { 103 | return false; 104 | } 105 | 106 | bool ClientBuffer::deregisterTensorFusedBuffer(Qnn_Tensor_t* tensor) { return false; } 107 | 108 | void ClientBuffer::freeFusedBuffers() {} 109 | 110 | size_t ClientBuffer::getOffset(Qnn_Tensor_t* tensor) { return 0; } 111 | 112 | size_t ClientBuffer::getTotalBufferSize(Qnn_Tensor_t* tensor) { return 0; } -------------------------------------------------------------------------------- /librwkv-qualcomm/src/Utils/ClientBuffer.hpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All Rights Reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include 14 | 15 | #include "IBufferAlloc.hpp" 16 | #include "Logger.hpp" 17 | 18 | class ClientBuffer final : public IBufferAlloc { 19 | public: 20 | ClientBuffer(){}; 21 | 22 | // Disable copy constructors, r-value referencing, etc 23 | ClientBuffer(const ClientBuffer&) = delete; 24 | 25 | ClientBuffer& operator=(const ClientBuffer&) = delete; 26 | 27 | ClientBuffer(ClientBuffer&&) = delete; 28 | 29 | ClientBuffer& operator=(ClientBuffer&&) = delete; 30 | 31 | bool initialize() override { return true; }; 32 | 33 | void* getBuffer(Qnn_Tensor_t* tensor) override; 34 | 35 | int getFd(Qnn_Tensor_t* tensor) override { 36 | QNN_WARN("getFd: This is not ION memory"); 37 | return -1; 38 | }; 39 | 40 | size_t getOffset(Qnn_Tensor_t* tensor) override; 41 | size_t getBufferSize(Qnn_Tensor_t* tensor) override; 42 | size_t getTotalBufferSize(Qnn_Tensor_t* tensor) override; 43 | 44 | bool allocateTensorBuffer(Qnn_Tensor_t* tensor, size_t tensorDataSize) override; 45 | 46 | bool freeTensorBuffer(Qnn_Tensor_t* tensor) override; 47 | 48 | bool useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src) override; 49 | bool useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src, int offset) override { return false; } 50 | 51 | bool useExternalMemory(Qnn_Tensor_t* dest, void* extMem) override; 52 | 53 | void* allocateTensorFusedBuffer(uint64_t bufferSize, int32_t* fd) override; 54 | bool allocateBuffers(const std::map>& allocs_per_chunk, 55 | std::map>& tensor_offsets) override { 56 | return false; 57 | }; 58 | 59 | bool mapFusedBufferOffset(Qnn_Tensor_t* tensor, 60 | size_t tensorDataSize, 61 | int32_t fd, 62 | uint32_t offset, 63 | uint64_t totalBufferSize, 64 | void* memPointer, 65 | Qnn_ContextHandle_t contextHandle) override; 66 | bool deregisterTensorFusedBuffer(Qnn_Tensor_t* tensor) override; 67 | void freeFusedBuffers() override; 68 | 69 | bool mapFusedBufferOffset(Qnn_Tensor_t* tensor, 70 | int alloc_idx, 71 | size_t offset, 72 | Qnn_ContextHandle_t ctx, 73 | size_t size) override { 74 | return false; 75 | } 76 | 77 | virtual ~ClientBuffer(){}; 78 | 79 | private: 80 | std::unordered_set m_sameMemoryFreeTensors; 81 | }; 82 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/Utils/DataUtil.hpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All rights reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | #pragma once 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | #include "QnnTypes.h" 15 | 16 | namespace qnn { 17 | namespace tools { 18 | namespace datautil { 19 | enum class StatusCode { 20 | SUCCESS, 21 | DATA_READ_FAIL, 22 | DATA_WRITE_FAIL, 23 | FILE_OPEN_FAIL, 24 | DIRECTORY_CREATE_FAIL, 25 | INVALID_DIMENSIONS, 26 | INVALID_DATA_TYPE, 27 | DATA_SIZE_MISMATCH, 28 | INVALID_BUFFER, 29 | }; 30 | 31 | const size_t g_bitsPerByte = 8; 32 | 33 | using ReadBatchDataRetType_t = std::tuple; 34 | 35 | std::tuple getDataTypeSizeInBytes(Qnn_DataType_t dataType); 36 | 37 | std::tuple calculateLength(std::vector dims, Qnn_DataType_t dataType); 38 | 39 | size_t calculateElementCount(std::vector dims); 40 | 41 | std::tuple getFileSize(std::string filePath); 42 | 43 | StatusCode readDataFromFile(std::string filePath, 44 | std::vector dims, 45 | Qnn_DataType_t dataType, 46 | uint8_t* buffer); 47 | 48 | /* 49 | * Read data in batches from vector and try to matches the model input's 50 | * batches. If the vector is empty while matching the batch size of model, 51 | * pad the remaining buffer with zeros 52 | * @param filePaths image paths vector 53 | * @param filePathsIndexOffset index offset in the vector 54 | * @param loopBackToStart loop the vector to fill the remaining tensor data 55 | * @param dims model input dimensions 56 | * @param dataType to create input buffer from file 57 | * @param buffer to fill the input image data 58 | * 59 | * @return ReadBatchDataRetType_t returns numFilesCopied and batchSize along 60 | * with status 61 | */ 62 | ReadBatchDataRetType_t readBatchData(const std::vector& filePaths, 63 | const size_t filePathsIndexOffset, 64 | const bool loopBackToStart, 65 | const std::vector& dims, 66 | const Qnn_DataType_t dataType, 67 | uint8_t* buffer); 68 | 69 | StatusCode readBinaryFromFile(std::string filePath, uint8_t* buffer, size_t bufferSize); 70 | 71 | #ifndef __hexagon__ 72 | StatusCode writeDataToFile(std::string fileDir, 73 | std::string fileName, 74 | std::vector dims, 75 | Qnn_DataType_t dataType, 76 | uint8_t* buffer); 77 | 78 | StatusCode writeBatchDataToFile(std::vector fileDirs, 79 | std::string fileName, 80 | std::vector dims, 81 | Qnn_DataType_t dataType, 82 | uint8_t* buffer, 83 | const size_t batchSize); 84 | 85 | StatusCode writeBinaryToFile(std::string fileDir, 86 | std::string fileName, 87 | uint8_t* buffer, 88 | size_t bufferSize); 89 | #endif 90 | 91 | template 92 | datautil::StatusCode floatToTfN( 93 | T_QuantType* out, float* in, int32_t offset, float scale, size_t numElements); 94 | 95 | template 96 | datautil::StatusCode tfNToFloat( 97 | float* out, T_QuantType* in, int32_t offset, float scale, size_t numElements); 98 | 99 | template 100 | datautil::StatusCode castToFloat(float* out, T_QuantType* in, size_t numElements); 101 | 102 | template 103 | datautil::StatusCode castFromFloat(T_QuantType* out, float* in, size_t numElements); 104 | 105 | const std::map g_dataTypeToSize = { 106 | {QNN_DATATYPE_INT_8, 1}, 107 | {QNN_DATATYPE_INT_16, 2}, 108 | {QNN_DATATYPE_INT_32, 4}, 109 | {QNN_DATATYPE_INT_64, 8}, 110 | {QNN_DATATYPE_UINT_8, 1}, 111 | {QNN_DATATYPE_UINT_16, 2}, 112 | {QNN_DATATYPE_UINT_32, 4}, 113 | {QNN_DATATYPE_UINT_64, 8}, 114 | {QNN_DATATYPE_FLOAT_16, 2}, 115 | {QNN_DATATYPE_FLOAT_32, 4}, 116 | {QNN_DATATYPE_FLOAT_64, 8}, 117 | {QNN_DATATYPE_SFIXED_POINT_8, 1}, 118 | {QNN_DATATYPE_SFIXED_POINT_16, 2}, 119 | {QNN_DATATYPE_SFIXED_POINT_32, 4}, 120 | {QNN_DATATYPE_UFIXED_POINT_8, 1}, 121 | {QNN_DATATYPE_UFIXED_POINT_16, 2}, 122 | {QNN_DATATYPE_UFIXED_POINT_32, 4}, 123 | {QNN_DATATYPE_BOOL_8, 1}, 124 | }; 125 | } // namespace datautil 126 | } // namespace tools 127 | } // namespace qnn 128 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/Utils/DmaBufAllocator.hpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All Rights Reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | #pragma once 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "IBufferAlloc.hpp" 16 | #include "Logger.hpp" 17 | #include "QnnInterface.h" 18 | 19 | typedef void* (*DmaBufCreateFn_t)(); 20 | typedef int (*DmaBufAllocFn_t)(void*, const char*, size_t, unsigned int, size_t); 21 | typedef void (*DmaBufDeinitFn_t)(void*); 22 | 23 | namespace rwkv_qualcomm { 24 | 25 | struct DmaBufferData { 26 | void* dmaBufferAllocator; 27 | int fd; 28 | void* memPointer; 29 | size_t totalBufferSize; 30 | int offset{0}; 31 | DmaBufferData() : dmaBufferAllocator(nullptr), fd(-1), memPointer(nullptr), totalBufferSize(0) {} 32 | DmaBufferData(void* bufferAllocator, int fdIn, void* memPointerIn, size_t sizeIn) 33 | : dmaBufferAllocator(bufferAllocator), 34 | fd(fdIn), 35 | memPointer(memPointerIn), 36 | totalBufferSize(sizeIn) {} 37 | }; 38 | 39 | class DmaBufferAllocator final : public IBufferAlloc { 40 | public: 41 | DmaBufferAllocator(Qnn_ContextHandle_t contextHandle, QNN_INTERFACE_VER_TYPE* qnnInterface); 42 | // Disable copy constructors, r-value referencing, etc 43 | DmaBufferAllocator(const DmaBufferAllocator&) = delete; 44 | DmaBufferAllocator& operator=(const DmaBufferAllocator&) = delete; 45 | DmaBufferAllocator(DmaBufferAllocator&&) = delete; 46 | DmaBufferAllocator& operator=(DmaBufferAllocator&&) = delete; 47 | 48 | bool initialize() override; 49 | void* getBuffer(Qnn_Tensor_t* tensor) override; 50 | int getFd(Qnn_Tensor_t* tensor) override; 51 | size_t getOffset(Qnn_Tensor_t* tensor) override; 52 | size_t getBufferSize(Qnn_Tensor_t* tensor) override; 53 | size_t getTotalBufferSize(Qnn_Tensor_t* tensor) override; 54 | 55 | bool freeTensorBuffer(Qnn_Tensor_t* tensor) override; 56 | 57 | bool allocateTensorBuffer(Qnn_Tensor_t* tensor, size_t tensorDataSize) override; 58 | bool useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src) override; 59 | 60 | virtual ~DmaBufferAllocator(); 61 | 62 | bool beforeWriteToBuffer(Qnn_Tensor_t* tensor) override; 63 | bool afterWriteToBuffer(Qnn_Tensor_t* tensor) override; 64 | bool beforeReadFromBuffer(Qnn_Tensor_t* tensor) override; 65 | bool afterReadFromBuffer(Qnn_Tensor_t* tensor) override; 66 | 67 | bool useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src, int offset) override { 68 | QNN_WARN("Offset based tensors not supported!!"); 69 | return false; 70 | ; 71 | } 72 | bool useExternalMemory(Qnn_Tensor_t* dest, void* extMem) override { 73 | QNN_WARN("External Memory not supported!!"); 74 | return false; 75 | ; 76 | } 77 | void* allocateTensorFusedBuffer(uint64_t bufferSize, int32_t* fd) override { 78 | QNN_WARN("Fused Buffers not supported\n"); 79 | return nullptr; 80 | }; 81 | bool allocateBuffers(const std::map>& allocs_per_chunk, 82 | std::map>& tensor_offsets) override { 83 | QNN_WARN("Fused Buffers not supported\n"); 84 | return false; 85 | }; 86 | bool mapFusedBufferOffset(Qnn_Tensor_t* tensor, 87 | size_t tensorDataSize, 88 | int32_t fd, 89 | uint32_t offset, 90 | uint64_t totalBufferSize, 91 | void* memPointer, 92 | Qnn_ContextHandle_t contextHandle) override { 93 | QNN_WARN("Fused Buffers not supported\n"); 94 | return false; 95 | }; 96 | bool deregisterTensorFusedBuffer(Qnn_Tensor_t* tensor) override { 97 | QNN_WARN("Fused Buffers not supported\n"); 98 | return false; 99 | }; 100 | void freeFusedBuffers() override { return; }; 101 | bool mapFusedBufferOffset(Qnn_Tensor_t* tensor, 102 | int alloc_idx, 103 | size_t offset, 104 | Qnn_ContextHandle_t ctx, 105 | size_t size) override { 106 | QNN_WARN("Fused Buffers not supported\n"); 107 | return false; 108 | }; 109 | 110 | private: 111 | DmaBufferData* getDmaBufTensorData(Qnn_Tensor_t* tensor); 112 | 113 | // Pointer to the dlopen'd libdmabufheap.so shared library which contains 114 | // dmaBufCreate, dmaBufAlloc, dmaBufDeinit 115 | void* m_libDmaBufHeapHandle; 116 | DmaBufCreateFn_t m_dmaBufCreate; 117 | DmaBufAllocFn_t m_dmaBufAlloc; 118 | DmaBufDeinitFn_t m_dmaBufDeinit; 119 | 120 | QNN_INTERFACE_VER_TYPE* m_qnnInterface; 121 | Qnn_ContextHandle_t m_contextHandle; 122 | 123 | std::unordered_map m_tensorToDmaBufferData; 124 | std::unordered_set m_sameMemoryFreeTensors; 125 | std::unordered_map m_memHandleToDmaBufMem; 126 | }; 127 | 128 | } // namespace rwkv_qualcomm 129 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/Utils/DynamicLoadUtil.hpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All rights reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #pragma once 10 | 11 | #include "Interfaces.hpp" 12 | 13 | namespace qnn { 14 | namespace tools { 15 | namespace dynamicloadutil { 16 | enum class StatusCode { 17 | SUCCESS, 18 | FAILURE, 19 | FAIL_LOAD_BACKEND, 20 | FAIL_LOAD_MODEL, 21 | FAIL_SYM_FUNCTION, 22 | FAIL_GET_INTERFACE_PROVIDERS, 23 | FAIL_LOAD_SYSTEM_LIB, 24 | }; 25 | 26 | StatusCode getQnnFunctionPointers(std::string backendPath, 27 | std::string modelPath, 28 | rwkv_app::QnnFunctionPointers* qnnFunctionPointers, 29 | void** backendHandle, 30 | bool loadModelLib, 31 | void** modelHandleRtn); 32 | StatusCode getQnnSystemFunctionPointers(std::string systemLibraryPath, 33 | rwkv_app::QnnFunctionPointers* qnnFunctionPointers); 34 | } // namespace dynamicloadutil 35 | } // namespace tools 36 | } // namespace qnn 37 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/Utils/IBufferAlloc.hpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All Rights Reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #pragma once 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "QnnTypes.h" 17 | 18 | class IBufferAlloc { 19 | public: 20 | virtual ~IBufferAlloc() {} 21 | IBufferAlloc() {} 22 | virtual bool initialize() = 0; 23 | virtual void* getBuffer(Qnn_Tensor_t* tensor) = 0; 24 | virtual int getFd(Qnn_Tensor_t* tensor) = 0; 25 | virtual size_t getOffset(Qnn_Tensor_t* tensor) = 0; 26 | virtual size_t getBufferSize(Qnn_Tensor_t* tensor) = 0; 27 | virtual size_t getTotalBufferSize(Qnn_Tensor_t* tensor) = 0; 28 | virtual bool allocateTensorBuffer(Qnn_Tensor_t* tensor, size_t tensorDataSize) = 0; 29 | virtual bool freeTensorBuffer(Qnn_Tensor_t* tensor) = 0; 30 | virtual bool useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src) = 0; 31 | virtual bool useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src, int offset) = 0; 32 | virtual bool useExternalMemory(Qnn_Tensor_t* dest, void* extMem) = 0; 33 | virtual void* allocateTensorFusedBuffer(uint64_t bufferSize, int32_t* fd) = 0; 34 | virtual bool allocateBuffers(const std::map>& allocs_per_chunk, 35 | std::map>& tensor_offsets) = 0; 36 | virtual bool mapFusedBufferOffset(Qnn_Tensor_t* tensor, 37 | size_t tensorDataSize, 38 | int32_t fd, 39 | uint32_t offset, 40 | uint64_t totalBufferSize, 41 | void* memPointer, 42 | Qnn_ContextHandle_t contextHandle) = 0; 43 | virtual bool mapFusedBufferOffset( 44 | Qnn_Tensor_t* tensor, int alloc_idx, size_t offset, Qnn_ContextHandle_t ctx, size_t size) = 0; 45 | 46 | virtual bool deregisterTensorFusedBuffer(Qnn_Tensor_t* tensor) = 0; 47 | virtual void freeFusedBuffers() = 0; 48 | 49 | // Functions to sync memory buffers for Read/Write using DmaBuf. 50 | virtual bool beforeWriteToBuffer(Qnn_Tensor_t* tensor) { return false; }; 51 | virtual bool afterWriteToBuffer(Qnn_Tensor_t* tensor) { return false; }; 52 | virtual bool beforeReadFromBuffer(Qnn_Tensor_t* tensor) { return false; }; 53 | virtual bool afterReadFromBuffer(Qnn_Tensor_t* tensor) { return false; }; 54 | }; 55 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/Utils/RpcMem.hpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All Rights Reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | #include "IBufferAlloc.hpp" 14 | #include "Logger.hpp" 15 | #include "QnnInterface.h" 16 | 17 | typedef void* (*RpcMemAllocFn_t)(int, uint32_t, int); 18 | typedef void (*RpcMemFreeFn_t)(void*); 19 | typedef int (*RpcMemToFdFn_t)(void*); 20 | 21 | struct RpcMemTensorData { 22 | int fd; 23 | void* memPointer; 24 | size_t size; 25 | size_t totalBufferSize; 26 | size_t offset; 27 | RpcMemTensorData() : fd(-1), memPointer(nullptr), size(0) {} 28 | RpcMemTensorData(int fdIn, void* memPointerIn, size_t sizeIn) 29 | : fd(fdIn), memPointer(memPointerIn), size(sizeIn) {} 30 | RpcMemTensorData( 31 | int fdIn, void* memPointerIn, size_t sizeIn, size_t totalBufferSizeIn, size_t offsetIn) 32 | : fd(fdIn), 33 | memPointer(memPointerIn), 34 | size(sizeIn), 35 | totalBufferSize(totalBufferSizeIn), 36 | offset(offsetIn) {} 37 | }; 38 | 39 | class RpcMem final : public IBufferAlloc { 40 | public: 41 | RpcMem(Qnn_ContextHandle_t contextHandle, QNN_INTERFACE_VER_TYPE* qnnInterface); 42 | // Disable copy constructors, r-value referencing, etc 43 | RpcMem(const RpcMem&) = delete; 44 | RpcMem& operator=(const RpcMem&) = delete; 45 | RpcMem(RpcMem&&) = delete; 46 | RpcMem& operator=(RpcMem&&) = delete; 47 | bool initialize() override; 48 | void* getBuffer(Qnn_Tensor_t* tensor) override; 49 | int getFd(Qnn_Tensor_t* tensor) override; 50 | 51 | size_t getOffset(Qnn_Tensor_t* tensor) override; 52 | 53 | size_t getBufferSize(Qnn_Tensor_t* tensor) override; 54 | 55 | size_t getTotalBufferSize(Qnn_Tensor_t* tensor) override; 56 | 57 | bool allocateTensorBuffer(Qnn_Tensor_t* tensor, size_t tensorDataSize) override; 58 | 59 | bool freeTensorBuffer(Qnn_Tensor_t* tensor) override; 60 | bool useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src) override; 61 | bool useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src, int offset) override; 62 | 63 | bool useExternalMemory(Qnn_Tensor_t* dest, void* extMem) override; 64 | 65 | void* allocateTensorFusedBuffer(uint64_t bufferSize, int32_t* fd) override; 66 | bool allocateBuffers(const std::map>& allocs_per_chunk, 67 | std::map>& tensor_offsets) override; 68 | 69 | bool mapFusedBufferOffset(Qnn_Tensor_t* tensor, 70 | size_t tensorDataSize, 71 | int32_t fd, 72 | uint32_t offset, 73 | uint64_t totalBufferSize, 74 | void* memPointer, 75 | Qnn_ContextHandle_t contextHandle) override; 76 | bool deregisterTensorFusedBuffer(Qnn_Tensor_t* tensor) override; 77 | void freeFusedBuffers() override; 78 | bool mapFusedBufferOffset(Qnn_Tensor_t* tensor, 79 | int alloc_idx, 80 | size_t offset, 81 | Qnn_ContextHandle_t ctx, 82 | size_t size) override; 83 | virtual ~RpcMem(); 84 | 85 | private: 86 | RpcMemTensorData* getRpcMemTensorData(Qnn_Tensor_t* tensor); 87 | 88 | // Pointer to the dlopen'd libcdsprpc.so shared library which contains 89 | // rpcmem_alloc, rpcmem_free, rpcmem_to_fd APIs 90 | void* m_libCdspRpc; 91 | // Function pointer to rpcmem_alloc 92 | RpcMemAllocFn_t m_rpcMemAlloc; 93 | // Function pointer to rpcmem_free 94 | RpcMemFreeFn_t m_rpcMemFree; 95 | // Function pointer to rpcmem_to_fd 96 | RpcMemToFdFn_t m_rpcMemToFd; 97 | QNN_INTERFACE_VER_TYPE* m_qnnInterface; 98 | Qnn_ContextHandle_t m_contextHandle; 99 | 100 | std::unordered_map m_tensorToRpcMem; 101 | std::unordered_set m_sameMemoryFreeTensors; 102 | std::vector> m_fusedBuffers; // vector<> 103 | std::vector m_fusedFds; 104 | std::unordered_set m_orphanedMemHandles; 105 | std::unordered_map m_memHandleToRpcMem; 106 | std::map, Qnn_Tensor_t*> memConfigList; 107 | }; 108 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/Utils/Utils.hpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All rights reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | #pragma once 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include "Logger.hpp" 19 | 20 | #include "Interfaces.hpp" 21 | 22 | namespace qnn { 23 | namespace tools { 24 | namespace rwkv_app { 25 | 26 | void split(std::vector &splitString, 27 | const std::string &tokenizedString, 28 | const char separator); 29 | 30 | bool copyMetadataToGraphsInfo(const QnnSystemContext_BinaryInfo_t *binaryInfo, 31 | GraphInfo_t **&graphsInfo, 32 | uint32_t &graphsCount); 33 | 34 | bool copyGraphsInfo(const QnnSystemContext_GraphInfo_t *graphsInput, 35 | const uint32_t numGraphs, 36 | GraphInfo_t **&graphsInfo); 37 | 38 | bool copyGraphsInfoV1(const QnnSystemContext_GraphInfoV1_t *graphInfoSrc, 39 | GraphInfo_t *graphInfoDst); 40 | 41 | bool copyGraphsInfoV3(const QnnSystemContext_GraphInfoV3_t *graphInfoSrc, 42 | GraphInfo_t *graphInfoDst); 43 | 44 | bool copyTensorsInfo(const Qnn_Tensor_t *tensorsInfoSrc, 45 | Qnn_Tensor_t *&tensorWrappers, 46 | uint32_t tensorsCount); 47 | 48 | bool deepCopyQnnTensorInfo(Qnn_Tensor_t *dst, const Qnn_Tensor_t *src); 49 | 50 | } // namespace rwkv_app 51 | } // namespace tools 52 | } // namespace qnn -------------------------------------------------------------------------------- /librwkv-qualcomm/src/Utils/dlwrap.cpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All Rights Reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #ifdef _WIN32 10 | 11 | #pragma warning(disable : 4133 4996) 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include "dlwrap.hpp" 21 | 22 | static const char* last_func; 23 | static long last_err; 24 | 25 | void* dlopen(const char* dll, int flags) { 26 | HINSTANCE h = LoadLibraryA(dll); 27 | if (h == NULL) { 28 | last_err = GetLastError(); 29 | last_func = "dlopen"; 30 | } 31 | 32 | return h; 33 | } 34 | 35 | int dlclose(void* h) { 36 | if (!FreeLibrary((HINSTANCE)h)) { 37 | last_err = GetLastError(); 38 | last_func = "dlclose"; 39 | return -1; 40 | } 41 | 42 | return 0; 43 | } 44 | 45 | void* dlsym(void* h, const char* name) { 46 | FARPROC p = GetProcAddress((HINSTANCE)h, name); 47 | if (!p) { 48 | last_err = GetLastError(); 49 | last_func = "dlsym"; 50 | } 51 | return (void*)(intptr_t)p; 52 | } 53 | 54 | const char* dlerror(void) { 55 | static char str[88]; 56 | 57 | if (!last_err) return NULL; 58 | 59 | sprintf(str, "%s error #%ld", last_func, last_err); 60 | last_err = 0; 61 | last_func = NULL; 62 | 63 | return str; 64 | } 65 | 66 | #endif // _WIN32 67 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/Utils/dlwrap.hpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All Rights Reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #ifndef DLWRAP_HPP 10 | #define DLWRAP_HPP 11 | 12 | #ifndef _WIN32 13 | 14 | // Just include regular dlfcn 15 | #include 16 | 17 | #else // _WIN32 18 | 19 | // Define basic set dl functions and flags 20 | 21 | #define RTLD_GLOBAL 0x100 22 | #define RTLD_LOCAL 0x000 23 | #define RTLD_LAZY 0x000 24 | #define RTLD_NOW 0x001 25 | 26 | void* dlopen(const char* filename, int flag); 27 | int dlclose(void* handle); 28 | void* dlsym(void* handle, const char* name); 29 | const char* dlerror(void); 30 | 31 | #endif // _WIN32 32 | 33 | #endif // DLWRAP_HPP 34 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/WrapperUtils/QnnWrapperUtils.cpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All rights reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #include 10 | 11 | #include "QnnTypeMacros.hpp" 12 | #include "QnnWrapperUtils.hpp" 13 | 14 | ModelError_t freeQnnTensor(Qnn_Tensor_t &tensor) { 15 | // free all pointer allocations in struct 16 | free((void *)QNN_TENSOR_GET_NAME(tensor)); 17 | free(QNN_TENSOR_GET_DIMENSIONS(tensor)); 18 | if (QNN_TENSOR_GET_IS_DYNAMIC_DIMENSIONS(tensor)) { 19 | free(QNN_TENSOR_GET_IS_DYNAMIC_DIMENSIONS(tensor)); 20 | } 21 | auto quant = QNN_TENSOR_GET_QUANT_PARAMS(tensor); 22 | auto encoding = quant.quantizationEncoding; 23 | if (encoding == QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET) { 24 | if (quant.axisScaleOffsetEncoding.scaleOffset != nullptr) { 25 | free(quant.axisScaleOffsetEncoding.scaleOffset); 26 | } 27 | } 28 | return MODEL_NO_ERROR; 29 | } 30 | 31 | ModelError_t freeQnnTensors(Qnn_Tensor_t *&tensors, 32 | uint32_t numTensors) { 33 | // free all pointer allocations in struct 34 | for (size_t i = 0; i < numTensors; i++) { 35 | freeQnnTensor(tensors[i]); 36 | } 37 | free(tensors); 38 | return MODEL_NO_ERROR; 39 | } 40 | 41 | ModelError_t freeGraphsInfo(GraphInfoPtr_t **graphsInfo, 42 | uint32_t numGraphs) { 43 | if (graphsInfo == nullptr || *graphsInfo == nullptr) { 44 | return MODEL_TENSOR_ERROR; 45 | } 46 | for (uint32_t i = 0; i < numGraphs; i++) { 47 | free((*graphsInfo)[i]->graphName); 48 | freeQnnTensors((*graphsInfo)[i]->inputTensors, (*graphsInfo)[i]->numInputTensors); 49 | freeQnnTensors((*graphsInfo)[i]->outputTensors, (*graphsInfo)[i]->numOutputTensors); 50 | } 51 | free(**graphsInfo); 52 | free(*graphsInfo); 53 | *graphsInfo = nullptr; 54 | return MODEL_NO_ERROR; 55 | } 56 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/WrapperUtils/QnnWrapperUtils.hpp: -------------------------------------------------------------------------------- 1 | //============================================================================== 2 | // 3 | // Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 4 | // All rights reserved. 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc. 6 | // 7 | //============================================================================== 8 | 9 | #pragma once 10 | 11 | #include "QnnContext.h" 12 | #include "QnnGraph.h" 13 | #include "QnnTensor.h" 14 | #include "QnnTypes.h" 15 | #include "QnnTypeDef.hpp" 16 | 17 | /** 18 | * @brief Frees all memory allocated tensor attributes. 19 | * 20 | * @param[in] tensor Qnn_Tensor_t object to free 21 | * 22 | * @return Error code 23 | */ 24 | ModelError_t freeQnnTensor(Qnn_Tensor_t &tensor); 25 | 26 | /** 27 | * @brief Loops through and frees all memory allocated tensor attributes for each tensor 28 | * object. 29 | * 30 | * @param[in] tensors array of tensor objects to free 31 | * 32 | * @param[in] numTensors length of the above tensors array 33 | * 34 | * @return Error code 35 | */ 36 | ModelError_t freeQnnTensors(Qnn_Tensor_t *&tensors, uint32_t numTensors); 37 | 38 | /** 39 | * @brief A helper function to free memory malloced for communicating the Graph for a model(s) 40 | * 41 | * @param[in] graphsInfo Pointer pointing to location of graph objects 42 | * 43 | * @param[in] numGraphs The number of graph objects the above pointer is pointing to 44 | * 45 | * @return Error code 46 | * 47 | */ 48 | ModelError_t freeGraphsInfo(GraphInfoPtr_t **graphsInfo, uint32_t numGraphs); 49 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/librwkv-qualcomm-app.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "IOTensor.hpp" 8 | #include "Interfaces.hpp" 9 | #include "half.hpp" 10 | 11 | namespace qnn { 12 | namespace tools { 13 | namespace rwkv_app { 14 | 15 | enum class StatusCode { 16 | SUCCESS, 17 | FAILURE, 18 | FAILURE_INPUT_LIST_EXHAUSTED, 19 | FAILURE_SYSTEM_ERROR, 20 | FAILURE_SYSTEM_COMMUNICATION_ERROR, 21 | QNN_FEATURE_UNSUPPORTED 22 | }; 23 | 24 | const int max_chunks = 8; 25 | 26 | class QnnRwkvApp { 27 | public: 28 | QnnRwkvApp(QnnFunctionPointers qnnFunctionPointers, 29 | void *backendHandle, 30 | void *modelHandle, 31 | std::vector> embedding = {}, 32 | std::string cachedBinaryPath = "", 33 | std::string saveBinaryName = ""); 34 | 35 | StatusCode initialize(); 36 | 37 | StatusCode initializeBackend(); 38 | 39 | StatusCode createContext(); 40 | 41 | StatusCode composeGraphs(); 42 | 43 | StatusCode finalizeGraphs(); 44 | 45 | StatusCode createPowerConfigId(); 46 | 47 | StatusCode setPowerConfig(); 48 | 49 | StatusCode destroyPowerConfigId(); 50 | 51 | StatusCode setRpcLatencyAndPolling(); 52 | 53 | StatusCode initializeTensors(); 54 | 55 | StatusCode execute(int token); 56 | 57 | StatusCode executeSequence(std::vector &tokens); 58 | 59 | StatusCode registerOpPackages(); 60 | 61 | StatusCode createFromBinary(uint8_t *binary, size_t binarySize); 62 | 63 | StatusCode saveBinary(); 64 | 65 | StatusCode freeContext(); 66 | 67 | StatusCode terminateBackend(); 68 | 69 | StatusCode freeGraphs(); 70 | 71 | Qnn_ContextHandle_t getContext(); 72 | 73 | std::string getBackendBuildId(); 74 | 75 | StatusCode isDevicePropertySupported(); 76 | 77 | StatusCode createDevice(); 78 | 79 | size_t getQnnDatatypeSize(Qnn_DataType_t dataType); 80 | 81 | StatusCode freeDevice(); 82 | 83 | StatusCode verifyFailReturnStatus(Qnn_ErrorHandle_t errCode); 84 | 85 | void fillQuantizedTensor(float value, Qnn_Tensor_t *tensor); 86 | 87 | virtual ~QnnRwkvApp(); 88 | 89 | std::vector m_lastOutput; 90 | 91 | uint32_t powerConfigId; 92 | uint32_t deviceId = 0; 93 | uint32_t coreId = 0; 94 | 95 | QnnFunctionPointers m_qnnFunctionPointers; 96 | std::string m_outputPath; 97 | std::string m_saveBinaryName; 98 | std::string m_cachedBinaryPath; 99 | std::vector m_opPackagePaths; 100 | uint8_t *m_binaryBuffer = nullptr; 101 | uint64_t m_binarySize = 0; 102 | QnnBackend_Config_t **m_backendConfig = nullptr; 103 | Qnn_ContextHandle_t m_context[max_chunks] = {nullptr}; 104 | QnnContext_Config_t **m_contextConfig = nullptr; 105 | GraphInfo_t **m_decodeGraphsInfo; 106 | GraphInfo_t **m_prefillGraphsInfo; 107 | uint32_t m_decodeGraphsCount; 108 | uint32_t m_prefillGraphsCount; 109 | void *m_backendLibraryHandle; 110 | void *m_modelHandle; 111 | IOTensor *m_ioTensor; 112 | Qnn_Tensor_t *m_inputTensors[max_chunks] = {nullptr}; 113 | Qnn_Tensor_t *m_outputTensors[max_chunks] = {nullptr}; 114 | Qnn_Tensor_t *m_prefillInputTensors[max_chunks] = {nullptr}; 115 | Qnn_Tensor_t *m_prefillOutputTensors[max_chunks] = {nullptr}; 116 | std::vector> m_embedding = {}; 117 | bool m_tensorsInitialized = false; 118 | bool m_isBackendInitialized; 119 | bool m_isContextCreated; 120 | 121 | GraphConfigInfo_t **m_graphConfigsInfo = nullptr; 122 | uint32_t m_graphConfigsInfoCount; 123 | Qnn_LogHandle_t m_logHandle = nullptr; 124 | Qnn_BackendHandle_t m_backendHandle = nullptr; 125 | Qnn_DeviceHandle_t m_deviceHandle = nullptr; 126 | 127 | Qnn_Tensor_t *m_logitsOutputTensor = nullptr; 128 | 129 | std::vector> m_decodeGraphsTensorNameToTensorPointer; 130 | std::vector> m_decodeGraphsTensorNameToSize; 131 | std::vector> m_prefillGraphsTensorNameToTensorPointer; 132 | std::vector> m_prefillGraphsTensorNameToSize; 133 | 134 | int m_prefillSequenceLength = 0; 135 | 136 | std::chrono::duration m_lastInferenceTime; 137 | }; 138 | } // namespace rwkv_app 139 | } // namespace tools 140 | } // namespace qnn 141 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/librwkv-qualcomm.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | enum class StatusCode { 7 | SUCCESS, 8 | FAILURE, 9 | FAILURE_INPUT_LIST_EXHAUSTED, 10 | FAILURE_SYSTEM_ERROR, 11 | FAILURE_SYSTEM_COMMUNICATION_ERROR, 12 | QNN_FEATURE_UNSUPPORTED 13 | }; 14 | 15 | typedef void* QnnRwkvBackend_t; 16 | 17 | typedef void* QnnRwkvModel_t; 18 | 19 | StatusCode QnnRwkvBackendCreate(QnnRwkvBackend_t *backend, QnnRwkvModel_t *modelHandle, std::string modelPath, std::string backendPath); 20 | 21 | StatusCode QnnRwkvBackendCreateWithContext(QnnRwkvBackend_t *backend, QnnRwkvModel_t *modelHandle, std::string contextPath, std::string backendPath, std::string systemlibPath); 22 | 23 | StatusCode QnnRwkvGetVocabSize(QnnRwkvBackend_t backend, std::vector& shape); 24 | 25 | StatusCode QnnRwkvCopyLogitsOutput(QnnRwkvBackend_t backend, float* outputBuffer, size_t outputSize); 26 | 27 | StatusCode QnnRwkvExecute(QnnRwkvBackend_t backend, int token); 28 | 29 | StatusCode QnnRwkvExecuteSequence(QnnRwkvBackend_t backend, std::vector tokens); 30 | 31 | double QnnRwkvGetLastInferenceTime(QnnRwkvBackend_t backend); 32 | 33 | StatusCode QnnRwkvResetStates(QnnRwkvBackend_t backend); 34 | 35 | StatusCode QnnRwkvSaveContext(QnnRwkvBackend_t backend, std::string contextPath); 36 | 37 | StatusCode QnnRwkvSetStates(QnnRwkvBackend_t backend, std::vector>> states); 38 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/soc_detect.cpp: -------------------------------------------------------------------------------- 1 | #include "soc_detect.h" 2 | #include 3 | 4 | const char * platform_name[] = { 5 | "Snapdragon", 6 | "Unknown", 7 | }; 8 | 9 | snapdragon_soc_id snapdragon_soc_ids[] = { 10 | {475, "SM7325", "778", "V68"}, 11 | {439, "SM8350", "888", "V68"}, 12 | {457, "SM8450", "8 Gen 1", "V69"}, 13 | {480, "SM8450_2", "8 Gen 1", "V69"}, 14 | {482, "SM8450_3", "8 Gen 1", "V69"}, 15 | {497, "QCM6490", "QCM6490", "V68"}, 16 | {498, "QCS6490", "QCS6490", "V68"}, 17 | {530, "SM8475", "8+ Gen 1", "V69"}, 18 | {531, "SM8475P", "8+ Gen 1", "V69"}, 19 | {540, "SM8475_2", "8+ Gen 1", "V69"}, 20 | {519, "SM8550", "8 Gen 2", "V73"}, 21 | {557, "SM8650", "8 Gen 3", "V75"}, 22 | {603, "QCS8550", "QCS8550", "V73"}, 23 | {604, "QCM8550", "QCM8550", "V73"}, 24 | {614, "SM8635", "8s Gen 3", "V73"}, 25 | {642, "SM8635", "8s Gen 3", "V73"}, 26 | {618, "SM8750", "8 Elite", "V79"} 27 | // TODO: add more 28 | }; 29 | 30 | soc_detect::soc_detect() { 31 | } 32 | 33 | soc_detect::~soc_detect() { 34 | } 35 | 36 | int soc_detect::detect_platform() { 37 | #ifndef _WIN32 38 | std::ifstream file("/sys/devices/soc0/family"); 39 | std::string tmp; 40 | if (file.is_open()) { 41 | file >> tmp; 42 | file.close(); 43 | } else { 44 | return -1; 45 | } 46 | 47 | if (tmp == "Snapdragon") { 48 | m_platform_type = PLATFORM_SNAPDRAGON; 49 | } else { 50 | m_platform_type = PLATFORM_UNKNOWN; 51 | } 52 | 53 | if (m_platform_type == PLATFORM_SNAPDRAGON) { 54 | std::ifstream file_soc_id("/sys/devices/soc0/soc_id"); 55 | if (file_soc_id.is_open()) { 56 | file_soc_id >> m_soc_id; 57 | file_soc_id.close(); 58 | } 59 | 60 | for (int i = 0; i < sizeof(snapdragon_soc_ids) / sizeof(snapdragon_soc_ids[0]); i++) { 61 | if (snapdragon_soc_ids[i].soc_id == m_soc_id) { 62 | m_soc_name = snapdragon_soc_ids[i].soc_name; 63 | m_soc_partname = snapdragon_soc_ids[i].soc_partname; 64 | m_htp_arch = snapdragon_soc_ids[i].htp_arch; 65 | break; 66 | } 67 | } 68 | } 69 | #endif 70 | return 0; 71 | } 72 | 73 | platform_type soc_detect::get_platform_type() { 74 | return m_platform_type; 75 | } 76 | 77 | const char * soc_detect::get_platform_name() { 78 | return platform_name[m_platform_type]; 79 | } 80 | 81 | const char * soc_detect::get_soc_name() { 82 | return m_soc_name; 83 | } 84 | 85 | const char * soc_detect::get_soc_partname() { 86 | return m_soc_partname; 87 | } 88 | 89 | const char * soc_detect::get_htp_arch() { 90 | return m_htp_arch; 91 | } 92 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/soc_detect.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | enum platform_type { 4 | PLATFORM_SNAPDRAGON, // lets add snapdragon support first 5 | PLATFORM_UNKNOWN, 6 | }; 7 | 8 | struct snapdragon_soc_id { 9 | int soc_id; 10 | const char * soc_partname; 11 | const char * soc_name; 12 | const char * htp_arch; 13 | }; 14 | 15 | class soc_detect { 16 | public: 17 | soc_detect(); 18 | ~soc_detect(); 19 | 20 | int detect_platform(); 21 | 22 | platform_type get_platform_type(); 23 | const char * get_platform_name(); 24 | const char * get_soc_name(); 25 | const char * get_soc_partname(); 26 | const char * get_htp_arch(); 27 | private: 28 | platform_type m_platform_type = PLATFORM_UNKNOWN; 29 | int m_soc_id = 0; 30 | const char * m_soc_name = "Unknown"; 31 | const char * m_soc_partname = "Unknown"; 32 | const char * m_htp_arch = "Unknown"; 33 | }; 34 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/tokenizer.cpp: -------------------------------------------------------------------------------- 1 | #include "tokenizer.h" 2 | #include "trie.hpp" 3 | 4 | int trie_tokenizer::load(const std::string vocab_file) { 5 | _tokenizer = new TRIE_TOKENIZER(vocab_file); 6 | if (!_tokenizer->inited()) 7 | return 1; 8 | return 0; 9 | } 10 | 11 | bool trie_tokenizer::inited() const { 12 | return _tokenizer->inited(); 13 | } 14 | 15 | std::vector trie_tokenizer::Encode(std::string_view str) const { 16 | auto ids = _tokenizer->encode(std::string(str)); 17 | return ids; 18 | } 19 | 20 | std::string trie_tokenizer::Decode(int id) const { 21 | return _tokenizer->decode(std::vector{id}); 22 | } 23 | 24 | std::string trie_tokenizer::Decode(const std::vector &ids) const { 25 | return _tokenizer->decode(ids); 26 | } 27 | 28 | std::vector abc_tokenizer::Encode(std::string_view str) const { 29 | std::vector ids; 30 | for (int i = 0; i < str.size(); ++i) { 31 | ids.push_back(str[i]); 32 | } 33 | return ids; 34 | } 35 | 36 | std::string abc_tokenizer::Decode(int id) const { 37 | if (id <= eos_token_id) { 38 | return ""; 39 | } else { 40 | return std::string(1, static_cast(id)); 41 | } 42 | } 43 | 44 | std::string abc_tokenizer::Decode(const std::vector &ids) const { 45 | std::string str; 46 | for (auto id : ids) { 47 | str += Decode(id); 48 | } 49 | return str; 50 | } 51 | -------------------------------------------------------------------------------- /librwkv-qualcomm/src/tokenizer.h: -------------------------------------------------------------------------------- 1 | #ifndef TOKENIZER_H 2 | #define TOKENIZER_H 3 | 4 | #include 5 | #include 6 | 7 | class TRIE_TOKENIZER; 8 | 9 | class tokenizer_base { 10 | public: 11 | tokenizer_base(int pad_token_id, int bos_token_id, int eos_token_id) 12 | : pad_token_id(pad_token_id), bos_token_id(bos_token_id), 13 | eos_token_id(eos_token_id) {} 14 | virtual ~tokenizer_base() = default; 15 | virtual int load(const std::string vocab_file) = 0; 16 | virtual std::vector Encode(std::string_view str) const = 0; 17 | virtual std::string Decode(const std::vector &ids) const = 0; 18 | virtual std::string Decode(int id) const = 0; 19 | const int pad_token_id; 20 | const int bos_token_id; 21 | const int eos_token_id; 22 | }; 23 | 24 | class trie_tokenizer : public tokenizer_base { 25 | public: 26 | trie_tokenizer() : tokenizer_base(0, 0, 0) {}; 27 | int load(const std::string vocab_file); 28 | std::vector Encode(std::string_view str) const; 29 | std::string Decode(const std::vector &ids) const; 30 | std::string Decode(int id) const; 31 | bool inited() const; 32 | private: 33 | TRIE_TOKENIZER * _tokenizer; 34 | }; 35 | 36 | class abc_tokenizer : public tokenizer_base { 37 | public: 38 | abc_tokenizer() : tokenizer_base(0, 2, 3) {}; 39 | int load(const std::string) { 40 | return 0; 41 | }; 42 | std::vector Encode(std::string_view str) const; 43 | std::string Decode(const std::vector &ids) const; 44 | std::string Decode(int id) const; 45 | }; 46 | 47 | #endif -------------------------------------------------------------------------------- /quant_encodings/README.md: -------------------------------------------------------------------------------- 1 | Refer to [GDrive](https://drive.google.com/drive/folders/1IXp6FwdiZjV4fn8HXRUoGHM91WzvEwqj?usp=drive_link) -------------------------------------------------------------------------------- /quantizers/configs/default_per_channel_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "defaults": 3 | { 4 | "ops": 5 | { 6 | "is_output_quantized": "True" 7 | }, 8 | "params": 9 | { 10 | "is_quantized": "True", 11 | "is_symmetric": "True" 12 | }, 13 | "strict_symmetric": "False", 14 | "per_channel_quantization": "False" 15 | }, 16 | 17 | "params": 18 | { 19 | "bias": 20 | { 21 | "is_quantized": "False" 22 | } 23 | }, 24 | 25 | "op_type": 26 | { 27 | "Squeeze": 28 | { 29 | "is_output_quantized": "False" 30 | }, 31 | "Pad": 32 | { 33 | "is_output_quantized": "False" 34 | }, 35 | "Mean": 36 | { 37 | "is_output_quantized": "False" 38 | }, 39 | "Conv": 40 | { 41 | "per_channel_quantization": "True" 42 | }, 43 | "Gemm": 44 | { 45 | "per_channel_quantization": "True" 46 | }, 47 | "Linear": 48 | { 49 | "per_channel_quantization": "True" 50 | }, 51 | "MatMul": 52 | { 53 | "per_channel_quantization": "True" 54 | } 55 | }, 56 | 57 | "supergroups": 58 | [ 59 | { 60 | "op_list": ["Conv", "Relu"] 61 | }, 62 | { 63 | "op_list": ["Conv", "Clip"] 64 | }, 65 | { 66 | "op_list": ["Add", "Relu"] 67 | }, 68 | { 69 | "op_list": ["Gemm", "Relu"] 70 | } 71 | ], 72 | 73 | "model_input": 74 | { 75 | "is_input_quantized": "True" 76 | }, 77 | 78 | "model_output": 79 | {} 80 | } 81 | -------------------------------------------------------------------------------- /quantizers/configs/htp_quantsim_config_v75.json: -------------------------------------------------------------------------------- 1 | { 2 | "defaults": 3 | { 4 | "hw_version": "V75", 5 | "ops": 6 | { 7 | "is_output_quantized": "True" 8 | }, 9 | "params": 10 | { 11 | "is_quantized": "True", 12 | "is_symmetric": "True" 13 | }, 14 | "per_channel_quantization": "True", 15 | "strict_symmetric": "False", 16 | "unsigned_symmetric": "False" 17 | }, 18 | 19 | "params": 20 | { 21 | "bias": 22 | { 23 | "is_quantized": "False" 24 | } 25 | }, 26 | 27 | "op_type": 28 | { 29 | "Cast": 30 | { 31 | "is_output_quantized": "False" 32 | }, 33 | "BatchPermutation": 34 | { 35 | "is_output_quantized": "False" 36 | }, 37 | "ChannelShuffle": 38 | { 39 | "is_output_quantized": "False" 40 | }, 41 | "CropAndResize": 42 | { 43 | "is_output_quantized": "False" 44 | }, 45 | "DepthToSpace": 46 | { 47 | "is_output_quantized": "False" 48 | }, 49 | "Dropout": 50 | { 51 | "is_output_quantized": "False" 52 | }, 53 | "Expand": 54 | { 55 | "is_output_quantized": "False" 56 | }, 57 | "Reshape": 58 | { 59 | "is_output_quantized": "False" 60 | }, 61 | "Upsample": 62 | { 63 | "is_output_quantized": "False" 64 | }, 65 | "SpaceToDepth": 66 | { 67 | "is_output_quantized": "False" 68 | }, 69 | "BatchToSpace": 70 | { 71 | "is_output_quantized": "False" 72 | }, 73 | "SpaceToBatch": 74 | { 75 | "is_output_quantized": "False" 76 | }, 77 | "NonMaxSuppression": 78 | { 79 | "is_output_quantized": "False" 80 | }, 81 | "Gather": 82 | { 83 | "is_output_quantized": "False", 84 | "per_channel_quantization": "False" 85 | }, 86 | "GatherND": 87 | { 88 | "is_output_quantized": "False" 89 | }, 90 | "Gemm": 91 | { 92 | "per_channel_quantization": "False" 93 | }, 94 | "GroupNorm": 95 | { 96 | "per_channel_quantization": "False", 97 | "params": { 98 | "bias": 99 | { 100 | "is_quantized": "True" 101 | } 102 | } 103 | }, 104 | "LayerNorm": 105 | { 106 | "per_channel_quantization": "False", 107 | "params": { 108 | "weight": { 109 | "is_symmetric": "False" 110 | } 111 | } 112 | }, 113 | "BatchNormalization": 114 | { 115 | "per_channel_quantization": "False" 116 | }, 117 | "InstanceNormalization": 118 | { 119 | "per_channel_quantization": "False" 120 | }, 121 | "MaxPool": 122 | { 123 | "is_output_quantized": "False" 124 | }, 125 | "MaxRoiPool": 126 | { 127 | "is_output_quantized": "False" 128 | }, 129 | "Mean": 130 | { 131 | "is_output_quantized": "False" 132 | }, 133 | "NonZero": 134 | { 135 | "is_output_quantized": "False" 136 | }, 137 | "Pad": 138 | { 139 | "is_output_quantized": "False" 140 | }, 141 | "ReduceMax": 142 | { 143 | "is_output_quantized": "False" 144 | }, 145 | "ReduceMin": 146 | { 147 | "is_output_quantized": "False" 148 | }, 149 | "ScatterElements": 150 | { 151 | "is_output_quantized": "False" 152 | }, 153 | "Sigmoid": 154 | { 155 | "encoding_constraints": 156 | { 157 | "min": 0.0, 158 | "max": 1.0 159 | } 160 | }, 161 | "Softmax": 162 | { 163 | "encoding_constraints": 164 | { 165 | "min": 0.0, 166 | "max": 1.0 167 | } 168 | }, 169 | "Slice": 170 | { 171 | "is_output_quantized": "False" 172 | }, 173 | "Split": 174 | { 175 | "is_output_quantized": "False" 176 | }, 177 | "Squeeze": 178 | { 179 | "is_output_quantized": "False" 180 | }, 181 | "Tile": 182 | { 183 | "is_output_quantized": "False" 184 | }, 185 | "TopK": 186 | { 187 | "is_output_quantized": "False" 188 | }, 189 | "Transpose": 190 | { 191 | "is_output_quantized": "False" 192 | } 193 | }, 194 | 195 | "supergroups": 196 | [ 197 | { 198 | "op_list": ["Add", "Relu"] 199 | }, 200 | { 201 | "op_list": ["Conv", "BatchNormalization","HardSwish"] 202 | }, 203 | { 204 | "op_list": ["Conv", "BatchNormalization","PRelu"] 205 | }, 206 | { 207 | "op_list": ["Conv", "BatchNormalization", "Relu"] 208 | }, 209 | { 210 | "op_list": ["Conv", "Clip"] 211 | }, 212 | { 213 | "op_list": ["Conv", "HardSwish"] 214 | }, 215 | { 216 | "op_list": ["Conv", "PRelu"] 217 | }, 218 | { 219 | "op_list": ["Conv", "Relu"] 220 | }, 221 | { 222 | "op_list": ["ConvTranspose", "Relu"] 223 | }, 224 | { 225 | "op_list": ["Gemm", "Relu"] 226 | } 227 | ], 228 | 229 | "model_input": 230 | { 231 | "is_input_quantized": "True" 232 | }, 233 | 234 | "model_output": 235 | {} 236 | } 237 | -------------------------------------------------------------------------------- /quantizers/configs/htp_quantsim_config_v75_per_channel.json: -------------------------------------------------------------------------------- 1 | { 2 | "defaults": 3 | { 4 | "hw_version": "V75", 5 | "ops": 6 | { 7 | "is_output_quantized": "True" 8 | }, 9 | "params": 10 | { 11 | "is_quantized": "True", 12 | "is_symmetric": "True" 13 | }, 14 | "per_channel_quantization": "True", 15 | "strict_symmetric": "False", 16 | "unsigned_symmetric": "False" 17 | }, 18 | 19 | "params": 20 | { 21 | "bias": 22 | { 23 | "is_quantized": "False" 24 | } 25 | }, 26 | 27 | "op_type": 28 | { 29 | "Cast": 30 | { 31 | "is_output_quantized": "False" 32 | }, 33 | "BatchPermutation": 34 | { 35 | "is_output_quantized": "False" 36 | }, 37 | "ChannelShuffle": 38 | { 39 | "is_output_quantized": "False" 40 | }, 41 | "CropAndResize": 42 | { 43 | "is_output_quantized": "False" 44 | }, 45 | "DepthToSpace": 46 | { 47 | "is_output_quantized": "False" 48 | }, 49 | "Dropout": 50 | { 51 | "is_output_quantized": "False" 52 | }, 53 | "Expand": 54 | { 55 | "is_output_quantized": "False" 56 | }, 57 | "Reshape": 58 | { 59 | "is_output_quantized": "False" 60 | }, 61 | "Upsample": 62 | { 63 | "is_output_quantized": "False" 64 | }, 65 | "SpaceToDepth": 66 | { 67 | "is_output_quantized": "False" 68 | }, 69 | "BatchToSpace": 70 | { 71 | "is_output_quantized": "False" 72 | }, 73 | "SpaceToBatch": 74 | { 75 | "is_output_quantized": "False" 76 | }, 77 | "NonMaxSuppression": 78 | { 79 | "is_output_quantized": "False" 80 | }, 81 | "Gather": 82 | { 83 | "is_output_quantized": "False", 84 | "per_channel_quantization": "False" 85 | }, 86 | "GatherND": 87 | { 88 | "is_output_quantized": "False" 89 | }, 90 | "Conv": 91 | { 92 | "per_channel_quantization": "True" 93 | }, 94 | "Gemm": 95 | { 96 | "per_channel_quantization": "True" 97 | }, 98 | "Linear": 99 | { 100 | "per_channel_quantization": "True" 101 | }, 102 | "GroupNorm": 103 | { 104 | "per_channel_quantization": "False", 105 | "params": { 106 | "bias": 107 | { 108 | "is_quantized": "True" 109 | } 110 | } 111 | }, 112 | "LayerNorm": 113 | { 114 | "per_channel_quantization": "False", 115 | "params": { 116 | "weight": { 117 | "is_symmetric": "False" 118 | } 119 | } 120 | }, 121 | "BatchNormalization": 122 | { 123 | "per_channel_quantization": "False" 124 | }, 125 | "InstanceNormalization": 126 | { 127 | "per_channel_quantization": "False" 128 | }, 129 | "MaxPool": 130 | { 131 | "is_output_quantized": "False" 132 | }, 133 | "MaxRoiPool": 134 | { 135 | "is_output_quantized": "False" 136 | }, 137 | "Mean": 138 | { 139 | "is_output_quantized": "False" 140 | }, 141 | "NonZero": 142 | { 143 | "is_output_quantized": "False" 144 | }, 145 | "Pad": 146 | { 147 | "is_output_quantized": "False" 148 | }, 149 | "ReduceMax": 150 | { 151 | "is_output_quantized": "False" 152 | }, 153 | "ReduceMin": 154 | { 155 | "is_output_quantized": "False" 156 | }, 157 | "ScatterElements": 158 | { 159 | "is_output_quantized": "False" 160 | }, 161 | "Sigmoid": 162 | { 163 | "encoding_constraints": 164 | { 165 | "min": 0.0, 166 | "max": 1.0 167 | } 168 | }, 169 | "Softmax": 170 | { 171 | "encoding_constraints": 172 | { 173 | "min": 0.0, 174 | "max": 1.0 175 | } 176 | }, 177 | "Slice": 178 | { 179 | "is_output_quantized": "False" 180 | }, 181 | "Split": 182 | { 183 | "is_output_quantized": "False" 184 | }, 185 | "Squeeze": 186 | { 187 | "is_output_quantized": "False" 188 | }, 189 | "Tile": 190 | { 191 | "is_output_quantized": "False" 192 | }, 193 | "TopK": 194 | { 195 | "is_output_quantized": "False" 196 | }, 197 | "Transpose": 198 | { 199 | "is_output_quantized": "False" 200 | } 201 | }, 202 | 203 | "supergroups": 204 | [ 205 | { 206 | "op_list": ["Add", "Relu"] 207 | }, 208 | { 209 | "op_list": ["Conv", "BatchNormalization","HardSwish"] 210 | }, 211 | { 212 | "op_list": ["Conv", "BatchNormalization","PRelu"] 213 | }, 214 | { 215 | "op_list": ["Conv", "BatchNormalization", "Relu"] 216 | }, 217 | { 218 | "op_list": ["Conv", "Clip"] 219 | }, 220 | { 221 | "op_list": ["Conv", "HardSwish"] 222 | }, 223 | { 224 | "op_list": ["Conv", "PRelu"] 225 | }, 226 | { 227 | "op_list": ["Conv", "Relu"] 228 | }, 229 | { 230 | "op_list": ["ConvTranspose", "Relu"] 231 | }, 232 | { 233 | "op_list": ["Gemm", "Relu"] 234 | } 235 | ], 236 | 237 | "model_input": 238 | { 239 | "is_input_quantized": "True" 240 | }, 241 | 242 | "model_output": 243 | {} 244 | } 245 | -------------------------------------------------------------------------------- /quantizers/configs/qsim_config_per_channel_with_exceptions.json: -------------------------------------------------------------------------------- 1 | { 2 | "defaults": 3 | { 4 | "hw_version": "V73", 5 | "ops": 6 | { 7 | "is_output_quantized": "True" 8 | }, 9 | "params": 10 | { 11 | "is_quantized": "True", 12 | "is_symmetric": "True" 13 | }, 14 | "per_channel_quantization": "False", 15 | "strict_symmetric": "False", 16 | "unsigned_symmetric": "False" 17 | }, 18 | 19 | "params": 20 | { 21 | "bias": 22 | { 23 | "is_quantized": "False" 24 | } 25 | }, 26 | 27 | "op_type": 28 | { 29 | "Squeeze": 30 | { 31 | "is_output_quantized": "False" 32 | }, 33 | "Pad": 34 | { 35 | "is_output_quantized": "False" 36 | }, 37 | "Reshape": 38 | { 39 | "is_output_quantized": "False" 40 | }, 41 | "ChannelShuffle": 42 | { 43 | "is_output_quantized": "False" 44 | }, 45 | "Tile": 46 | { 47 | "is_output_quantized": "False" 48 | }, 49 | "TopK": 50 | { 51 | "is_output_quantized": "False" 52 | }, 53 | "GatherND": 54 | { 55 | "is_output_quantized": "False" 56 | }, 57 | "ReduceMin": 58 | { 59 | "is_output_quantized": "False" 60 | }, 61 | "ReduceMax": 62 | { 63 | "is_output_quantized": "False" 64 | }, 65 | "Slice": 66 | { 67 | "is_output_quantized": "False" 68 | }, 69 | "NonZero": 70 | { 71 | "is_output_quantized": "False" 72 | }, 73 | "DepthToSpace": 74 | { 75 | "is_output_quantized": "False" 76 | }, 77 | "MaxPool": 78 | { 79 | "is_output_quantized": "False" 80 | }, 81 | "Split": 82 | { 83 | "is_output_quantized": "False" 84 | }, 85 | "Mean": 86 | { 87 | "is_output_quantized": "False" 88 | }, 89 | "Conv": 90 | { 91 | "per_channel_quantization": "True" 92 | }, 93 | "Gemm": 94 | { 95 | "per_channel_quantization": "True" 96 | }, 97 | "Cast": 98 | { 99 | "is_output_quantized": "False" 100 | }, 101 | "LayerNorm": 102 | { 103 | "is_output_quantized": "False" 104 | }, 105 | "Gather": 106 | { 107 | "is_output_quantized": "False" 108 | }, 109 | "Sigmoid": 110 | { 111 | "encoding_constraints": 112 | { 113 | "min": 0.0, 114 | "max": 1.0 115 | } 116 | }, 117 | "Softmax": 118 | { 119 | "encoding_constraints": 120 | { 121 | "min": 0.0, 122 | "max": 1.0 123 | } 124 | }, 125 | "Linear": 126 | { 127 | "per_channel_quantization": "True" 128 | }, 129 | "MatMul": 130 | { 131 | "per_channel_quantization": "True" 132 | } 133 | }, 134 | 135 | "supergroups": 136 | [ 137 | { 138 | "op_list": ["Conv", "Relu"] 139 | }, 140 | { 141 | "op_list": ["Conv", "Clip"] 142 | }, 143 | { 144 | "op_list": ["Conv", "BatchNormalization", "Relu"] 145 | }, 146 | { 147 | "op_list": ["ConvTranspose", "Relu"] 148 | }, 149 | { 150 | "op_list": ["Add", "Relu"] 151 | }, 152 | { 153 | "op_list": ["Gemm", "Relu"] 154 | }, 155 | { 156 | "op_list": ["Conv", "PRelu"] 157 | }, 158 | { 159 | "op_list": ["Conv", "BatchNormalization","PRelu"] 160 | }, 161 | { 162 | "op_list": ["Conv", "HardSwish"] 163 | }, 164 | { 165 | "op_list": ["Conv", "BatchNormalization","HardSwish"] 166 | } 167 | ], 168 | 169 | "model_input": 170 | { 171 | "is_input_quantized": "True" 172 | }, 173 | 174 | "model_output": 175 | {} 176 | } -------------------------------------------------------------------------------- /quantizers/configs/rwkv_gptq_exceptions.json: -------------------------------------------------------------------------------- 1 | { 2 | "module_list": 3 | [ 4 | { 5 | "module_name": "Embedding", 6 | "exception_stage": "pre-calibration", 7 | "exceptions": { 8 | "param_exceptions": { 9 | "asymmetric": "True", 10 | "bitwidth": "32" 11 | }, 12 | "input_exceptions": "None", 13 | "output_exceptions": "None" 14 | } 15 | }, 16 | { 17 | "module_name": "LayerNorm", 18 | "exception_stage": "pre-calibration", 19 | "exceptions": { 20 | "param_exceptions": { 21 | "asymmetric": "True", 22 | "bitwidth": "32" 23 | }, 24 | "input_exceptions": "None", 25 | "output_exceptions": "None" 26 | } 27 | } 28 | ], 29 | "name_list": 30 | [ 31 | { 32 | "module_name": "head", 33 | "exception_stage": "pre-calibration", 34 | "exceptions": { 35 | "param_exceptions": { 36 | "bitwidth": "32" 37 | }, 38 | "input_exceptions": "None", 39 | "output_exceptions": "None" 40 | } 41 | }, 42 | { 43 | "module_name": "matmul_time_maa_w1", 44 | "exception_stage": "pre-calibration", 45 | "exceptions": { 46 | "param_exceptions": { 47 | "bitwidth": "8" 48 | }, 49 | "input_exceptions": "None", 50 | "output_exceptions": "None" 51 | } 52 | } 53 | ] 54 | } 55 | -------------------------------------------------------------------------------- /rwkv_src/elemwise_ops.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from typing import Any 3 | 4 | class Add(torch.nn.Module): 5 | # pylint:disable=arguments-differ 6 | @staticmethod 7 | def forward(x: Any, y: Any) -> Any: 8 | if isinstance(x, torch.Tensor) or isinstance(y, torch.Tensor): 9 | out = torch.add(x, y) 10 | else: 11 | out = x + y 12 | return out 13 | 14 | class Subtract(torch.nn.Module): 15 | # pylint:disable=arguments-differ 16 | @staticmethod 17 | def forward(x: Any, y: Any) -> Any: 18 | if isinstance(x, torch.Tensor) or isinstance(y, torch.Tensor): 19 | out = torch.sub(x, y) 20 | else: 21 | out = x - y 22 | return out 23 | 24 | class Neg(torch.nn.Module): 25 | # pylint:disable=arguments-differ 26 | @staticmethod 27 | def forward(x: Any) -> Any: 28 | out = torch.neg(x) 29 | return out 30 | 31 | class Multiply(torch.nn.Module): 32 | # pylint:disable=arguments-differ 33 | @staticmethod 34 | def forward(x: Any, y: Any) -> Any: 35 | if isinstance(x, torch.Tensor) or isinstance(y, torch.Tensor): 36 | out = torch.mul(x, y) 37 | else: 38 | out = x * y 39 | return out 40 | 41 | class Tanh(torch.nn.Module): 42 | # pylint:disable=arguments-differ 43 | @staticmethod 44 | def forward(x: torch.Tensor) -> torch.Tensor: 45 | out = torch.tanh(x) 46 | return out 47 | 48 | class SiLU(torch.nn.Module): 49 | def __init__(self): 50 | super().__init__() 51 | self.sigmoid = torch.nn.Sigmoid() 52 | self.mul = Multiply() 53 | 54 | def forward(self, x: torch.Tensor) -> Any: 55 | return self.mul(x, self.sigmoid(x)) 56 | 57 | class Exponential(torch.nn.Module): 58 | def __init__(self): 59 | super().__init__() 60 | 61 | def forward(self, x: torch.Tensor) -> torch.Tensor: 62 | return torch.exp(x) 63 | 64 | class MatMul(torch.nn.Module): 65 | def __init__(self): 66 | super().__init__() 67 | 68 | def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: 69 | return torch.matmul(x, y) 70 | 71 | class Bmm(torch.nn.Module): 72 | def __init__(self): 73 | super().__init__() 74 | 75 | def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: 76 | return torch.bmm(x, y) 77 | 78 | class Split(torch.nn.Module): 79 | def __init__(self): 80 | super().__init__() 81 | 82 | def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor: 83 | return torch.split(x, *args, **kwargs) 84 | 85 | class ReLU(torch.nn.Module): 86 | def __init__(self): 87 | super().__init__() 88 | 89 | def forward(self, x: torch.Tensor) -> torch.Tensor: 90 | return torch.relu(x) 91 | 92 | class Pow(torch.nn.Module): 93 | def __init__(self): 94 | super().__init__() 95 | 96 | def forward(self, x: torch.Tensor, y: int) -> torch.Tensor: 97 | return torch.pow(x, y) -------------------------------------------------------------------------------- /rwkv_src/rwkv_tokenizer.py: -------------------------------------------------------------------------------- 1 | from typing import List,Set,Dict 2 | 3 | class ABCTokenizer(): 4 | def __init__(self): 5 | self.pad_token_id = 0 6 | self.bos_token_id = 2 7 | self.eos_token_id = 3 8 | def encode(self, text): 9 | ids = [ord(c) for c in text] 10 | return ids 11 | def decode(self, ids): 12 | txt = ''.join(chr(idx) if idx > self.eos_token_id else '' for idx in ids if idx != self.eos_token_id) 13 | return txt 14 | 15 | class RWKV_TOKENIZER(): 16 | table: List[List[List[bytes]]] 17 | good: List[Set[int]] 18 | wlen: List[int] 19 | def __init__(self, file_name): 20 | self.idx2token = {} 21 | sorted = [] # must be already sorted 22 | lines = open(file_name, "r", encoding="utf-8").readlines() 23 | for l in lines: 24 | idx = int(l[:l.index(' ')]) 25 | x = eval(l[l.index(' '):l.rindex(' ')]) 26 | x = x.encode("utf-8") if isinstance(x, str) else x 27 | assert isinstance(x, bytes) 28 | assert len(x) == int(l[l.rindex(' '):]) 29 | sorted += [x] 30 | self.idx2token[idx] = x 31 | 32 | self.token2idx = {} 33 | for k, v in self.idx2token.items(): 34 | self.token2idx[v] = int(k) 35 | 36 | # precompute some tables for fast matching 37 | self.table = [[[] for j in range(256)] for i in range(256)] 38 | self.good = [set() for i in range(256)] 39 | self.wlen = [0 for i in range(256)] 40 | 41 | for i in reversed(range(len(sorted))): # reverse order - match longer tokens first 42 | s = sorted[i] 43 | if len(s) >= 2: 44 | s0 = int(s[0]) 45 | s1 = int(s[1]) 46 | self.table[s0][s1] += [s] 47 | self.wlen[s0] = max(self.wlen[s0], len(s)) 48 | self.good[s0].add(s1) 49 | 50 | def encodeBytes(self, src: bytes) -> List[int]: 51 | src_len: int = len(src) 52 | tokens: List[int] = [] 53 | i: int = 0 54 | while i < src_len: 55 | s: bytes = src[i : i + 1] 56 | 57 | if i < src_len - 1: 58 | s1: int = int(src[i + 1]) 59 | s0: int = int(src[i]) 60 | if s1 in self.good[s0]: 61 | sss: bytes = src[i : i + self.wlen[s0]] 62 | try: 63 | s = next(filter(sss.startswith, self.table[s0][s1])) 64 | except: 65 | pass 66 | tokens.append(self.token2idx[s]) 67 | i += len(s) 68 | 69 | return tokens 70 | 71 | def decodeBytes(self, tokens): 72 | return b''.join(map(lambda i: self.idx2token[i], tokens)) 73 | 74 | def encode(self, src: str): 75 | return self.encodeBytes(src.encode("utf-8")) 76 | 77 | def decode(self, tokens): 78 | return self.decodeBytes(tokens).decode('utf-8') 79 | 80 | def printTokens(self, tokens): 81 | for i in tokens: 82 | s = self.idx2token[i] 83 | try: 84 | s = s.decode('utf-8') 85 | except: 86 | pass 87 | print(f'{repr(s)}{i}', end=' ') 88 | # print(repr(s), i) 89 | print() 90 | -------------------------------------------------------------------------------- /rwkv_src/wkv_custom.py: -------------------------------------------------------------------------------- 1 | wkv_c_impl_src = """ 2 | #include 3 | #include 4 | 5 | std::tuple wkv6( 6 | torch::Tensor k, torch::Tensor v, torch::Tensor r, 7 | torch::Tensor state2, torch::Tensor time_first, 8 | torch::Tensor time_decay) { 9 | state2 = state2.squeeze(0); 10 | auto num_head = state2.size(0); 11 | auto head_size = state2.size(1); 12 | int seq_length = k.size(0); 13 | 14 | k = k.reshape({seq_length, num_head, head_size, 1}); 15 | v = v.reshape({seq_length, num_head, 1, head_size}); 16 | r = r.reshape({seq_length, num_head, 1, head_size}); 17 | time_first = time_first.reshape({num_head, head_size, 1}); 18 | time_decay = time_decay.reshape({seq_length, num_head, head_size, 1}); 19 | auto kv = torch::matmul(k, v); 20 | std::vector wkv; 21 | for (int i = 0; i < seq_length; i++) { 22 | wkv.push_back(torch::matmul(r[i], (time_first * kv[i] + state2))); 23 | state2 = time_decay[i] * state2 + kv[i]; 24 | } 25 | auto wkv_tensor = torch::stack(wkv, 0).reshape({seq_length, num_head, 1, head_size}); 26 | 27 | return std::make_tuple(wkv_tensor, state2); 28 | } 29 | 30 | torch::Tensor wkv7_state( 31 | torch::Tensor w, torch::Tensor k, torch::Tensor v, 32 | torch::Tensor a, torch::Tensor b, torch::Tensor state2) { 33 | state2 = state2.squeeze(0); 34 | auto num_head = state2.size(0); 35 | auto head_size = state2.size(1); 36 | int seq_length = k.size(0); 37 | 38 | w = w.reshape({seq_length, num_head, 1, head_size}); 39 | k = k.reshape({seq_length, num_head, 1, head_size}); 40 | v = v.reshape({seq_length, num_head, head_size, 1}); 41 | b = b.reshape({seq_length, num_head, 1, head_size}); 42 | a = a.reshape({seq_length, num_head, head_size, 1}); 43 | 44 | auto kv = torch::matmul(v, k); 45 | auto ab = torch::matmul(a, b); 46 | auto state2_out = torch::zeros({seq_length, num_head, head_size, head_size}, kv.options()); 47 | for (int i = 0; i < seq_length; i++) { 48 | if (i == 0) { 49 | state2_out[i] = w[i] * state2 + kv[i] + torch::matmul(state2, ab[i]); 50 | } else { 51 | state2_out[i] = w[i] * state2_out[i-1] + kv[i] + torch::matmul(state2_out[i-1], ab[i]); 52 | } 53 | } 54 | return state2_out; 55 | } 56 | 57 | torch::Tensor wkv7_output(torch::Tensor r, torch::Tensor state2) { 58 | auto num_head = state2.size(1); 59 | auto head_size = state2.size(2); 60 | int seq_length = r.size(0); 61 | 62 | r = r.reshape({seq_length, num_head, head_size, 1}); 63 | auto x = torch::matmul(state2, r); 64 | return x; 65 | } 66 | 67 | TORCH_LIBRARY(rwkv, m) { 68 | m.def("wkv6", &wkv6); 69 | m.def("wkv7_state", &wkv7_state); 70 | m.def("wkv7_output", &wkv7_output); 71 | } 72 | 73 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 74 | } 75 | """ -------------------------------------------------------------------------------- /utils/htp_devices_config.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | htp_devices = { 4 | "SM8750": { 5 | "dsp_arch": "v79", 6 | "soc_id": 69, 7 | }, 8 | "SM8650": { 9 | "dsp_arch": "v75", 10 | "soc_id": 57, 11 | }, 12 | "SM8550": { 13 | "dsp_arch": "v73", 14 | "soc_id": 43, 15 | }, 16 | "SC8380": { 17 | "dsp_arch": "v73", 18 | "soc_id": 60, 19 | }, 20 | "SM8475": { 21 | "dsp_arch": "v69", 22 | "soc_id": 42, 23 | }, 24 | "SM8635": { 25 | "dsp_arch": "v73", 26 | "soc_id": 68, 27 | }, 28 | "SM7325": { 29 | "dsp_arch": "v68", 30 | "soc_id": 35, 31 | } 32 | } 33 | 34 | def dump_htp_config(soc_name: str, graph_names: list, output_path: str, old_qnn = False, weights_sharing=False): 35 | if not soc_name in htp_devices.keys(): 36 | raise ValueError(f"Invalid SoC name: {soc_name}") 37 | if graph_names is None or len(graph_names) == 0: 38 | raise ValueError("Invalid graph names") 39 | for i in range(len(graph_names)): 40 | graph_names[i] = graph_names[i].replace("lib", "").replace("-", "_") 41 | 42 | config = { 43 | "graphs": { 44 | "vtcm_mb": 0, 45 | "O": 3, 46 | "graph_names": graph_names, 47 | }, 48 | "devices": [{ 49 | "dsp_arch": htp_devices[soc_name]["dsp_arch"], 50 | "device_id": 0, 51 | "soc_id": htp_devices[soc_name]["soc_id"], 52 | "pd_session": "unsigned", 53 | "cores": [{ 54 | "core_id": 0, 55 | "perf_profile": "burst" 56 | }] 57 | }], 58 | "memory": { 59 | "mem_type": "shared_buffer" 60 | } 61 | } 62 | 63 | if soc_name != "SM8635" and soc_name != "SM7325": 64 | config["graphs"]["fp16_relaxed_precision"] = 1 65 | 66 | if not old_qnn: 67 | config["graphs"] = [config["graphs"]] 68 | 69 | if weights_sharing: 70 | config["context"] = {"weight_sharing_enabled": True} 71 | 72 | with open(output_path, "w") as f: 73 | json.dump(config, f, indent=4) 74 | 75 | def dump_htp_link_config(output_path: str, qnn_sdk_root_path: str): 76 | link = { 77 | "backend_extensions": 78 | { 79 | "shared_library_path": f"{qnn_sdk_root_path}/lib/x86_64-linux-clang/libQnnHtpNetRunExtensions.so", 80 | "config_file_path": output_path.replace("link.json", "config.json") 81 | } 82 | } 83 | with open(output_path, "w") as f: 84 | json.dump(link, f, indent=4) 85 | --------------------------------------------------------------------------------