├── .gitattributes
├── .gitignore
├── .gitmodules
├── README.md
├── archive
    ├── compute_linear_param_encodings.py
    ├── convert_model.py
    ├── export_quantized_model.py
    └── make_calibration_samples.py
├── assets
    ├── b_rwkv_vocab_v20230424.txt
    ├── lambada_test.txt
    ├── mmlu_dev_dataset
    │   ├── data-00000-of-00001.arrow
    │   ├── dataset_info.json
    │   └── state.json
    ├── mmlu_test_dataset.json
    ├── mmlu_test_dataset
    │   ├── data-00000-of-00001.arrow
    │   ├── dataset_info.json
    │   └── state.json
    ├── rwkv_vocab_v20230424.txt
    └── rwkv_vocab_v20230424_tts.txt
├── build_hexagon_wkv_kernel.sh
├── compute_quant_encodings_experimental.py
├── convert_model.py
├── convert_model_dlc.py
├── convert_vocab.py
├── docs
    ├── optrace.md
    └── xelite_npu_rwkv.png
├── hexagon
    ├── CPU
    │   └── RwkvWkvOpPackage
    │   │   ├── Makefile
    │   │   ├── config
    │   │       └── RwkvWkvOpPackageCPU.xml
    │   │   ├── makefiles
    │   │       ├── Android.mk
    │   │       ├── Application.mk
    │   │       └── Makefile.linux-x86_64
    │   │   └── src
    │   │       ├── CpuCustomOpPackage.cpp
    │   │       ├── RwkvWkvOpPackageInterface.cpp
    │   │       ├── ops
    │   │           ├── wkv6.cpp
    │   │           ├── wkv7_output.cpp
    │   │           └── wkv7_state.cpp
    │   │       └── utils
    │   │           ├── BackendUtils.hpp
    │   │           ├── CPU
    │   │               ├── CpuBackendUtils.cpp
    │   │               └── CpuBackendUtils.hpp
    │   │           └── CustomOpUtils.hpp
    ├── HTP
    │   ├── RwkvWkvOpPackage
    │   │   ├── Makefile
    │   │   ├── config
    │   │   │   └── RwkvWkvOpPackageHTP.xml
    │   │   └── src
    │   │   │   ├── RwkvWkvOpPackageInterface.cpp
    │   │   │   └── ops
    │   │   │       ├── wkv6.cpp
    │   │   │       ├── wkv7.cpp.old
    │   │   │       ├── wkv7_output.cpp
    │   │   │       └── wkv7_state.cpp
    │   └── prebuilt
    │   │   ├── libQnnRwkvWkvOpPackageV68.so
    │   │   ├── libQnnRwkvWkvOpPackageV69.so
    │   │   ├── libQnnRwkvWkvOpPackageV73.so
    │   │   ├── libQnnRwkvWkvOpPackageV75.so
    │   │   └── libQnnRwkvWkvOpPackageV79.so
    └── test
    │   ├── test_qnn_wkv_kernel.py
    │   └── wkv_custom.py
├── librwkv-qualcomm
    ├── CMakeLists.txt
    ├── Makefile
    ├── make
    │   ├── Android-demo.mk
    │   ├── Android-eval.mk
    │   ├── Android-mmlu.mk
    │   ├── Android.mk
    │   ├── Application.mk
    │   ├── Makefile.linux-x86_64
    │   ├── Makefile.oe-linux-aarch64-gcc11.2
    │   ├── Makefile.oe-linux-aarch64-gcc8.2
    │   ├── Makefile.oe-linux-aarch64-gcc9.3
    │   └── Makefile.ubuntu-aarch64-gcc9.4
    └── src
    │   ├── CMakeLists.txt
    │   ├── Interfaces.hpp
    │   ├── Log
    │       ├── LogUtils.cpp
    │       ├── LogUtils.hpp
    │       ├── Logger.cpp
    │       └── Logger.hpp
    │   ├── PAL
    │       ├── include
    │       │   └── PAL
    │       │   │   ├── Debug.hpp
    │       │   │   ├── Directory.hpp
    │       │   │   ├── DynamicLoading.hpp
    │       │   │   ├── FileOp.hpp
    │       │   │   ├── Path.hpp
    │       │   │   └── StringOp.hpp
    │       └── src
    │       │   ├── common
    │       │       └── StringOp.cpp
    │       │   ├── linux
    │       │       ├── Directory.cpp
    │       │       ├── DynamicLoading.cpp
    │       │       ├── FileOp.cpp
    │       │       └── Path.cpp
    │       │   └── windows
    │       │       ├── Common.cpp
    │       │       ├── Common.hpp
    │       │       ├── Directory.cpp
    │       │       ├── DynamicLoading.cpp
    │       │       ├── FileOp.cpp
    │       │       └── Path.cpp
    │   ├── QnnTypeDef.hpp
    │   ├── QnnTypeMacros.hpp
    │   ├── Utils
    │       ├── BuildId.hpp
    │       ├── ClientBuffer.cpp
    │       ├── ClientBuffer.hpp
    │       ├── DataUtil.cpp
    │       ├── DataUtil.hpp
    │       ├── DmaBufAllocator.cpp
    │       ├── DmaBufAllocator.hpp
    │       ├── DynamicLoadUtil.cpp
    │       ├── DynamicLoadUtil.hpp
    │       ├── IBufferAlloc.hpp
    │       ├── IOTensor.cpp
    │       ├── IOTensor.hpp
    │       ├── RpcMem.cpp
    │       ├── RpcMem.hpp
    │       ├── Utils.cpp
    │       ├── Utils.hpp
    │       ├── dlwrap.cpp
    │       └── dlwrap.hpp
    │   ├── WrapperUtils
    │       ├── QnnWrapperUtils.cpp
    │       └── QnnWrapperUtils.hpp
    │   ├── eval_text.cpp
    │   ├── half.hpp
    │   ├── json.hpp
    │   ├── librwkv-qualcomm-app.cpp
    │   ├── librwkv-qualcomm-app.hpp
    │   ├── librwkv-qualcomm.cpp
    │   ├── librwkv-qualcomm.h
    │   ├── main.cpp
    │   ├── mmlu.cpp
    │   ├── soc_detect.cpp
    │   ├── soc_detect.h
    │   ├── tokenizer.cpp
    │   ├── tokenizer.h
    │   └── trie.hpp
├── make_context_cache_binary.py
├── make_context_cache_binary_dlc.py
├── quant_encodings
    └── README.md
├── quantizers
    ├── advanced_ptq
    │   └── actmse_quantizer.py
    ├── base_quantizer.py
    ├── configs
    │   ├── backend_aware_htp_quantsim_config_v75.json
    │   ├── default_per_channel_config.json
    │   ├── htp_quantsim_config_v75.json
    │   ├── htp_quantsim_config_v75_per_channel.json
    │   ├── qsim_config_per_channel_with_exceptions.json
    │   ├── rwkv_activation_exceptions.json
    │   └── rwkv_gptq_exceptions.json
    └── exceptions.py
├── quantsim_eval_lambada.py
├── quantsim_eval_mmlu.py
├── rwkv_src
    ├── elemwise_ops.py
    ├── rwkv_model.py
    ├── rwkv_tokenizer.py
    ├── rwkv_v5_modules.py
    ├── rwkv_v6_modules.py
    ├── rwkv_v7_modules.py
    ├── rwkv_v7_modules_conv.py
    └── wkv_custom.py
└── utils
    ├── dataset_builder.py
    ├── htp_devices_config.py
    ├── indexed_dataset.py
    ├── model_preparer.py
    ├── model_utils.py
    └── split_onnx.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.encodings filter=lfs diff=lfs merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | onnx/
 2 | libs/
 3 | lib/
 4 | build/
 5 | output/
 6 | .pkl_memoize_py3/
 7 | gmon.out
 8 | qacc_temp/
 9 | obj/
10 | bin/
11 | *.pyc
12 | __pycache__
13 | .vscode/
14 | tmp/
15 | samples*/
16 | input_list*
17 | dataset_cache/
18 | quant_export/
19 | trace_output/
20 | test_wkv*
21 | test_data*
22 | QNN/
23 | v7_*_quant/


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MollySophia/rwkv-qualcomm/fd403b7c9f3b6c4ac5a810f334a0a51c8693fb42/.gitmodules


--------------------------------------------------------------------------------
/archive/compute_linear_param_encodings.py:
--------------------------------------------------------------------------------
 1 | # from rwkv_src.modeling_rwkv6 import Rwkv6ForCausalLM
 2 | from rwkv_src.rwkv_model import RWKV_RNN
 3 | from transformers import AutoConfig, AutoTokenizer
 4 | import types
 5 | import torch
 6 | import torch.nn as nn
 7 | from transformers.tokenization_utils_base import BatchEncoding
 8 | 
 9 | from utils.model_utils import get_dummy_input_for_rwkv_causal_llm
10 | from quantizers.advanced_ptq.actmse_quantizer import ActMSEQuantizer
11 | from utils.dataset_builder import DatasetBuilder
12 | 
13 | import argparse
14 | from pathlib import Path
15 | 
16 | parser = argparse.ArgumentParser(description='Compute param encodings for linear modules')
17 | parser.add_argument('model', type=Path, help='Path to RWKV pth file')
18 | parser.add_argument('--weights_bitwidth', type=int, default=4, help='Weights bitwidth')
19 | parser.add_argument('--use_cuda', action='store_true', default=True, help='Use CUDA')
20 | parser.add_argument('--strategy', type=str, choices=['symqt', 'symfp', 'asym'], default='asym', help='Quantization strategy')
21 | args_parser = parser.parse_args()
22 | 
23 | args = types.SimpleNamespace()
24 | ##############################
25 | args.quant_scheme = "tf"
26 | args.activation_bit_width = 32
27 | args.parameter_bit_width = args_parser.weights_bitwidth
28 | args.in_place_quantsim = False
29 | args.config_file = "quantizers/configs/default_per_channel_config.json"
30 | args.num_cands = 20
31 | args.export_dir = "quant_export"
32 | args.output_dir = "quant_export"
33 | args.model_name = str(args_parser.model).replace(".pth", "").split("/")[-1]
34 | args.input_symmetry = args_parser.strategy
35 | args.exceptions_file = "quantizers/configs/rwkv_gptq_exceptions.json"
36 | args.act_mse_loss_type = "mse"
37 | args.parameter_encoding_file = None
38 | args.encoding_path = None
39 | args.do_actmse = True
40 | args.disable_act_quantizers = True
41 | args.fp16 = False
42 | args.do_train = False
43 | args.clip_activation = None
44 | args.load_sim_checkpoint = False
45 | args.save_sim_checkpoint = False
46 | ##############################
47 | args.calib_dataset_name = "wikitext"
48 | args.calib_dataset_config_name = "wikitext-2-raw-v1"
49 | args.dataset_cache_dir = "./dataset_cache"
50 | args.calib_dataset_split = None
51 | args.calib_dataset_preprocessor = "gpt2"
52 | args.eval_dataset_name = "wikitext"
53 | args.eval_dataset_config_name = "wikitext-103-raw-v1"
54 | args.eval_dataset_split = "test"
55 | args.eval_dataset_preprocessor = "gptq"
56 | args.num_calibration_batches = 20
57 | args.per_device_calib_batch_size = 1
58 | args.per_device_eval_batch_size = 1
59 | args.block_size = 1024
60 | args.seed = 1234
61 | ##############################
62 | 
63 | device = torch.device("cuda") if args_parser.use_cuda and torch.cuda.is_available() else torch.device("cpu")
64 | args.device = device
65 | 
66 | model_args = types.SimpleNamespace()
67 | model_args.USE_CUDA = args_parser.use_cuda
68 | model_args.fp16 = False
69 | model_args.wkv_customop = False
70 | model_args.USE_EMBEDDING = True
71 | model_args.MODEL_NAME = str(args_parser.model)
72 | model_args.RESCALE_LAYER = 0
73 | model_args.eos_token_id = 0
74 | model = RWKV_RNN(model_args)
75 | 
76 | tokenizer = AutoTokenizer.from_pretrained("RWKV/rwkv-5-world-1b5", trust_remote_code=True)
77 | tokenizer.model_max_length = 1024
78 | 
79 | dummy_input = get_dummy_input_for_rwkv_causal_llm(1, 1, device, model_cfg=model.args)
80 | 
81 | dataset_builder = DatasetBuilder(args)
82 | dataset_builder.make_dataset(tokenizer=tokenizer, args=args, column_name="text", shuffle=True)
83 | 
84 | quantizer = ActMSEQuantizer(model, args, model.args)
85 | quantizer.orig_model = model
86 | quantizer.prepare_quantsim(dummy_input, args, dataset_builder.train_dataloader, tokenizer)
87 | 


--------------------------------------------------------------------------------
/archive/make_calibration_samples.py:
--------------------------------------------------------------------------------
 1 | from rwkv_src.rwkv_tokenizer import RWKV_TOKENIZER
 2 | from rwkv_src.rwkv_model import RWKV_RNN, make_chunks, run_prompt
 3 | import types
 4 | import os, sys
 5 | import torch
 6 | import argparse
 7 | from pathlib import Path
 8 | 
 9 | from torchvision import datasets
10 | from datasets import load_dataset
11 | 
12 | def main():
13 |     parser = argparse.ArgumentParser(description='Make calibration sample files')
14 |     parser.add_argument('model', type=Path, help='Path to RWKV pth file')
15 |     parser.add_argument('output', type=Path, help='Path to output folder')
16 |     parser.add_argument('chunks', type=int, help='Number of chunks')
17 |     parser.add_argument('--ext_embedding', action='store_true', default=False, help='Use external embedding')
18 |     parser.add_argument('--prefill', action='store_true', default=False, help='Prefill model')
19 |     args = parser.parse_args()
20 | 
21 |     seq_length = 32 if args.prefill else 1
22 | 
23 |     model_args = types.SimpleNamespace()
24 |     model_args.USE_CUDA = torch.cuda.is_available()
25 |     model_args.fp16 = False
26 |     model_args.USE_EMBEDDING = False if args.ext_embedding else True
27 |     model_args.RESCALE_LAYER = 0
28 |     model_args.wkv_customop = False
29 | 
30 |     model_args.MODEL_NAME = str(args.model)
31 | 
32 |     tokenizer = RWKV_TOKENIZER("./assets/rwkv_vocab_v20230424.txt")
33 | 
34 |     model = make_chunks(args.chunks, model_args) if args.chunks > 1 else RWKV_RNN(model_args)
35 | 
36 |     dataset = load_dataset('wikitext', 'wikitext-2-raw-v1', split='train')
37 |     print("dataset len:", len(dataset['text']))
38 |     for i in range(20):
39 |         run_prompt(model, dataset['text'][i], tokenizer=tokenizer, length=0, seq_length=seq_length, generate_samples=True, samples_output=str(args.output))
40 | 
41 | if __name__ == '__main__':
42 |     main()


--------------------------------------------------------------------------------
/assets/mmlu_dev_dataset/data-00000-of-00001.arrow:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MollySophia/rwkv-qualcomm/fd403b7c9f3b6c4ac5a810f334a0a51c8693fb42/assets/mmlu_dev_dataset/data-00000-of-00001.arrow


--------------------------------------------------------------------------------
/assets/mmlu_dev_dataset/dataset_info.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "builder_name": "parquet",
 3 |   "citation": "",
 4 |   "config_name": "all",
 5 |   "dataset_name": "mmlu",
 6 |   "dataset_size": 168871380,
 7 |   "description": "",
 8 |   "download_checksums": {
 9 |     "hf://datasets/cais/mmlu@c30699e8356da336a370243923dbaf21066bb9fe/all/test-00000-of-00001.parquet": {
10 |       "num_bytes": 3504718,
11 |       "checksum": null
12 |     },
13 |     "hf://datasets/cais/mmlu@c30699e8356da336a370243923dbaf21066bb9fe/all/validation-00000-of-00001.parquet": {
14 |       "num_bytes": 408449,
15 |       "checksum": null
16 |     },
17 |     "hf://datasets/cais/mmlu@c30699e8356da336a370243923dbaf21066bb9fe/all/dev-00000-of-00001.parquet": {
18 |       "num_bytes": 76504,
19 |       "checksum": null
20 |     },
21 |     "hf://datasets/cais/mmlu@c30699e8356da336a370243923dbaf21066bb9fe/all/auxiliary_train-00000-of-00001.parquet": {
22 |       "num_bytes": 47513731,
23 |       "checksum": null
24 |     }
25 |   },
26 |   "download_size": 51503402,
27 |   "features": {
28 |     "question": {
29 |       "dtype": "string",
30 |       "_type": "Value"
31 |     },
32 |     "subject": {
33 |       "dtype": "string",
34 |       "_type": "Value"
35 |     },
36 |     "choices": {
37 |       "feature": {
38 |         "dtype": "string",
39 |         "_type": "Value"
40 |       },
41 |       "_type": "Sequence"
42 |     },
43 |     "answer": {
44 |       "names": [
45 |         "A",
46 |         "B",
47 |         "C",
48 |         "D"
49 |       ],
50 |       "_type": "ClassLabel"
51 |     }
52 |   },
53 |   "homepage": "",
54 |   "license": "",
55 |   "size_in_bytes": 220374782,
56 |   "splits": {
57 |     "test": {
58 |       "name": "test",
59 |       "num_bytes": 6969209,
60 |       "num_examples": 14042,
61 |       "dataset_name": "mmlu"
62 |     },
63 |     "validation": {
64 |       "name": "validation",
65 |       "num_bytes": 763676,
66 |       "num_examples": 1531,
67 |       "dataset_name": "mmlu"
68 |     },
69 |     "dev": {
70 |       "name": "dev",
71 |       "num_bytes": 125389,
72 |       "num_examples": 285,
73 |       "dataset_name": "mmlu"
74 |     },
75 |     "auxiliary_train": {
76 |       "name": "auxiliary_train",
77 |       "num_bytes": 161013106,
78 |       "num_examples": 99842,
79 |       "dataset_name": "mmlu"
80 |     }
81 |   },
82 |   "version": {
83 |     "version_str": "0.0.0",
84 |     "major": 0,
85 |     "minor": 0,
86 |     "patch": 0
87 |   }
88 | }


--------------------------------------------------------------------------------
/assets/mmlu_dev_dataset/state.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_data_files": [
 3 |     {
 4 |       "filename": "data-00000-of-00001.arrow"
 5 |     }
 6 |   ],
 7 |   "_fingerprint": "ca7a71e4c243f30b",
 8 |   "_format_columns": null,
 9 |   "_format_kwargs": {},
10 |   "_format_type": null,
11 |   "_output_all_columns": false,
12 |   "_split": "dev"
13 | }


--------------------------------------------------------------------------------
/assets/mmlu_test_dataset/data-00000-of-00001.arrow:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MollySophia/rwkv-qualcomm/fd403b7c9f3b6c4ac5a810f334a0a51c8693fb42/assets/mmlu_test_dataset/data-00000-of-00001.arrow


--------------------------------------------------------------------------------
/assets/mmlu_test_dataset/dataset_info.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "builder_name": "parquet",
 3 |   "citation": "",
 4 |   "config_name": "all",
 5 |   "dataset_name": "mmlu",
 6 |   "dataset_size": 168871380,
 7 |   "description": "",
 8 |   "download_checksums": {
 9 |     "hf://datasets/cais/mmlu@c30699e8356da336a370243923dbaf21066bb9fe/all/test-00000-of-00001.parquet": {
10 |       "num_bytes": 3504718,
11 |       "checksum": null
12 |     },
13 |     "hf://datasets/cais/mmlu@c30699e8356da336a370243923dbaf21066bb9fe/all/validation-00000-of-00001.parquet": {
14 |       "num_bytes": 408449,
15 |       "checksum": null
16 |     },
17 |     "hf://datasets/cais/mmlu@c30699e8356da336a370243923dbaf21066bb9fe/all/dev-00000-of-00001.parquet": {
18 |       "num_bytes": 76504,
19 |       "checksum": null
20 |     },
21 |     "hf://datasets/cais/mmlu@c30699e8356da336a370243923dbaf21066bb9fe/all/auxiliary_train-00000-of-00001.parquet": {
22 |       "num_bytes": 47513731,
23 |       "checksum": null
24 |     }
25 |   },
26 |   "download_size": 51503402,
27 |   "features": {
28 |     "question": {
29 |       "dtype": "string",
30 |       "_type": "Value"
31 |     },
32 |     "subject": {
33 |       "dtype": "string",
34 |       "_type": "Value"
35 |     },
36 |     "choices": {
37 |       "feature": {
38 |         "dtype": "string",
39 |         "_type": "Value"
40 |       },
41 |       "_type": "Sequence"
42 |     },
43 |     "answer": {
44 |       "names": [
45 |         "A",
46 |         "B",
47 |         "C",
48 |         "D"
49 |       ],
50 |       "_type": "ClassLabel"
51 |     }
52 |   },
53 |   "homepage": "",
54 |   "license": "",
55 |   "size_in_bytes": 220374782,
56 |   "splits": {
57 |     "test": {
58 |       "name": "test",
59 |       "num_bytes": 6969209,
60 |       "num_examples": 14042,
61 |       "dataset_name": "mmlu"
62 |     },
63 |     "validation": {
64 |       "name": "validation",
65 |       "num_bytes": 763676,
66 |       "num_examples": 1531,
67 |       "dataset_name": "mmlu"
68 |     },
69 |     "dev": {
70 |       "name": "dev",
71 |       "num_bytes": 125389,
72 |       "num_examples": 285,
73 |       "dataset_name": "mmlu"
74 |     },
75 |     "auxiliary_train": {
76 |       "name": "auxiliary_train",
77 |       "num_bytes": 161013106,
78 |       "num_examples": 99842,
79 |       "dataset_name": "mmlu"
80 |     }
81 |   },
82 |   "version": {
83 |     "version_str": "0.0.0",
84 |     "major": 0,
85 |     "minor": 0,
86 |     "patch": 0
87 |   }
88 | }


--------------------------------------------------------------------------------
/assets/mmlu_test_dataset/state.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_data_files": [
 3 |     {
 4 |       "filename": "data-00000-of-00001.arrow"
 5 |     }
 6 |   ],
 7 |   "_fingerprint": "436299c1c09696bb",
 8 |   "_format_columns": null,
 9 |   "_format_kwargs": {},
10 |   "_format_type": null,
11 |   "_output_all_columns": false,
12 |   "_split": "test"
13 | }


--------------------------------------------------------------------------------
/build_hexagon_wkv_kernel.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | rm -rf hexagon/HTP/RwkvWkvOpPackage/build
 4 | make -C hexagon/HTP/RwkvWkvOpPackage/ htp_x86 htp_v68 htp_v69 htp_v73 htp_v75 htp_v79 -j4
 5 | 
 6 | make -C hexagon/CPU/RwkvWkvOpPackage/ -j4
 7 | 
 8 | rm -rf hexagon/HTP/prebuilt
 9 | mkdir -p hexagon/HTP/prebuilt
10 | 
11 | cp hexagon/HTP/RwkvWkvOpPackage/build/hexagon-v68/libQnnRwkvWkvOpPackage.so hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV68.so
12 | cp hexagon/HTP/RwkvWkvOpPackage/build/hexagon-v69/libQnnRwkvWkvOpPackage.so hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV69.so
13 | cp hexagon/HTP/RwkvWkvOpPackage/build/hexagon-v73/libQnnRwkvWkvOpPackage.so hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV73.so
14 | cp hexagon/HTP/RwkvWkvOpPackage/build/hexagon-v75/libQnnRwkvWkvOpPackage.so hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV75.so
15 | cp hexagon/HTP/RwkvWkvOpPackage/build/hexagon-v79/libQnnRwkvWkvOpPackage.so hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV79.so


--------------------------------------------------------------------------------
/convert_vocab.py:
--------------------------------------------------------------------------------
 1 | import sys, ast
 2 | 
 3 | vocab_file = sys.argv[1]
 4 | vocab = None
 5 | with open(vocab_file, 'r') as f:
 6 |     vocab = f.readlines()
 7 | 
 8 | vocab_new = []
 9 | for line in vocab:
10 |     parts = line.split(' ')
11 |     assert len(parts) >= 3
12 |     idx, token, token_len = int(parts[0]), ast.literal_eval(' '.join(parts[1:-1])), int(parts[-1])
13 |     token = token.encode("utf-8") if isinstance(token, str) else token
14 |     token_raw = "b'"
15 |     for byte in token:
16 |         token_raw += '\\x' + hex(byte)[2:].zfill(2)
17 |     token_raw += "'"
18 |     vocab_new.append(f"{idx} {token_raw} {token_len}\n")
19 | 
20 | with open("b_" + vocab_file, 'w') as f:
21 |     f.writelines(vocab_new)


--------------------------------------------------------------------------------
/docs/optrace.md:
--------------------------------------------------------------------------------
 1 | ```
 2 | rm -rf trace_output
 3 | ./qnn-net-run --profiling_level detailed --profiling_option optrace --output_data_type float_and_native --retrieve_context RWKV-x070-World-1.5B-v3-20250127-ctx4096.bin --backend libQnnHtp.so --input_list ./input_list.txt --output_dir ./trace_output --log_level info --perf_profile burst --io_tensor_mem_handle_type=ion
 4 | # or with customop:
 5 | ./qnn-net-run --profiling_level detailed --profiling_option optrace --output_data_type float_and_native --retrieve_context RWKV-x070-World-1.5B-v3-20250127-ctx4096.bin --backend libQnnHtp.so --input_list ./input_list.txt --output_dir ./trace_output --log_level info --perf_profile burst --io_tensor_mem_handle_type=ion --op_packages libQnnRwkvWkvOpPackage.so:RwkvWkvOpPackageInterfaceProvider
 6 | ```
 7 | 
 8 | ```
 9 | adb pull /data/local/tmp/rwkv/trace_output
10 | qnn-profile-viewer --reader $QNN_SDK_ROOT/lib/x86_64-linux-clang/libQnnHtpOptraceProfilingReader.so --input_log ./trace_output/qnn-profiling-data_0.log --schematic ./RWKV-x070-World-1.5B-v3-20250127-ctx4096_schematic.bin --output ./chrometrace.json
11 | ```


--------------------------------------------------------------------------------
/docs/xelite_npu_rwkv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MollySophia/rwkv-qualcomm/fd403b7c9f3b6c4ac5a810f334a0a51c8693fb42/docs/xelite_npu_rwkv.png


--------------------------------------------------------------------------------
/hexagon/CPU/RwkvWkvOpPackage/Makefile:
--------------------------------------------------------------------------------
 1 | #
 2 | #  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 3 | #  All rights reserved.
 4 | #  Confidential and Proprietary - Qualcomm Technologies, Inc.
 5 | #
 6 | 
 7 | # define default
 8 | default: all
 9 | 
10 | # define package name
11 | export PACKAGE_NAME := $(notdir $(shell pwd))
12 | 
13 | # define library prerequisites list
14 | lib_cpu := src
15 | make_dir := makefiles
16 | LIB_SOURCES = $(lib_cpu))
17 | 
18 | # define target_architecture
19 | export TARGET_AARCH_VARS:= -march=x86-64
20 | 
21 | # define target name
22 | export TARGET = linux-x86_64
23 | 
24 | # specify compiler
25 | export CXX ?= clang++-9
26 | 
27 | # define default Android ABI
28 | PLATFORM ?= arm64-v8a
29 | 
30 | .PHONY: all $(LIB_SOURCES) all_android all_x86 cpu cpu_x86 cpu_android
31 | all: $(LIB_SOURCES) all_x86 all_android
32 | 
33 | # Combined Targets
34 | cpu: cpu_x86 cpu_android
35 | clean: clean_x86 clean_android clean_qnx clean_qos
36 | 
37 | # x86 Targets
38 | all_x86: cpu_x86
39 | 
40 | cpu_x86:
41 | 	$(call build_if_exists,$(lib_cpu),-$(MAKE) -f $(make_dir)/Makefile.linux-x86_64)
42 | 
43 | clean_x86:
44 | 	@rm -rf libs obj
45 | 
46 | # qnx Targets
47 | all_qnx: cpu_qnx cpu_qos
48 | 
49 | cpu_qnx: check_qnx
50 | 	$(call build_if_exists,$(lib_cpu),-$(MAKE) -f $(make_dir)/Makefile.qnx-aarch64)
51 | 
52 | clean_qnx:
53 | 	@rm -rf libs obj
54 | 
55 | cpu_qos: check_qnx
56 | 	$(call build_if_exists,$(lib_cpu),-$(MAKE) -f $(make_dir)/Makefile.qos224-aarch64)
57 | 
58 | clean_qos:
59 | 	@rm -rf libs obj
60 | 
61 | # Android Targets
62 | 
63 | all_android: cpu_android
64 | 
65 | cpu_android: cpu_aarch64-android
66 | 
67 | cpu_aarch64-android: check_ndk clean_aarch64-android
68 | 	$(call build_if_exists,$(lib_cpu),$(ANDROID_NDK_ROOT)/ndk-build APP_ALLOW_MISSING_DEPS=true APP_ABI="arm64-v8a" NDK_PROJECT_PATH=./ NDK_APPLICATION_MK=$(make_dir)/Application.mk APP_BUILD_SCRIPT=$(make_dir)/Android.mk)
69 | 	@$(rename_target_dirs)
70 | 
71 | clean_android: check_ndk clean_aarch64-android
72 | 
73 | clean_aarch64-android:
74 | 	@rm -rf libs/aarch64-android
75 | 	@rm -rf obj/local/aarch64-android
76 | 
77 | # utilities
78 | # Syntax: $(call build_if_exists <dir>,<cmd>)
79 | build_if_exists = $(if $(wildcard $(1)),$(2),$(warning WARNING: $(1) does not exist. Skipping Compilation))
80 | rename_target_dirs = find . -type d -execdir rename 's/arm64-v8a/aarch64-android/' '{}' \+ \
81 | 
82 | check_ndk:
83 | ifeq ($(ANDROID_NDK_ROOT),)
84 | 	$(error ERROR: ANDROID_NDK_ROOT not set, skipping compilation for Android platform(s).)
85 | endif
86 | 
87 | check_qnx:
88 | ifeq ($(QNX_HOST),)
89 | 	$(error ERROR: QNX_HOST not set, skipping compilation for QNX platform.)
90 | endif
91 | ifeq ($(QNX_TARGET),)
92 | 	$(error ERROR: QNX_TARGET not set, skipping compilation for QNX platform.)
93 | endif
94 | 


--------------------------------------------------------------------------------
/hexagon/CPU/RwkvWkvOpPackage/makefiles/Android.mk:
--------------------------------------------------------------------------------
 1 | # ==============================================================================
 2 | #
 3 | #  Copyright (c) 2020, 2023-2024 Qualcomm Technologies, Inc.
 4 | #  All Rights Reserved.
 5 | #  Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | #
 7 | # ===============================================================
 8 | 
 9 | LOCAL_PATH := $(call my-dir)
10 | SUPPORTED_TARGET_ABI := arm64-v8a x86 x86_64
11 | 
12 | #============================ Verify Target Info and Application Variables =========================================
13 | ifneq ($(filter $(TARGET_ARCH_ABI),$(SUPPORTED_TARGET_ABI)),)
14 | ifneq ($(APP_STL), c++_shared)
15 | $(error Unsupported APP_STL: "$(APP_STL)")
16 | endif
17 | else
18 | $(error Unsupported TARGET_ARCH_ABI: '$(TARGET_ARCH_ABI)')
19 | endif
20 | 
21 | #============================ Define Common Variables ===============================================================
22 | # Include paths
23 | UTIL_SRC_DIR := $(LOCAL_PATH)/../src/utils
24 | # QNN_SDK_ROOT should be set and points to the SDK path, it will be used.
25 | ifdef QNN_SDK_ROOT
26 | # define directories
27 | CUSTOM_OP_DIR :=$(QNN_SDK_ROOT)/share/QNN/OpPackageGenerator/CustomOp
28 | 
29 | # setup include paths
30 | PACKAGE_C_INCLUDES += -I $(QNN_SDK_ROOT)/include/QNN -I $(QNN_SDK_ROOT)/include/QNN/CPU -I $(LOCAL_PATH)/../include/ -I $(UTIL_SRC_DIR) -I $(UTIL_SRC_DIR)/CPU -I $(CUSTOM_OP_DIR)
31 | # copy source files from SDK if not present
32 | $(info Copying custom op source files from SDK)
33 | COPYFILES := $(shell find $(CUSTOM_OP_DIR)/CPU -name "*.cpp" -exec cp -rf {} $(LOCAL_PATH)/../src 2>/dev/null \;)
34 | else
35 | $(error QNN_SDK_ROOT: Please set QNN_SDK_ROOT)
36 | endif
37 | 
38 | #========================== Define OpPackage Library Build Variables =============================================
39 | include $(CLEAR_VARS)
40 | LOCAL_C_INCLUDES               := $(PACKAGE_C_INCLUDES)
41 | MY_SRC_FILES                    = $(wildcard $(LOCAL_PATH)/../src/*.cpp) $(wildcard $(LOCAL_PATH)/../src/utils/*.cpp) $(wildcard $(LOCAL_PATH)/../src/utils/CPU/*.cpp) $(wildcard $(LOCAL_PATH)/../src/ops/*.cpp)
42 | LOCAL_MODULE                   := RwkvWkvOpPackage
43 | LOCAL_SRC_FILES                := $(subst makefiles/,,$(MY_SRC_FILES))
44 | LOCAL_LDLIBS                   := -lGLESv2 -lEGL
45 | include $(BUILD_SHARED_LIBRARY)
46 | 


--------------------------------------------------------------------------------
/hexagon/CPU/RwkvWkvOpPackage/makefiles/Application.mk:
--------------------------------------------------------------------------------
 1 | # ==============================================================================
 2 | #
 3 | #  Copyright (c) 2020, 2023 Qualcomm Technologies, Inc.
 4 | #  All Rights Reserved.
 5 | #  Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | #
 7 | # ===============================================================
 8 | 
 9 | APP_ABI      := arm64-v8a
10 | APP_STL      := c++_shared
11 | APP_PLATFORM := android-21
12 | APP_CPPFLAGS += -std=c++11 -O3 -fvisibility=hidden -DQNN_API="__attribute__((visibility(\"default\")))"
13 | APP_LDFLAGS  += -lc -lm -ldl


--------------------------------------------------------------------------------
/hexagon/CPU/RwkvWkvOpPackage/makefiles/Makefile.linux-x86_64:
--------------------------------------------------------------------------------
  1 | # ==============================================================================
  2 | #
  3 | #  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
  4 | #  All rights reserved.
  5 | #  Confidential and Proprietary - Qualcomm Technologies, Inc.
  6 | #
  7 | # ==============================================================================
  8 | 
  9 | # define relevant directories
 10 | SRC_DIR := src
 11 | SRC_DIR_OPS := src/ops
 12 | SRC_DIR_UTILS := src/utils/CPU
 13 | 
 14 | # Checking if clang++-9 is present. If not switch to clang++
 15 | ifeq ($(shell $(CXX) -v 2>&1 | grep -c "clang version"), 0)
 16 |   CXX := clang++
 17 | endif
 18 | 
 19 | # define library name and corresponding directory
 20 | QNN_TARGET ?= x86_64-linux-clang
 21 | export LIB_DIR := ./libs/$(QNN_TARGET)
 22 | 
 23 | ifdef PACKAGE_NAME
 24 | library := $(LIB_DIR)/lib$(PACKAGE_NAME).so
 25 | else
 26 | library :=$(LIB_DIR)/libCpuCustomPackage.so
 27 | endif
 28 | 
 29 | # define target architecture if not previously defined, default is x86
 30 | ifndef TARGET_AARCH_VARS
 31 | TARGET_AARCH_VARS:= -march=x86-64
 32 | endif
 33 | 
 34 | # Include paths
 35 | # QNN_SDK_ROOT should be set and points to the SDK path, it will be used.
 36 | ifdef QNN_SDK_ROOT
 37 | # setup custom op directory path
 38 | CUSTOM_OP_DIR :=$(QNN_SDK_ROOT)/share/QNN/OpPackageGenerator/CustomOp
 39 | 
 40 | # setup include paths
 41 | 
 42 | INCLUDES += -I$(QNN_SDK_ROOT)/include/QNN -I include -I$(QNN_SDK_ROOT)/include/QNN/CPU -I $(CUSTOM_OP_DIR)
 43 | INCLUDES += -I $(SRC_DIR)/utils -I $(SRC_DIR)/utils/CPU
 44 | 
 45 | # copy source files from custom op directory
 46 | $(info Copying custom op source files from SDK)
 47 | COPYFILES := $(shell find $(CUSTOM_OP_DIR)/CPU -name "*.cpp" -exec cp -rf {} $(SRC_DIR) 2>/dev/null \;)
 48 | else
 49 | $(error QNN_SDK_ROOT: Please set QNN_SDK_ROOT)
 50 | endif
 51 | 
 52 | # set compiler flags
 53 | COMMON_CXXFLAGS = -std=c++11 -fno-exceptions -fPIC -pg $(INCLUDES)
 54 | COMMON_LDFLAGS = -shared -s -fPIC
 55 | 
 56 | ifdef QNN_DEBUG_ENABLE
 57 | CXXFLAGS += $(COMMON_CXXFLAGS) -march=x86-64 -O0 -g -DQNN_API=""
 58 | LDFLAGS += $(COMMON_LDFLAGS)
 59 | else
 60 | CXXFLAGS += $(COMMON_CXXFLAGS) -march=x86-64 -O3 -Wno-write-strings -fvisibility=hidden -DQNN_API="__attribute__((visibility(\"default\")))"
 61 | LDFLAGS += $(COMMON_LDFLAGS) -fvisibility=hidden -flto
 62 | endif
 63 | 
 64 | # define library sources
 65 | SOURCES     := $(wildcard $(SRC_DIR)/*.cpp)
 66 | SOURCES_OPS := $(wildcard $(SRC_DIR_OPS)/*.cpp)
 67 | SOURCE_UTILS := $(wildcard $(SRC_DIR_UTILS)/*.cpp)
 68 | 
 69 | # define object directories
 70 | OBJ_DIR := obj/$(QNN_TARGET)
 71 | OBJ_DIR_OPS := obj/$(QNN_TARGET)/ops
 72 | OBJ_DIR_UTILS := obj/$(QNN_TARGET)/utils
 73 | 
 74 | # setup object files in object directory
 75 | OBJECTS := $(patsubst %.cpp,$(OBJ_DIR)/%.o,$(foreach x,$(SOURCES),$(notdir $(x))))
 76 | OBJECTS_OPS := $(patsubst %.cpp,$(OBJ_DIR_OPS)/%.o,$(foreach x,$(SOURCES_OPS),$(notdir $(x))))
 77 | OBJECTS_UTILS := $(patsubst %.cpp,$(OBJ_DIR_UTILS)/%.o,$(foreach x,$(SOURCE_UTILS),$(notdir $(x))))
 78 | 
 79 | # Rule to make library
 80 | .PHONY: library
 81 | library: $(library)
 82 | 
 83 | # Implicit rule to compile and link object files
 84 | $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp
 85 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 86 | 
 87 | $(OBJ_DIR_UTILS)/%.o: $(SRC_DIR_UTILS)/%.cpp
 88 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 89 | 
 90 | # set up resources
 91 | directories := $(LIB_DIR) $(OBJ_DIR) $(OBJ_DIR_OPS) $(OBJ_DIR_UTILS)
 92 | 
 93 | # Compile
 94 | $(library): $(OBJECTS) $(OBJECTS_OPS) $(OBJECTS_UTILS) | $(directories)
 95 | 	$(CXX) $(CXXFLAGS) $(LINKFLAGS) -shared $^ -o $@
 96 | 
 97 | # rule for object directory resource
 98 | $(OBJECTS): | $(OBJ_DIR) $(COPYFILES)
 99 | $(OBJECTS_OPS): | $(OBJ_DIR_OPS)
100 | $(OBJECTS_UTILS): | $(OBJ_DIR_UTILS)
101 | 
102 | # rule to create directories
103 | $(directories):
104 | 	mkdir -p $@
105 | 
106 | .PHONY: clean
107 | clean:
108 | 	rm -rf $(OBJ_DIR) $(LIB_DIR)
109 | 


--------------------------------------------------------------------------------
/hexagon/CPU/RwkvWkvOpPackage/src/CpuCustomOpPackage.cpp:
--------------------------------------------------------------------------------
  1 | //=============================================================================
  2 | //
  3 | //  Copyright (c) 2020-2022 Qualcomm Technologies, Inc.
  4 | //  All Rights Reserved.
  5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
  6 | //
  7 | //=============================================================================
  8 | 
  9 | #include "CPU/QnnCpuOpPackage.h"
 10 | #include "CustomBEMacros.hpp"
 11 | #include "CustomOpPackage.hpp"
 12 | #include "QnnSdkBuildId.h"
 13 | 
 14 | using namespace qnn::custom;
 15 | using namespace qnn::custom::utils;
 16 | 
 17 | static Qnn_ErrorHandle_t QnnOpPackage_execute(void* opPkgNodeData) {
 18 |   auto opPkg = CustomOpPackage::getInstance();
 19 |   std::shared_ptr<CustomOp> op;
 20 | 
 21 |   opPkg->getOpResolver()->getCustomOp((opHandle)opPkgNodeData, op);
 22 |   auto opRegistration = opPkg->getOpRegistration(op->m_typeName);
 23 | 
 24 |   QNN_CUSTOM_BE_ENSURE(opPkg, QNN_OP_PACKAGE_ERROR_GENERAL);
 25 |   QNN_CUSTOM_BE_ENSURE_STATUS(opRegistration->execute(op.get()));
 26 | 
 27 |   return QNN_SUCCESS;
 28 | }
 29 | 
 30 | std::mutex CustomOpPackage::s_mtx;
 31 | std::shared_ptr<CustomOpPackage> CustomOpPackage ::s_opPackageInstance;
 32 | bool CustomOpPackage::s_isInitialized;
 33 | 
 34 | Qnn_ErrorHandle_t CustomOpPackage::getPackageInfo(const QnnOpPackage_Info_t** info) {
 35 |   QNN_CUSTOM_BE_ENSURE(info, QNN_OP_PACKAGE_ERROR_INVALID_INFO)
 36 | 
 37 |   for (auto op : m_registered_ops) {
 38 |     m_operationNames.push_back(op.first.c_str());
 39 |   }
 40 | 
 41 |   m_sdkApiVersion              = QNN_CPU_API_VERSION_INIT;
 42 |   m_packageInfo                = QNN_OP_PACKAGE_INFO_INIT;
 43 |   m_packageInfo.packageName    = m_packageName;
 44 |   m_packageInfo.operationNames = m_operationNames.data();
 45 |   m_packageInfo.numOperations  = static_cast<uint32_t>(m_operationNames.size());
 46 |   m_packageInfo.sdkBuildId     = QNN_SDK_BUILD_ID;
 47 |   m_packageInfo.sdkApiVersion  = &m_sdkApiVersion;
 48 |   *info                        = &m_packageInfo;
 49 | 
 50 |   return QNN_SUCCESS;
 51 | }
 52 | 
 53 | Qnn_ErrorHandle_t CustomOpPackage::createOpImpl(
 54 |     QnnOpPackage_GraphInfrastructure_t graphInfrastructure,
 55 |     QnnOpPackage_Node_t node,
 56 |     QnnOpPackage_OpImpl_t* opImplPtr) {
 57 |   // initialize op resolver if not already set
 58 |   if (!m_opResolver) {
 59 |     m_opResolver.reset(new CustomOpResolver());
 60 |   }
 61 |   auto cpuNode  = reinterpret_cast<QnnCpuOpPackage_Node_t*>(node);
 62 |   auto customOp = std::shared_ptr<CustomOp>(new CustomOp(cpuNode->name, cpuNode->typeName));
 63 |   const auto opRegistration = m_registered_ops[cpuNode->typeName];
 64 | 
 65 |   // Get op from op factory
 66 |   QNN_CUSTOM_BE_ENSURE_STATUS(
 67 |       opRegistration->initialize(node, graphInfrastructure, customOp.get()));
 68 | 
 69 |   // Update op reference
 70 |   auto opImpl      = std::make_shared<QnnCpuOpPackage_OpImpl_t>();
 71 |   opImpl->opImplFn = QnnOpPackage_execute;
 72 |   opImpl->userData = (void*)m_opResolver->registerCustomOp(std::move(customOp));
 73 | 
 74 |   // update out kernel param
 75 |   auto cpuImpl = reinterpret_cast<QnnCpuOpPackage_OpImpl_t**>(opImplPtr);
 76 |   *cpuImpl     = opImpl.get();
 77 | 
 78 |   // update opImpl list
 79 |   m_OpImplList.emplace_back(opImpl);
 80 | 
 81 |   return QNN_SUCCESS;
 82 | }
 83 | 
 84 | Qnn_ErrorHandle_t CustomOpPackage::freeOpImpl(QnnOpPackage_OpImpl_t opImpl) {
 85 |   QNN_CUSTOM_BE_ENSURE(opImpl, QNN_OP_PACKAGE_ERROR_GENERAL);
 86 | 
 87 |   auto op = std::shared_ptr<CustomOp>(new CustomOp());
 88 | 
 89 |   auto cpuOpImpl = reinterpret_cast<QnnCpuOpPackage_OpImpl_t*>(opImpl);
 90 |   m_opResolver->getCustomOp((opHandle)cpuOpImpl->userData, op);
 91 | 
 92 |   auto opRegistration = m_registered_ops[op->m_typeName];
 93 |   QNN_CUSTOM_BE_ENSURE_STATUS(m_opResolver->removeCustomOp((opHandle)cpuOpImpl->userData));
 94 | 
 95 |   if (opRegistration->free) {
 96 |     opRegistration->free(*op);
 97 |   }
 98 | 
 99 |   return QNN_SUCCESS;
100 | }
101 | 
102 | std::shared_ptr<CustomOpPackage> CustomOpPackage::getInstance() noexcept {
103 |   std::lock_guard<std::mutex> locker(s_mtx);
104 |   if (!s_opPackageInstance) {
105 |     s_opPackageInstance.reset(new (std::nothrow) CustomOpPackage());
106 |   }
107 |   return s_opPackageInstance;
108 | }
109 | 
110 | void CustomOpPackage::setIsInitialized(bool isInitialized) {
111 |   std::lock_guard<std::mutex> locker(s_mtx);
112 |   s_isInitialized = isInitialized;
113 | }
114 | 
115 | bool CustomOpPackage::getIsInitialized() {
116 |   std::lock_guard<std::mutex> locker(s_mtx);
117 |   return s_isInitialized;
118 | }
119 | 
120 | void CustomOpPackage::destroyInstance() {
121 |   if (s_opPackageInstance && s_isInitialized) s_opPackageInstance.reset();
122 |   s_isInitialized = false;
123 | }
124 | 
125 | void CustomOpPackage::freeResolver() {
126 |   if (m_opResolver) m_opResolver.reset();
127 | }
128 | 


--------------------------------------------------------------------------------
/hexagon/CPU/RwkvWkvOpPackage/src/RwkvWkvOpPackageInterface.cpp:
--------------------------------------------------------------------------------
  1 | //==============================================================================
  2 | // Auto Generated Code for RwkvWkvOpPackage
  3 | //==============================================================================
  4 | #include "QnnCpuOpPackage.h"
  5 | #include "CustomOpPackage.hpp"
  6 | 
  7 | using namespace qnn::custom;
  8 | using namespace qnn::custom::macros;
  9 | 
 10 | static Qnn_ErrorHandle_t RwkvWkvOpPackageInitialize(
 11 |   QnnOpPackage_GlobalInfrastructure_t globalInfrastructure) {
 12 | 
 13 |   QNN_CUSTOM_BE_ENSURE(!(CustomOpPackage::getIsInitialized()),QNN_OP_PACKAGE_ERROR_LIBRARY_ALREADY_INITIALIZED);
 14 | 
 15 |   INIT_BE_OP_PACKAGE(RwkvWkvOpPackage)
 16 | 
 17 |   REGISTER_PACKAGE_OP(wkv6)
 18 |   REGISTER_PACKAGE_OP(wkv7_state)
 19 |   REGISTER_PACKAGE_OP(wkv7_output)
 20 | 
 21 |   // INIT_BE_PACKAGE_OPTIMIZATIONS();
 22 | 
 23 |   CustomOpPackage::setIsInitialized(true);
 24 | 
 25 |   return QNN_SUCCESS;
 26 | }
 27 | 
 28 | static Qnn_ErrorHandle_t RwkvWkvOpPackageGetInfo(const QnnOpPackage_Info_t** info) {
 29 |   auto opPkg = CustomOpPackage::getInstance();
 30 | 
 31 |   QNN_CUSTOM_BE_ENSURE(opPkg, QNN_OP_PACKAGE_ERROR_LIBRARY_NOT_INITIALIZED);
 32 | 
 33 |   QNN_CUSTOM_BE_ENSURE_STATUS(opPkg->getPackageInfo(info));
 34 | 
 35 |   return QNN_SUCCESS;
 36 | }
 37 | 
 38 | static Qnn_ErrorHandle_t RwkvWkvOpPackageValidateOpConfig(Qnn_OpConfig_t opConfig) {
 39 |   auto opPkg = CustomOpPackage::getInstance();
 40 | 
 41 |   QNN_CUSTOM_BE_ENSURE(opPkg, QNN_OP_PACKAGE_ERROR_LIBRARY_NOT_INITIALIZED);
 42 | 
 43 |   auto opRegistration = opPkg->getOpRegistration(opConfig.v1.typeName);
 44 | 
 45 |   QNN_CUSTOM_BE_ENSURE(opRegistration, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE)
 46 | 
 47 |   QNN_CUSTOM_BE_ENSURE_STATUS(opRegistration->validateOpConfig(opConfig));
 48 | 
 49 | return QNN_SUCCESS;
 50 | }
 51 | 
 52 | static Qnn_ErrorHandle_t RwkvWkvOpPackageCreateOpImpl(
 53 |    QnnOpPackage_GraphInfrastructure_t graphInfrastructure,
 54 |    QnnOpPackage_Node_t node,
 55 |    QnnOpPackage_OpImpl_t* opImpl) {
 56 |   auto opPkg = CustomOpPackage::getInstance();
 57 | 
 58 |   QNN_CUSTOM_BE_ENSURE(opPkg, QNN_OP_PACKAGE_ERROR_LIBRARY_NOT_INITIALIZED);
 59 | 
 60 |   QNN_CUSTOM_BE_ENSURE_STATUS(
 61 |     opPkg->createOpImpl(graphInfrastructure, node, opImpl));
 62 | 
 63 |   return QNN_SUCCESS;
 64 | }
 65 | 
 66 | static Qnn_ErrorHandle_t RwkvWkvOpPackageFreeOpImpl(
 67 |    QnnCpuOpPackage_OpImpl_t* opImpl) {
 68 |   auto opPkg = CustomOpPackage::getInstance();
 69 | 
 70 |   QNN_CUSTOM_BE_ENSURE(opPkg, QNN_OP_PACKAGE_ERROR_LIBRARY_NOT_INITIALIZED);
 71 | 
 72 |   QNN_CUSTOM_BE_ENSURE_STATUS(opPkg->freeOpImpl(opImpl));
 73 | 
 74 |   return QNN_SUCCESS;
 75 | }
 76 | 
 77 | static Qnn_ErrorHandle_t RwkvWkvOpPackageTerminate() {
 78 |   auto opPkg = CustomOpPackage::getInstance();
 79 | 
 80 |   CustomOpPackage::destroyInstance();
 81 |   opPkg->freeResolver();
 82 | 
 83 |   return QNN_SUCCESS;
 84 | }
 85 | 
 86 | static Qnn_ErrorHandle_t RwkvWkvOpPackageLogInitialize(
 87 | QnnLog_Callback_t callback, QnnLog_Level_t maxLogLevel) {
 88 | // function should be used if at least two backends support it
 89 | // USER SHOULD NOTE THIS FUNCTION IS UNUSED BY BE
 90 | 
 91 |   return QNN_SUCCESS;
 92 | }
 93 | 
 94 | static Qnn_ErrorHandle_t RwkvWkvOpPackageLogSetLevel(
 95 | QnnLog_Level_t maxLogLevel) {
 96 | // USER SHOULD NOTE THIS FUNCTION IS UNUSED BY CPU BE
 97 | 
 98 | return QNN_SUCCESS;
 99 | }
100 | 
101 | static Qnn_ErrorHandle_t RwkvWkvOpPackageLogTerminate() {
102 | // USER SHOULD NOTE THIS FUNCTION IS UNUSED BY CPU BE
103 | 
104 |   return QNN_SUCCESS;
105 | }
106 | 
107 | 
108 | extern "C" QNN_API Qnn_ErrorHandle_t RwkvWkvOpPackageInterfaceProvider(
109 |    QnnOpPackage_Interface_t* interface) {
110 |   interface->interfaceVersion.major = 1;
111 |   interface->interfaceVersion.minor = 4;
112 |   interface->interfaceVersion.patch = 0;
113 |   interface->v1_4.init              = RwkvWkvOpPackageInitialize;
114 |   interface->v1_4.terminate         = RwkvWkvOpPackageTerminate;
115 |   interface->v1_4.getInfo           = RwkvWkvOpPackageGetInfo;
116 |   interface->v1_4.validateOpConfig  = RwkvWkvOpPackageValidateOpConfig;
117 |   interface->v1_4.createOpImpl     =  RwkvWkvOpPackageCreateOpImpl;
118 |   interface->v1_4.freeOpImpl        = RwkvWkvOpPackageFreeOpImpl;
119 |   interface->v1_4.logInitialize     = RwkvWkvOpPackageLogInitialize;
120 |   interface->v1_4.logSetLevel       = RwkvWkvOpPackageLogSetLevel;
121 |   interface->v1_4.logTerminate      = RwkvWkvOpPackageLogTerminate;
122 |   return QNN_SUCCESS;
123 | }
124 | 
125 | 


--------------------------------------------------------------------------------
/hexagon/CPU/RwkvWkvOpPackage/src/ops/wkv6.cpp:
--------------------------------------------------------------------------------
  1 | //==============================================================================
  2 | // Auto Generated Code for RwkvWkvOpPackage
  3 | //==============================================================================
  4 | #include <iostream>
  5 | #include <string>
  6 | 
  7 | #include "CpuBackendUtils.hpp"
  8 | #include "CustomOpPackage.hpp"
  9 | 
 10 | using namespace qnn::custom;
 11 | using namespace qnn::custom::utils;
 12 | 
 13 | namespace wkv6 {
 14 | 
 15 | Qnn_ErrorHandle_t execute(CustomOp* operation) {
 16 |   /*
 17 |    * To have good performance and stability, it is required to avoid heap memory
 18 |    * allocation in this function. The heap memory allocation includes but not
 19 |    * limited to calling malloc, operator new, constructing STL container objects
 20 |    * like std::vector with default allocator, and adding items like calling
 21 |    * std::vector::push_back to STL container objects with default allocator.
 22 |    *
 23 |    * Please check in SDK documentation for more information.
 24 |    */
 25 | 
 26 |   float* k = (float*)operation->getInput(0)->data;
 27 |   float* v = (float*)operation->getInput(1)->data;
 28 |   float* r = (float*)operation->getInput(2)->data;
 29 |   float* state_in = (float*)operation->getInput(3)->data;
 30 |   float* tf = (float*)operation->getInput(4)->data;
 31 |   float* td = (float*)operation->getInput(5)->data;
 32 |   float* output = (float*)operation->getOutput(0)->data;
 33 |   float* state_out = (float*)operation->getOutput(1)->data;
 34 | 
 35 |   int num_heads = operation->getInput(3)->currentDimensions[0];
 36 |   int head_size = operation->getInput(3)->currentDimensions[1];
 37 |   int seq_length = operation->getInput(0)->currentDimensions[0] / num_heads;
 38 | 
 39 |   memset(output, 0, seq_length * num_heads * head_size * sizeof(float));
 40 |   for (int t = 0; t < seq_length; t++) {
 41 |     if (t > 0) state_in = state_out;
 42 |     for (int h = 0; h < num_heads; h++) {
 43 |       for (int i = 0; i < head_size; i++) {
 44 |         auto k_val = k[t * num_heads * head_size + h * head_size + i];
 45 |         auto r_val = r[t * num_heads * head_size + h * head_size + i];
 46 |         auto td_val = td[t * num_heads * head_size + h * head_size + i];
 47 |         auto tf_val = tf[h * head_size + i];
 48 |         for (int j = 0; j < head_size; j++) {
 49 |           auto v_val = v[t * num_heads * head_size + h * head_size + j];
 50 |           auto kv_val = k_val * v_val;
 51 |           auto prev_state_val = state_in[h * head_size * head_size + i * head_size + j];
 52 |           output[t * num_heads * head_size + h * head_size + j] += r_val * (kv_val * tf_val + prev_state_val);
 53 |           state_out[h * head_size * head_size + i * head_size + j] = prev_state_val * td_val + kv_val;
 54 |         }
 55 |       }
 56 |     }
 57 |   }
 58 | 
 59 |   return QNN_SUCCESS;
 60 | }
 61 | 
 62 | Qnn_ErrorHandle_t finalize(const CustomOp* operation) {
 63 |   QNN_CUSTOM_BE_ENSURE_EQ(operation->numInput(), 6, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE)
 64 |   QNN_CUSTOM_BE_ENSURE_EQ(operation->numOutput(), 2, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE)
 65 | 
 66 |   /**
 67 |    * Add code here
 68 |    **/
 69 | 
 70 |   return QNN_SUCCESS;
 71 | }
 72 | 
 73 | Qnn_ErrorHandle_t free(CustomOp& operation) {
 74 | 
 75 |   /**
 76 |    * Add code here
 77 |    **/
 78 | 
 79 |   return QNN_SUCCESS;
 80 | }
 81 | 
 82 | Qnn_ErrorHandle_t populateFromNode(const QnnOpPackage_Node_t node,
 83 |                                    QnnOpPackage_GraphInfrastructure_t graphInfrastructure,
 84 |                                    CustomOp* operation) {
 85 |   // Add input
 86 |   for (uint32_t i = 0; i < numInputs(node); i++) {
 87 |     operation->addInput(getInput(node, i));
 88 |   }
 89 | 
 90 |   // Add output
 91 |   for (uint32_t i = 0; i < numOutputs(node); i++) {
 92 |     operation->addOutput(getOutput(node, i));
 93 |   }
 94 | 
 95 | 
 96 |   return QNN_SUCCESS;
 97 | }
 98 | 
 99 | Qnn_ErrorHandle_t validateOpConfig(Qnn_OpConfig_t opConfig) {
100 |   QNN_CUSTOM_BE_ENSURE_EQ(
101 |       strcmp(opConfig.v1.typeName, "wkv6"), 0, QNN_OP_PACKAGE_ERROR_INVALID_ARGUMENT)
102 | 
103 |   QNN_CUSTOM_BE_ENSURE_EQ(opConfig.v1.numOfInputs, 6, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE)
104 |   QNN_CUSTOM_BE_ENSURE_EQ(opConfig.v1.numOfOutputs, 2, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE)
105 | 
106 |   return QNN_SUCCESS;
107 | }
108 | }  // namespace wkv6
109 | 
110 | CustomOpRegistration_t* register_Wkv6CustomOp() {
111 |   using namespace wkv6;
112 |   static CustomOpRegistration_t WkvRegister = {execute, finalize, free, validateOpConfig, populateFromNode};
113 |   return &WkvRegister;
114 | }
115 | 
116 | REGISTER_OP(wkv6, register_Wkv6CustomOp);
117 | 


--------------------------------------------------------------------------------
/hexagon/CPU/RwkvWkvOpPackage/src/ops/wkv7_output.cpp:
--------------------------------------------------------------------------------
 1 | //==============================================================================
 2 | // Auto Generated Code for RwkvWkvOpPackage
 3 | //==============================================================================
 4 | #include <iostream>
 5 | #include <string>
 6 | 
 7 | #include "CpuBackendUtils.hpp"
 8 | #include "CustomOpPackage.hpp"
 9 | 
10 | using namespace qnn::custom;
11 | using namespace qnn::custom::utils;
12 | 
13 | namespace wkv7_output {
14 | 
15 | Qnn_ErrorHandle_t execute(CustomOp* operation) {
16 |   /*
17 |    * To have good performance and stability, it is required to avoid heap memory
18 |    * allocation in this function. The heap memory allocation includes but not
19 |    * limited to calling malloc, operator new, constructing STL container objects
20 |    * like std::vector with default allocator, and adding items like calling
21 |    * std::vector::push_back to STL container objects with default allocator.
22 |    *
23 |    * Please check in SDK documentation for more information.
24 |    */
25 | 
26 | 
27 |   return QNN_SUCCESS;
28 | }
29 | 
30 | Qnn_ErrorHandle_t finalize(const CustomOp* operation) {
31 |   QNN_CUSTOM_BE_ENSURE_EQ(operation->numInput(), 2, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE)
32 |   QNN_CUSTOM_BE_ENSURE_EQ(operation->numOutput(), 1, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE)
33 | 
34 |   /**
35 |    * Add code here
36 |    **/
37 | 
38 |   return QNN_SUCCESS;
39 | }
40 | 
41 | Qnn_ErrorHandle_t free(CustomOp& operation) {
42 | 
43 |   /**
44 |    * Add code here
45 |    **/
46 | 
47 |   return QNN_SUCCESS;
48 | }
49 | 
50 | Qnn_ErrorHandle_t populateFromNode(const QnnOpPackage_Node_t node,
51 |                                    QnnOpPackage_GraphInfrastructure_t graphInfrastructure,
52 |                                    CustomOp* operation) {
53 |   // Add input
54 |   for (uint32_t i = 0; i < numInputs(node); i++) {
55 |     operation->addInput(getInput(node, i));
56 |   }
57 | 
58 |   // Add output
59 |   for (uint32_t i = 0; i < numOutputs(node); i++) {
60 |     operation->addOutput(getOutput(node, i));
61 |   }
62 | 
63 | 
64 |   return QNN_SUCCESS;
65 | }
66 | 
67 | Qnn_ErrorHandle_t validateOpConfig(Qnn_OpConfig_t opConfig) {
68 |   QNN_CUSTOM_BE_ENSURE_EQ(
69 |       strcmp(opConfig.v1.typeName, "wkv7_output"), 0, QNN_OP_PACKAGE_ERROR_INVALID_ARGUMENT)
70 | 
71 |   QNN_CUSTOM_BE_ENSURE_EQ(opConfig.v1.numOfInputs, 2, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE)
72 |   QNN_CUSTOM_BE_ENSURE_EQ(opConfig.v1.numOfOutputs, 1, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE)
73 | 
74 |   return QNN_SUCCESS;
75 | }
76 | }  // namespace wkv7_output
77 | 
78 | CustomOpRegistration_t* register_Wkv7OutputCustomOp() {
79 |   using namespace wkv7_output;
80 |   static CustomOpRegistration_t WkvRegister = {execute, finalize, free, validateOpConfig, populateFromNode};
81 |   return &WkvRegister;
82 | }
83 | 
84 | REGISTER_OP(wkv7_output, register_Wkv7OutputCustomOp);
85 | 


--------------------------------------------------------------------------------
/hexagon/CPU/RwkvWkvOpPackage/src/ops/wkv7_state.cpp:
--------------------------------------------------------------------------------
  1 | //==============================================================================
  2 | // Auto Generated Code for RwkvWkvOpPackage
  3 | //==============================================================================
  4 | #include <iostream>
  5 | #include <string>
  6 | 
  7 | #include "CpuBackendUtils.hpp"
  8 | #include "CustomOpPackage.hpp"
  9 | 
 10 | using namespace qnn::custom;
 11 | using namespace qnn::custom::utils;
 12 | 
 13 | namespace wkv7_state {
 14 | 
 15 | Qnn_ErrorHandle_t execute(CustomOp* operation) {
 16 |   /*
 17 |    * To have good performance and stability, it is required to avoid heap memory
 18 |    * allocation in this function. The heap memory allocation includes but not
 19 |    * limited to calling malloc, operator new, constructing STL container objects
 20 |    * like std::vector with default allocator, and adding items like calling
 21 |    * std::vector::push_back to STL container objects with default allocator.
 22 |    *
 23 |    * Please check in SDK documentation for more information.
 24 |    */
 25 | 
 26 |   float* r = (float*)operation->getInput(0)->data;
 27 |   float* w = (float*)operation->getInput(1)->data;
 28 |   float* k = (float*)operation->getInput(2)->data;
 29 |   float* v = (float*)operation->getInput(3)->data;
 30 |   float* a = (float*)operation->getInput(4)->data;
 31 |   float* b = (float*)operation->getInput(5)->data;
 32 |   float* state_in = (float*)operation->getInput(6)->data;
 33 |   float* output = (float*)operation->getOutput(0)->data;
 34 |   float* state_out = (float*)operation->getOutput(1)->data;
 35 | 
 36 |   int num_heads = operation->getInput(6)->currentDimensions[0];
 37 |   int head_size = operation->getInput(6)->currentDimensions[1];
 38 |   // int seq_length = operation->getInput(0)->currentDimensions[0];
 39 |   int seq_length = operation->getInput(0)->currentDimensions[0] / num_heads;
 40 | 
 41 |   for (int t = 0; t < seq_length; t++) {
 42 |     if (t > 0) state_in = state_out;
 43 |     for (int h = 0; h < num_heads; h++) {
 44 |       for (int i = 0; i < head_size; i++) {
 45 |         auto v_val = v[t * num_heads * head_size + h * head_size + i];
 46 | 
 47 |         float sa = 0, result = 0;
 48 |         for (int j = 0; j < head_size; j++) {
 49 |           sa += a[t * num_heads * head_size + h * head_size + j] * state_in[h * head_size * head_size + i * head_size + j];
 50 |         }
 51 | 
 52 |         for (int j = 0; j < head_size; j++) {
 53 |           auto r_val = r[t * num_heads * head_size + h * head_size + j];
 54 |           auto w_val = w[t * num_heads * head_size + h * head_size + j];
 55 |           auto k_val = k[t * num_heads * head_size + h * head_size + j];
 56 |           auto b_val = b[t * num_heads * head_size + h * head_size + j];
 57 |           auto kv_val = k_val * v_val;
 58 |           auto state_val = state_in[h * head_size * head_size + i * head_size + j] * w_val + kv_val + sa * b_val;
 59 |           result += state_val * r_val;
 60 |           state_out[h * head_size * head_size + i * head_size + j] = state_val;
 61 |         }
 62 |         output[t * num_heads * head_size + h * head_size + i] = result;
 63 |       }
 64 |     }
 65 |   }
 66 | 
 67 |   return QNN_SUCCESS;
 68 | }
 69 | 
 70 | Qnn_ErrorHandle_t finalize(const CustomOp* operation) {
 71 |   QNN_CUSTOM_BE_ENSURE_EQ(operation->numInput(), 6, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE)
 72 |   QNN_CUSTOM_BE_ENSURE_EQ(operation->numOutput(), 1, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE)
 73 | 
 74 |   /**
 75 |    * Add code here
 76 |    **/
 77 | 
 78 |   return QNN_SUCCESS;
 79 | }
 80 | 
 81 | Qnn_ErrorHandle_t free(CustomOp& operation) {
 82 | 
 83 |   /**
 84 |    * Add code here
 85 |    **/
 86 | 
 87 |   return QNN_SUCCESS;
 88 | }
 89 | 
 90 | Qnn_ErrorHandle_t populateFromNode(const QnnOpPackage_Node_t node,
 91 |                                    QnnOpPackage_GraphInfrastructure_t graphInfrastructure,
 92 |                                    CustomOp* operation) {
 93 |   // Add input
 94 |   for (uint32_t i = 0; i < numInputs(node); i++) {
 95 |     operation->addInput(getInput(node, i));
 96 |   }
 97 | 
 98 |   // Add output
 99 |   for (uint32_t i = 0; i < numOutputs(node); i++) {
100 |     operation->addOutput(getOutput(node, i));
101 |   }
102 | 
103 | 
104 |   return QNN_SUCCESS;
105 | }
106 | 
107 | Qnn_ErrorHandle_t validateOpConfig(Qnn_OpConfig_t opConfig) {
108 |   QNN_CUSTOM_BE_ENSURE_EQ(
109 |       strcmp(opConfig.v1.typeName, "wkv7_state"), 0, QNN_OP_PACKAGE_ERROR_INVALID_ARGUMENT)
110 | 
111 |   QNN_CUSTOM_BE_ENSURE_EQ(opConfig.v1.numOfInputs, 6, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE)
112 |   QNN_CUSTOM_BE_ENSURE_EQ(opConfig.v1.numOfOutputs, 1, QNN_OP_PACKAGE_ERROR_VALIDATION_FAILURE)
113 | 
114 |   return QNN_SUCCESS;
115 | }
116 | }  // namespace wkv7_state
117 | 
118 | CustomOpRegistration_t* register_Wkv7StateCustomOp() {
119 |   using namespace wkv7_state;
120 |   static CustomOpRegistration_t WkvRegister = {execute, finalize, free, validateOpConfig, populateFromNode};
121 |   return &WkvRegister;
122 | }
123 | 
124 | REGISTER_OP(wkv7_state, register_Wkv7StateCustomOp);
125 | 


--------------------------------------------------------------------------------
/hexagon/CPU/RwkvWkvOpPackage/src/utils/BackendUtils.hpp:
--------------------------------------------------------------------------------
 1 | //==============================================================================
 2 | //
 3 | // Copyright (c) 2020-2023 Qualcomm Technologies, Inc.
 4 | // All Rights Reserved.
 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | //
 7 | //==============================================================================
 8 | 
 9 | #pragma once
10 | #include <stddef.h>
11 | #include <stdint.h>
12 | 
13 | #include <string>
14 | #include <utility>
15 | 
16 | #include "QnnOpPackage.h"
17 | #include "QnnTypes.h"
18 | 
19 | //============================================================================
20 | // Backend Defined Behavior
21 | //=============================================================================
22 | // A required backend defined tensor object which designates an input or output tensor
23 | typedef struct CustomOpTensor* CustomOpTensorPtr_t;
24 | 
25 | // A required backend defined parameter object which designates scalar, tensor and string parameters
26 | typedef struct CustomOpParam* CustomOpParamPtr_t;
27 | 
28 | // A backend defined object which contains additional info about an operation such as connectivity,
29 | // buffers etc
30 | typedef struct CustomOpContext* CustomOpContextPtr_t;
31 | 
32 | // A backend defined object which contains information about a kernel such as its string path, its
33 | // buffers, assigned memory, local dimensions etc.
34 | typedef struct CustomOpKernelContext* CustomOpKernelContextPtr_t;
35 | 
36 | namespace qnn {
37 | 
38 | namespace custom {
39 | 
40 | namespace utils {
41 | 
42 | // Each backend is expected to define these utilities to aid users in accessing basic info about
43 | // an operation package node.
44 | const CustomOpTensorPtr_t* getInput(QnnOpPackage_Node_t node);
45 | 
46 | const CustomOpTensorPtr_t* getOutput(QnnOpPackage_Node_t node);
47 | 
48 | const CustomOpParamPtr_t* getParam(QnnOpPackage_Node_t node);
49 | 
50 | const CustomOpTensorPtr_t getInput(QnnOpPackage_Node_t node, size_t idx);
51 | 
52 | CustomOpTensorPtr_t getOutput(QnnOpPackage_Node_t node, size_t idx);
53 | 
54 | const std::pair<bool, CustomOpParamPtr_t> getParam(QnnOpPackage_Node_t node,
55 |                                                    const std::string& paramName);
56 | 
57 | uint32_t numInputs(QnnOpPackage_Node_t node);
58 | 
59 | uint32_t numOutputs(QnnOpPackage_Node_t node);
60 | 
61 | uint32_t numDimensions(CustomOpTensorPtr_t tensor);
62 | 
63 | const uint32_t* getTensorShape(CustomOpTensorPtr_t tensor);
64 | 
65 | void* getTensorData(CustomOpTensorPtr_t tensor);
66 | 
67 | uint32_t numTensorSize(CustomOpTensorPtr_t tensor);
68 | // Additional backend utilities should be included under this namespace
69 | namespace backend_utils {}
70 | }  // namespace utils
71 | }  // namespace custom
72 | }  // namespace qnn
73 | 


--------------------------------------------------------------------------------
/hexagon/CPU/RwkvWkvOpPackage/src/utils/CPU/CpuBackendUtils.cpp:
--------------------------------------------------------------------------------
  1 | //==============================================================================
  2 | //
  3 | // Copyright (c) 2020, 2023 Qualcomm Technologies, Inc.
  4 | // All Rights Reserved.
  5 | // Confidential and Proprietary - Qualcomm Technologies, Inc.
  6 | //
  7 | //==============================================================================
  8 | 
  9 | #include <string.h>
 10 | 
 11 | #include <string>
 12 | 
 13 | #include "CpuBackendUtils.hpp"
 14 | 
 15 | namespace qnn {
 16 | 
 17 | namespace custom {
 18 | 
 19 | namespace utils {
 20 | 
 21 | // Each backend is expected to define these utilities to aid users in accessing basic info about
 22 | // an operation package node.
 23 | const CustomOpTensorPtr_t* getInput(QnnOpPackage_Node_t node) {
 24 |   return (CustomOpTensorPtr_t*)reinterpret_cast<QnnCpuOpPackage_Node_t*>(node)->inputs;
 25 | }
 26 | 
 27 | const CustomOpTensorPtr_t* getOutput(QnnOpPackage_Node_t node) {
 28 |   return (CustomOpTensorPtr_t*)reinterpret_cast<QnnCpuOpPackage_Node_t*>(node)->outputs;
 29 | }
 30 | 
 31 | const CustomOpParamPtr_t* getParam(QnnOpPackage_Node_t node) {
 32 |   return (CustomOpParamPtr_t*)reinterpret_cast<QnnCpuOpPackage_Node_t*>(node)->params;
 33 | }
 34 | 
 35 | const std::pair<bool, CustomOpParamPtr_t> getParam(QnnOpPackage_Node_t node,
 36 |                                                    const std::string& name) {
 37 |   auto cpuNode = reinterpret_cast<QnnCpuOpPackage_Node_t*>(node);
 38 |   auto params  = (CustomOpParamPtr_t*)cpuNode->params;
 39 | 
 40 |   for (uint32_t idx = 0; idx < cpuNode->numOfParams; idx++) {
 41 |     auto paramName = params[idx]->name;
 42 | 
 43 |     if (strcmp(paramName, name.c_str()) == 0) {
 44 |       return {true, params[idx]};
 45 |     }
 46 |   }
 47 | 
 48 |   return {false, nullptr};
 49 | }
 50 | 
 51 | const CustomOpTensorPtr_t getInput(QnnOpPackage_Node_t node, size_t idx) {
 52 |   return (CustomOpTensorPtr_t) reinterpret_cast<QnnCpuOpPackage_Node_t*>(node)->inputs[idx];
 53 | }
 54 | 
 55 | CustomOpTensorPtr_t getOutput(QnnOpPackage_Node_t node, size_t idx) {
 56 |   return (CustomOpTensorPtr_t) reinterpret_cast<QnnCpuOpPackage_Node_t*>(node)->outputs[idx];
 57 | }
 58 | 
 59 | uint32_t numInputs(QnnOpPackage_Node_t node) {
 60 |   return reinterpret_cast<QnnCpuOpPackage_Node_t*>(node)->numOfInputs;
 61 | }
 62 | 
 63 | uint32_t numOutputs(QnnOpPackage_Node_t node) {
 64 |   return reinterpret_cast<QnnCpuOpPackage_Node_t*>(node)->numOfOutputs;
 65 | }
 66 | 
 67 | uint32_t numDimensions(CustomOpTensorPtr_t tensor) {
 68 |   return reinterpret_cast<QnnCpuOpPackage_Tensor_t*>(tensor)->rank;
 69 | }
 70 | 
 71 | uint32_t numTensorSize(CustomOpTensorPtr_t tensor) {
 72 |   uint32_t size  = 1;
 73 |   auto cpuTensor = reinterpret_cast<QnnCpuOpPackage_Tensor_t*>(tensor);
 74 | 
 75 |   for (uint32_t i = 0; i < cpuTensor->rank; i++) {
 76 |     size *= cpuTensor->currentDimensions[i];
 77 |   }
 78 |   return size;
 79 | }
 80 | 
 81 | const uint32_t* getTensorShape(CustomOpTensorPtr_t tensor) {
 82 |   return reinterpret_cast<QnnCpuOpPackage_Tensor_t*>(tensor)->currentDimensions;
 83 | }
 84 | 
 85 | template <typename T>
 86 | const T* getTensorData(CustomOpTensorPtr_t tensor) {
 87 |   auto tempTensor = reinterpret_cast<QnnCpuOpPackage_Tensor_t*>(tensor);
 88 |   auto dataRef    = reinterpret_cast<T*>(tempTensor->data);
 89 |   return const_cast<T*>(dataRef);
 90 | }
 91 | 
 92 | template <typename T>
 93 | T& getTensorDataRef(CustomOpTensorPtr_t tensor) {
 94 |   auto tempTensor = reinterpret_cast<QnnCpuOpPackage_Tensor_t*>(tensor);
 95 |   auto dataRef    = reinterpret_cast<T*>(tempTensor->data);
 96 |   return &dataRef;
 97 | }
 98 | 
 99 | namespace backend_utils {
100 | 
101 | const double getScalarParam(const CustomOpParamPtr_t param) {
102 |   auto cpuParam = reinterpret_cast<QnnCpuOpPackage_Param_t*>(param);
103 |   return cpuParam->scalarParam;
104 | }
105 | 
106 | const CustomOpTensorPtr_t getTensorParam(const CustomOpParamPtr_t param) {
107 |   auto cpuParam = reinterpret_cast<QnnCpuOpPackage_Param_t*>(param);
108 |   return (CustomOpTensorPtr_t)cpuParam->tensorParam;
109 | }
110 | 
111 | }  // namespace backend_utils
112 | }  // namespace utils
113 | }  // namespace custom
114 | }  // namespace qnn
115 | 


--------------------------------------------------------------------------------
/hexagon/CPU/RwkvWkvOpPackage/src/utils/CPU/CpuBackendUtils.hpp:
--------------------------------------------------------------------------------
 1 | //==============================================================================
 2 | //
 3 | // Copyright (c) 2020 Qualcomm Technologies, Inc.
 4 | // All Rights Reserved.
 5 | // Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | //
 7 | //==============================================================================
 8 | 
 9 | #pragma once
10 | 
11 | #include "BackendUtils.hpp"
12 | #include "QnnCpuOpPackage.h"
13 | 
14 | // Tensor and parameter definitions
15 | struct CustomOpTensor : public QnnCpuOpPackage_Tensor_t {};
16 | 
17 | struct CustomOpParam : public QnnCpuOpPackage_Param_t {};
18 | 
19 | namespace qnn {
20 | namespace custom {
21 | namespace utils {
22 | namespace backend_utils {
23 | 
24 | const double getScalarParam(const CustomOpParamPtr_t param);
25 | 
26 | const CustomOpTensorPtr_t getTensorParam(const CustomOpParamPtr_t param);
27 | }  // namespace backend_utils
28 | }  // namespace utils
29 | }  // namespace custom
30 | }  // namespace qnn


--------------------------------------------------------------------------------
/hexagon/CPU/RwkvWkvOpPackage/src/utils/CustomOpUtils.hpp:
--------------------------------------------------------------------------------
  1 | //==============================================================================
  2 | //
  3 | // Copyright (c) 2020 Qualcomm Technologies, Inc.
  4 | // All Rights Reserved.
  5 | // Confidential and Proprietary - Qualcomm Technologies, Inc.
  6 | //
  7 | //==============================================================================
  8 | 
  9 | #pragma once
 10 | 
 11 | #include <map>
 12 | #include <memory>
 13 | #include <string>
 14 | #include <vector>
 15 | 
 16 | #include "BackendUtils.hpp"
 17 | #include "CustomBEMacros.hpp"
 18 | 
 19 | namespace qnn {
 20 | 
 21 | namespace custom {
 22 | 
 23 | namespace utils {
 24 | 
 25 | /**
 26 |  * @brief Helper class that can hold information that holds information extracted
 27 |  * from a QNN node. This class function signatures and private members are freely extensible and
 28 |  * modifiable. The public class member variables must remain unchanged.
 29 |  */
 30 | 
 31 | class CustomOp {
 32 |  public:
 33 |   const char* m_name;
 34 |   const char* m_typeName;
 35 |   uint32_t m_numKernels;
 36 | 
 37 |   CustomOp()          = default;
 38 |   virtual ~CustomOp() = default;
 39 | 
 40 |   /**
 41 |    * @brief The custom op constructor
 42 |    * @param name The name of the operation
 43 |    * @param typeName The type of the operation
 44 |    * @return
 45 |    */
 46 |   CustomOp(const char* name, const char* typeName) : m_name(name), m_typeName(typeName) {}
 47 | 
 48 |   /**
 49 |    * @brief Adds an input tensor to the operation
 50 |    * @param inTensor An input tensor to this operation as defined by each backend
 51 |    * @return
 52 |    */
 53 |   virtual Qnn_ErrorHandle_t addInput(CustomOpTensorPtr_t inTensor) {
 54 |     m_Inputs.emplace_back(inTensor);
 55 |     return QNN_SUCCESS;
 56 |   };
 57 | 
 58 |   /**
 59 |    * @brief Adds an output tensor to the operation
 60 |    * @param outTensor An output tensor of this operation as defined by each backend
 61 |    * @return
 62 |    */
 63 |   virtual Qnn_ErrorHandle_t addOutput(CustomOpTensorPtr_t outTensor) {
 64 |     m_Outputs.emplace_back(outTensor);
 65 |     return QNN_SUCCESS;
 66 |   };
 67 | 
 68 |   /**
 69 |    * Adds the parameter name
 70 |    * @param paramName The name of each parameter to be added
 71 |    * @param param The param object to be added as defined by the backend
 72 |    * @return
 73 |    */
 74 |   virtual Qnn_ErrorHandle_t addParam(const std::string& paramName, CustomOpParamPtr_t param) {
 75 |     m_Params[paramName] = param;
 76 |     return QNN_SUCCESS;
 77 |   };
 78 | 
 79 |   /**
 80 |    * Returns a pointer to the input tensor specified by index
 81 |    * @param index
 82 |    * @return
 83 |    */
 84 |   const CustomOpTensorPtr_t getInput(size_t index = 0) const { return m_Inputs[index]; }
 85 | 
 86 |   /**
 87 |    * Returns a reference to the output tensor specified by index
 88 |    * @param index
 89 |    * @return
 90 |    */
 91 |   CustomOpTensorPtr_t getOutput(size_t index = 0) const { return m_Outputs[index]; }
 92 | 
 93 |   /**
 94 |    * Returns a reference to the output tensor data
 95 |    * @param index
 96 |    * @return
 97 |    */
 98 |   CustomOpTensorPtr_t* getOutputsFlat() { return m_Outputs.data(); }
 99 | 
100 |   /** Returns the requested parameter specified by name
101 |    * @param name the name of the parameter
102 |    */
103 |   CustomOpParamPtr_t getParam(const std::string& name) { return m_Params[name]; }
104 | 
105 |   /**
106 |    *
107 |    * @return The number of inputs
108 |    */
109 |   uint32_t numInput() const { return m_Inputs.size(); }
110 | 
111 |   /**
112 |    *
113 |    * @return The number of outputs
114 |    */
115 |   uint32_t numOutput() const { return m_Outputs.size(); }
116 | 
117 |  protected:
118 |   std::vector<CustomOpTensorPtr_t> m_Inputs;
119 |   std::vector<CustomOpTensorPtr_t> m_Outputs;
120 |   std::map<std::string, CustomOpParamPtr_t> m_Params;
121 |   std::unique_ptr<CustomOpTensorPtr_t> m_tempTensor;
122 | };
123 | 
124 | }  // namespace utils
125 | }  // namespace custom
126 | }  // namespace qnn
127 | 


--------------------------------------------------------------------------------
/hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV68.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MollySophia/rwkv-qualcomm/fd403b7c9f3b6c4ac5a810f334a0a51c8693fb42/hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV68.so


--------------------------------------------------------------------------------
/hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV69.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MollySophia/rwkv-qualcomm/fd403b7c9f3b6c4ac5a810f334a0a51c8693fb42/hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV69.so


--------------------------------------------------------------------------------
/hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV73.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MollySophia/rwkv-qualcomm/fd403b7c9f3b6c4ac5a810f334a0a51c8693fb42/hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV73.so


--------------------------------------------------------------------------------
/hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV75.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MollySophia/rwkv-qualcomm/fd403b7c9f3b6c4ac5a810f334a0a51c8693fb42/hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV75.so


--------------------------------------------------------------------------------
/hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV79.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MollySophia/rwkv-qualcomm/fd403b7c9f3b6c4ac5a810f334a0a51c8693fb42/hexagon/HTP/prebuilt/libQnnRwkvWkvOpPackageV79.so


--------------------------------------------------------------------------------
/hexagon/test/wkv_custom.py:
--------------------------------------------------------------------------------
 1 | wkv_c_impl_src = """
 2 | #include <torch/extension.h>
 3 | #include <torch/script.h>
 4 | 
 5 | std::tuple<torch::Tensor, torch::Tensor> wkv6(
 6 |     torch::Tensor k, torch::Tensor v, torch::Tensor r,
 7 |     torch::Tensor state2, torch::Tensor time_first,
 8 |     torch::Tensor time_decay) {
 9 |     state2 = state2.squeeze(0);
10 |     auto num_head = state2.size(0);
11 |     auto head_size = state2.size(1);
12 |     int seq_length = k.size(0);
13 | 
14 |     k = k.reshape({seq_length, num_head, head_size, 1});
15 |     v = v.reshape({seq_length, num_head, 1, head_size});
16 |     r = r.reshape({seq_length, num_head, 1, head_size});
17 |     time_first = time_first.reshape({num_head, head_size, 1});
18 |     time_decay = time_decay.reshape({seq_length, num_head, head_size, 1});
19 |     auto kv = torch::matmul(k, v);
20 |     std::vector<torch::Tensor> wkv;
21 |     for (int i = 0; i < seq_length; i++) {
22 |         wkv.push_back(torch::matmul(r[i], (time_first * kv[i] + state2)));
23 |         state2 = time_decay[i] * state2 + kv[i];
24 |     }
25 |     auto wkv_tensor = torch::stack(wkv, 0).reshape({seq_length, num_head, head_size});
26 | 
27 |     return std::make_tuple(wkv_tensor, state2);
28 | }
29 | 
30 | std::tuple<torch::Tensor, torch::Tensor> wkv7(
31 |     torch::Tensor r, torch::Tensor w, torch::Tensor k, torch::Tensor v,
32 |     torch::Tensor a, torch::Tensor b, torch::Tensor state2) {
33 |     state2 = state2.squeeze(0);
34 |     auto num_head = state2.size(0);
35 |     auto head_size = state2.size(1);
36 |     int seq_length = k.size(0);
37 | 
38 |     w = w.reshape({seq_length, num_head, 1, head_size});
39 |     k = k.reshape({seq_length, num_head, 1, head_size});
40 |     v = v.reshape({seq_length, num_head, head_size, 1});
41 |     r = r.reshape({seq_length, num_head, head_size, 1});
42 |     b = b.reshape({seq_length, num_head, 1, head_size});
43 |     a = a.reshape({seq_length, num_head, head_size, 1});
44 | 
45 |     auto kv = torch::matmul(v, k);
46 |     auto ab = torch::matmul(a, b);
47 |     std::vector<torch::Tensor> x;
48 |     for (int i = 0; i < seq_length; i++) {
49 |         state2 = w[i] * state2 + kv[i] + torch::matmul(state2, ab[i]);
50 |         x.push_back(torch::matmul(state2, r[i]));
51 |     }
52 |     auto x_tensor = torch::stack(x, 0).reshape({seq_length, num_head, head_size});
53 |     return std::make_tuple(x_tensor, state2);
54 | }
55 | 
56 | TORCH_LIBRARY(rwkv, m) {
57 |   m.def("wkv6", &wkv6);
58 |   m.def("wkv7", &wkv7);
59 | }
60 | 
61 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
62 | }
63 | """


--------------------------------------------------------------------------------
/librwkv-qualcomm/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required (VERSION 3.14)
2 | project(librwkv-qualcomm)
3 | add_subdirectory(src)
4 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/Makefile:
--------------------------------------------------------------------------------
 1 | default: all
 2 | 
 3 | # define package name
 4 | PACKAGE_NAME := $(notdir $(shell pwd))
 5 | 
 6 | # define library prerequisites list
 7 | src_folder := src
 8 | make_dir := make
 9 | EXE_SOURCES = $(src_folder)
10 | 
11 | # define target_architecture
12 | export TARGET_AARCH_VARS:= -march=x86-64
13 | 
14 | # define target name
15 | export TARGET = linux-x86_64
16 | 
17 | # specify compiler
18 | export CXX := clang++-9
19 | 
20 | .PHONY: all $(EXE_SOURCES) all_android
21 | 
22 | all: $(EXE_SOURCES) all_android
23 | 
24 | # Combined Targets
25 | clean: clean_x86 clean_android
26 | 
27 | all_x86: clean_x86
28 | 	$(call build_if_exists,$(src_folder),-$(MAKE) -f $(make_dir)/Makefile.linux-x86_64)
29 | 
30 | clean_x86:
31 | 	@rm -rf bin obj include
32 | 
33 | # Android Targets
34 | 
35 | all_android: aarch64-android aarch64-android-demo aarch64-android-eval aarch64-android-mmlu
36 | 
37 | aarch64-android: check_ndk clean_aarch64-android
38 | 	$(call build_if_exists,$(src_folder),$(ANDROID_NDK_ROOT)/ndk-build APP_ALLOW_MISSING_DEPS=true APP_ABI="arm64-v8a" NDK_PROJECT_PATH=./ NDK_APPLICATION_MK=$(make_dir)/Application.mk APP_BUILD_SCRIPT=$(make_dir)/Android.mk)
39 | 
40 | aarch64-android-demo: check_ndk clean_aarch64-android
41 | 	$(call build_if_exists,$(src_folder),$(ANDROID_NDK_ROOT)/ndk-build APP_ALLOW_MISSING_DEPS=true APP_ABI="arm64-v8a" NDK_PROJECT_PATH=./ NDK_APPLICATION_MK=$(make_dir)/Application.mk APP_BUILD_SCRIPT=$(make_dir)/Android-demo.mk)
42 | 
43 | aarch64-android-eval: check_ndk clean_aarch64-android
44 | 	$(call build_if_exists,$(src_folder),$(ANDROID_NDK_ROOT)/ndk-build APP_ALLOW_MISSING_DEPS=true APP_ABI="arm64-v8a" NDK_PROJECT_PATH=./ NDK_APPLICATION_MK=$(make_dir)/Application.mk APP_BUILD_SCRIPT=$(make_dir)/Android-eval.mk)
45 | 
46 | aarch64-android-mmlu: check_ndk clean_aarch64-android
47 | 	$(call build_if_exists,$(src_folder),$(ANDROID_NDK_ROOT)/ndk-build APP_ALLOW_MISSING_DEPS=true APP_ABI="arm64-v8a" NDK_PROJECT_PATH=./ NDK_APPLICATION_MK=$(make_dir)/Application.mk APP_BUILD_SCRIPT=$(make_dir)/Android-mmlu.mk)
48 | 
49 | clean_android: check_ndk clean_aarch64-android
50 | 
51 | clean_aarch64-android:
52 | 	@rm -rf bin/aarch64-android
53 | 	@rm -rf obj/local/aarch64-android
54 | 
55 | all_ubuntu_aarch64_gcc94: check_ubuntu_aarch64_gcc94
56 | 	$(call build_if_exists,$(src_folder),-$(MAKE) -f $(make_dir)/Makefile.ubuntu-aarch64-gcc9.4)
57 | 
58 | clean_ubuntu_aarch64_gcc94:
59 | 	@rm -rf bin/aarch64-ubuntu-gcc9.4 obj/aarch64-ubuntu-gcc9.4
60 | 
61 | check_ubuntu_aarch64_gcc94:
62 | ifeq ($(QNN_AARCH64_UBUNTU_GCC_94),)
63 | 	$(error ERROR: QNN_AARCH64_UBUNTU_GCC_94 not set, skipping compilation for Ubuntu platform.)
64 | endif
65 | 
66 | # utilities
67 | # Syntax: $(call build_if_exists <dir>,<cmd>)
68 | build_if_exists = $(if $(wildcard $(1)),$(2),$(warning WARNING: $(1) does not exist. Skipping Compilation))
69 | 
70 | check_ndk:
71 | ifeq ($(ANDROID_NDK_ROOT),)
72 | 	$(error ERROR: ANDROID_NDK_ROOT not set, skipping compilation for Android platform(s).)
73 | endif
74 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/make/Android-demo.mk:
--------------------------------------------------------------------------------
 1 | LOCAL_PATH := $(call my-dir)
 2 | SUPPORTED_TARGET_ABI := arm64-v8a
 3 | 
 4 | #============================ Define Common Variables ===============================================================
 5 | # Include paths
 6 | PACKAGE_C_INCLUDES += -I $(QNN_SDK_ROOT)/include/QNN
 7 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/
 8 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/CachingUtil
 9 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/Log
10 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/PAL/include
11 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/Utils
12 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/WrapperUtils
13 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../include/flatbuffers
14 | 
15 | #========================== Define OpPackage Library Build Variables =============================================
16 | include $(CLEAR_VARS)
17 | LOCAL_C_INCLUDES               := $(PACKAGE_C_INCLUDES)
18 | MY_SRC_FILES                   := $(wildcard $(LOCAL_PATH)/../src/main.cpp)
19 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/tokenizer.cpp)
20 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/librwkv-qualcomm-app.cpp)
21 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/librwkv-qualcomm.cpp)
22 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/soc_detect.cpp)
23 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/Log/*.cpp)
24 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/PAL/src/linux/*.cpp)
25 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/PAL/src/common/*.cpp)
26 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/Utils/*.cpp)
27 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/WrapperUtils/*.cpp)
28 | LOCAL_MODULE                   := rwkv-qualcomm-demo
29 | LOCAL_SRC_FILES                := $(subst make/,,$(MY_SRC_FILES))
30 | LOCAL_LDLIBS                   := -lGLESv2 -lEGL -llog
31 | include $(BUILD_EXECUTABLE)
32 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/make/Android-eval.mk:
--------------------------------------------------------------------------------
 1 | LOCAL_PATH := $(call my-dir)
 2 | SUPPORTED_TARGET_ABI := arm64-v8a
 3 | 
 4 | #============================ Define Common Variables ===============================================================
 5 | # Include paths
 6 | PACKAGE_C_INCLUDES += -I $(QNN_SDK_ROOT)/include/QNN
 7 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/
 8 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/CachingUtil
 9 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/Log
10 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/PAL/include
11 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/Utils
12 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/WrapperUtils
13 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../include/flatbuffers
14 | 
15 | #========================== Define OpPackage Library Build Variables =============================================
16 | include $(CLEAR_VARS)
17 | LOCAL_C_INCLUDES               := $(PACKAGE_C_INCLUDES)
18 | MY_SRC_FILES                   := $(wildcard $(LOCAL_PATH)/../src/eval_text.cpp)
19 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/tokenizer.cpp)
20 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/librwkv-qualcomm-app.cpp)
21 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/librwkv-qualcomm.cpp)
22 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/soc_detect.cpp)
23 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/Log/*.cpp)
24 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/PAL/src/linux/*.cpp)
25 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/PAL/src/common/*.cpp)
26 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/Utils/*.cpp)
27 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/WrapperUtils/*.cpp)
28 | LOCAL_MODULE                   := rwkv-qualcomm-eval
29 | LOCAL_SRC_FILES                := $(subst make/,,$(MY_SRC_FILES))
30 | LOCAL_LDLIBS                   := -lGLESv2 -lEGL -llog
31 | include $(BUILD_EXECUTABLE)
32 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/make/Android-mmlu.mk:
--------------------------------------------------------------------------------
 1 | LOCAL_PATH := $(call my-dir)
 2 | SUPPORTED_TARGET_ABI := arm64-v8a
 3 | 
 4 | #============================ Define Common Variables ===============================================================
 5 | # Include paths
 6 | PACKAGE_C_INCLUDES += -I $(QNN_SDK_ROOT)/include/QNN
 7 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/
 8 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/CachingUtil
 9 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/Log
10 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/PAL/include
11 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/Utils
12 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/WrapperUtils
13 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../include/flatbuffers
14 | 
15 | #========================== Define OpPackage Library Build Variables =============================================
16 | include $(CLEAR_VARS)
17 | LOCAL_C_INCLUDES               := $(PACKAGE_C_INCLUDES)
18 | MY_SRC_FILES                   := $(wildcard $(LOCAL_PATH)/../src/mmlu.cpp)
19 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/tokenizer.cpp)
20 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/librwkv-qualcomm-app.cpp)
21 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/librwkv-qualcomm.cpp)
22 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/soc_detect.cpp)
23 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/Log/*.cpp)
24 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/PAL/src/linux/*.cpp)
25 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/PAL/src/common/*.cpp)
26 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/Utils/*.cpp)
27 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/WrapperUtils/*.cpp)
28 | LOCAL_MODULE                   := rwkv-qualcomm-mmlu
29 | LOCAL_SRC_FILES                := $(subst make/,,$(MY_SRC_FILES))
30 | LOCAL_LDLIBS                   := -lGLESv2 -lEGL -llog
31 | include $(BUILD_EXECUTABLE)
32 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/make/Android.mk:
--------------------------------------------------------------------------------
 1 | LOCAL_PATH := $(call my-dir)
 2 | SUPPORTED_TARGET_ABI := arm64-v8a
 3 | 
 4 | #============================ Define Common Variables ===============================================================
 5 | # Include paths
 6 | PACKAGE_C_INCLUDES += -I $(QNN_SDK_ROOT)/include/QNN
 7 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/
 8 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/CachingUtil
 9 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/Log
10 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/PAL/include
11 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/Utils
12 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../src/WrapperUtils
13 | PACKAGE_C_INCLUDES += -I $(LOCAL_PATH)/../include/flatbuffers
14 | 
15 | #========================== Define OpPackage Library Build Variables =============================================
16 | include $(CLEAR_VARS)
17 | LOCAL_C_INCLUDES               := $(PACKAGE_C_INCLUDES)
18 | MY_SRC_FILES                   := $(wildcard $(LOCAL_PATH)/../src/librwkv-qualcomm-app.cpp)
19 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/librwkv-qualcomm.cpp)
20 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/soc_detect.cpp)
21 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/Log/*.cpp)
22 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/PAL/src/linux/*.cpp)
23 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/PAL/src/common/*.cpp)
24 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/Utils/*.cpp)
25 | MY_SRC_FILES                   += $(wildcard $(LOCAL_PATH)/../src/WrapperUtils/*.cpp)
26 | LOCAL_MODULE                   := librwkv-qualcomm
27 | LOCAL_SRC_FILES                := $(subst make/,,$(MY_SRC_FILES))
28 | LOCAL_LDLIBS                   := -lGLESv2 -lEGL -llog
29 | include $(BUILD_STATIC_LIBRARY)
30 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/make/Application.mk:
--------------------------------------------------------------------------------
1 | APP_ABI      := arm64-v8a
2 | APP_STL      := c++_static
3 | APP_PLATFORM := android-21
4 | APP_CPPFLAGS += -std=c++20 -fexceptions -O3 -fvisibility=hidden -DQNN_API="__attribute__((visibility(\"default\")))" -DANDROID
5 | APP_LDFLAGS  += -lc -lm -ldl
6 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/make/Makefile.linux-x86_64:
--------------------------------------------------------------------------------
  1 | # define relevant directories
  2 | SRC_DIR := src
  3 | SRC_DIR_LOG := src/Log
  4 | SRC_DIR_PAL_LINUX := src/PAL/src/linux
  5 | SRC_DIR_PAL_COMMON := src/PAL/src/common
  6 | SRC_DIR_UTILS := src/Utils
  7 | SRC_DIR_WRAPPER_UTILS := src/WrapperUtils
  8 | QNN_API_INCLUDE := $(QNN_SDK_ROOT)/include/QNN
  9 | PAL_INCLUDE := src/PAL/include
 10 | 
 11 | # Checking if clang++ is present. If not switch to clang++
 12 | ifeq ($(shell $(CXX) -v 2>&1 | grep -c "clang version"), 0)
 13 |   CXX := clang++
 14 | endif
 15 | 
 16 | QNN_TARGET ?= x86_64-linux-clang
 17 | export TARGET_DIR := ./bin/$(QNN_TARGET)
 18 | 
 19 | librwkv-qualcomm := $(TARGET_DIR)/librwkv-qualcomm.a
 20 | 
 21 | # define target architecture if not previously defined, default is x86
 22 | ifndef TARGET_AARCH_VARS
 23 | TARGET_AARCH_VARS:= -march=x86-64
 24 | endif
 25 | 
 26 | .PHONY: librwkv-qualcomm_all
 27 | .DEFAULT: librwkv-qualcomm_all
 28 | librwkv-qualcomm_all: $(librwkv-qualcomm)
 29 | 
 30 | # Include paths
 31 | INCLUDES += -I$(SRC_DIR) -I$(SRC_DIR_LOG) -I$(SRC_DIR_UTILS) -I$(SRC_DIR_WRAPPER_UTILS) -I$(PAL_INCLUDE) -I$(QNN_API_INCLUDE)
 32 | 
 33 | # set compiler flags
 34 | # pthread is needed for AIC and HTP-MCP Backend
 35 | COMMON_CXXFLAGS = -std=c++20 -fno-rtti -fPIC -Wall -Werror -pg -pthread $(INCLUDES)
 36 | COMMON_LDFLAGS = -shared -s -fPIC -pthread
 37 | 
 38 | ifdef QNN_DEBUG_ENABLE
 39 | CXXFLAGS += $(COMMON_CXXFLAGS) -march=x86-64 -O0 -g -DQNN_API=""
 40 | LDFLAGS += $(COMMON_LDFLAGS)
 41 | else
 42 | CXXFLAGS += $(COMMON_CXXFLAGS) -march=x86-64 -O3 -Wno-write-strings -fvisibility=hidden -DQNN_API="__attribute__((visibility(\"default\")))"
 43 | LDFLAGS += $(COMMON_LDFLAGS) -fvisibility=hidden -flto
 44 | endif
 45 | 
 46 | # define library sources
 47 | SOURCES := $(wildcard $(SRC_DIR)/*.cpp)
 48 | SOURCES_LOG := $(wildcard $(SRC_DIR_LOG)/*.cpp)
 49 | SOURCES_PAL := $(wildcard $(SRC_DIR_PAL_LINUX)/*.cpp)
 50 | SOURCES_PAL += $(wildcard $(SRC_DIR_PAL_COMMON)/*.cpp)
 51 | SOURCES_UTILS := $(wildcard $(SRC_DIR_UTILS)/*.cpp)
 52 | SOURCES_WRAPPER_UTILS := $(wildcard $(SRC_DIR_WRAPPER_UTILS)/*.cpp)
 53 | 
 54 | # define object directory
 55 | OBJ_ROOT := obj
 56 | OBJ_DIR := obj/$(QNN_TARGET)
 57 | OBJ_DIR_LOG := obj/$(QNN_TARGET)/Log/
 58 | OBJ_DIR_PAL := obj/$(QNN_TARGET)/PAL
 59 | OBJ_DIR_UTILS := obj/$(QNN_TARGET)/Utils/
 60 | OBJ_DIR_WRAPPER_UTILS := obj/$(QNN_TARGET)/WrapperUtils/
 61 | 
 62 | # setup object files in object directory
 63 | OBJECTS := $(patsubst %.cpp,$(OBJ_DIR)/%.o,$(foreach x,$(SOURCES),$(notdir $(x))))
 64 | OBJECTS_LOG := $(patsubst %.cpp,$(OBJ_DIR_LOG)/%.o,$(foreach x,$(SOURCES_LOG),$(notdir $(x))))
 65 | OBJECTS_PAL := $(patsubst %.cpp,$(OBJ_DIR_PAL)/%.o,$(foreach x,$(SOURCES_PAL),$(notdir $(x))))
 66 | OBJECTS_UTILS := $(patsubst %.cpp,$(OBJ_DIR_UTILS)/%.o,$(foreach x,$(SOURCES_UTILS),$(notdir $(x))))
 67 | OBJECTS_WRAPPER_UTILS := $(patsubst %.cpp,$(OBJ_DIR_WRAPPER_UTILS)/%.o,$(foreach x,$(SOURCES_WRAPPER_UTILS),$(notdir $(x))))
 68 | 
 69 | #LIBS=-l/usr/lib/x86_64-linux-gnu/libflatbuffers.a
 70 | LIBS=-ldl
 71 | 
 72 | # Rule to make executable
 73 | .PHONY: librwkv-qualcomm
 74 | librwkv-qualcomm: $(librwkv-qualcomm)
 75 | 
 76 | # Implicit rule to compile and link object files
 77 | $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp
 78 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 79 | 
 80 | $(OBJ_DIR_LOG)/%.o: $(SRC_DIR_LOG)/%.cpp
 81 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 82 | 
 83 | $(OBJ_DIR_PAL)/%.o: $(SRC_DIR_PAL_LINUX)/%.cpp
 84 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 85 | 
 86 | $(OBJ_DIR_PAL)/%.o: $(SRC_DIR_PAL_COMMON)/%.cpp
 87 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 88 | 
 89 | $(OBJ_DIR_UTILS)/%.o: $(SRC_DIR_UTILS)/%.cpp
 90 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 91 | 
 92 | $(OBJ_DIR_WRAPPER_UTILS)/%.o: $(SRC_DIR_WRAPPER_UTILS)/%.cpp
 93 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 94 | 
 95 | # set up resources
 96 | directories := $(TARGET_DIR) $(OBJ_DIR) $(OBJ_DIR_LOG) $(OBJ_DIR_PAL) $(OBJ_DIR_UTILS) $(OBJ_DIR_WRAPPER_UTILS)
 97 | 
 98 | # Compile
 99 | $(librwkv-qualcomm): $(OBJECTS) $(OBJECTS_LOG) $(OBJECTS_PAL) $(OBJECTS_UTILS) $(OBJECTS_WRAPPER_UTILS) | $(directories)
100 | 	$(CXX) $(CXXFLAGS) $(LINKFLAGS) -o $@ $^ $(LIBS)
101 | 
102 | $(librwkv-qualcomm): $(OBJECTS) $(OBJECTS_LOG) $(OBJECTS_PAL) $(OBJECTS_UTILS) $(OBJECTS_WRAPPER_UTILS) | $(directories)
103 | 	ar cr $@ $(LIBS)
104 | 
105 | # rule for object directory resource
106 | $(OBJECTS): | $(OBJ_DIR)
107 | $(OBJECTS_LOG): | $(OBJ_DIR_LOG)
108 | $(OBJECTS_PAL): | $(OBJ_DIR_PAL)
109 | $(OBJECTS_UTILS): | $(OBJ_DIR_UTILS)
110 | $(OBJECTS_WRAPPER_UTILS): | $(OBJ_DIR_WRAPPER_UTILS)
111 | 
112 | # rule to create directories
113 | $(directories):
114 | 	mkdir -p $@
115 | 
116 | .PHONY: clean
117 | clean:
118 | 	rm -rf $(OBJ_ROOT) $(TARGET_DIR)
119 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/make/Makefile.oe-linux-aarch64-gcc11.2:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (c) 2023 Qualcomm Technologies, Inc.
  3 | # All Rights Reserved.
  4 | # Confidential and Proprietary - Qualcomm Technologies, Inc.
  5 | #
  6 | 
  7 | # define relevant directories
  8 | SRC_DIR := src
  9 | SRC_DIR_LOG := src/Log
 10 | SRC_DIR_PAL_LINUX := src/PAL/src/linux
 11 | SRC_DIR_PAL_COMMON := src/PAL/src/common
 12 | SRC_DIR_UTILS := src/Utils
 13 | SRC_DIR_WRAPPER_UTILS := src/WrapperUtils
 14 | ifeq ($(shell test -d ../../../target && echo 0),0)
 15 |   QNN_API_INCLUDE := ../../../include
 16 | else
 17 |   QNN_API_INCLUDE := ../../../../include/QNN
 18 | endif
 19 | PAL_INCLUDE := src/PAL/include
 20 | 
 21 | QNN_TARGET ?= aarch64-oe-linux-gcc11.2
 22 | export TARGET_DIR := ./bin/$(QNN_TARGET)
 23 | CXX=$(QNN_AARCH64_LINUX_OE_GCC_112)/sysroots/x86_64-qtisdk-linux/usr/bin/aarch64-oe-linux/aarch64-oe-linux-g++ --sysroot=$(QNN_AARCH64_LINUX_OE_GCC_112)/sysroots/armv8a-oe-linux
 24 | 
 25 | qnn-sample-app := $(TARGET_DIR)/qnn-sample-app
 26 | 
 27 | .PHONY: sample_app_all
 28 | .DEFAULT: sample_app_all
 29 | sample_app_all: $(qnn-sample-app)
 30 | 
 31 | # Include paths
 32 | INCLUDES += -I$(SRC_DIR) -I$(SRC_DIR_LOG) -I$(SRC_DIR_UTILS) -I$(SRC_DIR_WRAPPER_UTILS) -I$(PAL_INCLUDE) -I$(QNN_API_INCLUDE)
 33 | 
 34 | # set compiler flags
 35 | COMMON_CXXFLAGS = -ldl -std=gnu++11 -fPIC -Wl,-lstdc++ -Wall -Werror -fno-exceptions -fno-rtti -fPIC -pg $(INCLUDES)
 36 | COMMON_LDFLAGS = -shared -s -fPIC
 37 | 
 38 | ifdef QNN_DEBUG_ENABLE
 39 | CXXFLAGS += $(COMMON_CXXFLAGS) -g -DQNN_API=""
 40 | LDFLAGS += $(COMMON_LDFLAGS)
 41 | else
 42 | CXXFLAGS += $(COMMON_CXXFLAGS) -Wno-write-strings -fvisibility=hidden -DQNN_API="__attribute__((visibility(\"default\")))"
 43 | LDFLAGS += $(COMMON_LDFLAGS) -fvisibility=hidden -flto
 44 | endif
 45 | 
 46 | # define library sources
 47 | SOURCES := $(wildcard $(SRC_DIR)/*.cpp)
 48 | SOURCES_LOG := $(wildcard $(SRC_DIR_LOG)/*.cpp)
 49 | SOURCES_PAL := $(wildcard $(SRC_DIR_PAL_LINUX)/*.cpp)
 50 | SOURCES_PAL += $(wildcard $(SRC_DIR_PAL_COMMON)/*.cpp)
 51 | SOURCES_UTILS := $(wildcard $(SRC_DIR_UTILS)/*.cpp)
 52 | SOURCES_WRAPPER_UTILS := $(wildcard $(SRC_DIR_WRAPPER_UTILS)/*.cpp)
 53 | 
 54 | # define object directory
 55 | OBJ_ROOT := obj
 56 | OBJ_DIR := obj/$(QNN_TARGET)
 57 | OBJ_DIR_LOG := obj/$(QNN_TARGET)/Log/
 58 | OBJ_DIR_PAL := obj/$(QNN_TARGET)/PAL
 59 | OBJ_DIR_UTILS := obj/$(QNN_TARGET)/Utils/
 60 | OBJ_DIR_WRAPPER_UTILS := obj/$(QNN_TARGET)/WrapperUtils/
 61 | 
 62 | # setup object files in object directory
 63 | OBJECTS := $(patsubst %.cpp,$(OBJ_DIR)/%.o,$(foreach x,$(SOURCES),$(notdir $(x))))
 64 | OBJECTS_LOG := $(patsubst %.cpp,$(OBJ_DIR_LOG)/%.o,$(foreach x,$(SOURCES_LOG),$(notdir $(x))))
 65 | OBJECTS_PAL := $(patsubst %.cpp,$(OBJ_DIR_PAL)/%.o,$(foreach x,$(SOURCES_PAL),$(notdir $(x))))
 66 | OBJECTS_UTILS := $(patsubst %.cpp,$(OBJ_DIR_UTILS)/%.o,$(foreach x,$(SOURCES_UTILS),$(notdir $(x))))
 67 | OBJECTS_WRAPPER_UTILS := $(patsubst %.cpp,$(OBJ_DIR_WRAPPER_UTILS)/%.o,$(foreach x,$(SOURCES_WRAPPER_UTILS),$(notdir $(x))))
 68 | 
 69 | LIBS=-ldl
 70 | 
 71 | # Rule to make executable
 72 | .PHONY: qnn-sample-app
 73 | qnn-sample-app: $(qnn-sample-app)
 74 | 
 75 | # Implicit rule to compile and link object files
 76 | $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp
 77 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 78 | 
 79 | $(OBJ_DIR_LOG)/%.o: $(SRC_DIR_LOG)/%.cpp
 80 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 81 | 
 82 | $(OBJ_DIR_PAL)/%.o: $(SRC_DIR_PAL_LINUX)/%.cpp
 83 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 84 | 
 85 | $(OBJ_DIR_PAL)/%.o: $(SRC_DIR_PAL_COMMON)/%.cpp
 86 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 87 | 
 88 | $(OBJ_DIR_UTILS)/%.o: $(SRC_DIR_UTILS)/%.cpp
 89 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 90 | 
 91 | $(OBJ_DIR_WRAPPER_UTILS)/%.o: $(SRC_DIR_WRAPPER_UTILS)/%.cpp
 92 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 93 | 
 94 | # set up resources
 95 | directories := $(TARGET_DIR) $(OBJ_DIR) $(OBJ_DIR_LOG) $(OBJ_DIR_PAL) $(OBJ_DIR_UTILS) $(OBJ_DIR_WRAPPER_UTILS)
 96 | 
 97 | # Compile
 98 | $(qnn-sample-app): obj/$(QNN_TARGET)/main.o obj/$(QNN_TARGET)/QnnSampleApp.o $(OBJECTS_LOG) $(OBJECTS_PAL) $(OBJECTS_UTILS) $(OBJECTS_WRAPPER_UTILS) | $(directories)
 99 | 	$(CXX) $(CXXFLAGS) $(LINKFLAGS) -o $@ $^ $(LIBS)
100 | 
101 | # rule for object directory resource
102 | $(OBJECTS): | $(OBJ_DIR)
103 | $(OBJECTS_LOG): | $(OBJ_DIR_LOG)
104 | $(OBJECTS_PAL): | $(OBJ_DIR_PAL)
105 | $(OBJECTS_UTILS): | $(OBJ_DIR_UTILS)
106 | $(OBJECTS_WRAPPER_UTILS): | $(OBJ_DIR_WRAPPER_UTILS)
107 | 
108 | # rule to create directories
109 | $(directories):
110 | 	mkdir -p $@
111 | 
112 | .PHONY: clean
113 | clean:
114 | 	rm -rf $(OBJ_ROOT) $(TARGET_DIR)
115 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/make/Makefile.oe-linux-aarch64-gcc8.2:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (c) 2021-2023 Qualcomm Technologies, Inc.
  3 | # All Rights Reserved.
  4 | # Confidential and Proprietary - Qualcomm Technologies, Inc.
  5 | #
  6 | 
  7 | # define relevant directories
  8 | SRC_DIR := src
  9 | SRC_DIR_LOG := src/Log
 10 | SRC_DIR_PAL_LINUX := src/PAL/src/linux
 11 | SRC_DIR_PAL_COMMON := src/PAL/src/common
 12 | SRC_DIR_UTILS := src/Utils
 13 | SRC_DIR_WRAPPER_UTILS := src/WrapperUtils
 14 | ifeq ($(shell test -d ../../../target && echo 0),0)
 15 |   QNN_API_INCLUDE := ../../../include
 16 | else
 17 |   QNN_API_INCLUDE := ../../../../include/QNN
 18 | endif
 19 | PAL_INCLUDE := src/PAL/include
 20 | 
 21 | QNN_TARGET ?= aarch64-oe-linux-gcc8.2
 22 | export TARGET_DIR := ./bin/$(QNN_TARGET)
 23 | CXX=$(QNN_AARCH64_LINUX_OE_GCC_82)/sysroots/x86_64-oesdk-linux/usr/bin/aarch64-oe-linux/aarch64-oe-linux-g++ --sysroot=$(QNN_AARCH64_LINUX_OE_GCC_82)/sysroots/aarch64-oe-linux
 24 | 
 25 | qnn-sample-app := $(TARGET_DIR)/qnn-sample-app
 26 | 
 27 | .PHONY: sample_app_all
 28 | .DEFAULT: sample_app_all
 29 | sample_app_all: $(qnn-sample-app)
 30 | 
 31 | # Include paths
 32 | INCLUDES += -I$(SRC_DIR) -I$(SRC_DIR_LOG) -I$(SRC_DIR_UTILS) -I$(SRC_DIR_WRAPPER_UTILS) -I$(PAL_INCLUDE) -I$(QNN_API_INCLUDE)
 33 | 
 34 | # set compiler flags
 35 | COMMON_CXXFLAGS = -ldl -std=gnu++11 -fPIC -Wl,-lstdc++ -Wall -Werror -fno-exceptions -fno-rtti -fPIC -pg $(INCLUDES)
 36 | COMMON_LDFLAGS = -shared -s -fPIC
 37 | 
 38 | ifdef QNN_DEBUG_ENABLE
 39 | CXXFLAGS += $(COMMON_CXXFLAGS) -g -DQNN_API=""
 40 | LDFLAGS += $(COMMON_LDFLAGS)
 41 | else
 42 | CXXFLAGS += $(COMMON_CXXFLAGS) -Wno-write-strings -fvisibility=hidden -DQNN_API="__attribute__((visibility(\"default\")))"
 43 | LDFLAGS += $(COMMON_LDFLAGS) -fvisibility=hidden -flto
 44 | endif
 45 | 
 46 | # define library sources
 47 | SOURCES := $(wildcard $(SRC_DIR)/*.cpp)
 48 | SOURCES_LOG := $(wildcard $(SRC_DIR_LOG)/*.cpp)
 49 | SOURCES_PAL := $(wildcard $(SRC_DIR_PAL_LINUX)/*.cpp)
 50 | SOURCES_PAL += $(wildcard $(SRC_DIR_PAL_COMMON)/*.cpp)
 51 | SOURCES_UTILS := $(wildcard $(SRC_DIR_UTILS)/*.cpp)
 52 | SOURCES_WRAPPER_UTILS := $(wildcard $(SRC_DIR_WRAPPER_UTILS)/*.cpp)
 53 | 
 54 | # define object directory
 55 | OBJ_ROOT := obj
 56 | OBJ_DIR := obj/$(QNN_TARGET)
 57 | OBJ_DIR_LOG := obj/$(QNN_TARGET)/Log/
 58 | OBJ_DIR_PAL := obj/$(QNN_TARGET)/PAL
 59 | OBJ_DIR_UTILS := obj/$(QNN_TARGET)/Utils/
 60 | OBJ_DIR_WRAPPER_UTILS := obj/$(QNN_TARGET)/WrapperUtils/
 61 | 
 62 | # setup object files in object directory
 63 | OBJECTS := $(patsubst %.cpp,$(OBJ_DIR)/%.o,$(foreach x,$(SOURCES),$(notdir $(x))))
 64 | OBJECTS_LOG := $(patsubst %.cpp,$(OBJ_DIR_LOG)/%.o,$(foreach x,$(SOURCES_LOG),$(notdir $(x))))
 65 | OBJECTS_PAL := $(patsubst %.cpp,$(OBJ_DIR_PAL)/%.o,$(foreach x,$(SOURCES_PAL),$(notdir $(x))))
 66 | OBJECTS_UTILS := $(patsubst %.cpp,$(OBJ_DIR_UTILS)/%.o,$(foreach x,$(SOURCES_UTILS),$(notdir $(x))))
 67 | OBJECTS_WRAPPER_UTILS := $(patsubst %.cpp,$(OBJ_DIR_WRAPPER_UTILS)/%.o,$(foreach x,$(SOURCES_WRAPPER_UTILS),$(notdir $(x))))
 68 | 
 69 | LIBS=-ldl
 70 | 
 71 | # Rule to make executable
 72 | .PHONY: qnn-sample-app
 73 | qnn-sample-app: $(qnn-sample-app)
 74 | 
 75 | # Implicit rule to compile and link object files
 76 | $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp
 77 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 78 | 
 79 | $(OBJ_DIR_LOG)/%.o: $(SRC_DIR_LOG)/%.cpp
 80 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 81 | 
 82 | $(OBJ_DIR_PAL)/%.o: $(SRC_DIR_PAL_LINUX)/%.cpp
 83 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 84 | 
 85 | $(OBJ_DIR_PAL)/%.o: $(SRC_DIR_PAL_COMMON)/%.cpp
 86 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 87 | 
 88 | $(OBJ_DIR_UTILS)/%.o: $(SRC_DIR_UTILS)/%.cpp
 89 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 90 | 
 91 | $(OBJ_DIR_WRAPPER_UTILS)/%.o: $(SRC_DIR_WRAPPER_UTILS)/%.cpp
 92 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 93 | 
 94 | # set up resources
 95 | directories := $(TARGET_DIR) $(OBJ_DIR) $(OBJ_DIR_LOG) $(OBJ_DIR_PAL) $(OBJ_DIR_UTILS) $(OBJ_DIR_WRAPPER_UTILS)
 96 | 
 97 | # Compile
 98 | $(qnn-sample-app): obj/$(QNN_TARGET)/main.o obj/$(QNN_TARGET)/QnnSampleApp.o $(OBJECTS_LOG) $(OBJECTS_PAL) $(OBJECTS_UTILS) $(OBJECTS_WRAPPER_UTILS) | $(directories)
 99 | 	$(CXX) $(CXXFLAGS) $(LINKFLAGS) -o $@ $^ $(LIBS)
100 | 
101 | # rule for object directory resource
102 | $(OBJECTS): | $(OBJ_DIR)
103 | $(OBJECTS_LOG): | $(OBJ_DIR_LOG)
104 | $(OBJECTS_PAL): | $(OBJ_DIR_PAL)
105 | $(OBJECTS_UTILS): | $(OBJ_DIR_UTILS)
106 | $(OBJECTS_WRAPPER_UTILS): | $(OBJ_DIR_WRAPPER_UTILS)
107 | 
108 | # rule to create directories
109 | $(directories):
110 | 	mkdir -p $@
111 | 
112 | .PHONY: clean
113 | clean:
114 | 	rm -rf $(OBJ_ROOT) $(TARGET_DIR)
115 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/make/Makefile.oe-linux-aarch64-gcc9.3:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (c) 2021-2023 Qualcomm Technologies, Inc.
  3 | # All Rights Reserved.
  4 | # Confidential and Proprietary - Qualcomm Technologies, Inc.
  5 | #
  6 | 
  7 | # define relevant directories
  8 | SRC_DIR := src
  9 | SRC_DIR_LOG := src/Log
 10 | SRC_DIR_PAL_LINUX := src/PAL/src/linux
 11 | SRC_DIR_PAL_COMMON := src/PAL/src/common
 12 | SRC_DIR_UTILS := src/Utils
 13 | SRC_DIR_WRAPPER_UTILS := src/WrapperUtils
 14 | ifeq ($(shell test -d ../../../target && echo 0),0)
 15 |   QNN_API_INCLUDE := ../../../include
 16 | else
 17 |   QNN_API_INCLUDE := ../../../../include/QNN
 18 | endif
 19 | PAL_INCLUDE := src/PAL/include
 20 | 
 21 | QNN_TARGET ?= aarch64-oe-linux-gcc9.3
 22 | export TARGET_DIR := ./bin/$(QNN_TARGET)
 23 | CXX=$(QNN_AARCH64_LINUX_OE_GCC_93)/sysroots/x86_64-oesdk-linux/usr/bin/aarch64-oe-linux/aarch64-oe-linux-g++ --sysroot=$(QNN_AARCH64_LINUX_OE_GCC_93)/sysroots/aarch64-oe-linux
 24 | 
 25 | qnn-sample-app := $(TARGET_DIR)/qnn-sample-app
 26 | 
 27 | .PHONY: sample_app_all
 28 | .DEFAULT: sample_app_all
 29 | sample_app_all: $(qnn-sample-app)
 30 | 
 31 | # Include paths
 32 | INCLUDES += -I$(SRC_DIR) -I$(SRC_DIR_LOG) -I$(SRC_DIR_UTILS) -I$(SRC_DIR_WRAPPER_UTILS) -I$(PAL_INCLUDE) -I$(QNN_API_INCLUDE)
 33 | 
 34 | # set compiler flags
 35 | COMMON_CXXFLAGS = -ldl -std=gnu++11 -fPIC -Wl,-lstdc++ -Wall -Werror -fno-exceptions -fno-rtti -fPIC -pg $(INCLUDES)
 36 | COMMON_LDFLAGS = -shared -s -fPIC
 37 | 
 38 | ifdef QNN_DEBUG_ENABLE
 39 | CXXFLAGS += $(COMMON_CXXFLAGS) -g -DQNN_API=""
 40 | LDFLAGS += $(COMMON_LDFLAGS)
 41 | else
 42 | CXXFLAGS += $(COMMON_CXXFLAGS) -Wno-write-strings -fvisibility=hidden -DQNN_API="__attribute__((visibility(\"default\")))"
 43 | LDFLAGS += $(COMMON_LDFLAGS) -fvisibility=hidden -flto
 44 | endif
 45 | 
 46 | # define library sources
 47 | SOURCES := $(wildcard $(SRC_DIR)/*.cpp)
 48 | SOURCES_LOG := $(wildcard $(SRC_DIR_LOG)/*.cpp)
 49 | SOURCES_PAL := $(wildcard $(SRC_DIR_PAL_LINUX)/*.cpp)
 50 | SOURCES_PAL += $(wildcard $(SRC_DIR_PAL_COMMON)/*.cpp)
 51 | SOURCES_UTILS := $(wildcard $(SRC_DIR_UTILS)/*.cpp)
 52 | SOURCES_WRAPPER_UTILS := $(wildcard $(SRC_DIR_WRAPPER_UTILS)/*.cpp)
 53 | 
 54 | # define object directory
 55 | OBJ_ROOT := obj
 56 | OBJ_DIR := obj/$(QNN_TARGET)
 57 | OBJ_DIR_LOG := obj/$(QNN_TARGET)/Log/
 58 | OBJ_DIR_PAL := obj/$(QNN_TARGET)/PAL
 59 | OBJ_DIR_UTILS := obj/$(QNN_TARGET)/Utils/
 60 | OBJ_DIR_WRAPPER_UTILS := obj/$(QNN_TARGET)/WrapperUtils/
 61 | 
 62 | # setup object files in object directory
 63 | OBJECTS := $(patsubst %.cpp,$(OBJ_DIR)/%.o,$(foreach x,$(SOURCES),$(notdir $(x))))
 64 | OBJECTS_LOG := $(patsubst %.cpp,$(OBJ_DIR_LOG)/%.o,$(foreach x,$(SOURCES_LOG),$(notdir $(x))))
 65 | OBJECTS_PAL := $(patsubst %.cpp,$(OBJ_DIR_PAL)/%.o,$(foreach x,$(SOURCES_PAL),$(notdir $(x))))
 66 | OBJECTS_UTILS := $(patsubst %.cpp,$(OBJ_DIR_UTILS)/%.o,$(foreach x,$(SOURCES_UTILS),$(notdir $(x))))
 67 | OBJECTS_WRAPPER_UTILS := $(patsubst %.cpp,$(OBJ_DIR_WRAPPER_UTILS)/%.o,$(foreach x,$(SOURCES_WRAPPER_UTILS),$(notdir $(x))))
 68 | 
 69 | LIBS=-ldl
 70 | 
 71 | # Rule to make executable
 72 | .PHONY: qnn-sample-app
 73 | qnn-sample-app: $(qnn-sample-app)
 74 | 
 75 | # Implicit rule to compile and link object files
 76 | $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp
 77 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 78 | 
 79 | $(OBJ_DIR_LOG)/%.o: $(SRC_DIR_LOG)/%.cpp
 80 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 81 | 
 82 | $(OBJ_DIR_PAL)/%.o: $(SRC_DIR_PAL_LINUX)/%.cpp
 83 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 84 | 
 85 | $(OBJ_DIR_PAL)/%.o: $(SRC_DIR_PAL_COMMON)/%.cpp
 86 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 87 | 
 88 | $(OBJ_DIR_UTILS)/%.o: $(SRC_DIR_UTILS)/%.cpp
 89 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 90 | 
 91 | $(OBJ_DIR_WRAPPER_UTILS)/%.o: $(SRC_DIR_WRAPPER_UTILS)/%.cpp
 92 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 93 | 
 94 | # set up resources
 95 | directories := $(TARGET_DIR) $(OBJ_DIR) $(OBJ_DIR_LOG) $(OBJ_DIR_PAL) $(OBJ_DIR_UTILS) $(OBJ_DIR_WRAPPER_UTILS)
 96 | 
 97 | # Compile
 98 | $(qnn-sample-app): obj/$(QNN_TARGET)/main.o obj/$(QNN_TARGET)/QnnSampleApp.o $(OBJECTS_LOG) $(OBJECTS_PAL) $(OBJECTS_UTILS) $(OBJECTS_WRAPPER_UTILS) | $(directories)
 99 | 	$(CXX) $(CXXFLAGS) $(LINKFLAGS) -o $@ $^ $(LIBS)
100 | 
101 | # rule for object directory resource
102 | $(OBJECTS): | $(OBJ_DIR)
103 | $(OBJECTS_LOG): | $(OBJ_DIR_LOG)
104 | $(OBJECTS_PAL): | $(OBJ_DIR_PAL)
105 | $(OBJECTS_UTILS): | $(OBJ_DIR_UTILS)
106 | $(OBJECTS_WRAPPER_UTILS): | $(OBJ_DIR_WRAPPER_UTILS)
107 | 
108 | # rule to create directories
109 | $(directories):
110 | 	mkdir -p $@
111 | 
112 | .PHONY: clean
113 | clean:
114 | 	rm -rf $(OBJ_ROOT) $(TARGET_DIR)
115 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/make/Makefile.ubuntu-aarch64-gcc9.4:
--------------------------------------------------------------------------------
  1 | #
  2 | # Copyright (c) 2023 Qualcomm Technologies, Inc.
  3 | # All Rights Reserved.
  4 | # Confidential and Proprietary - Qualcomm Technologies, Inc.
  5 | #
  6 | 
  7 | # define relevant directories
  8 | SRC_DIR := src
  9 | SRC_DIR_LOG := src/Log
 10 | SRC_DIR_PAL_LINUX := src/PAL/src/linux
 11 | SRC_DIR_PAL_COMMON := src/PAL/src/common
 12 | SRC_DIR_UTILS := src/Utils
 13 | SRC_DIR_WRAPPER_UTILS := src/WrapperUtils
 14 | QNN_API_INCLUDE := $(QNN_SDK_ROOT)/include/QNN
 15 | PAL_INCLUDE := src/PAL/include
 16 | 
 17 | QNN_TARGET ?= aarch64-ubuntu-gcc9.4
 18 | export TARGET_DIR := ./bin/$(QNN_TARGET)
 19 | CXX=$(QNN_AARCH64_UBUNTU_GCC_94)/usr/bin/aarch64-linux-gnu-g++ --sysroot=$(QNN_AARCH64_UBUNTU_GCC_94)
 20 | 
 21 | rwkv-qualcomm-demo := $(TARGET_DIR)/rwkv-qualcomm-demo
 22 | 
 23 | .PHONY: rwkv_qualcomm_demo_all
 24 | .DEFAULT: rwkv_qualcomm_demo_all
 25 | rwkv_qualcomm_demo_all: $(rwkv-qualcomm-demo)
 26 | 
 27 | # Include paths
 28 | INCLUDES += -I$(SRC_DIR) -I$(SRC_DIR_LOG) -I$(SRC_DIR_UTILS) -I$(SRC_DIR_WRAPPER_UTILS) -I$(PAL_INCLUDE) -I$(QNN_API_INCLUDE)
 29 | 
 30 | # set compiler flags
 31 | COMMON_CXXFLAGS = -ldl -std=gnu++20 -fPIC -Wl,-lstdc++ -Wall -fno-rtti -fPIC -pg $(INCLUDES)
 32 | COMMON_LDFLAGS = -shared -s -fPIC
 33 | 
 34 | ifdef QNN_DEBUG_ENABLE
 35 | CXXFLAGS += $(COMMON_CXXFLAGS) -g -DQNN_API=""
 36 | LDFLAGS += $(COMMON_LDFLAGS)
 37 | else
 38 | CXXFLAGS += $(COMMON_CXXFLAGS) -Wno-write-strings -fvisibility=hidden -DQNN_API="__attribute__((visibility(\"default\")))"
 39 | LDFLAGS += $(COMMON_LDFLAGS) -fvisibility=hidden -flto
 40 | endif
 41 | 
 42 | # define library sources
 43 | SOURCES := $(wildcard $(SRC_DIR)/*.cpp)
 44 | SOURCES_LOG := $(wildcard $(SRC_DIR_LOG)/*.cpp)
 45 | SOURCES_PAL := $(wildcard $(SRC_DIR_PAL_LINUX)/*.cpp)
 46 | SOURCES_PAL += $(wildcard $(SRC_DIR_PAL_COMMON)/*.cpp)
 47 | SOURCES_UTILS := $(wildcard $(SRC_DIR_UTILS)/*.cpp)
 48 | SOURCES_WRAPPER_UTILS := $(wildcard $(SRC_DIR_WRAPPER_UTILS)/*.cpp)
 49 | 
 50 | # define object directory
 51 | OBJ_ROOT := obj
 52 | OBJ_DIR := obj/$(QNN_TARGET)
 53 | OBJ_DIR_LOG := obj/$(QNN_TARGET)/Log/
 54 | OBJ_DIR_PAL := obj/$(QNN_TARGET)/PAL
 55 | OBJ_DIR_UTILS := obj/$(QNN_TARGET)/Utils/
 56 | OBJ_DIR_WRAPPER_UTILS := obj/$(QNN_TARGET)/WrapperUtils/
 57 | 
 58 | # setup object files in object directory
 59 | OBJECTS := $(patsubst %.cpp,$(OBJ_DIR)/%.o,$(foreach x,$(SOURCES),$(notdir $(x))))
 60 | OBJECTS_LOG := $(patsubst %.cpp,$(OBJ_DIR_LOG)/%.o,$(foreach x,$(SOURCES_LOG),$(notdir $(x))))
 61 | OBJECTS_PAL := $(patsubst %.cpp,$(OBJ_DIR_PAL)/%.o,$(foreach x,$(SOURCES_PAL),$(notdir $(x))))
 62 | OBJECTS_UTILS := $(patsubst %.cpp,$(OBJ_DIR_UTILS)/%.o,$(foreach x,$(SOURCES_UTILS),$(notdir $(x))))
 63 | OBJECTS_WRAPPER_UTILS := $(patsubst %.cpp,$(OBJ_DIR_WRAPPER_UTILS)/%.o,$(foreach x,$(SOURCES_WRAPPER_UTILS),$(notdir $(x))))
 64 | 
 65 | LIBS=-ldl
 66 | 
 67 | # Rule to make executable
 68 | .PHONY: rwkv-qualcomm-demo
 69 | rwkv-qualcomm-demo: $(rwkv-qualcomm-demo)
 70 | 
 71 | # Implicit rule to compile and link object files
 72 | $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp
 73 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 74 | 
 75 | $(OBJ_DIR_LOG)/%.o: $(SRC_DIR_LOG)/%.cpp
 76 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 77 | 
 78 | $(OBJ_DIR_PAL)/%.o: $(SRC_DIR_PAL_LINUX)/%.cpp
 79 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 80 | 
 81 | $(OBJ_DIR_PAL)/%.o: $(SRC_DIR_PAL_COMMON)/%.cpp
 82 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 83 | 
 84 | $(OBJ_DIR_UTILS)/%.o: $(SRC_DIR_UTILS)/%.cpp
 85 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 86 | 
 87 | $(OBJ_DIR_WRAPPER_UTILS)/%.o: $(SRC_DIR_WRAPPER_UTILS)/%.cpp
 88 | 	$(CXX) $(CXXFLAGS) -c $^ -o $@
 89 | 
 90 | # set up resources
 91 | directories := $(TARGET_DIR) $(OBJ_DIR) $(OBJ_DIR_LOG) $(OBJ_DIR_PAL) $(OBJ_DIR_UTILS) $(OBJ_DIR_WRAPPER_UTILS)
 92 | 
 93 | # Compile
 94 | $(rwkv-qualcomm-demo): obj/$(QNN_TARGET)/main.o obj/$(QNN_TARGET)/librwkv-qualcomm-app.o obj/$(QNN_TARGET)/tokenizer.o obj/$(QNN_TARGET)/librwkv-qualcomm.o obj/$(QNN_TARGET)/soc_detect.o $(OBJECTS_LOG) $(OBJECTS_PAL) $(OBJECTS_UTILS) $(OBJECTS_WRAPPER_UTILS) | $(directories)
 95 | 	$(CXX) $(CXXFLAGS) $(LINKFLAGS) -o $@ $^ $(LIBS)
 96 | 
 97 | # rule for object directory resource
 98 | $(OBJECTS): | $(OBJ_DIR)
 99 | $(OBJECTS_LOG): | $(OBJ_DIR_LOG)
100 | $(OBJECTS_PAL): | $(OBJ_DIR_PAL)
101 | $(OBJECTS_UTILS): | $(OBJ_DIR_UTILS)
102 | $(OBJECTS_WRAPPER_UTILS): | $(OBJ_DIR_WRAPPER_UTILS)
103 | 
104 | # rule to create directories
105 | $(directories):
106 | 	mkdir -p $@
107 | 
108 | .PHONY: clean
109 | clean:
110 | 	rm -rf $(OBJ_ROOT) $(TARGET_DIR)
111 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | set(CMAKE_CXX_STANDARD 20)
 2 | 
 3 | set(LIB "rwkv-qualcomm")
 4 | set(LIB_SOURCES "librwkv-qualcomm-app.cpp"
 5 |                 "librwkv-qualcomm.cpp"
 6 |                 "soc_detect.cpp"
 7 |                 "Log/Logger.cpp"
 8 |                 "Log/LogUtils.cpp"
 9 |                 "PAL/src/windows/Common.cpp"
10 |                 "PAL/src/windows/Directory.cpp"
11 |                 "PAL/src/windows/DynamicLoading.cpp"
12 |                 "PAL/src/windows/FileOp.cpp"
13 |                 "PAL/src/windows/Path.cpp"
14 |                 "PAL/src/common/StringOp.cpp"
15 |                 "Utils/DataUtil.cpp"
16 |                 "Utils/DynamicLoadUtil.cpp"
17 |                 "Utils/IOTensor.cpp"
18 |                 "Utils/Utils.cpp"
19 |                 "Utils/ClientBuffer.cpp"
20 |                 "Utils/dlwrap.cpp"
21 |                 "Utils/RpcMem.cpp"
22 |                 "WrapperUtils/QnnWrapperUtils.cpp")
23 | 
24 | add_library(${LIB} STATIC ${LIB_SOURCES})
25 | 
26 | target_compile_definitions(${LIB} PUBLIC "-DNOMINMAX")
27 | target_link_libraries(${LIB} PRIVATE Shlwapi Shell32)
28 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
29 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /O2 /Ob3")
30 | target_include_directories(${LIB} PUBLIC CachingUtil
31 |                                          Log
32 |                                          PAL/include
33 |                                          Utils
34 |                                          WrapperUtils
35 |                                          ${CMAKE_BINARY_DIR}
36 |                                          ${QNN_SDK_ROOT}/include/QNN
37 |                                          ./)
38 | 
39 | set(DEMO "rwkv-qualcomm-demo")
40 | set(DEMO_SOURCES "main.cpp"
41 |                  "tokenizer.cpp"
42 |                  "soc_detect.cpp"
43 |                  "librwkv-qualcomm-app.cpp"
44 |                  "librwkv-qualcomm.cpp"
45 |                  "soc_detect.cpp"
46 |                  "Log/Logger.cpp"
47 |                  "Log/LogUtils.cpp"
48 |                  "PAL/src/windows/Common.cpp"
49 |                  "PAL/src/windows/Directory.cpp"
50 |                  "PAL/src/windows/DynamicLoading.cpp"
51 |                  "PAL/src/windows/FileOp.cpp"
52 |                  "PAL/src/windows/Path.cpp"
53 |                  "PAL/src/common/StringOp.cpp"
54 |                  "Utils/DataUtil.cpp"
55 |                  "Utils/DynamicLoadUtil.cpp"
56 |                  "Utils/IOTensor.cpp"
57 |                  "Utils/Utils.cpp"
58 |                  "Utils/ClientBuffer.cpp"
59 |                  "Utils/dlwrap.cpp"
60 |                  "Utils/RpcMem.cpp"
61 |                  "WrapperUtils/QnnWrapperUtils.cpp")
62 | add_executable(${DEMO} ${DEMO_SOURCES})
63 | 
64 | target_compile_definitions(${DEMO} PUBLIC "-DNOMINMAX")
65 | target_link_libraries(${DEMO} PRIVATE Shlwapi Shell32)
66 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
67 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /O2 /Ob3")
68 | target_include_directories(${DEMO} PUBLIC CachingUtil
69 |                                         Log
70 |                                         PAL/include
71 |                                         Utils
72 |                                         WrapperUtils
73 |                                         ${CMAKE_BINARY_DIR}
74 |                                         ${QNN_SDK_ROOT}/include/QNN
75 |                                         ./)


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/Interfaces.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "QnnInterface.h"
 4 | #include "QnnWrapperUtils.hpp"
 5 | #include "System/QnnSystemInterface.h"
 6 | 
 7 | namespace qnn {
 8 | namespace tools {
 9 | namespace rwkv_app {
10 | 
11 | // Graph Related Function Handle Types
12 | typedef ModelError_t (*ComposeGraphsFnHandleType_t)(
13 |     Qnn_BackendHandle_t,
14 |     QNN_INTERFACE_VER_TYPE,
15 |     Qnn_ContextHandle_t,
16 |     const GraphConfigInfo_t **,
17 |     const uint32_t,
18 |     GraphInfo_t ***,
19 |     uint32_t *,
20 |     bool,
21 |     QnnLog_Callback_t,
22 |     QnnLog_Level_t);
23 | typedef ModelError_t (*FreeGraphInfoFnHandleType_t)(
24 |     GraphInfo_t ***, uint32_t);
25 | 
26 | typedef struct QnnFunctionPointers {
27 |   ComposeGraphsFnHandleType_t composeGraphsFnHandle;
28 |   FreeGraphInfoFnHandleType_t freeGraphInfoFnHandle;
29 |   QNN_INTERFACE_VER_TYPE qnnInterface;
30 |   QNN_SYSTEM_INTERFACE_VER_TYPE qnnSystemInterface;
31 | } QnnFunctionPointers;
32 | 
33 | }  // namespace rwkv_app
34 | }  // namespace tools
35 | }  // namespace qnn
36 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/Log/LogUtils.cpp:
--------------------------------------------------------------------------------
 1 | //==============================================================================
 2 | //
 3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 4 | //  All rights reserved.
 5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | //
 7 | //==============================================================================
 8 | 
 9 | #include "LogUtils.hpp"
10 | #ifdef ANDROID
11 | #include <android/log.h>
12 | #endif
13 | 
14 | void qnn::log::utils::logDefaultCallback(const char* fmt,
15 |                                          QnnLog_Level_t level,
16 |                                          uint64_t timestamp,
17 |                                          va_list argp) {
18 |   const char* levelStr = "";
19 |   switch (level) {
20 |     case QNN_LOG_LEVEL_ERROR:
21 |       levelStr = " ERROR ";
22 |       break;
23 |     case QNN_LOG_LEVEL_WARN:
24 |       levelStr = "WARNING";
25 |       break;
26 |     case QNN_LOG_LEVEL_INFO:
27 |       levelStr = "  INFO ";
28 |       break;
29 |     case QNN_LOG_LEVEL_DEBUG:
30 |       levelStr = " DEBUG ";
31 |       break;
32 |     case QNN_LOG_LEVEL_VERBOSE:
33 |       levelStr = "VERBOSE";
34 |       break;
35 |     case QNN_LOG_LEVEL_MAX:
36 |       levelStr = "UNKNOWN";
37 |       break;
38 |   }
39 | 
40 |   double ms = (double)timestamp / 1000000.0;
41 |   // To avoid interleaved messages
42 |   {
43 |     std::lock_guard<std::mutex> lock(sg_logUtilMutex);
44 |     fprintf(stdout, "%8.1fms [%-7s] ", ms, levelStr);
45 |     vfprintf(stdout, fmt, argp);
46 |     fprintf(stdout, "\n");
47 |   }
48 | }
49 | 
50 | #ifdef ANDROID
51 | void qnn::log::utils::logAndroidCallback(const char* fmt,
52 |                                         QnnLog_Level_t level,
53 |                                         uint64_t timestamp,
54 |                                         va_list argp){
55 |   int loglevel = ANDROID_LOG_UNKNOWN;
56 |   switch (level) {
57 |     case QNN_LOG_LEVEL_ERROR:
58 |       loglevel = ANDROID_LOG_ERROR;
59 |       break;
60 |     case QNN_LOG_LEVEL_WARN:
61 |       loglevel = ANDROID_LOG_WARN;
62 |       break;
63 |     case QNN_LOG_LEVEL_INFO:
64 |       loglevel = ANDROID_LOG_INFO;
65 |       break;
66 |     case QNN_LOG_LEVEL_DEBUG:
67 |       loglevel = ANDROID_LOG_DEBUG;
68 |       break;
69 |     case QNN_LOG_LEVEL_VERBOSE:
70 |       loglevel = ANDROID_LOG_VERBOSE;
71 |       break;
72 |     case QNN_LOG_LEVEL_MAX:
73 |       loglevel = ANDROID_LOG_UNKNOWN;
74 |       break;
75 |   }
76 |   char logStr[1024];
77 |   vsnprintf(logStr, sizeof(logStr), fmt, argp);
78 |   __android_log_print(loglevel, "RWKV-QNN", "%s\n", logStr);
79 | }
80 | #endif
81 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/Log/LogUtils.hpp:
--------------------------------------------------------------------------------
 1 | //==============================================================================
 2 | //
 3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 4 | //  All rights reserved.
 5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | //
 7 | //==============================================================================
 8 | 
 9 | #pragma once
10 | 
11 | #include <cstdarg>
12 | #include <cstdio>
13 | #include <mutex>
14 | #include <string>
15 | 
16 | #include "QnnLog.h"
17 | 
18 | namespace qnn {
19 | namespace log {
20 | namespace utils {
21 | 
22 | void logAndroidCallback(const char* fmt, QnnLog_Level_t level, uint64_t timestamp, va_list argp);
23 | 
24 | // In non-hexagon app stdout is used and for hexagon farf logging is used
25 | void logDefaultCallback(const char* fmt, QnnLog_Level_t level, uint64_t timestamp, va_list argp);
26 | 
27 | static std::mutex sg_logUtilMutex;
28 | 
29 | }  // namespace utils
30 | }  // namespace log
31 | }  // namespace qnn
32 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/Log/Logger.cpp:
--------------------------------------------------------------------------------
  1 | //==============================================================================
  2 | //
  3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
  4 | //  All rights reserved.
  5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
  6 | //
  7 | //==============================================================================
  8 | 
  9 | #include <chrono>
 10 | #include <cstdio>
 11 | #include <iostream>
 12 | #include <sstream>
 13 | 
 14 | #include "LogUtils.hpp"
 15 | #include "Logger.hpp"
 16 | 
 17 | using namespace qnn::log;
 18 | 
 19 | std::shared_ptr<Logger> Logger::s_logger = nullptr;
 20 | 
 21 | std::mutex Logger::s_logMutex;
 22 | 
 23 | std::shared_ptr<Logger> Logger::createLogger(QnnLog_Callback_t callback,
 24 |                                              QnnLog_Level_t maxLevel,
 25 |                                              QnnLog_Error_t* status) {
 26 |   std::lock_guard<std::mutex> lock(s_logMutex);
 27 |   if ((maxLevel > QNN_LOG_LEVEL_VERBOSE) || (maxLevel == 0)) {
 28 |     if (status) {
 29 |       *status = QNN_LOG_ERROR_INVALID_ARGUMENT;
 30 |     }
 31 |     return nullptr;
 32 |   }
 33 |   if (!s_logger) {
 34 |     s_logger = std::shared_ptr<Logger>(new (std::nothrow) Logger(callback, maxLevel, status));
 35 |   }
 36 |   *status = QNN_LOG_NO_ERROR;
 37 |   return s_logger;
 38 | }
 39 | 
 40 | Logger::Logger(QnnLog_Callback_t callback, QnnLog_Level_t maxLevel, QnnLog_Error_t* status)
 41 |     : m_callback(callback), m_maxLevel(maxLevel), m_epoch(getTimestamp()) {
 42 |   if (!callback) {
 43 | #ifdef ANDROID
 44 |     m_callback = utils::logAndroidCallback;
 45 | #else
 46 |     m_callback = utils::logDefaultCallback;
 47 | #endif
 48 |   }
 49 | }
 50 | 
 51 | void Logger::log(QnnLog_Level_t level, const char* file, long line, const char* fmt, ...) {
 52 |   if (m_callback) {
 53 |     if (level > m_maxLevel.load(std::memory_order_seq_cst)) {
 54 |       return;
 55 |     }
 56 |     va_list argp;
 57 |     va_start(argp, fmt);
 58 |     std::string logString(fmt);
 59 |     std::ignore = file;
 60 |     std::ignore = line;
 61 |     (*m_callback)(logString.c_str(), level, getTimestamp() - m_epoch, argp);
 62 |     va_end(argp);
 63 |   }
 64 | }
 65 | 
 66 | uint64_t Logger::getTimestamp() const {
 67 |   return std::chrono::duration_cast<std::chrono::nanoseconds>(
 68 |              std::chrono::system_clock::now().time_since_epoch())
 69 |       .count();
 70 | }
 71 | 
 72 | std::shared_ptr<::qnn::log::Logger> g_logger{nullptr};
 73 | 
 74 | bool qnn::log::initializeLogging() {
 75 |   QnnLog_Level_t logLevel;
 76 |   QnnLog_Error_t status;
 77 | #ifdef QNN_ENABLE_DEBUG
 78 |   logLevel = QNN_LOG_LEVEL_DEBUG;
 79 | #else
 80 |   logLevel = QNN_LOG_LEVEL_ERROR;
 81 | #endif
 82 |   // Default log stream is enabled in Core/Logger component
 83 |   g_logger = ::qnn::log::Logger::createLogger(nullptr, logLevel, &status);
 84 |   if (QNN_LOG_NO_ERROR != status || !g_logger) {
 85 |     return false;
 86 |   }
 87 |   return true;
 88 | }
 89 | 
 90 | QnnLog_Callback_t qnn::log::getLogCallback() { return g_logger->getLogCallback(); }
 91 | 
 92 | QnnLog_Level_t qnn::log::getLogLevel() { return g_logger->getMaxLevel(); }
 93 | 
 94 | bool qnn::log::isLogInitialized() {
 95 |   if (g_logger == nullptr) {
 96 |     return false;
 97 |   }
 98 |   return true;
 99 | }
100 | 
101 | bool qnn::log::setLogLevel(QnnLog_Level_t maxLevel) {
102 |   if (!::qnn::log::Logger::isValid() ||
103 |       !(maxLevel >= QNN_LOG_LEVEL_ERROR && maxLevel <= QNN_LOG_LEVEL_DEBUG)) {
104 |     return false;
105 |   }
106 | 
107 |   g_logger->setMaxLevel(maxLevel);
108 |   return true;
109 | }
110 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/Log/Logger.hpp:
--------------------------------------------------------------------------------
  1 | //==============================================================================
  2 | //
  3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
  4 | //  All rights reserved.
  5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
  6 | //
  7 | //==============================================================================
  8 | 
  9 | #pragma once
 10 | 
 11 | #include <atomic>
 12 | #include <cstdarg>
 13 | #include <cstring>
 14 | #include <memory>
 15 | #include <mutex>
 16 | 
 17 | #include "QnnLog.h"
 18 | 
 19 | #define __FILENAME__ (strrchr(__FILE__, '/') + 1)
 20 | 
 21 | /**
 22 |  * @brief Log something with the current logger. Always valid to call, though
 23 |  *        it won't do something if no logger has been set.
 24 |  */
 25 | 
 26 | #define QNN_LOG_LEVEL(level, fmt, ...)                                \
 27 |   do {                                                                \
 28 |     auto logger = ::qnn::log::Logger::getLogger();                    \
 29 |     if (logger) {                                                     \
 30 |       logger->log(level, __FILENAME__, __LINE__, fmt, ##__VA_ARGS__); \
 31 |     }                                                                 \
 32 |   } while (0)
 33 | 
 34 | #define QNN_ERROR(fmt, ...) QNN_LOG_LEVEL(QNN_LOG_LEVEL_ERROR, fmt, ##__VA_ARGS__)
 35 | 
 36 | #define QNN_ERROR_EXIT(fmt, ...)   \
 37 |   {                                \
 38 |     QNN_ERROR(fmt, ##__VA_ARGS__); \
 39 |     exit(EXIT_FAILURE);            \
 40 |   }
 41 | 
 42 | #define QNN_WARN(fmt, ...) QNN_LOG_LEVEL(QNN_LOG_LEVEL_WARN, fmt, ##__VA_ARGS__)
 43 | 
 44 | #define QNN_INFO(fmt, ...) QNN_LOG_LEVEL(QNN_LOG_LEVEL_INFO, fmt, ##__VA_ARGS__)
 45 | 
 46 | #define QNN_DEBUG(fmt, ...) QNN_LOG_LEVEL(QNN_LOG_LEVEL_DEBUG, fmt, ##__VA_ARGS__)
 47 | 
 48 | #define QNN_VERBOSE(fmt, ...) QNN_LOG_LEVEL(QNN_LOG_LEVEL_VERBOSE, fmt, ##__VA_ARGS__)
 49 | 
 50 | #define QNN_FUNCTION_ENTRY_LOG QNN_LOG_LEVEL(QNN_LOG_LEVEL_VERBOSE, "Entering %s", __func__)
 51 | 
 52 | #define QNN_FUNCTION_EXIT_LOG QNN_LOG_LEVEL(QNN_LOG_LEVEL_VERBOSE, "Returning from %s", __func__)
 53 | 
 54 | namespace qnn {
 55 | namespace log {
 56 | 
 57 | bool initializeLogging();
 58 | 
 59 | QnnLog_Callback_t getLogCallback();
 60 | 
 61 | QnnLog_Level_t getLogLevel();
 62 | 
 63 | bool isLogInitialized();
 64 | 
 65 | bool setLogLevel(QnnLog_Level_t maxLevel);
 66 | 
 67 | class Logger final {
 68 |  public:
 69 |   Logger(const Logger&)            = delete;
 70 |   Logger& operator=(const Logger&) = delete;
 71 |   Logger(Logger&&)                 = delete;
 72 |   Logger& operator=(Logger&&)      = delete;
 73 | 
 74 |   void setMaxLevel(QnnLog_Level_t maxLevel) {
 75 |     m_maxLevel.store(maxLevel, std::memory_order_seq_cst);
 76 |   }
 77 | 
 78 |   QnnLog_Level_t getMaxLevel() { return m_maxLevel.load(std::memory_order_seq_cst); }
 79 | 
 80 |   QnnLog_Callback_t getLogCallback() { return m_callback; }
 81 | 
 82 |   void log(QnnLog_Level_t level, const char* file, long line, const char* fmt, ...);
 83 | 
 84 |   static std::shared_ptr<Logger> createLogger(QnnLog_Callback_t callback,
 85 |                                               QnnLog_Level_t maxLevel,
 86 |                                               QnnLog_Error_t* status);
 87 | 
 88 |   static bool isValid() { return (s_logger != nullptr); }
 89 | 
 90 |   static std::shared_ptr<Logger> getLogger() { return s_logger; }
 91 | 
 92 |   static void reset() { s_logger = nullptr; }
 93 |   uint64_t getTimestamp() const;
 94 | 
 95 |  private:
 96 |   Logger(QnnLog_Callback_t callback, QnnLog_Level_t maxLevel, QnnLog_Error_t* status);
 97 | 
 98 |   QnnLog_Callback_t m_callback;
 99 |   std::atomic<QnnLog_Level_t> m_maxLevel;
100 |   uint64_t m_epoch;
101 |   static std::shared_ptr<Logger> s_logger;
102 |   static std::mutex s_logMutex;
103 | };
104 | 
105 | }  // namespace log
106 | }  // namespace qnn
107 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/PAL/include/PAL/Debug.hpp:
--------------------------------------------------------------------------------
 1 | //==============================================================================
 2 | //
 3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 4 | //  All rights reserved.
 5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | //
 7 | //==============================================================================
 8 | 
 9 | #pragma once
10 | 
11 | #define DEBUG_ON 0
12 | 
13 | #if DEBUG_ON
14 | #define DEBUG_MSG(...)            \
15 |   {                               \
16 |     fprintf(stderr, __VA_ARGS__); \
17 |     fprintf(stderr, "\n");        \
18 |   }
19 | #else
20 | #define DEBUG_MSG(...)
21 | #endif
22 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/PAL/include/PAL/Directory.hpp:
--------------------------------------------------------------------------------
 1 | //==============================================================================
 2 | //
 3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 4 | //  All rights reserved.
 5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | //
 7 | //==============================================================================
 8 | 
 9 | //---------------------------------------------------------------------------
10 | /// @file
11 | ///   This file includes APIs for directory operations on supported platforms
12 | //---------------------------------------------------------------------------
13 | 
14 | #pragma once
15 | 
16 | #include <string>
17 | 
18 | #include "PAL/FileOp.hpp"
19 | 
20 | namespace pal {
21 | class Directory;
22 | }
23 | 
24 | class pal::Directory {
25 |  public:
26 |   using DirMode = pal::FileOp::FileMode;
27 |   //---------------------------------------------------------------------------
28 |   /// @brief
29 |   ///   Creates a directory in the file system.
30 |   /// @param path
31 |   ///   Name of directory to create.
32 |   /// @param dirmode
33 |   ///   Directory mode
34 |   /// @return
35 |   ///   True if
36 |   ///     1. create a directory successfully
37 |   ///     2. or directory exist already
38 |   ///   False otherwise
39 |   ///
40 |   ///  For example:
41 |   ///
42 |   ///  - Create a directory in default.
43 |   ///     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
44 |   ///     pal::Directory::Create(path, pal::Directory::DirMode::S_DEFAULT_);
45 |   ///     pal::Directory::Create(path);
46 |   ///     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
47 |   ///
48 |   ///  - Create a directory with specific permission.
49 |   ///     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
50 |   ///     pal::Directory::Create(path, pal::Directory::DirMode::S_IRWXU_|
51 |   ///                                  pal::Directory::DirMode::S_IRWXG_|
52 |   ///                                  pal::Directory::DirMode::S_IRWXO_);
53 |   ///     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
54 |   ///
55 |   /// @note For windows, dirmode is not used.
56 |   /// @note For linux, dirmode is used to set the permission of the folder.
57 |   //---------------------------------------------------------------------------
58 |   static bool create(const std::string &path,
59 |                      pal::Directory::DirMode dirmode = pal::Directory::DirMode::S_DEFAULT_);
60 | 
61 |   //---------------------------------------------------------------------------
62 |   /// @brief
63 |   ///   Removes the entire directory whether it's empty or not.
64 |   /// @param path
65 |   ///   Name of directory to delete.
66 |   /// @return
67 |   ///   True if the directory was successfully deleted, false otherwise.
68 |   //---------------------------------------------------------------------------
69 |   static bool remove(const std::string &path);
70 | 
71 |   //---------------------------------------------------------------------------
72 |   /// @brief
73 |   ///   Creates a directory and all parent directories required.
74 |   /// @param path
75 |   ///   Path of directory to create.
76 |   /// @return
77 |   ///   True if the directory was successfully created, false otherwise.
78 |   //---------------------------------------------------------------------------
79 |   static bool makePath(const std::string &path);
80 | };
81 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/PAL/include/PAL/DynamicLoading.hpp:
--------------------------------------------------------------------------------
  1 | //==============================================================================
  2 | //
  3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
  4 | //  All rights reserved.
  5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
  6 | //
  7 | //==============================================================================
  8 | 
  9 | //---------------------------------------------------------------------------
 10 | /// @file
 11 | ///   This file includes APIs for dynamic loading on supported platforms
 12 | //---------------------------------------------------------------------------
 13 | 
 14 | #pragma once
 15 | 
 16 | #include <string>
 17 | 
 18 | namespace pal {
 19 | namespace dynamicloading {
 20 | // we only support subset of POSIX of dlopen/dlsym/dladdr/dlerror/dlclose
 21 | // except the following flags for dlopen, others should be done only
 22 | // when we really need them
 23 | // DL_NOW is MUST
 24 | // DL_LOCAL is enabled if not specified
 25 | enum {
 26 |   DL_NOW    = 0x0001,
 27 |   DL_LOCAL  = 0x0002,
 28 |   DL_GLOBAL = 0x0004,
 29 | };
 30 | 
 31 | // specify this address to distingiush from NULL pointer
 32 | #define DL_DEFAULT (void *)(0x4)
 33 | 
 34 | //---------------------------------------------------------------------------
 35 | /// @brief
 36 | ///   Loads the dynamic shared object
 37 | /// @param filename
 38 | ///   If contains path separators, treat it as relative or absolute pathname
 39 | ///   or search it for the rule of dynamic linker
 40 | /// @param flags
 41 | ///   - DL_NOW: resolve undefined symbols before return. MUST be specified.
 42 | ///   - DL_LOCAL: optional, but the default specified. Symbols defined in this
 43 | ///     shared object are not made available to resolve references in subsequently
 44 | ///     loaded shared objects
 45 | ///   - DL_GLOBAL: optional, resolve symbol globally
 46 | /// @return
 47 | ///   On success, a non-NULL handle for the loaded library.
 48 | ///   On error, NULL
 49 | //---------------------------------------------------------------------------
 50 | void *dlOpen(const char *filename, int flags);
 51 | 
 52 | //---------------------------------------------------------------------------
 53 | /// @brief
 54 | ///   Obtain address of a symbol in a shared object or executable
 55 | /// @param handle
 56 | ///   A handle of a dynamic loaded shared object returned by dlopen
 57 | /// @param symbol
 58 | ///   A null-terminated symbol name
 59 | /// @return
 60 | ///   On success, return the address associated with symbol
 61 | ///   On error, NULL
 62 | //---------------------------------------------------------------------------
 63 | void *dlSym(void *handle, const char *symbol);
 64 | 
 65 | //---------------------------------------------------------------------------
 66 | /// @brief
 67 | ///   Translate the address of a symbol to the path of the belonging shared object
 68 | /// @param addr
 69 | ///   Address of symbol in a shared object
 70 | /// @param path
 71 | ///   Full name of shared object that contains address, usually it is an absolute path
 72 | /// @return
 73 | ///   On success, return a non-zero value
 74 | ///   On error, return 0
 75 | //---------------------------------------------------------------------------
 76 | int dlAddrToLibName(void *addr, std::string &name);
 77 | 
 78 | //---------------------------------------------------------------------------
 79 | /// @brief
 80 | ///   Decrements the reference count on the dynamically loaded shared object
 81 | ///   referred to by handle. If the reference count drops to 0, then the
 82 | ///   object is unloaded.
 83 | /// @return
 84 | ///   On success, 0; on error, a nonzero value
 85 | //---------------------------------------------------------------------------
 86 | int dlClose(void *handle);
 87 | 
 88 | //---------------------------------------------------------------------------
 89 | /// @brief
 90 | ///   Obtain error diagnostic for functions in the dl-family APIs.
 91 | /// @return
 92 | ///   Returns a human-readable, null-terminated string describing the most
 93 | ///   recent error that occurred from a call to one of the functions in the
 94 | ///   dl-family APIs.
 95 | //---------------------------------------------------------------------------
 96 | char *dlError(void);
 97 | 
 98 | }  // namespace dynamicloading
 99 | }  // namespace pal
100 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/PAL/include/PAL/Path.hpp:
--------------------------------------------------------------------------------
 1 | //==============================================================================
 2 | //
 3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 4 | //  All rights reserved.
 5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | //
 7 | //==============================================================================
 8 | 
 9 | //------------------------------------------------------------------------------
10 | /// @file
11 | ///   The file includes APIs for path related operations on supported platforms
12 | //------------------------------------------------------------------------------
13 | 
14 | #pragma once
15 | 
16 | #include <string>
17 | #include <vector>
18 | 
19 | namespace pal {
20 | class Path;
21 | }
22 | 
23 | class pal::Path {
24 |  public:
25 |   //---------------------------------------------------------------------------
26 |   /// @brief Returns path separator for the system
27 |   //---------------------------------------------------------------------------
28 |   static char getSeparator();
29 | 
30 |   //---------------------------------------------------------------------------
31 |   /// @brief Concatenate s1 and s2
32 |   //---------------------------------------------------------------------------
33 |   static std::string combine(const std::string &s1, const std::string &s2);
34 | 
35 |   //---------------------------------------------------------------------------
36 |   /// @brief Get the directory name
37 |   //---------------------------------------------------------------------------
38 |   static std::string getDirectoryName(const std::string &path);
39 | 
40 |   //---------------------------------------------------------------------------
41 |   /// @brief Get absolute path
42 |   //---------------------------------------------------------------------------
43 |   static std::string getAbsolute(const std::string &path);
44 | 
45 |   //---------------------------------------------------------------------------
46 |   /// @brief Check if the input path is absolute path
47 |   //---------------------------------------------------------------------------
48 |   static bool isAbsolute(const std::string &path);
49 | 
50 |  private:
51 | };
52 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/PAL/include/PAL/StringOp.hpp:
--------------------------------------------------------------------------------
 1 | //==============================================================================
 2 | //
 3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 4 | //  All rights reserved.
 5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | //
 7 | //==============================================================================
 8 | 
 9 | //-----------------------------------------------------------------------------
10 | /// @file
11 | ///   The file inludes APIs for string operations on supported platforms
12 | //-----------------------------------------------------------------------------
13 | 
14 | #pragma once
15 | 
16 | #include <sys/types.h>
17 | 
18 | namespace pal {
19 | class StringOp;
20 | }
21 | 
22 | //------------------------------------------------------------------------------
23 | /// @brief
24 | ///   FileOp contains OS Specific file system functionality.
25 | //------------------------------------------------------------------------------
26 | class pal::StringOp {
27 |  public:
28 |   //---------------------------------------------------------------------------
29 |   /// @brief
30 |   ///   Copy copy_size bytes from buffer src to buffer dst. Behaviour of the
31 |   ///   function is undefined if src and dst overlap.
32 |   /// @param dst
33 |   ///   Destination buffer
34 |   /// @param dst_size
35 |   ///   Size of destination buffer
36 |   /// @param src
37 |   ///   Source buffer
38 |   /// @param copy_size
39 |   ///   Number of bytes to copy
40 |   /// @return
41 |   ///   Number of bytes copied
42 |   //---------------------------------------------------------------------------
43 |   static size_t memscpy(void *dst, size_t dstSize, const void *src, size_t copySize);
44 | 
45 |   //---------------------------------------------------------------------------
46 |   /// @brief
47 |   ///   Returns a pointer to a null-terminated byte string, which contains copies
48 |   ///   of at most size bytes from the string pointed to by str. If the null
49 |   ///   terminator is not encountered in the first size bytes, it is added to the
50 |   ///   duplicated string.
51 |   /// @param source
52 |   ///   Source string
53 |   /// @param maxlen
54 |   ///   Max number of bytes to copy from str
55 |   /// @return
56 |   ///   A pointer to the newly allocated string, or a null pointer if an error
57 |   ///   occurred.
58 |   //---------------------------------------------------------------------------
59 |   static char *strndup(const char *source, size_t maxlen);
60 | };
61 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/PAL/src/common/StringOp.cpp:
--------------------------------------------------------------------------------
 1 | //==============================================================================
 2 | //
 3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 4 | //  All rights reserved.
 5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | //
 7 | //==============================================================================
 8 | 
 9 | #include <stdlib.h>
10 | #include <string.h>
11 | 
12 | #include "PAL/StringOp.hpp"
13 | 
14 | //---------------------------------------------------------------------------
15 | //    pal::StringOp::memscpy
16 | //---------------------------------------------------------------------------
17 | size_t pal::StringOp::memscpy(void *dst, size_t dstSize, const void *src, size_t copySize) {
18 |   if (!dst || !src || !dstSize || !copySize) return 0;
19 | 
20 |   size_t minSize = dstSize < copySize ? dstSize : copySize;
21 | 
22 |   memcpy(dst, src, minSize);
23 | 
24 |   return minSize;
25 | }
26 | 
27 | #ifdef __hexagon__
28 | size_t strnlen(const char *s, size_t n) {
29 |   size_t i;
30 |   for (i = 0; i < n && s[i] != '\0'; i++) continue;
31 |   return i;
32 | }
33 | #endif
34 | 
35 | //---------------------------------------------------------------------------
36 | //    pal::StringOp::strndup
37 | //---------------------------------------------------------------------------
38 | char *pal::StringOp::strndup(const char *source, size_t maxlen) {
39 | #ifdef _WIN32
40 |   size_t length = ::strnlen(source, maxlen);
41 | 
42 |   char *destination = (char *)malloc((length + 1) * sizeof(char));
43 |   if (destination == nullptr) return nullptr;
44 | 
45 |   // copy length bytes to destination and leave destination[length] to be
46 |   // null terminator
47 |   strncpy_s(destination, length + 1, source, length);
48 | 
49 |   return destination;
50 | #elif __hexagon__
51 |   size_t length = strnlen(source, maxlen);
52 | 
53 |   char *destination = (char *)malloc((length + 1) * sizeof(char));
54 |   if (destination == nullptr) return nullptr;
55 |   // copy length bytes to destination and leave destination[length] to be
56 |   // null terminator
57 |   strncpy(destination, source, length);
58 |   destination[length] = '\0';
59 |   return destination;
60 | #else
61 |   return ::strndup(source, maxlen);
62 | #endif
63 | }
64 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/PAL/src/linux/Directory.cpp:
--------------------------------------------------------------------------------
  1 | //==============================================================================
  2 | //
  3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
  4 | //  All rights reserved.
  5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
  6 | //
  7 | //==============================================================================
  8 | 
  9 | #include <fcntl.h>
 10 | #include <stdio.h>
 11 | #include <stdlib.h>
 12 | #ifndef __QNXNTO__
 13 | #include <sys/sendfile.h>
 14 | #endif
 15 | #include <dirent.h>
 16 | #include <errno.h>
 17 | #include <sys/stat.h>
 18 | #include <sys/types.h>
 19 | #include <unistd.h>
 20 | 
 21 | #include <cstring>
 22 | #include <sstream>
 23 | #include <string>
 24 | #include <vector>
 25 | 
 26 | #include "PAL/Directory.hpp"
 27 | #include "PAL/FileOp.hpp"
 28 | #include "PAL/Path.hpp"
 29 | 
 30 | //------------------------------------------------------------------------------
 31 | //------------------------------------------------------------------------------
 32 | #ifdef __QNXNTO__
 33 | static bool is_qnx_dir(const struct dirent *ep) {
 34 |   struct dirent_extra *exp;
 35 |   bool is_dir = false;
 36 | 
 37 |   for (exp = _DEXTRA_FIRST(ep); _DEXTRA_VALID(exp, ep); exp = _DEXTRA_NEXT(exp)) {
 38 |     if (exp->d_type == _DTYPE_STAT || exp->d_type == _DTYPE_LSTAT) {
 39 |       struct stat *statbuff = &((dirent_extra_stat *)exp)->d_stat;
 40 |       if (statbuff && S_ISDIR(statbuff->st_mode)) {
 41 |         is_dir = true;
 42 |         break;
 43 |       }
 44 |     }
 45 |   }
 46 |   return is_dir;
 47 | }
 48 | #endif
 49 | 
 50 | // ------------------------------------------------------------------------------
 51 | //    pal::Directory::create
 52 | // ------------------------------------------------------------------------------
 53 | bool pal::Directory::create(const std::string &path, pal::Directory::DirMode dirmode) {
 54 |   struct stat st;
 55 |   int status = 0;
 56 |   if (stat(path.c_str(), &st) != 0) {
 57 |     // Directory does not exist
 58 |     status = mkdir(path.c_str(), static_cast<mode_t>(dirmode));
 59 |   } else if (!S_ISDIR(st.st_mode)) {
 60 |     errno  = ENOTDIR;
 61 |     status = -1;
 62 |   }
 63 |   return (status == 0);
 64 | }
 65 | 
 66 | //------------------------------------------------------------------------------
 67 | //------------------------------------------------------------------------------
 68 | bool pal::Directory::remove(const std::string &dirName) {
 69 |   DIR *dir;
 70 |   struct dirent *entry;
 71 | 
 72 |   dir = opendir(dirName.c_str());
 73 |   if (dir == nullptr) {
 74 |     // If the directory doesn't exist then just return true.
 75 |     if (errno == ENOENT) {
 76 |       return true;
 77 |     }
 78 |     return false;
 79 |   }
 80 | 
 81 | #ifdef __QNXNTO__
 82 |   if (dircntl(dir, D_SETFLAG, D_FLAG_STAT) == -1) {
 83 |     return false;
 84 |   }
 85 | #endif
 86 | 
 87 |   // Recursively traverse the directory tree.
 88 |   while ((entry = readdir(dir)) != nullptr) {
 89 |     if (strcmp(entry->d_name, ".") && strcmp(entry->d_name, "..")) {
 90 |       std::stringstream ss;
 91 |       ss << dirName << Path::getSeparator() << entry->d_name;
 92 |       std::string path = ss.str();
 93 | #ifdef __QNXNTO__
 94 |       if (is_qnx_dir(entry))
 95 | #else
 96 |       if (entry->d_type == DT_DIR)
 97 | #endif
 98 |       {
 99 |         // It's a directory so we need to drill down into it and delete
100 |         // its contents.
101 |         if (!remove(path)) {
102 |           return false;
103 |         }
104 |       } else {
105 |         if (::remove(path.c_str())) {
106 |           return false;
107 |         }
108 |       }
109 |     }
110 |   }
111 | 
112 |   closedir(dir);
113 | 
114 |   if (::remove(dirName.c_str())) {
115 |     return false;
116 |   }
117 | 
118 |   return true;
119 | }
120 | 
121 | bool pal::Directory::makePath(const std::string &path) {
122 |   struct stat st;
123 |   bool rc = false;
124 | 
125 |   if (path == ".") {
126 |     rc = true;
127 |   } else if (stat(path.c_str(), &st) == 0) {
128 |     if (st.st_mode & S_IFDIR) {
129 |       rc = true;
130 |     }
131 |   } else {
132 |     size_t offset = path.find_last_of(Path::getSeparator());
133 |     if (offset != std::string::npos) {
134 |       std::string newPath = path.substr(0, offset);
135 |       if (!makePath(newPath)) {
136 |         return false;
137 |       }
138 |     }
139 | 
140 |     // There is a possible race condition, where a file/directory can be
141 |     // created in between the stat() above, and the mkdir() call here.
142 |     // So, ignore the return code from the mkdir() call, and then re-check
143 |     // for existence of the directory after it. Ensure both that it exists
144 |     // and that it is a directory - just like above.
145 |     mkdir(path.c_str(), 0777);
146 | 
147 |     if ((stat(path.c_str(), &st) == 0) && (st.st_mode & S_IFDIR)) {
148 |       rc = true;
149 |     }
150 |   }
151 | 
152 |   return rc;
153 | }
154 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/PAL/src/linux/DynamicLoading.cpp:
--------------------------------------------------------------------------------
 1 | //==============================================================================
 2 | //
 3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 4 | //  All rights reserved.
 5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | //
 7 | //==============================================================================
 8 | 
 9 | #include <dlfcn.h>
10 | #include <stdlib.h>
11 | 
12 | #include "PAL/Debug.hpp"
13 | #include "PAL/DynamicLoading.hpp"
14 | 
15 | void *pal::dynamicloading::dlOpen(const char *filename, int flags) {
16 |   int realFlags = 0;
17 | 
18 |   if (flags & DL_NOW) {
19 |     realFlags |= RTLD_NOW;
20 |   }
21 | 
22 |   if (flags & DL_LOCAL) {
23 |     realFlags |= RTLD_LOCAL;
24 |   }
25 | 
26 |   if (flags & DL_GLOBAL) {
27 |     realFlags |= RTLD_GLOBAL;
28 |   }
29 | 
30 |   return ::dlopen(filename, realFlags);
31 | }
32 | 
33 | void *pal::dynamicloading::dlSym(void *handle, const char *symbol) {
34 |   if (handle == DL_DEFAULT) {
35 |     return ::dlsym(RTLD_DEFAULT, symbol);
36 |   }
37 | 
38 |   return ::dlsym(handle, symbol);
39 | }
40 | 
41 | int pal::dynamicloading::dlAddrToLibName(void *addr, std::string &name) {
42 |   // Clean the output buffer
43 |   name = std::string();
44 | 
45 |   // If the address is empty, return zero as treating failure
46 |   if (!addr) {
47 |     DEBUG_MSG("Input address is nullptr.");
48 |     return 0;
49 |   }
50 | 
51 |   // Dl_info do not maintain the lifetime of its string members,
52 |   // it would be maintained by dlopen() and dlclose(),
53 |   // so we do not need to release it manually
54 |   Dl_info info;
55 |   int result = ::dladdr(addr, &info);
56 | 
57 |   // If dladdr() successes, set name to the library name
58 |   if (result) {
59 |     name = std::string(info.dli_fname);
60 |   } else {
61 |     DEBUG_MSG("Input address could not be matched to a shared object.");
62 |   }
63 | 
64 |   return result;
65 | }
66 | 
67 | int pal::dynamicloading::dlClose(void *handle) {
68 |   if (!handle) {
69 |     return 0;
70 |   }
71 | 
72 |   return ::dlclose(handle);
73 | }
74 | 
75 | char *pal::dynamicloading::dlError(void) { return ::dlerror(); }
76 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/PAL/src/linux/Path.cpp:
--------------------------------------------------------------------------------
 1 | //==============================================================================
 2 | //
 3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 4 | //  All rights reserved.
 5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | //
 7 | //==============================================================================
 8 | 
 9 | #include <stdlib.h>
10 | 
11 | #include <sstream>
12 | #ifndef PATH_MAX
13 | #include <limits.h>
14 | #endif
15 | 
16 | #include "PAL/FileOp.hpp"
17 | #include "PAL/Path.hpp"
18 | 
19 | char pal::Path::getSeparator() { return '/'; }
20 | 
21 | std::string pal::Path::combine(const std::string &s1, const std::string &s2) {
22 |   std::stringstream ss;
23 |   ss << s1;
24 |   if (s1.size() > 0 && s1[s1.size() - 1] != getSeparator()) {
25 |     ss << getSeparator();
26 |   }
27 |   ss << s2;
28 |   return ss.str();
29 | }
30 | 
31 | std::string pal::Path::getDirectoryName(const std::string &path) {
32 |   std::string rc = path;
33 |   size_t index   = path.find_last_of(pal::Path::getSeparator());
34 |   if (index != std::string::npos) {
35 |     rc = path.substr(0, index);
36 |   }
37 |   return rc;
38 | }
39 | 
40 | std::string pal::Path::getAbsolute(const std::string &path) {
41 |   // Functionality was duplicated of function in FileOp
42 |   // Just call that function directly instead
43 |   return pal::FileOp::getAbsolutePath(path);
44 | }
45 | 
46 | bool pal::Path::isAbsolute(const std::string &path) {
47 |   return path.size() > 0 && path[0] == getSeparator();
48 | }
49 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/PAL/src/windows/Common.cpp:
--------------------------------------------------------------------------------
 1 | //==============================================================================
 2 | //
 3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 4 | //  All rights reserved.
 5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | //
 7 | //==============================================================================
 8 | 
 9 | #include <Windows.h>
10 | #include <io.h>
11 | #include <stdlib.h>
12 | #include <sys/stat.h>
13 | #include <time.h>
14 | 
15 | #include <algorithm>
16 | #include <iostream>
17 | #include <vector>
18 | 
19 | #include "Common.hpp"
20 | #include "PAL/Debug.hpp"
21 | 
22 | int32_t pal::scanDir(const std::string &path, std::vector<WIN32_FIND_DATAA> &namelist) {
23 |   // example : "C:/Users/guest" scan nothing, "C:/Users/guest/*" can scan the
24 |   // entire directory instead
25 |   std::string scanPath = path + "/*";
26 |   WIN32_FIND_DATAA findFileData;
27 |   HANDLE hFind = FindFirstFileA(scanPath.c_str(), &findFileData);
28 |   if (hFind == INVALID_HANDLE_VALUE) {
29 |     DEBUG_MSG("scanDir fail! Error code : %d", GetLastError());
30 |     return -1;
31 |   }
32 | 
33 |   do {
34 |     // will compare char until '\0' to allow filename with first char = '.'
35 |     if (strncmp(findFileData.cFileName, ".", 2) == 0 ||
36 |         strncmp(findFileData.cFileName, "..", 3) == 0) {
37 |       continue;
38 |     }
39 |     namelist.push_back(findFileData);
40 |   } while (FindNextFileA(hFind, &findFileData));
41 |   FindClose(hFind);
42 | 
43 |   return namelist.size();
44 | }
45 | 
46 | void pal::normalizeSeparator(std::string &path) { replace(path.begin(), path.end(), '\\', '/'); }
47 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/PAL/src/windows/Common.hpp:
--------------------------------------------------------------------------------
 1 | //==============================================================================
 2 | //
 3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 4 | //  All rights reserved.
 5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | //
 7 | //==============================================================================
 8 | 
 9 | #pragma once
10 | 
11 | #include <Windows.h>
12 | #include <io.h>
13 | 
14 | #include <iostream>
15 | #include <vector>
16 | 
17 | namespace pal {
18 | /**
19 |  * @brief
20 |  *   Scans elements in a directory.
21 |  * @param path
22 |  *   Path in string which we are going to scan.
23 |  * @param namelist
24 |  *   Data struct for each element, which will be stored as WIN32_FIND_DATAA.
25 |  * @return
26 |  *   Number of elements in this path, return -1 if fail.
27 |  */
28 | int32_t scanDir(const std::string &path, std::vector<WIN32_FIND_DATAA> &namelist);
29 | 
30 | /**
31 |  * @brief
32 |  *   Replace all the '\\' in path with '/' to keep consistency.
33 |  * @param path
34 |  *   The string which you want to format.
35 |  */
36 | void normalizeSeparator(std::string &path);
37 | }  // namespace pal
38 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/PAL/src/windows/Directory.cpp:
--------------------------------------------------------------------------------
  1 | //==============================================================================
  2 | //
  3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
  4 | //  All rights reserved.
  5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
  6 | //
  7 | //==============================================================================
  8 | 
  9 | #include <Windows.h>
 10 | #include <stdlib.h>
 11 | #include <sys/stat.h>
 12 | 
 13 | #include <algorithm>
 14 | #include <iostream>
 15 | 
 16 | #include "Common.hpp"
 17 | #include "PAL/Debug.hpp"
 18 | #include "PAL/Directory.hpp"
 19 | #include "PAL/FileOp.hpp"
 20 | #include "PAL/Path.hpp"
 21 | 
 22 | //--------------------------------------------------------------------------------------
 23 | //   pal::Directory::Create
 24 | //--------------------------------------------------------------------------------------
 25 | bool pal::Directory::create(const std::string &path, pal::Directory::DirMode dirmode) {
 26 |   struct stat st;
 27 |   // it create a directory successfully or directory exists already, return true.
 28 |   if ((stat(path.c_str(), &st) != 0 && (CreateDirectoryA(path.c_str(), NULL) != 0)) ||
 29 |       ((st.st_mode & S_IFDIR) != 0)) {
 30 |     return true;
 31 |   } else {
 32 |     DEBUG_MSG("Create Folder fail! Error code : %d", GetLastError());
 33 |   }
 34 |   return false;
 35 | }
 36 | 
 37 | //--------------------------------------------------------------------------------------
 38 | //   pal::Directory::Remove
 39 | //--------------------------------------------------------------------------------------
 40 | bool pal::Directory::remove(const std::string &dirName) {
 41 |   struct stat st;
 42 |   if (stat(dirName.c_str(), &st) == 0) {
 43 |     if ((st.st_mode & S_IFDIR) != 0) {
 44 |       // a directory exist and remove it !
 45 |       std::string fullPath = dirName;
 46 |       if (pal::Path::isAbsolute(dirName) == 0) {
 47 |         fullPath = pal::Path::getAbsolute(dirName);
 48 |       }
 49 |       // Note  This string MUST be double-null terminated.
 50 |       fullPath               = fullPath + '\0' + '\0';
 51 |       SHFILEOPSTRUCTA fileOp = {
 52 |           NULL,              // hwnd
 53 |           FO_DELETE,         // wFunc, delete usage
 54 |           fullPath.c_str(),  // pFrom, delete target folder
 55 |           "",                // pTo, delete operation can ignore this
 56 |           FOF_NO_UI,         // Perform operation silently, presenting no UI to user
 57 |           false,             // fAnyOperationsAborted,
 58 |           0,                 // hNameMappings
 59 |           ""                 // lpszProgressTitle, used only if for FOF_SIMPLEPROGRESS
 60 |       };
 61 |       if (SHFileOperationA(&fileOp) == 0) {
 62 |         return true;
 63 |       } else {
 64 |         DEBUG_MSG("Delete folder fail! Error code : %d", GetLastError());
 65 |       }
 66 |     }
 67 |   } else {
 68 |     // If the directory doesn't exist then just, return true. Behaves like Linux
 69 |     if (errno == ENOENT) {
 70 |       return true;
 71 |     } else {
 72 |       DEBUG_MSG("Remove stat fail! Error code : %d", errno);
 73 |     }
 74 |   }
 75 |   return false;
 76 | }
 77 | 
 78 | //--------------------------------------------------------------------------------------
 79 | //   pal::Directory::MakePath
 80 | //--------------------------------------------------------------------------------------
 81 | bool pal::Directory::makePath(const std::string &path) {
 82 |   struct stat st;
 83 |   bool rc = false;
 84 |   if (path == ".") {
 85 |     rc = true;
 86 |   } else if (stat(path.c_str(), &st) == 0) {
 87 |     if ((st.st_mode & S_IFDIR) != 0) {
 88 |       // if a directory path is already exist
 89 |       rc = true;
 90 |     }
 91 |   } else {
 92 |     size_t offset = std::min(path.find_last_of('/'), path.find_last_of('\\'));
 93 |     if (offset != std::string::npos) {
 94 |       std::string newPath = path.substr(0, offset);
 95 |       if (!makePath(newPath)) {
 96 |         return false;
 97 |       }
 98 |     }
 99 |     pal::Directory::create(path.c_str());
100 |     if ((stat(path.c_str(), &st) == 0) && ((st.st_mode & S_IFDIR) != 0)) {
101 |       rc = true;
102 |     }
103 |   }
104 |   return rc;
105 | }


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/PAL/src/windows/Path.cpp:
--------------------------------------------------------------------------------
 1 | //==============================================================================
 2 | //
 3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 4 | //  All rights reserved.
 5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | //
 7 | //==============================================================================
 8 | 
 9 | #include <shlwapi.h>
10 | #include <stdlib.h>
11 | 
12 | #include <algorithm>
13 | #include <iostream>
14 | #include <sstream>
15 | 
16 | #include "Common.hpp"
17 | #include "PAL/FileOp.hpp"
18 | #include "PAL/Path.hpp"
19 | 
20 | //------------------------------------------------------------------------------
21 | //    PAL::Path::GetSeparator
22 | //------------------------------------------------------------------------------
23 | char pal::Path::getSeparator() { return '/'; }
24 | 
25 | //------------------------------------------------------------------------------
26 | //    pal::Path::Combine
27 | //------------------------------------------------------------------------------
28 | std::string pal::Path::combine(const std::string &s1, const std::string &s2) {
29 |   std::stringstream ss;
30 |   ss << s1;
31 |   if (s1.size() > 0 && ((s1[s1.size() - 1] != '/') && (s1[s1.size() - 1] != '\\'))) {
32 |     ss << getSeparator();
33 |   }
34 |   ss << s2;
35 |   return ss.str();
36 | }
37 | 
38 | //------------------------------------------------------------------------------
39 | //    pal::Path::getDirectoryName
40 | //------------------------------------------------------------------------------
41 | std::string pal::Path::getDirectoryName(const std::string &path) {
42 |   std::string rc = path;
43 |   int32_t index  = std::max(static_cast<int32_t>(path.find_last_of('\\')),
44 |                            static_cast<int32_t>(path.find_last_of('/')));
45 |   if (index != static_cast<int32_t>(std::string::npos)) {
46 |     rc = path.substr(0, index);
47 |   }
48 |   pal::normalizeSeparator(rc);
49 |   return rc;
50 | }
51 | 
52 | //------------------------------------------------------------------------------
53 | //    pal::Path::getAbsolute
54 | //------------------------------------------------------------------------------
55 | std::string pal::Path::getAbsolute(const std::string &path) {
56 |   std::string res = pal::FileOp::getAbsolutePath(path);
57 |   pal::normalizeSeparator(res);
58 |   return res;
59 | }
60 | 
61 | //------------------------------------------------------------------------------
62 | //    PAL::Path::isAbsolute
63 | //    requirement : shlwapi.lib
64 | //------------------------------------------------------------------------------
65 | bool pal::Path::isAbsolute(const std::string &path) {
66 |   std::string windowsPath = path;
67 |   // in windows, when we need to check relative or absolute path,
68 |   // separator MUST be '\\' rather than '/'
69 |   // for more information : https://docs.microsoft.com/en-us/dotnet/standard/io/file-path-formats
70 |   replace(windowsPath.begin(), windowsPath.end(), '/', '\\');
71 |   return PathIsRelativeA(windowsPath.c_str()) == false;
72 | }
73 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/QnnTypeDef.hpp:
--------------------------------------------------------------------------------
 1 | //==============================================================================
 2 | //
 3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 4 | //  All Rights Reserved.
 5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | //
 7 | //==============================================================================
 8 | 
 9 | #ifndef QNN_TYPE_DEF_H_
10 | #define QNN_TYPE_DEF_H_
11 | 
12 | #include "Logger.hpp"
13 | #include "QnnInterface.h"
14 | #include "QnnTypeMacros.hpp"
15 | #include "QnnTypes.h"
16 | 
17 | typedef enum ModelError {
18 |   MODEL_NO_ERROR               = 0,
19 |   MODEL_TENSOR_ERROR           = 1,
20 |   MODEL_PARAMS_ERROR           = 2,
21 |   MODEL_NODES_ERROR            = 3,
22 |   MODEL_GRAPH_ERROR            = 4,
23 |   MODEL_CONTEXT_ERROR          = 5,
24 |   MODEL_GENERATION_ERROR       = 6,
25 |   MODEL_SETUP_ERROR            = 7,
26 |   MODEL_INVALID_ARGUMENT_ERROR = 8,
27 |   MODEL_FILE_ERROR             = 9,
28 |   MODEL_MEMORY_ALLOCATE_ERROR  = 10,
29 |   // Value selected to ensure 32 bits.
30 |   MODEL_UNKNOWN_ERROR = 0x7FFFFFFF
31 | } ModelError_t;
32 | 
33 | using TensorWrapper = Qnn_Tensor_t;
34 | #define GET_TENSOR_WRAPPER_TENSOR(tensorWrapper) tensorWrapper
35 | #define GET_TENSOR_WRAPPER_NAME(tensorWrapper)   QNN_TENSOR_GET_NAME(tensorWrapper)
36 | 
37 | typedef struct GraphInfo {
38 |   Qnn_GraphHandle_t graph;
39 |   char* graphName;
40 |   TensorWrapper* inputTensors;
41 |   uint32_t numInputTensors;
42 |   TensorWrapper* outputTensors;
43 |   uint32_t numOutputTensors;
44 | } GraphInfo_t;
45 | typedef GraphInfo_t* GraphInfoPtr_t;
46 | 
47 | typedef struct GraphConfigInfo {
48 |   char* graphName;
49 |   const QnnGraph_Config_t** graphConfigs;
50 | } GraphConfigInfo_t;
51 | 
52 | #endif  // QNN_TYPE_DEF_H_
53 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/Utils/BuildId.hpp:
--------------------------------------------------------------------------------
 1 | //==============================================================================
 2 | //
 3 | //  Copyright (c) 2020, 2024 Qualcomm Technologies, Inc.
 4 | //  All Rights Reserved.
 5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | //
 7 | //==============================================================================
 8 | 
 9 | #pragma once
10 | 
11 | namespace qnn {
12 | namespace tools {
13 | 
14 | inline std::string getBuildId() { return std::string("v2.31.0.250130151446_114721"); }
15 | 
16 | }  // namespace tools
17 | }  // namespace qnn
18 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/Utils/ClientBuffer.cpp:
--------------------------------------------------------------------------------
  1 | //==============================================================================
  2 | //
  3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
  4 | //  All Rights Reserved.
  5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
  6 | //
  7 | //==============================================================================
  8 | 
  9 | #include "ClientBuffer.hpp"
 10 | #include "QnnTypeMacros.hpp"
 11 | 
 12 | void* ClientBuffer::getBuffer(Qnn_Tensor_t* tensor) {
 13 |   if (!tensor) {
 14 |     QNN_WARN("getBuffer: received a null pointer to a tensor");
 15 |     return nullptr;
 16 |   }
 17 |   return QNN_TENSOR_GET_CLIENT_BUF(tensor).data;
 18 | }
 19 | 
 20 | size_t ClientBuffer::getBufferSize(Qnn_Tensor_t* tensor) {
 21 |   if (!tensor) {
 22 |     QNN_WARN("getBufferSize: received a null pointer to a tensor");
 23 |     return 0;
 24 |   }
 25 |   return QNN_TENSOR_GET_CLIENT_BUF(tensor).dataSize;
 26 | };
 27 | 
 28 | bool ClientBuffer::allocateTensorBuffer(Qnn_Tensor_t* tensor, size_t tensorDataSize) {
 29 |   if (!tensor) {
 30 |     QNN_ERROR("Received nullptr for tensors");
 31 |     return false;
 32 |   }
 33 |   QNN_TENSOR_SET_MEM_TYPE(tensor, QNN_TENSORMEMTYPE_RAW);
 34 |   Qnn_ClientBuffer_t clientBuffer;
 35 |   clientBuffer.data = malloc(tensorDataSize);
 36 |   if (nullptr == clientBuffer.data) {
 37 |     QNN_ERROR("mem alloc failed for clientBuffer.data");
 38 |     return false;
 39 |   }
 40 |   clientBuffer.dataSize = tensorDataSize;
 41 |   QNN_TENSOR_SET_CLIENT_BUF(tensor, clientBuffer);
 42 |   return true;
 43 | }
 44 | 
 45 | bool ClientBuffer::freeTensorBuffer(Qnn_Tensor_t* tensor) {
 46 |   if (!tensor) {
 47 |     QNN_ERROR("Received nullptr for tensors");
 48 |     return false;
 49 |   }
 50 |   if (QNN_TENSOR_GET_CLIENT_BUF(tensor).data) {
 51 |     if (m_sameMemoryFreeTensors.find(tensor) == m_sameMemoryFreeTensors.end()) {
 52 |       free(QNN_TENSOR_GET_CLIENT_BUF(tensor).data);
 53 |     }
 54 |     QNN_TENSOR_SET_CLIENT_BUF(tensor, Qnn_ClientBuffer_t({nullptr, 0u}));
 55 |     QNN_TENSOR_SET_MEM_TYPE(tensor, QNN_TENSORMEMTYPE_UNDEFINED);
 56 |   }
 57 |   return true;
 58 | }
 59 | 
 60 | bool ClientBuffer::useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src) {
 61 |   if (nullptr == dest || nullptr == src) {
 62 |     QNN_ERROR("Received nullptr");
 63 |     return false;
 64 |   }
 65 |   if (false == freeTensorBuffer(dest)) {
 66 |     return false;
 67 |   }
 68 | 
 69 |   QNN_TENSOR_SET_MEM_TYPE(dest, QNN_TENSOR_GET_MEM_TYPE(src));
 70 |   QNN_TENSOR_SET_CLIENT_BUF(dest, QNN_TENSOR_GET_CLIENT_BUF(src));
 71 |   m_sameMemoryFreeTensors.insert(dest);
 72 |   return true;
 73 | }
 74 | 
 75 | bool ClientBuffer::useExternalMemory(Qnn_Tensor_t* dest, void* extMem) {
 76 |   if (nullptr == dest || nullptr == extMem) {
 77 |     QNN_ERROR("Received nullptr");
 78 |     return false;
 79 |   }
 80 | 
 81 |   Qnn_ClientBuffer_t clientBuffer;
 82 |   clientBuffer.data     = extMem;
 83 |   clientBuffer.dataSize = QNN_TENSOR_GET_CLIENT_BUF(dest).dataSize;
 84 |   if (false == freeTensorBuffer(dest)) {
 85 |     return false;
 86 |   }
 87 | 
 88 |   QNN_TENSOR_SET_MEM_TYPE(dest, QNN_TENSORMEMTYPE_RAW);
 89 |   QNN_TENSOR_SET_CLIENT_BUF(dest, clientBuffer);
 90 |   m_sameMemoryFreeTensors.insert(dest);
 91 |   return true;
 92 | }
 93 | 
 94 | void* ClientBuffer::allocateTensorFusedBuffer(uint64_t bufferSize, int32_t* fd) { return nullptr; }
 95 | 
 96 | bool ClientBuffer::mapFusedBufferOffset(Qnn_Tensor_t* tensor,
 97 |                                         size_t tensorDataSize,
 98 |                                         int32_t fd,
 99 |                                         uint32_t offset,
100 |                                         uint64_t totalBufferSize,
101 |                                         void* memPointer,
102 |                                         Qnn_ContextHandle_t contextHandle) {
103 |   return false;
104 | }
105 | 
106 | bool ClientBuffer::deregisterTensorFusedBuffer(Qnn_Tensor_t* tensor) { return false; }
107 | 
108 | void ClientBuffer::freeFusedBuffers() {}
109 | 
110 | size_t ClientBuffer::getOffset(Qnn_Tensor_t* tensor) { return 0; }
111 | 
112 | size_t ClientBuffer::getTotalBufferSize(Qnn_Tensor_t* tensor) { return 0; }


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/Utils/ClientBuffer.hpp:
--------------------------------------------------------------------------------
 1 | //==============================================================================
 2 | //
 3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 4 | //  All Rights Reserved.
 5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | //
 7 | //==============================================================================
 8 | 
 9 | #pragma once
10 | 
11 | #include <stdlib.h>
12 | 
13 | #include <unordered_set>
14 | 
15 | #include "IBufferAlloc.hpp"
16 | #include "Logger.hpp"
17 | 
18 | class ClientBuffer final : public IBufferAlloc {
19 |  public:
20 |   ClientBuffer(){};
21 | 
22 |   // Disable copy constructors, r-value referencing, etc
23 |   ClientBuffer(const ClientBuffer&) = delete;
24 | 
25 |   ClientBuffer& operator=(const ClientBuffer&) = delete;
26 | 
27 |   ClientBuffer(ClientBuffer&&) = delete;
28 | 
29 |   ClientBuffer& operator=(ClientBuffer&&) = delete;
30 | 
31 |   bool initialize() override { return true; };
32 | 
33 |   void* getBuffer(Qnn_Tensor_t* tensor) override;
34 | 
35 |   int getFd(Qnn_Tensor_t* tensor) override {
36 |     QNN_WARN("getFd: This is not ION memory");
37 |     return -1;
38 |   };
39 | 
40 |   size_t getOffset(Qnn_Tensor_t* tensor) override;
41 |   size_t getBufferSize(Qnn_Tensor_t* tensor) override;
42 |   size_t getTotalBufferSize(Qnn_Tensor_t* tensor) override;
43 | 
44 |   bool allocateTensorBuffer(Qnn_Tensor_t* tensor, size_t tensorDataSize) override;
45 | 
46 |   bool freeTensorBuffer(Qnn_Tensor_t* tensor) override;
47 | 
48 |   bool useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src) override;
49 |   bool useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src, int offset) override { return false; }
50 | 
51 |   bool useExternalMemory(Qnn_Tensor_t* dest, void* extMem) override;
52 | 
53 |   void* allocateTensorFusedBuffer(uint64_t bufferSize, int32_t* fd) override;
54 |   bool allocateBuffers(const std::map<int, std::map<std::string, size_t>>& allocs_per_chunk,
55 |                        std::map<std::string, std::pair<int, size_t>>& tensor_offsets) override {
56 |     return false;
57 |   };
58 | 
59 |   bool mapFusedBufferOffset(Qnn_Tensor_t* tensor,
60 |                             size_t tensorDataSize,
61 |                             int32_t fd,
62 |                             uint32_t offset,
63 |                             uint64_t totalBufferSize,
64 |                             void* memPointer,
65 |                             Qnn_ContextHandle_t contextHandle) override;
66 |   bool deregisterTensorFusedBuffer(Qnn_Tensor_t* tensor) override;
67 |   void freeFusedBuffers() override;
68 | 
69 |   bool mapFusedBufferOffset(Qnn_Tensor_t* tensor,
70 |                             int alloc_idx,
71 |                             size_t offset,
72 |                             Qnn_ContextHandle_t ctx,
73 |                             size_t size) override {
74 |     return false;
75 |   }
76 | 
77 |   virtual ~ClientBuffer(){};
78 | 
79 |  private:
80 |   std::unordered_set<Qnn_Tensor_t*> m_sameMemoryFreeTensors;
81 | };
82 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/Utils/DataUtil.hpp:
--------------------------------------------------------------------------------
  1 | //==============================================================================
  2 | //
  3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
  4 | //  All rights reserved.
  5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
  6 | //
  7 | //==============================================================================
  8 | #pragma once
  9 | 
 10 | #include <map>
 11 | #include <queue>
 12 | #include <vector>
 13 | 
 14 | #include "QnnTypes.h"
 15 | 
 16 | namespace qnn {
 17 | namespace tools {
 18 | namespace datautil {
 19 | enum class StatusCode {
 20 |   SUCCESS,
 21 |   DATA_READ_FAIL,
 22 |   DATA_WRITE_FAIL,
 23 |   FILE_OPEN_FAIL,
 24 |   DIRECTORY_CREATE_FAIL,
 25 |   INVALID_DIMENSIONS,
 26 |   INVALID_DATA_TYPE,
 27 |   DATA_SIZE_MISMATCH,
 28 |   INVALID_BUFFER,
 29 | };
 30 | 
 31 | const size_t g_bitsPerByte = 8;
 32 | 
 33 | using ReadBatchDataRetType_t = std::tuple<StatusCode, size_t, size_t>;
 34 | 
 35 | std::tuple<StatusCode, size_t> getDataTypeSizeInBytes(Qnn_DataType_t dataType);
 36 | 
 37 | std::tuple<StatusCode, size_t> calculateLength(std::vector<size_t> dims, Qnn_DataType_t dataType);
 38 | 
 39 | size_t calculateElementCount(std::vector<size_t> dims);
 40 | 
 41 | std::tuple<StatusCode, size_t> getFileSize(std::string filePath);
 42 | 
 43 | StatusCode readDataFromFile(std::string filePath,
 44 |                             std::vector<size_t> dims,
 45 |                             Qnn_DataType_t dataType,
 46 |                             uint8_t* buffer);
 47 | 
 48 | /*
 49 |  * Read data in batches from vector and try to matches the model input's
 50 |  * batches. If the vector is empty while matching the batch size of model,
 51 |  * pad the remaining buffer with zeros
 52 |  * @param filePaths image paths vector
 53 |  * @param filePathsIndexOffset index offset in the vector
 54 |  * @param loopBackToStart loop the vector to fill the remaining tensor data
 55 |  * @param dims model input dimensions
 56 |  * @param dataType to create input buffer from file
 57 |  * @param buffer to fill the input image data
 58 |  *
 59 |  * @return ReadBatchDataRetType_t returns numFilesCopied and batchSize along
 60 |  * with status
 61 |  */
 62 | ReadBatchDataRetType_t readBatchData(const std::vector<std::string>& filePaths,
 63 |                                      const size_t filePathsIndexOffset,
 64 |                                      const bool loopBackToStart,
 65 |                                      const std::vector<size_t>& dims,
 66 |                                      const Qnn_DataType_t dataType,
 67 |                                      uint8_t* buffer);
 68 | 
 69 | StatusCode readBinaryFromFile(std::string filePath, uint8_t* buffer, size_t bufferSize);
 70 | 
 71 | #ifndef __hexagon__
 72 | StatusCode writeDataToFile(std::string fileDir,
 73 |                            std::string fileName,
 74 |                            std::vector<size_t> dims,
 75 |                            Qnn_DataType_t dataType,
 76 |                            uint8_t* buffer);
 77 | 
 78 | StatusCode writeBatchDataToFile(std::vector<std::string> fileDirs,
 79 |                                 std::string fileName,
 80 |                                 std::vector<size_t> dims,
 81 |                                 Qnn_DataType_t dataType,
 82 |                                 uint8_t* buffer,
 83 |                                 const size_t batchSize);
 84 | 
 85 | StatusCode writeBinaryToFile(std::string fileDir,
 86 |                              std::string fileName,
 87 |                              uint8_t* buffer,
 88 |                              size_t bufferSize);
 89 | #endif
 90 | 
 91 | template <typename T_QuantType>
 92 | datautil::StatusCode floatToTfN(
 93 |     T_QuantType* out, float* in, int32_t offset, float scale, size_t numElements);
 94 | 
 95 | template <typename T_QuantType>
 96 | datautil::StatusCode tfNToFloat(
 97 |     float* out, T_QuantType* in, int32_t offset, float scale, size_t numElements);
 98 | 
 99 | template <typename T_QuantType>
100 | datautil::StatusCode castToFloat(float* out, T_QuantType* in, size_t numElements);
101 | 
102 | template <typename T_QuantType>
103 | datautil::StatusCode castFromFloat(T_QuantType* out, float* in, size_t numElements);
104 | 
105 | const std::map<Qnn_DataType_t, size_t> g_dataTypeToSize = {
106 |     {QNN_DATATYPE_INT_8, 1},
107 |     {QNN_DATATYPE_INT_16, 2},
108 |     {QNN_DATATYPE_INT_32, 4},
109 |     {QNN_DATATYPE_INT_64, 8},
110 |     {QNN_DATATYPE_UINT_8, 1},
111 |     {QNN_DATATYPE_UINT_16, 2},
112 |     {QNN_DATATYPE_UINT_32, 4},
113 |     {QNN_DATATYPE_UINT_64, 8},
114 |     {QNN_DATATYPE_FLOAT_16, 2},
115 |     {QNN_DATATYPE_FLOAT_32, 4},
116 |     {QNN_DATATYPE_FLOAT_64, 8},
117 |     {QNN_DATATYPE_SFIXED_POINT_8, 1},
118 |     {QNN_DATATYPE_SFIXED_POINT_16, 2},
119 |     {QNN_DATATYPE_SFIXED_POINT_32, 4},
120 |     {QNN_DATATYPE_UFIXED_POINT_8, 1},
121 |     {QNN_DATATYPE_UFIXED_POINT_16, 2},
122 |     {QNN_DATATYPE_UFIXED_POINT_32, 4},
123 |     {QNN_DATATYPE_BOOL_8, 1},
124 | };
125 | }  // namespace datautil
126 | }  // namespace tools
127 | }  // namespace qnn
128 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/Utils/DmaBufAllocator.hpp:
--------------------------------------------------------------------------------
  1 | //==============================================================================
  2 | //
  3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
  4 | //  All Rights Reserved.
  5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
  6 | //
  7 | //==============================================================================
  8 | #pragma once
  9 | 
 10 | #include <map>
 11 | #include <unordered_map>
 12 | #include <unordered_set>
 13 | #include <vector>
 14 | 
 15 | #include "IBufferAlloc.hpp"
 16 | #include "Logger.hpp"
 17 | #include "QnnInterface.h"
 18 | 
 19 | typedef void* (*DmaBufCreateFn_t)();
 20 | typedef int (*DmaBufAllocFn_t)(void*, const char*, size_t, unsigned int, size_t);
 21 | typedef void (*DmaBufDeinitFn_t)(void*);
 22 | 
 23 | namespace rwkv_qualcomm {
 24 | 
 25 | struct DmaBufferData {
 26 |   void* dmaBufferAllocator;
 27 |   int fd;
 28 |   void* memPointer;
 29 |   size_t totalBufferSize;
 30 |   int offset{0};
 31 |   DmaBufferData() : dmaBufferAllocator(nullptr), fd(-1), memPointer(nullptr), totalBufferSize(0) {}
 32 |   DmaBufferData(void* bufferAllocator, int fdIn, void* memPointerIn, size_t sizeIn)
 33 |       : dmaBufferAllocator(bufferAllocator),
 34 |         fd(fdIn),
 35 |         memPointer(memPointerIn),
 36 |         totalBufferSize(sizeIn) {}
 37 | };
 38 | 
 39 | class DmaBufferAllocator final : public IBufferAlloc {
 40 |  public:
 41 |   DmaBufferAllocator(Qnn_ContextHandle_t contextHandle, QNN_INTERFACE_VER_TYPE* qnnInterface);
 42 |   // Disable copy constructors, r-value referencing, etc
 43 |   DmaBufferAllocator(const DmaBufferAllocator&)            = delete;
 44 |   DmaBufferAllocator& operator=(const DmaBufferAllocator&) = delete;
 45 |   DmaBufferAllocator(DmaBufferAllocator&&)                 = delete;
 46 |   DmaBufferAllocator& operator=(DmaBufferAllocator&&)      = delete;
 47 | 
 48 |   bool initialize() override;
 49 |   void* getBuffer(Qnn_Tensor_t* tensor) override;
 50 |   int getFd(Qnn_Tensor_t* tensor) override;
 51 |   size_t getOffset(Qnn_Tensor_t* tensor) override;
 52 |   size_t getBufferSize(Qnn_Tensor_t* tensor) override;
 53 |   size_t getTotalBufferSize(Qnn_Tensor_t* tensor) override;
 54 | 
 55 |   bool freeTensorBuffer(Qnn_Tensor_t* tensor) override;
 56 | 
 57 |   bool allocateTensorBuffer(Qnn_Tensor_t* tensor, size_t tensorDataSize) override;
 58 |   bool useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src) override;
 59 | 
 60 |   virtual ~DmaBufferAllocator();
 61 | 
 62 |   bool beforeWriteToBuffer(Qnn_Tensor_t* tensor) override;
 63 |   bool afterWriteToBuffer(Qnn_Tensor_t* tensor) override;
 64 |   bool beforeReadFromBuffer(Qnn_Tensor_t* tensor) override;
 65 |   bool afterReadFromBuffer(Qnn_Tensor_t* tensor) override;
 66 | 
 67 |   bool useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src, int offset) override {
 68 |     QNN_WARN("Offset based tensors not supported!!");
 69 |     return false;
 70 |     ;
 71 |   }
 72 |   bool useExternalMemory(Qnn_Tensor_t* dest, void* extMem) override {
 73 |     QNN_WARN("External Memory not supported!!");
 74 |     return false;
 75 |     ;
 76 |   }
 77 |   void* allocateTensorFusedBuffer(uint64_t bufferSize, int32_t* fd) override {
 78 |     QNN_WARN("Fused Buffers not supported\n");
 79 |     return nullptr;
 80 |   };
 81 |   bool allocateBuffers(const std::map<int, std::map<std::string, size_t>>& allocs_per_chunk,
 82 |                        std::map<std::string, std::pair<int, size_t>>& tensor_offsets) override {
 83 |     QNN_WARN("Fused Buffers not supported\n");
 84 |     return false;
 85 |   };
 86 |   bool mapFusedBufferOffset(Qnn_Tensor_t* tensor,
 87 |                             size_t tensorDataSize,
 88 |                             int32_t fd,
 89 |                             uint32_t offset,
 90 |                             uint64_t totalBufferSize,
 91 |                             void* memPointer,
 92 |                             Qnn_ContextHandle_t contextHandle) override {
 93 |     QNN_WARN("Fused Buffers not supported\n");
 94 |     return false;
 95 |   };
 96 |   bool deregisterTensorFusedBuffer(Qnn_Tensor_t* tensor) override {
 97 |     QNN_WARN("Fused Buffers not supported\n");
 98 |     return false;
 99 |   };
100 |   void freeFusedBuffers() override { return; };
101 |   bool mapFusedBufferOffset(Qnn_Tensor_t* tensor,
102 |                             int alloc_idx,
103 |                             size_t offset,
104 |                             Qnn_ContextHandle_t ctx,
105 |                             size_t size) override {
106 |     QNN_WARN("Fused Buffers not supported\n");
107 |     return false;
108 |   };
109 | 
110 |  private:
111 |   DmaBufferData* getDmaBufTensorData(Qnn_Tensor_t* tensor);
112 | 
113 |   // Pointer to the dlopen'd libdmabufheap.so shared library which contains
114 |   // dmaBufCreate, dmaBufAlloc, dmaBufDeinit
115 |   void* m_libDmaBufHeapHandle;
116 |   DmaBufCreateFn_t m_dmaBufCreate;
117 |   DmaBufAllocFn_t m_dmaBufAlloc;
118 |   DmaBufDeinitFn_t m_dmaBufDeinit;
119 | 
120 |   QNN_INTERFACE_VER_TYPE* m_qnnInterface;
121 |   Qnn_ContextHandle_t m_contextHandle;
122 | 
123 |   std::unordered_map<Qnn_Tensor_t*, DmaBufferData> m_tensorToDmaBufferData;
124 |   std::unordered_set<Qnn_Tensor_t*> m_sameMemoryFreeTensors;
125 |   std::unordered_map<Qnn_MemHandle_t, DmaBufferData> m_memHandleToDmaBufMem;
126 | };
127 | 
128 | }  // namespace rwkv_qualcomm
129 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/Utils/DynamicLoadUtil.hpp:
--------------------------------------------------------------------------------
 1 | //==============================================================================
 2 | //
 3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 4 | //  All rights reserved.
 5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | //
 7 | //==============================================================================
 8 | 
 9 | #pragma once
10 | 
11 | #include "Interfaces.hpp"
12 | 
13 | namespace qnn {
14 | namespace tools {
15 | namespace dynamicloadutil {
16 | enum class StatusCode {
17 |   SUCCESS,
18 |   FAILURE,
19 |   FAIL_LOAD_BACKEND,
20 |   FAIL_LOAD_MODEL,
21 |   FAIL_SYM_FUNCTION,
22 |   FAIL_GET_INTERFACE_PROVIDERS,
23 |   FAIL_LOAD_SYSTEM_LIB,
24 | };
25 | 
26 | StatusCode getQnnFunctionPointers(std::string backendPath,
27 |                                   std::string modelPath,
28 |                                   rwkv_app::QnnFunctionPointers* qnnFunctionPointers,
29 |                                   void** backendHandle,
30 |                                   bool loadModelLib,
31 |                                   void** modelHandleRtn);
32 | StatusCode getQnnSystemFunctionPointers(std::string systemLibraryPath,
33 |                                         rwkv_app::QnnFunctionPointers* qnnFunctionPointers);
34 | }  // namespace dynamicloadutil
35 | }  // namespace tools
36 | }  // namespace qnn
37 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/Utils/IBufferAlloc.hpp:
--------------------------------------------------------------------------------
 1 | //==============================================================================
 2 | //
 3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 4 | //  All Rights Reserved.
 5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | //
 7 | //==============================================================================
 8 | 
 9 | #pragma once
10 | #include <map>
11 | #include <string>
12 | #include <unordered_map>
13 | #include <utility>
14 | #include <vector>
15 | 
16 | #include "QnnTypes.h"
17 | 
18 | class IBufferAlloc {
19 |  public:
20 |   virtual ~IBufferAlloc() {}
21 |   IBufferAlloc() {}
22 |   virtual bool initialize()                                                                   = 0;
23 |   virtual void* getBuffer(Qnn_Tensor_t* tensor)                                               = 0;
24 |   virtual int getFd(Qnn_Tensor_t* tensor)                                                     = 0;
25 |   virtual size_t getOffset(Qnn_Tensor_t* tensor)                                              = 0;
26 |   virtual size_t getBufferSize(Qnn_Tensor_t* tensor)                                          = 0;
27 |   virtual size_t getTotalBufferSize(Qnn_Tensor_t* tensor)                                     = 0;
28 |   virtual bool allocateTensorBuffer(Qnn_Tensor_t* tensor, size_t tensorDataSize)              = 0;
29 |   virtual bool freeTensorBuffer(Qnn_Tensor_t* tensor)                                         = 0;
30 |   virtual bool useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src)                           = 0;
31 |   virtual bool useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src, int offset)               = 0;
32 |   virtual bool useExternalMemory(Qnn_Tensor_t* dest, void* extMem)                            = 0;
33 |   virtual void* allocateTensorFusedBuffer(uint64_t bufferSize, int32_t* fd)                   = 0;
34 |   virtual bool allocateBuffers(const std::map<int, std::map<std::string, size_t>>& allocs_per_chunk,
35 |                                std::map<std::string, std::pair<int, size_t>>& tensor_offsets) = 0;
36 |   virtual bool mapFusedBufferOffset(Qnn_Tensor_t* tensor,
37 |                                     size_t tensorDataSize,
38 |                                     int32_t fd,
39 |                                     uint32_t offset,
40 |                                     uint64_t totalBufferSize,
41 |                                     void* memPointer,
42 |                                     Qnn_ContextHandle_t contextHandle)                        = 0;
43 |   virtual bool mapFusedBufferOffset(
44 |       Qnn_Tensor_t* tensor, int alloc_idx, size_t offset, Qnn_ContextHandle_t ctx, size_t size) = 0;
45 | 
46 |   virtual bool deregisterTensorFusedBuffer(Qnn_Tensor_t* tensor) = 0;
47 |   virtual void freeFusedBuffers()                                = 0;
48 | 
49 |   // Functions to sync memory buffers for Read/Write using DmaBuf.
50 |   virtual bool beforeWriteToBuffer(Qnn_Tensor_t* tensor) { return false; };
51 |   virtual bool afterWriteToBuffer(Qnn_Tensor_t* tensor) { return false; };
52 |   virtual bool beforeReadFromBuffer(Qnn_Tensor_t* tensor) { return false; };
53 |   virtual bool afterReadFromBuffer(Qnn_Tensor_t* tensor) { return false; };
54 | };
55 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/Utils/RpcMem.hpp:
--------------------------------------------------------------------------------
  1 | //==============================================================================
  2 | //
  3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
  4 | //  All Rights Reserved.
  5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
  6 | //
  7 | //==============================================================================
  8 | 
  9 | #pragma once
 10 | 
 11 | #include <unordered_set>
 12 | 
 13 | #include "IBufferAlloc.hpp"
 14 | #include "Logger.hpp"
 15 | #include "QnnInterface.h"
 16 | 
 17 | typedef void* (*RpcMemAllocFn_t)(int, uint32_t, int);
 18 | typedef void (*RpcMemFreeFn_t)(void*);
 19 | typedef int (*RpcMemToFdFn_t)(void*);
 20 | 
 21 | struct RpcMemTensorData {
 22 |   int fd;
 23 |   void* memPointer;
 24 |   size_t size;
 25 |   size_t totalBufferSize;
 26 |   size_t offset;
 27 |   RpcMemTensorData() : fd(-1), memPointer(nullptr), size(0) {}
 28 |   RpcMemTensorData(int fdIn, void* memPointerIn, size_t sizeIn)
 29 |       : fd(fdIn), memPointer(memPointerIn), size(sizeIn) {}
 30 |   RpcMemTensorData(
 31 |       int fdIn, void* memPointerIn, size_t sizeIn, size_t totalBufferSizeIn, size_t offsetIn)
 32 |       : fd(fdIn),
 33 |         memPointer(memPointerIn),
 34 |         size(sizeIn),
 35 |         totalBufferSize(totalBufferSizeIn),
 36 |         offset(offsetIn) {}
 37 | };
 38 | 
 39 | class RpcMem final : public IBufferAlloc {
 40 |  public:
 41 |   RpcMem(Qnn_ContextHandle_t contextHandle, QNN_INTERFACE_VER_TYPE* qnnInterface);
 42 |   // Disable copy constructors, r-value referencing, etc
 43 |   RpcMem(const RpcMem&)            = delete;
 44 |   RpcMem& operator=(const RpcMem&) = delete;
 45 |   RpcMem(RpcMem&&)                 = delete;
 46 |   RpcMem& operator=(RpcMem&&)      = delete;
 47 |   bool initialize() override;
 48 |   void* getBuffer(Qnn_Tensor_t* tensor) override;
 49 |   int getFd(Qnn_Tensor_t* tensor) override;
 50 | 
 51 |   size_t getOffset(Qnn_Tensor_t* tensor) override;
 52 | 
 53 |   size_t getBufferSize(Qnn_Tensor_t* tensor) override;
 54 | 
 55 |   size_t getTotalBufferSize(Qnn_Tensor_t* tensor) override;
 56 | 
 57 |   bool allocateTensorBuffer(Qnn_Tensor_t* tensor, size_t tensorDataSize) override;
 58 | 
 59 |   bool freeTensorBuffer(Qnn_Tensor_t* tensor) override;
 60 |   bool useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src) override;
 61 |   bool useSameMemory(Qnn_Tensor_t* dest, Qnn_Tensor_t* src, int offset) override;
 62 | 
 63 |   bool useExternalMemory(Qnn_Tensor_t* dest, void* extMem) override;
 64 | 
 65 |   void* allocateTensorFusedBuffer(uint64_t bufferSize, int32_t* fd) override;
 66 |   bool allocateBuffers(const std::map<int, std::map<std::string, size_t>>& allocs_per_chunk,
 67 |                        std::map<std::string, std::pair<int, size_t>>& tensor_offsets) override;
 68 | 
 69 |   bool mapFusedBufferOffset(Qnn_Tensor_t* tensor,
 70 |                             size_t tensorDataSize,
 71 |                             int32_t fd,
 72 |                             uint32_t offset,
 73 |                             uint64_t totalBufferSize,
 74 |                             void* memPointer,
 75 |                             Qnn_ContextHandle_t contextHandle) override;
 76 |   bool deregisterTensorFusedBuffer(Qnn_Tensor_t* tensor) override;
 77 |   void freeFusedBuffers() override;
 78 |   bool mapFusedBufferOffset(Qnn_Tensor_t* tensor,
 79 |                             int alloc_idx,
 80 |                             size_t offset,
 81 |                             Qnn_ContextHandle_t ctx,
 82 |                             size_t size) override;
 83 |   virtual ~RpcMem();
 84 | 
 85 |  private:
 86 |   RpcMemTensorData* getRpcMemTensorData(Qnn_Tensor_t* tensor);
 87 | 
 88 |   // Pointer to the dlopen'd libcdsprpc.so shared library which contains
 89 |   // rpcmem_alloc, rpcmem_free, rpcmem_to_fd APIs
 90 |   void* m_libCdspRpc;
 91 |   // Function pointer to rpcmem_alloc
 92 |   RpcMemAllocFn_t m_rpcMemAlloc;
 93 |   // Function pointer to rpcmem_free
 94 |   RpcMemFreeFn_t m_rpcMemFree;
 95 |   // Function pointer to rpcmem_to_fd
 96 |   RpcMemToFdFn_t m_rpcMemToFd;
 97 |   QNN_INTERFACE_VER_TYPE* m_qnnInterface;
 98 |   Qnn_ContextHandle_t m_contextHandle;
 99 | 
100 |   std::unordered_map<Qnn_Tensor_t*, RpcMemTensorData> m_tensorToRpcMem;
101 |   std::unordered_set<Qnn_Tensor_t*> m_sameMemoryFreeTensors;
102 |   std::vector<std::pair<void*, size_t>> m_fusedBuffers;  // vector<<memPointer, bufferSize>>
103 |   std::vector<int32_t> m_fusedFds;
104 |   std::unordered_set<Qnn_MemHandle_t> m_orphanedMemHandles;
105 |   std::unordered_map<Qnn_MemHandle_t, RpcMemTensorData> m_memHandleToRpcMem;
106 |   std::map<std::tuple<int, size_t, Qnn_ContextHandle_t>, Qnn_Tensor_t*> memConfigList;
107 | };
108 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/Utils/Utils.hpp:
--------------------------------------------------------------------------------
 1 | //==============================================================================
 2 | //
 3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 4 | //  All rights reserved.
 5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | //
 7 | //==============================================================================
 8 | #pragma once
 9 | 
10 | #include <iostream>
11 | #include <map>
12 | #include <queue>
13 | #include <regex>
14 | #include <string>
15 | #include <tuple>
16 | #include <unordered_map>
17 | #include <vector>
18 | #include "Logger.hpp"
19 | 
20 | #include "Interfaces.hpp"
21 | 
22 | namespace qnn {
23 | namespace tools {
24 | namespace rwkv_app {
25 | 
26 | void split(std::vector<std::string> &splitString,
27 |            const std::string &tokenizedString,
28 |            const char separator);
29 | 
30 | bool copyMetadataToGraphsInfo(const QnnSystemContext_BinaryInfo_t *binaryInfo,
31 |                               GraphInfo_t **&graphsInfo,
32 |                               uint32_t &graphsCount);
33 | 
34 | bool copyGraphsInfo(const QnnSystemContext_GraphInfo_t *graphsInput,
35 |                     const uint32_t numGraphs,
36 |                     GraphInfo_t **&graphsInfo);
37 | 
38 | bool copyGraphsInfoV1(const QnnSystemContext_GraphInfoV1_t *graphInfoSrc,
39 |                       GraphInfo_t *graphInfoDst);
40 | 
41 | bool copyGraphsInfoV3(const QnnSystemContext_GraphInfoV3_t *graphInfoSrc,
42 |                       GraphInfo_t *graphInfoDst);
43 | 
44 | bool copyTensorsInfo(const Qnn_Tensor_t *tensorsInfoSrc,
45 |                      Qnn_Tensor_t *&tensorWrappers,
46 |                      uint32_t tensorsCount);
47 | 
48 | bool deepCopyQnnTensorInfo(Qnn_Tensor_t *dst, const Qnn_Tensor_t *src);
49 | 
50 | }  // namespace rwkv_app
51 | }  // namespace tools
52 | }  // namespace qnn


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/Utils/dlwrap.cpp:
--------------------------------------------------------------------------------
 1 | //==============================================================================
 2 | //
 3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 4 | //  All Rights Reserved.
 5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | //
 7 | //==============================================================================
 8 | 
 9 | #ifdef _WIN32
10 | 
11 | #pragma warning(disable : 4133 4996)
12 | 
13 | #include <inttypes.h>
14 | #include <stdio.h>
15 | #include <stdlib.h>
16 | #include <string.h>
17 | #include <wchar.h>
18 | #include <windows.h>
19 | 
20 | #include "dlwrap.hpp"
21 | 
22 | static const char* last_func;
23 | static long last_err;
24 | 
25 | void* dlopen(const char* dll, int flags) {
26 |   HINSTANCE h = LoadLibraryA(dll);
27 |   if (h == NULL) {
28 |     last_err  = GetLastError();
29 |     last_func = "dlopen";
30 |   }
31 | 
32 |   return h;
33 | }
34 | 
35 | int dlclose(void* h) {
36 |   if (!FreeLibrary((HINSTANCE)h)) {
37 |     last_err  = GetLastError();
38 |     last_func = "dlclose";
39 |     return -1;
40 |   }
41 | 
42 |   return 0;
43 | }
44 | 
45 | void* dlsym(void* h, const char* name) {
46 |   FARPROC p = GetProcAddress((HINSTANCE)h, name);
47 |   if (!p) {
48 |     last_err  = GetLastError();
49 |     last_func = "dlsym";
50 |   }
51 |   return (void*)(intptr_t)p;
52 | }
53 | 
54 | const char* dlerror(void) {
55 |   static char str[88];
56 | 
57 |   if (!last_err) return NULL;
58 | 
59 |   sprintf(str, "%s error #%ld", last_func, last_err);
60 |   last_err  = 0;
61 |   last_func = NULL;
62 | 
63 |   return str;
64 | }
65 | 
66 | #endif  // _WIN32
67 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/Utils/dlwrap.hpp:
--------------------------------------------------------------------------------
 1 | //==============================================================================
 2 | //
 3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 4 | //  All Rights Reserved.
 5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | //
 7 | //==============================================================================
 8 | 
 9 | #ifndef DLWRAP_HPP
10 | #define DLWRAP_HPP
11 | 
12 | #ifndef _WIN32
13 | 
14 | // Just include regular dlfcn
15 | #include <dlfcn.h>
16 | 
17 | #else  // _WIN32
18 | 
19 | // Define basic set dl functions and flags
20 | 
21 | #define RTLD_GLOBAL 0x100
22 | #define RTLD_LOCAL  0x000
23 | #define RTLD_LAZY   0x000
24 | #define RTLD_NOW    0x001
25 | 
26 | void* dlopen(const char* filename, int flag);
27 | int dlclose(void* handle);
28 | void* dlsym(void* handle, const char* name);
29 | const char* dlerror(void);
30 | 
31 | #endif  // _WIN32
32 | 
33 | #endif  // DLWRAP_HPP
34 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/WrapperUtils/QnnWrapperUtils.cpp:
--------------------------------------------------------------------------------
 1 | //==============================================================================
 2 | //
 3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 4 | //  All rights reserved.
 5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | //
 7 | //==============================================================================
 8 | 
 9 | #include <stdlib.h>
10 | 
11 | #include "QnnTypeMacros.hpp"
12 | #include "QnnWrapperUtils.hpp"
13 | 
14 | ModelError_t freeQnnTensor(Qnn_Tensor_t &tensor) {
15 |   // free all pointer allocations in struct
16 |   free((void *)QNN_TENSOR_GET_NAME(tensor));
17 |   free(QNN_TENSOR_GET_DIMENSIONS(tensor));
18 |   if (QNN_TENSOR_GET_IS_DYNAMIC_DIMENSIONS(tensor)) {
19 |     free(QNN_TENSOR_GET_IS_DYNAMIC_DIMENSIONS(tensor));
20 |   }
21 |   auto quant    = QNN_TENSOR_GET_QUANT_PARAMS(tensor);
22 |   auto encoding = quant.quantizationEncoding;
23 |   if (encoding == QNN_QUANTIZATION_ENCODING_AXIS_SCALE_OFFSET) {
24 |     if (quant.axisScaleOffsetEncoding.scaleOffset != nullptr) {
25 |       free(quant.axisScaleOffsetEncoding.scaleOffset);
26 |     }
27 |   }
28 |   return MODEL_NO_ERROR;
29 | }
30 | 
31 | ModelError_t freeQnnTensors(Qnn_Tensor_t *&tensors,
32 |                                                               uint32_t numTensors) {
33 |   // free all pointer allocations in struct
34 |   for (size_t i = 0; i < numTensors; i++) {
35 |     freeQnnTensor(tensors[i]);
36 |   }
37 |   free(tensors);
38 |   return MODEL_NO_ERROR;
39 | }
40 | 
41 | ModelError_t freeGraphsInfo(GraphInfoPtr_t **graphsInfo,
42 |                                                               uint32_t numGraphs) {
43 |   if (graphsInfo == nullptr || *graphsInfo == nullptr) {
44 |     return MODEL_TENSOR_ERROR;
45 |   }
46 |   for (uint32_t i = 0; i < numGraphs; i++) {
47 |     free((*graphsInfo)[i]->graphName);
48 |     freeQnnTensors((*graphsInfo)[i]->inputTensors, (*graphsInfo)[i]->numInputTensors);
49 |     freeQnnTensors((*graphsInfo)[i]->outputTensors, (*graphsInfo)[i]->numOutputTensors);
50 |   }
51 |   free(**graphsInfo);
52 |   free(*graphsInfo);
53 |   *graphsInfo = nullptr;
54 |   return MODEL_NO_ERROR;
55 | }
56 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/WrapperUtils/QnnWrapperUtils.hpp:
--------------------------------------------------------------------------------
 1 | //==============================================================================
 2 | //
 3 | //  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 4 | //  All rights reserved.
 5 | //  Confidential and Proprietary - Qualcomm Technologies, Inc.
 6 | //
 7 | //==============================================================================
 8 | 
 9 | #pragma once
10 | 
11 | #include "QnnContext.h"
12 | #include "QnnGraph.h"
13 | #include "QnnTensor.h"
14 | #include "QnnTypes.h"
15 | #include "QnnTypeDef.hpp"
16 | 
17 | /**
18 |  * @brief Frees all memory allocated tensor attributes.
19 |  *
20 |  * @param[in] tensor Qnn_Tensor_t object to free
21 |  *
22 |  * @return Error code
23 |  */
24 | ModelError_t freeQnnTensor(Qnn_Tensor_t &tensor);
25 | 
26 | /**
27 |  * @brief Loops through and frees all memory allocated tensor attributes for each tensor
28 |  * object.
29 |  *
30 |  * @param[in] tensors array of tensor objects to free
31 |  *
32 |  * @param[in] numTensors length of the above tensors array
33 |  *
34 |  * @return Error code
35 |  */
36 | ModelError_t freeQnnTensors(Qnn_Tensor_t *&tensors, uint32_t numTensors);
37 | 
38 | /**
39 |  * @brief A helper function to free memory malloced for communicating the Graph for a model(s)
40 |  *
41 |  * @param[in] graphsInfo Pointer pointing to location of graph objects
42 |  *
43 |  * @param[in] numGraphs The number of graph objects the above pointer is pointing to
44 |  *
45 |  * @return Error code
46 |  *
47 |  */
48 | ModelError_t freeGraphsInfo(GraphInfoPtr_t **graphsInfo, uint32_t numGraphs);
49 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/librwkv-qualcomm-app.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <memory>
  4 | #include <queue>
  5 | #include <vector>
  6 | 
  7 | #include "IOTensor.hpp"
  8 | #include "Interfaces.hpp"
  9 | #include "half.hpp"
 10 | 
 11 | namespace qnn {
 12 | namespace tools {
 13 | namespace rwkv_app {
 14 | 
 15 | enum class StatusCode {
 16 |   SUCCESS,
 17 |   FAILURE,
 18 |   FAILURE_INPUT_LIST_EXHAUSTED,
 19 |   FAILURE_SYSTEM_ERROR,
 20 |   FAILURE_SYSTEM_COMMUNICATION_ERROR,
 21 |   QNN_FEATURE_UNSUPPORTED
 22 | };
 23 | 
 24 | const int max_chunks = 8;
 25 | 
 26 | class QnnRwkvApp {
 27 |  public:
 28 |   QnnRwkvApp(QnnFunctionPointers qnnFunctionPointers,
 29 |                void *backendHandle,
 30 |                void *modelHandle,
 31 |                std::vector<std::vector<float>> embedding = {},
 32 |                std::string cachedBinaryPath            = "",
 33 |                std::string saveBinaryName              = "");
 34 | 
 35 |   StatusCode initialize();
 36 | 
 37 |   StatusCode initializeBackend();
 38 | 
 39 |   StatusCode createContext();
 40 | 
 41 |   StatusCode composeGraphs();
 42 | 
 43 |   StatusCode finalizeGraphs();
 44 | 
 45 |   StatusCode createPowerConfigId();
 46 | 
 47 |   StatusCode setPowerConfig();
 48 | 
 49 |   StatusCode destroyPowerConfigId();
 50 | 
 51 |   StatusCode setRpcLatencyAndPolling();
 52 | 
 53 |   StatusCode initializeTensors();
 54 | 
 55 |   StatusCode execute(int token);
 56 | 
 57 |   StatusCode executeSequence(std::vector<int> &tokens);
 58 | 
 59 |   StatusCode registerOpPackages();
 60 | 
 61 |   StatusCode createFromBinary(uint8_t *binary, size_t binarySize);
 62 | 
 63 |   StatusCode saveBinary();
 64 | 
 65 |   StatusCode freeContext();
 66 | 
 67 |   StatusCode terminateBackend();
 68 | 
 69 |   StatusCode freeGraphs();
 70 | 
 71 |   Qnn_ContextHandle_t getContext();
 72 | 
 73 |   std::string getBackendBuildId();
 74 | 
 75 |   StatusCode isDevicePropertySupported();
 76 | 
 77 |   StatusCode createDevice();
 78 | 
 79 |   size_t getQnnDatatypeSize(Qnn_DataType_t dataType);
 80 | 
 81 |   StatusCode freeDevice();
 82 | 
 83 |   StatusCode verifyFailReturnStatus(Qnn_ErrorHandle_t errCode);
 84 | 
 85 |   void fillQuantizedTensor(float value, Qnn_Tensor_t *tensor);
 86 | 
 87 |   virtual ~QnnRwkvApp();
 88 | 
 89 |   std::vector<half_float::half> m_lastOutput;
 90 | 
 91 |   uint32_t powerConfigId;
 92 |   uint32_t deviceId = 0;
 93 |   uint32_t coreId = 0;
 94 | 
 95 |   QnnFunctionPointers m_qnnFunctionPointers;
 96 |   std::string m_outputPath;
 97 |   std::string m_saveBinaryName;
 98 |   std::string m_cachedBinaryPath;
 99 |   std::vector<std::string> m_opPackagePaths;
100 |   uint8_t *m_binaryBuffer = nullptr;
101 |   uint64_t m_binarySize = 0;
102 |   QnnBackend_Config_t **m_backendConfig = nullptr;
103 |   Qnn_ContextHandle_t m_context[max_chunks] = {nullptr};
104 |   QnnContext_Config_t **m_contextConfig = nullptr;
105 |   GraphInfo_t **m_decodeGraphsInfo;
106 |   GraphInfo_t **m_prefillGraphsInfo;
107 |   uint32_t m_decodeGraphsCount;
108 |   uint32_t m_prefillGraphsCount;
109 |   void *m_backendLibraryHandle;
110 |   void *m_modelHandle;
111 |   IOTensor *m_ioTensor;
112 |   Qnn_Tensor_t *m_inputTensors[max_chunks] = {nullptr};
113 |   Qnn_Tensor_t *m_outputTensors[max_chunks] = {nullptr};
114 |   Qnn_Tensor_t *m_prefillInputTensors[max_chunks] = {nullptr};
115 |   Qnn_Tensor_t *m_prefillOutputTensors[max_chunks] = {nullptr};
116 |   std::vector<std::vector<float>> m_embedding = {};
117 |   bool m_tensorsInitialized = false;
118 |   bool m_isBackendInitialized;
119 |   bool m_isContextCreated;
120 | 
121 |   GraphConfigInfo_t **m_graphConfigsInfo = nullptr;
122 |   uint32_t m_graphConfigsInfoCount;
123 |   Qnn_LogHandle_t m_logHandle         = nullptr;
124 |   Qnn_BackendHandle_t m_backendHandle = nullptr;
125 |   Qnn_DeviceHandle_t m_deviceHandle   = nullptr;
126 | 
127 |   Qnn_Tensor_t *m_logitsOutputTensor = nullptr;
128 | 
129 |   std::vector<std::unordered_map<std::string, void*>> m_decodeGraphsTensorNameToTensorPointer;
130 |   std::vector<std::unordered_map<std::string, size_t>> m_decodeGraphsTensorNameToSize;
131 |   std::vector<std::unordered_map<std::string, void*>> m_prefillGraphsTensorNameToTensorPointer;
132 |   std::vector<std::unordered_map<std::string, size_t>> m_prefillGraphsTensorNameToSize;
133 | 
134 |   int m_prefillSequenceLength = 0;
135 | 
136 |   std::chrono::duration<double> m_lastInferenceTime;
137 | };
138 | }  // namespace rwkv_app
139 | }  // namespace tools
140 | }  // namespace qnn
141 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/librwkv-qualcomm.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <string>
 4 | #include <vector>
 5 | 
 6 | enum class StatusCode {
 7 |   SUCCESS,
 8 |   FAILURE,
 9 |   FAILURE_INPUT_LIST_EXHAUSTED,
10 |   FAILURE_SYSTEM_ERROR,
11 |   FAILURE_SYSTEM_COMMUNICATION_ERROR,
12 |   QNN_FEATURE_UNSUPPORTED
13 | };
14 | 
15 | typedef void* QnnRwkvBackend_t;
16 | 
17 | typedef void* QnnRwkvModel_t;
18 | 
19 | StatusCode QnnRwkvBackendCreate(QnnRwkvBackend_t *backend, QnnRwkvModel_t *modelHandle, std::string modelPath, std::string backendPath);
20 | 
21 | StatusCode QnnRwkvBackendCreateWithContext(QnnRwkvBackend_t *backend, QnnRwkvModel_t *modelHandle, std::string contextPath, std::string backendPath, std::string systemlibPath);
22 | 
23 | StatusCode QnnRwkvGetVocabSize(QnnRwkvBackend_t backend, std::vector<size_t>& shape);
24 | 
25 | StatusCode QnnRwkvCopyLogitsOutput(QnnRwkvBackend_t backend, float* outputBuffer, size_t outputSize);
26 | 
27 | StatusCode QnnRwkvExecute(QnnRwkvBackend_t backend, int token);
28 | 
29 | StatusCode QnnRwkvExecuteSequence(QnnRwkvBackend_t backend, std::vector<int> tokens);
30 | 
31 | double QnnRwkvGetLastInferenceTime(QnnRwkvBackend_t backend);
32 | 
33 | StatusCode QnnRwkvResetStates(QnnRwkvBackend_t backend);
34 | 
35 | StatusCode QnnRwkvSaveContext(QnnRwkvBackend_t backend, std::string contextPath);
36 | 
37 | StatusCode QnnRwkvSetStates(QnnRwkvBackend_t backend, std::vector<std::vector<std::vector<float>>> states);
38 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/soc_detect.cpp:
--------------------------------------------------------------------------------
 1 | #include "soc_detect.h"
 2 | #include <fstream>
 3 | 
 4 | const char * platform_name[] = {
 5 |     "Snapdragon",
 6 |     "Unknown",
 7 | };
 8 | 
 9 | snapdragon_soc_id snapdragon_soc_ids[] = {
10 |     {475, "SM7325", "778", "V68"},
11 |     {439, "SM8350", "888", "V68"},
12 |     {457, "SM8450", "8 Gen 1", "V69"},
13 |     {480, "SM8450_2", "8 Gen 1", "V69"},
14 |     {482, "SM8450_3", "8 Gen 1", "V69"},
15 |     {497, "QCM6490", "QCM6490", "V68"},
16 |     {498, "QCS6490", "QCS6490", "V68"},
17 |     {530, "SM8475", "8+ Gen 1", "V69"},
18 |     {531, "SM8475P", "8+ Gen 1", "V69"},
19 |     {540, "SM8475_2", "8+ Gen 1", "V69"},
20 |     {519, "SM8550", "8 Gen 2", "V73"},
21 |     {557, "SM8650", "8 Gen 3", "V75"},
22 |     {603, "QCS8550", "QCS8550", "V73"},
23 |     {604, "QCM8550", "QCM8550", "V73"},
24 |     {614, "SM8635", "8s Gen 3", "V73"},
25 |     {642, "SM8635", "8s Gen 3", "V73"},
26 |     {618, "SM8750", "8 Elite", "V79"}
27 |     // TODO: add more
28 | };
29 | 
30 | soc_detect::soc_detect() {
31 | }
32 | 
33 | soc_detect::~soc_detect() {
34 | }
35 | 
36 | int soc_detect::detect_platform() {
37 | #ifndef _WIN32
38 |     std::ifstream file("/sys/devices/soc0/family");
39 |     std::string tmp;
40 |     if (file.is_open()) {
41 |         file >> tmp;
42 |         file.close();
43 |     } else {
44 |         return -1;
45 |     }
46 | 
47 |     if (tmp == "Snapdragon") {
48 |         m_platform_type = PLATFORM_SNAPDRAGON;
49 |     } else {
50 |         m_platform_type = PLATFORM_UNKNOWN;
51 |     }
52 | 
53 |     if (m_platform_type == PLATFORM_SNAPDRAGON) {
54 |         std::ifstream file_soc_id("/sys/devices/soc0/soc_id");
55 |         if (file_soc_id.is_open()) {
56 |             file_soc_id >> m_soc_id;
57 |             file_soc_id.close();
58 |         }
59 | 
60 |         for (int i = 0; i < sizeof(snapdragon_soc_ids) / sizeof(snapdragon_soc_ids[0]); i++) {
61 |             if (snapdragon_soc_ids[i].soc_id == m_soc_id) {
62 |                 m_soc_name = snapdragon_soc_ids[i].soc_name;
63 |                 m_soc_partname = snapdragon_soc_ids[i].soc_partname;
64 |                 m_htp_arch = snapdragon_soc_ids[i].htp_arch;
65 |                 break;
66 |             }
67 |         }
68 |     }
69 | #endif
70 |     return 0;
71 | }
72 | 
73 | platform_type soc_detect::get_platform_type() {
74 |     return m_platform_type;
75 | }
76 | 
77 | const char * soc_detect::get_platform_name() {
78 |     return platform_name[m_platform_type];
79 | }
80 | 
81 | const char * soc_detect::get_soc_name() {
82 |     return m_soc_name;
83 | }
84 | 
85 | const char * soc_detect::get_soc_partname() {
86 |     return m_soc_partname;
87 | }
88 | 
89 | const char * soc_detect::get_htp_arch() {
90 |     return m_htp_arch;
91 | }
92 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/soc_detect.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | enum platform_type {
 4 |     PLATFORM_SNAPDRAGON, // lets add snapdragon support first
 5 |     PLATFORM_UNKNOWN,
 6 | };
 7 | 
 8 | struct snapdragon_soc_id {
 9 |     int soc_id;
10 |     const char * soc_partname;
11 |     const char * soc_name;
12 |     const char * htp_arch;
13 | };
14 | 
15 | class soc_detect {
16 |     public:
17 |         soc_detect();
18 |         ~soc_detect();
19 | 
20 |         int detect_platform();
21 | 
22 |         platform_type get_platform_type();
23 |         const char * get_platform_name();
24 |         const char * get_soc_name();
25 |         const char * get_soc_partname();
26 |         const char * get_htp_arch();
27 |     private:
28 |         platform_type m_platform_type = PLATFORM_UNKNOWN;
29 |         int m_soc_id = 0;
30 |         const char * m_soc_name = "Unknown";
31 |         const char * m_soc_partname = "Unknown";
32 |         const char * m_htp_arch = "Unknown";
33 | };
34 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/tokenizer.cpp:
--------------------------------------------------------------------------------
 1 | #include "tokenizer.h"
 2 | #include "trie.hpp"
 3 | 
 4 | int trie_tokenizer::load(const std::string vocab_file) {
 5 |     _tokenizer = new TRIE_TOKENIZER(vocab_file);
 6 |     if (!_tokenizer->inited())
 7 |         return 1;
 8 |     return 0;
 9 | }
10 | 
11 | bool trie_tokenizer::inited() const {
12 |     return _tokenizer->inited();
13 | }
14 | 
15 | std::vector<int> trie_tokenizer::Encode(std::string_view str) const {
16 |     auto ids = _tokenizer->encode(std::string(str));
17 |     return ids;
18 | }
19 | 
20 | std::string trie_tokenizer::Decode(int id) const {
21 |     return _tokenizer->decode(std::vector<int>{id});
22 | }
23 | 
24 | std::string trie_tokenizer::Decode(const std::vector<int> &ids) const {
25 |     return _tokenizer->decode(ids);
26 | }
27 | 
28 | std::vector<int> abc_tokenizer::Encode(std::string_view str) const {
29 |   std::vector<int> ids;
30 |   for (int i = 0; i < str.size(); ++i) {
31 |     ids.push_back(str[i]);
32 |   }
33 |   return ids;
34 | }
35 | 
36 | std::string abc_tokenizer::Decode(int id) const {
37 |   if (id <= eos_token_id) {
38 |     return "";
39 |   } else {
40 |     return std::string(1, static_cast<char>(id));
41 |   }
42 | }
43 | 
44 | std::string abc_tokenizer::Decode(const std::vector<int> &ids) const {
45 |   std::string str;
46 |   for (auto id : ids) {
47 |     str += Decode(id);
48 |   }
49 |   return str;
50 | }
51 | 


--------------------------------------------------------------------------------
/librwkv-qualcomm/src/tokenizer.h:
--------------------------------------------------------------------------------
 1 | #ifndef TOKENIZER_H
 2 | #define TOKENIZER_H
 3 | 
 4 | #include <string>
 5 | #include <vector>
 6 | 
 7 | class TRIE_TOKENIZER;
 8 | 
 9 | class tokenizer_base {
10 | public:
11 |   tokenizer_base(int pad_token_id, int bos_token_id, int eos_token_id)
12 |       : pad_token_id(pad_token_id), bos_token_id(bos_token_id),
13 |         eos_token_id(eos_token_id) {}
14 |   virtual ~tokenizer_base() = default;
15 |   virtual int load(const std::string vocab_file) = 0;
16 |   virtual std::vector<int> Encode(std::string_view str) const = 0;
17 |   virtual std::string Decode(const std::vector<int> &ids) const = 0;
18 |   virtual std::string Decode(int id) const = 0;
19 |   const int pad_token_id;
20 |   const int bos_token_id;
21 |   const int eos_token_id;
22 | };
23 | 
24 | class trie_tokenizer : public tokenizer_base {
25 | public:
26 |     trie_tokenizer() : tokenizer_base(0, 0, 0) {};
27 |     int load(const std::string vocab_file);
28 |     std::vector<int> Encode(std::string_view str) const;
29 |     std::string Decode(const std::vector<int> &ids) const;
30 |     std::string Decode(int id) const;
31 |     bool inited() const;
32 | private:
33 |     TRIE_TOKENIZER * _tokenizer;
34 | };
35 | 
36 | class abc_tokenizer : public tokenizer_base {
37 | public:
38 |     abc_tokenizer() : tokenizer_base(0, 2, 3) {};
39 |     int load(const std::string) {
40 |         return 0;
41 |     };
42 |     std::vector<int> Encode(std::string_view str) const;
43 |     std::string Decode(const std::vector<int> &ids) const;
44 |     std::string Decode(int id) const;
45 | };
46 | 
47 | #endif


--------------------------------------------------------------------------------
/quant_encodings/README.md:
--------------------------------------------------------------------------------
1 | Refer to [GDrive](https://drive.google.com/drive/folders/1IXp6FwdiZjV4fn8HXRUoGHM91WzvEwqj?usp=drive_link)


--------------------------------------------------------------------------------
/quantizers/configs/default_per_channel_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "defaults":
 3 |   {
 4 |     "ops":
 5 |     {
 6 |       "is_output_quantized": "True"
 7 |     },
 8 |     "params":
 9 |     {
10 |       "is_quantized": "True",
11 |       "is_symmetric": "True"
12 |     },
13 |     "strict_symmetric": "False",
14 | 	"per_channel_quantization": "False"
15 |   },
16 | 
17 |   "params":
18 |   {
19 |     "bias":
20 |     {
21 |       "is_quantized": "False"
22 |     }
23 |   },
24 | 
25 |   "op_type":
26 |   {
27 |     "Squeeze":
28 |     {
29 |       "is_output_quantized": "False"
30 |     },
31 |     "Pad":
32 |     {
33 |       "is_output_quantized": "False"
34 |     },
35 |     "Mean":
36 |     {
37 |       "is_output_quantized": "False"
38 |     },
39 |     "Conv":
40 |     {
41 |       "per_channel_quantization": "True"
42 |     },
43 |     "Gemm":
44 |     {
45 |       "per_channel_quantization": "True"
46 |     },
47 |     "Linear":
48 |     {
49 |       "per_channel_quantization": "True"
50 |     },
51 |     "MatMul":
52 |     {
53 |       "per_channel_quantization": "True"
54 |     }
55 |   },
56 | 
57 |   "supergroups":
58 |   [
59 |     {
60 |       "op_list": ["Conv", "Relu"]
61 |     },
62 |     {
63 |       "op_list": ["Conv", "Clip"]
64 |     },
65 |     {
66 |       "op_list": ["Add", "Relu"]
67 |     },
68 |     {
69 |       "op_list": ["Gemm", "Relu"]
70 |     }
71 |   ],
72 | 
73 |   "model_input":
74 |   {
75 |     "is_input_quantized": "True"
76 |   },
77 | 
78 |   "model_output":
79 |   {}
80 | }
81 | 


--------------------------------------------------------------------------------
/quantizers/configs/htp_quantsim_config_v75.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "defaults":
  3 |   {
  4 |     "hw_version": "V75",
  5 |     "ops":
  6 |     {
  7 |       "is_output_quantized": "True"
  8 |     },
  9 |     "params":
 10 |     {
 11 |       "is_quantized": "True",
 12 |       "is_symmetric": "True"
 13 |     },
 14 |     "per_channel_quantization": "True",
 15 |     "strict_symmetric": "False",
 16 |     "unsigned_symmetric": "False"
 17 |   },
 18 | 
 19 |   "params":
 20 |   {
 21 |     "bias":
 22 |     {
 23 |       "is_quantized": "False"
 24 |     }
 25 |   },
 26 | 
 27 |   "op_type":
 28 |   {
 29 |     "Cast":
 30 |     {
 31 |       "is_output_quantized": "False"
 32 |     },
 33 |     "BatchPermutation":
 34 |     {
 35 |       "is_output_quantized": "False"
 36 |     },
 37 |     "ChannelShuffle":
 38 |     {
 39 |       "is_output_quantized": "False"
 40 |     },
 41 |     "CropAndResize":
 42 |     {
 43 |       "is_output_quantized": "False"
 44 |     },
 45 |     "DepthToSpace":
 46 |     {
 47 |       "is_output_quantized": "False"
 48 |     },
 49 |     "Dropout":
 50 |     {
 51 |       "is_output_quantized": "False"
 52 |     },
 53 |     "Expand":
 54 |     {
 55 |       "is_output_quantized": "False"
 56 |     },
 57 |     "Reshape":
 58 |     {
 59 |       "is_output_quantized": "False"
 60 |     },
 61 |     "Upsample":
 62 |     {
 63 |       "is_output_quantized": "False"
 64 |     },
 65 |     "SpaceToDepth":
 66 |     {
 67 |       "is_output_quantized": "False"
 68 |     },
 69 |     "BatchToSpace":
 70 |     {
 71 |       "is_output_quantized": "False"
 72 |     },
 73 |     "SpaceToBatch":
 74 |     {
 75 |       "is_output_quantized": "False"
 76 |     },
 77 |     "NonMaxSuppression":
 78 |     {
 79 |       "is_output_quantized": "False"
 80 |     },
 81 |     "Gather":
 82 |     {
 83 |       "is_output_quantized": "False",
 84 |       "per_channel_quantization": "False"
 85 |     },
 86 |     "GatherND":
 87 |     {
 88 |       "is_output_quantized": "False"
 89 |     },
 90 |     "Gemm":
 91 |     {
 92 |       "per_channel_quantization": "False"
 93 |     },
 94 |     "GroupNorm":
 95 |     {
 96 |       "per_channel_quantization": "False",
 97 |       "params": {
 98 |         "bias":
 99 |         {
100 |           "is_quantized": "True"
101 |         }
102 |       }
103 |     },
104 |     "LayerNorm":
105 |     {
106 |       "per_channel_quantization": "False",
107 |       "params": {
108 |         "weight": {
109 |           "is_symmetric": "False"
110 |         }
111 |       }
112 |     },
113 |     "BatchNormalization":
114 |     {
115 |       "per_channel_quantization": "False"
116 |     },
117 |     "InstanceNormalization":
118 |     {
119 |       "per_channel_quantization": "False"
120 |     },
121 |     "MaxPool":
122 |     {
123 |       "is_output_quantized": "False"
124 |     },
125 |     "MaxRoiPool":
126 |     {
127 |       "is_output_quantized": "False"
128 |     },
129 |     "Mean":
130 |     {
131 |       "is_output_quantized": "False"
132 |     },
133 |     "NonZero":
134 |     {
135 |       "is_output_quantized": "False"
136 |     },
137 |     "Pad":
138 |     {
139 |       "is_output_quantized": "False"
140 |     },
141 |     "ReduceMax":
142 |     {
143 |       "is_output_quantized": "False"
144 |     },
145 |     "ReduceMin":
146 |     {
147 |       "is_output_quantized": "False"
148 |     },
149 |     "ScatterElements":
150 |     {
151 |       "is_output_quantized": "False"
152 |     },
153 |     "Sigmoid":
154 |     {
155 |       "encoding_constraints":
156 |       {
157 |         "min": 0.0,
158 |         "max": 1.0
159 |       }
160 |     },
161 |     "Softmax":
162 |     {
163 |       "encoding_constraints":
164 |       {
165 |         "min": 0.0,
166 |         "max": 1.0
167 |       }
168 |     },
169 |     "Slice":
170 |     {
171 |       "is_output_quantized": "False"
172 |     },
173 |     "Split":
174 |     {
175 |       "is_output_quantized": "False"
176 |     },
177 |     "Squeeze":
178 |     {
179 |       "is_output_quantized": "False"
180 |     },
181 |     "Tile":
182 |     {
183 |       "is_output_quantized": "False"
184 |     },
185 |     "TopK":
186 |     {
187 |       "is_output_quantized": "False"
188 |     },
189 |     "Transpose":
190 |     {
191 |       "is_output_quantized": "False"
192 |     }
193 |   },
194 | 
195 |   "supergroups":
196 |   [
197 |     {
198 |       "op_list": ["Add", "Relu"]
199 |     },
200 |     {
201 |       "op_list": ["Conv", "BatchNormalization","HardSwish"]
202 |     },
203 |     {
204 |       "op_list": ["Conv", "BatchNormalization","PRelu"]
205 |     },
206 |     {
207 |       "op_list": ["Conv", "BatchNormalization", "Relu"]
208 |     },
209 | 	{
210 |       "op_list": ["Conv", "Clip"]
211 | 	},
212 |     {
213 |       "op_list": ["Conv", "HardSwish"]
214 |     },
215 |     {
216 |       "op_list": ["Conv", "PRelu"]
217 |     },
218 |     {
219 |       "op_list": ["Conv", "Relu"]
220 |     },
221 |     {
222 |       "op_list": ["ConvTranspose", "Relu"]
223 |     },
224 |     {
225 |       "op_list": ["Gemm", "Relu"]
226 |     }
227 |   ],
228 | 
229 |   "model_input":
230 |   {
231 |     "is_input_quantized": "True"
232 |   },
233 | 
234 |   "model_output":
235 |   {}
236 | }
237 | 


--------------------------------------------------------------------------------
/quantizers/configs/htp_quantsim_config_v75_per_channel.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "defaults":
  3 |   {
  4 |     "hw_version": "V75",
  5 |     "ops":
  6 |     {
  7 |       "is_output_quantized": "True"
  8 |     },
  9 |     "params":
 10 |     {
 11 |       "is_quantized": "True",
 12 |       "is_symmetric": "True"
 13 |     },
 14 |     "per_channel_quantization": "True",
 15 |     "strict_symmetric": "False",
 16 |     "unsigned_symmetric": "False"
 17 |   },
 18 | 
 19 |   "params":
 20 |   {
 21 |     "bias":
 22 |     {
 23 |       "is_quantized": "False"
 24 |     }
 25 |   },
 26 | 
 27 |   "op_type":
 28 |   {
 29 |     "Cast":
 30 |     {
 31 |       "is_output_quantized": "False"
 32 |     },
 33 |     "BatchPermutation":
 34 |     {
 35 |       "is_output_quantized": "False"
 36 |     },
 37 |     "ChannelShuffle":
 38 |     {
 39 |       "is_output_quantized": "False"
 40 |     },
 41 |     "CropAndResize":
 42 |     {
 43 |       "is_output_quantized": "False"
 44 |     },
 45 |     "DepthToSpace":
 46 |     {
 47 |       "is_output_quantized": "False"
 48 |     },
 49 |     "Dropout":
 50 |     {
 51 |       "is_output_quantized": "False"
 52 |     },
 53 |     "Expand":
 54 |     {
 55 |       "is_output_quantized": "False"
 56 |     },
 57 |     "Reshape":
 58 |     {
 59 |       "is_output_quantized": "False"
 60 |     },
 61 |     "Upsample":
 62 |     {
 63 |       "is_output_quantized": "False"
 64 |     },
 65 |     "SpaceToDepth":
 66 |     {
 67 |       "is_output_quantized": "False"
 68 |     },
 69 |     "BatchToSpace":
 70 |     {
 71 |       "is_output_quantized": "False"
 72 |     },
 73 |     "SpaceToBatch":
 74 |     {
 75 |       "is_output_quantized": "False"
 76 |     },
 77 |     "NonMaxSuppression":
 78 |     {
 79 |       "is_output_quantized": "False"
 80 |     },
 81 |     "Gather":
 82 |     {
 83 |       "is_output_quantized": "False",
 84 |       "per_channel_quantization": "False"
 85 |     },
 86 |     "GatherND":
 87 |     {
 88 |       "is_output_quantized": "False"
 89 |     },
 90 |     "Conv":
 91 |     {
 92 |       "per_channel_quantization": "True"
 93 |     },
 94 |     "Gemm":
 95 |     {
 96 |       "per_channel_quantization": "True"
 97 |     },
 98 |     "Linear":
 99 |     {
100 |       "per_channel_quantization": "True"
101 |     },
102 |     "GroupNorm":
103 |     {
104 |       "per_channel_quantization": "False",
105 |       "params": {
106 |         "bias":
107 |         {
108 |           "is_quantized": "True"
109 |         }
110 |       }
111 |     },
112 |     "LayerNorm":
113 |     {
114 |       "per_channel_quantization": "False",
115 |       "params": {
116 |         "weight": {
117 |           "is_symmetric": "False"
118 |         }
119 |       }
120 |     },
121 |     "BatchNormalization":
122 |     {
123 |       "per_channel_quantization": "False"
124 |     },
125 |     "InstanceNormalization":
126 |     {
127 |       "per_channel_quantization": "False"
128 |     },
129 |     "MaxPool":
130 |     {
131 |       "is_output_quantized": "False"
132 |     },
133 |     "MaxRoiPool":
134 |     {
135 |       "is_output_quantized": "False"
136 |     },
137 |     "Mean":
138 |     {
139 |       "is_output_quantized": "False"
140 |     },
141 |     "NonZero":
142 |     {
143 |       "is_output_quantized": "False"
144 |     },
145 |     "Pad":
146 |     {
147 |       "is_output_quantized": "False"
148 |     },
149 |     "ReduceMax":
150 |     {
151 |       "is_output_quantized": "False"
152 |     },
153 |     "ReduceMin":
154 |     {
155 |       "is_output_quantized": "False"
156 |     },
157 |     "ScatterElements":
158 |     {
159 |       "is_output_quantized": "False"
160 |     },
161 |     "Sigmoid":
162 |     {
163 |       "encoding_constraints":
164 |       {
165 |         "min": 0.0,
166 |         "max": 1.0
167 |       }
168 |     },
169 |     "Softmax":
170 |     {
171 |       "encoding_constraints":
172 |       {
173 |         "min": 0.0,
174 |         "max": 1.0
175 |       }
176 |     },
177 |     "Slice":
178 |     {
179 |       "is_output_quantized": "False"
180 |     },
181 |     "Split":
182 |     {
183 |       "is_output_quantized": "False"
184 |     },
185 |     "Squeeze":
186 |     {
187 |       "is_output_quantized": "False"
188 |     },
189 |     "Tile":
190 |     {
191 |       "is_output_quantized": "False"
192 |     },
193 |     "TopK":
194 |     {
195 |       "is_output_quantized": "False"
196 |     },
197 |     "Transpose":
198 |     {
199 |       "is_output_quantized": "False"
200 |     }
201 |   },
202 | 
203 |   "supergroups":
204 |   [
205 |     {
206 |       "op_list": ["Add", "Relu"]
207 |     },
208 |     {
209 |       "op_list": ["Conv", "BatchNormalization","HardSwish"]
210 |     },
211 |     {
212 |       "op_list": ["Conv", "BatchNormalization","PRelu"]
213 |     },
214 |     {
215 |       "op_list": ["Conv", "BatchNormalization", "Relu"]
216 |     },
217 | 	{
218 |       "op_list": ["Conv", "Clip"]
219 | 	},
220 |     {
221 |       "op_list": ["Conv", "HardSwish"]
222 |     },
223 |     {
224 |       "op_list": ["Conv", "PRelu"]
225 |     },
226 |     {
227 |       "op_list": ["Conv", "Relu"]
228 |     },
229 |     {
230 |       "op_list": ["ConvTranspose", "Relu"]
231 |     },
232 |     {
233 |       "op_list": ["Gemm", "Relu"]
234 |     }
235 |   ],
236 | 
237 |   "model_input":
238 |   {
239 |     "is_input_quantized": "True"
240 |   },
241 | 
242 |   "model_output":
243 |   {}
244 | }
245 | 


--------------------------------------------------------------------------------
/quantizers/configs/qsim_config_per_channel_with_exceptions.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "defaults":
  3 |   {
  4 |     "hw_version": "V73",
  5 |     "ops":
  6 |     {
  7 |       "is_output_quantized": "True"
  8 |     },
  9 |     "params":
 10 |     {
 11 |       "is_quantized": "True",
 12 |       "is_symmetric": "True"
 13 |     },
 14 |     "per_channel_quantization": "False",
 15 |     "strict_symmetric": "False",
 16 |     "unsigned_symmetric": "False"
 17 |   },
 18 | 
 19 |   "params":
 20 |   {
 21 |     "bias":
 22 |     {
 23 |       "is_quantized": "False"
 24 |     }
 25 |   },
 26 | 
 27 |   "op_type":
 28 |   {
 29 |     "Squeeze":
 30 |     {
 31 |       "is_output_quantized": "False"
 32 |     },
 33 |     "Pad":
 34 |     {
 35 |       "is_output_quantized": "False"
 36 |     },
 37 |     "Reshape":
 38 |     {
 39 |       "is_output_quantized": "False"
 40 |     },
 41 |     "ChannelShuffle":
 42 |     {
 43 |       "is_output_quantized": "False"
 44 |     },
 45 |     "Tile":
 46 |     {
 47 |       "is_output_quantized": "False"
 48 |     },
 49 |     "TopK":
 50 |     {
 51 |       "is_output_quantized": "False"
 52 |     },
 53 |     "GatherND":
 54 |     {
 55 |       "is_output_quantized": "False"
 56 |     },
 57 |     "ReduceMin":
 58 |     {
 59 |       "is_output_quantized": "False"
 60 |     },
 61 |     "ReduceMax":
 62 |     {
 63 |       "is_output_quantized": "False"
 64 |     },
 65 |     "Slice":
 66 |     {
 67 |       "is_output_quantized": "False"
 68 |     },
 69 |     "NonZero":
 70 |     {
 71 |       "is_output_quantized": "False"
 72 |     },
 73 |     "DepthToSpace":
 74 |     {
 75 |       "is_output_quantized": "False"
 76 |     },
 77 |     "MaxPool":
 78 |     {
 79 |       "is_output_quantized": "False"
 80 |     },
 81 |     "Split":
 82 |     {
 83 |       "is_output_quantized": "False"
 84 |     },
 85 |     "Mean":
 86 |     {
 87 |       "is_output_quantized": "False"
 88 |     },
 89 |     "Conv":
 90 |     {
 91 |       "per_channel_quantization": "True"
 92 |     },
 93 |     "Gemm":
 94 |     {
 95 |       "per_channel_quantization": "True"
 96 |     },
 97 |     "Cast":
 98 |     {
 99 |       "is_output_quantized": "False"
100 |     },
101 |     "LayerNorm":
102 |     {
103 |       "is_output_quantized": "False"
104 |     },
105 |     "Gather":
106 |     {
107 |       "is_output_quantized": "False"
108 |     },
109 |     "Sigmoid":
110 |     {
111 |       "encoding_constraints":
112 |       {
113 |         "min": 0.0,
114 |         "max": 1.0
115 |       }
116 |     },
117 |     "Softmax":
118 |     {
119 |       "encoding_constraints":
120 |       {
121 |         "min": 0.0,
122 |         "max": 1.0
123 |       }
124 |     },
125 |     "Linear":
126 |     {
127 |       "per_channel_quantization": "True"
128 |     },
129 |     "MatMul":
130 |     {
131 |       "per_channel_quantization": "True"
132 |     }
133 |   },
134 | 
135 |   "supergroups":
136 |   [
137 |     {
138 |       "op_list": ["Conv", "Relu"]
139 |     },
140 | 	{
141 |       "op_list": ["Conv", "Clip"]
142 | 	},
143 |     {
144 |       "op_list": ["Conv", "BatchNormalization", "Relu"]
145 |     },
146 |     {
147 |       "op_list": ["ConvTranspose", "Relu"]
148 |     },
149 |     {
150 |       "op_list": ["Add", "Relu"]
151 |     },
152 |     {
153 |       "op_list": ["Gemm", "Relu"]
154 |     },
155 |     {
156 |       "op_list": ["Conv", "PRelu"]
157 |     },
158 |     {
159 |       "op_list": ["Conv", "BatchNormalization","PRelu"]
160 |     },
161 |     {
162 |       "op_list": ["Conv", "HardSwish"]
163 |     },
164 |     {
165 |       "op_list": ["Conv", "BatchNormalization","HardSwish"]
166 |     }
167 |   ],
168 | 
169 |   "model_input":
170 |   {
171 |     "is_input_quantized": "True"
172 |   },
173 | 
174 |   "model_output":
175 |   {}
176 | }


--------------------------------------------------------------------------------
/quantizers/configs/rwkv_gptq_exceptions.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "module_list":
 3 |   [
 4 |     {
 5 |       "module_name": "Embedding",
 6 |       "exception_stage": "pre-calibration",
 7 |       "exceptions": {
 8 |         "param_exceptions": {
 9 |           "asymmetric": "True",
10 |           "bitwidth": "32"
11 |         },
12 |         "input_exceptions": "None",
13 |         "output_exceptions": "None"
14 |       }
15 |     },
16 |     {
17 |       "module_name": "LayerNorm",
18 |       "exception_stage": "pre-calibration",
19 |       "exceptions": {
20 |         "param_exceptions": {
21 |           "asymmetric": "True",
22 |           "bitwidth": "32"
23 |         },
24 |         "input_exceptions": "None",
25 |         "output_exceptions": "None"
26 |       }
27 |     }
28 |   ],
29 |   "name_list":
30 |   [
31 |     {
32 |       "module_name": "head",
33 |       "exception_stage": "pre-calibration",
34 |       "exceptions": {
35 |         "param_exceptions": {
36 |           "bitwidth": "32"
37 |         },
38 |         "input_exceptions": "None",
39 |         "output_exceptions": "None"
40 |       }
41 |     },
42 |     {
43 |       "module_name": "matmul_time_maa_w1",
44 |       "exception_stage": "pre-calibration",
45 |       "exceptions": {
46 |         "param_exceptions": {
47 |           "bitwidth": "8"
48 |         },
49 |         "input_exceptions": "None",
50 |         "output_exceptions": "None"
51 |       }
52 |     }
53 |   ]
54 | }
55 | 


--------------------------------------------------------------------------------
/rwkv_src/elemwise_ops.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from typing import Any
 3 | 
 4 | class Add(torch.nn.Module):
 5 |     # pylint:disable=arguments-differ
 6 |     @staticmethod
 7 |     def forward(x: Any, y: Any) -> Any:
 8 |         if isinstance(x, torch.Tensor) or isinstance(y, torch.Tensor):
 9 |             out = torch.add(x, y)
10 |         else:
11 |             out = x + y
12 |         return out
13 | 
14 | class Subtract(torch.nn.Module):
15 |     # pylint:disable=arguments-differ
16 |     @staticmethod
17 |     def forward(x: Any, y: Any) -> Any:
18 |         if isinstance(x, torch.Tensor) or isinstance(y, torch.Tensor):
19 |             out = torch.sub(x, y)
20 |         else:
21 |             out = x - y
22 |         return out
23 |     
24 | class Neg(torch.nn.Module):
25 |     # pylint:disable=arguments-differ
26 |     @staticmethod
27 |     def forward(x: Any) -> Any:
28 |         out = torch.neg(x)
29 |         return out
30 | 
31 | class Multiply(torch.nn.Module):
32 |     # pylint:disable=arguments-differ
33 |     @staticmethod
34 |     def forward(x: Any, y: Any) -> Any:
35 |         if isinstance(x, torch.Tensor) or isinstance(y, torch.Tensor):
36 |             out = torch.mul(x, y)
37 |         else:
38 |             out = x * y
39 |         return out
40 | 
41 | class Tanh(torch.nn.Module):
42 |     # pylint:disable=arguments-differ
43 |     @staticmethod
44 |     def forward(x: torch.Tensor) -> torch.Tensor:
45 |         out = torch.tanh(x)
46 |         return out
47 | 
48 | class SiLU(torch.nn.Module):
49 |     def __init__(self):
50 |         super().__init__()
51 |         self.sigmoid = torch.nn.Sigmoid()
52 |         self.mul = Multiply()
53 | 
54 |     def forward(self, x: torch.Tensor) -> Any:
55 |         return self.mul(x, self.sigmoid(x))
56 |     
57 | class Exponential(torch.nn.Module):
58 |     def __init__(self):
59 |         super().__init__()
60 | 
61 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
62 |         return torch.exp(x)
63 | 
64 | class MatMul(torch.nn.Module):
65 |     def __init__(self):
66 |         super().__init__()
67 | 
68 |     def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
69 |         return torch.matmul(x, y)
70 | 
71 | class Bmm(torch.nn.Module):
72 |     def __init__(self):
73 |         super().__init__()
74 | 
75 |     def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
76 |         return torch.bmm(x, y)
77 | 
78 | class Split(torch.nn.Module):
79 |     def __init__(self):
80 |         super().__init__()
81 | 
82 |     def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
83 |         return torch.split(x, *args, **kwargs)
84 |     
85 | class ReLU(torch.nn.Module):
86 |     def __init__(self):
87 |         super().__init__()
88 | 
89 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
90 |         return torch.relu(x)
91 |     
92 | class Pow(torch.nn.Module):
93 |     def __init__(self):
94 |         super().__init__()
95 | 
96 |     def forward(self, x: torch.Tensor, y: int) -> torch.Tensor:
97 |         return torch.pow(x, y)


--------------------------------------------------------------------------------
/rwkv_src/rwkv_tokenizer.py:
--------------------------------------------------------------------------------
 1 | from typing import List,Set,Dict
 2 | 
 3 | class ABCTokenizer():
 4 |     def __init__(self):
 5 |         self.pad_token_id = 0
 6 |         self.bos_token_id = 2
 7 |         self.eos_token_id = 3
 8 |     def encode(self, text):
 9 |         ids = [ord(c) for c in text]
10 |         return ids
11 |     def decode(self, ids):
12 |         txt = ''.join(chr(idx) if idx > self.eos_token_id else '' for idx in ids if idx != self.eos_token_id)
13 |         return txt
14 | 
15 | class RWKV_TOKENIZER():
16 |     table: List[List[List[bytes]]]
17 |     good: List[Set[int]]
18 |     wlen: List[int]
19 |     def __init__(self, file_name):
20 |         self.idx2token = {}
21 |         sorted = [] # must be already sorted
22 |         lines = open(file_name, "r", encoding="utf-8").readlines()
23 |         for l in lines:
24 |             idx = int(l[:l.index(' ')])
25 |             x = eval(l[l.index(' '):l.rindex(' ')])
26 |             x = x.encode("utf-8") if isinstance(x, str) else x
27 |             assert isinstance(x, bytes)
28 |             assert len(x) == int(l[l.rindex(' '):])
29 |             sorted += [x]
30 |             self.idx2token[idx] = x
31 | 
32 |         self.token2idx = {}
33 |         for k, v in self.idx2token.items():
34 |             self.token2idx[v] = int(k)
35 | 
36 |         # precompute some tables for fast matching
37 |         self.table = [[[] for j in range(256)] for i in range(256)]
38 |         self.good = [set() for i in range(256)]
39 |         self.wlen = [0 for i in range(256)]
40 | 
41 |         for i in reversed(range(len(sorted))): # reverse order - match longer tokens first
42 |             s = sorted[i]
43 |             if len(s) >= 2:
44 |                 s0 = int(s[0])
45 |                 s1 = int(s[1])
46 |                 self.table[s0][s1] += [s]
47 |                 self.wlen[s0] = max(self.wlen[s0], len(s))
48 |                 self.good[s0].add(s1)
49 | 
50 |     def encodeBytes(self, src: bytes) -> List[int]:
51 |         src_len: int = len(src)
52 |         tokens: List[int] = []
53 |         i: int = 0
54 |         while i < src_len:
55 |             s: bytes = src[i : i + 1]
56 | 
57 |             if i < src_len - 1:
58 |                 s1: int = int(src[i + 1])
59 |                 s0: int = int(src[i])
60 |                 if s1 in self.good[s0]:
61 |                     sss: bytes = src[i : i + self.wlen[s0]]
62 |                     try:
63 |                         s = next(filter(sss.startswith, self.table[s0][s1]))
64 |                     except:
65 |                         pass
66 |             tokens.append(self.token2idx[s])
67 |             i += len(s)
68 | 
69 |         return tokens
70 | 
71 |     def decodeBytes(self, tokens):
72 |         return b''.join(map(lambda i: self.idx2token[i], tokens))
73 | 
74 |     def encode(self, src: str):
75 |         return self.encodeBytes(src.encode("utf-8"))
76 | 
77 |     def decode(self, tokens):
78 |         return self.decodeBytes(tokens).decode('utf-8')
79 | 
80 |     def printTokens(self, tokens):
81 |         for i in tokens:
82 |             s = self.idx2token[i]
83 |             try:
84 |                 s = s.decode('utf-8')
85 |             except:
86 |                 pass
87 |             print(f'{repr(s)}{i}', end=' ')
88 |             # print(repr(s), i)
89 |         print()
90 | 


--------------------------------------------------------------------------------
/rwkv_src/wkv_custom.py:
--------------------------------------------------------------------------------
 1 | wkv_c_impl_src = """
 2 | #include <torch/extension.h>
 3 | #include <torch/script.h>
 4 | 
 5 | std::tuple<torch::Tensor, torch::Tensor> wkv6(
 6 |     torch::Tensor k, torch::Tensor v, torch::Tensor r,
 7 |     torch::Tensor state2, torch::Tensor time_first,
 8 |     torch::Tensor time_decay) {
 9 |     state2 = state2.squeeze(0);
10 |     auto num_head = state2.size(0);
11 |     auto head_size = state2.size(1);
12 |     int seq_length = k.size(0);
13 | 
14 |     k = k.reshape({seq_length, num_head, head_size, 1});
15 |     v = v.reshape({seq_length, num_head, 1, head_size});
16 |     r = r.reshape({seq_length, num_head, 1, head_size});
17 |     time_first = time_first.reshape({num_head, head_size, 1});
18 |     time_decay = time_decay.reshape({seq_length, num_head, head_size, 1});
19 |     auto kv = torch::matmul(k, v);
20 |     std::vector<torch::Tensor> wkv;
21 |     for (int i = 0; i < seq_length; i++) {
22 |         wkv.push_back(torch::matmul(r[i], (time_first * kv[i] + state2)));
23 |         state2 = time_decay[i] * state2 + kv[i];
24 |     }
25 |     auto wkv_tensor = torch::stack(wkv, 0).reshape({seq_length, num_head, 1, head_size});
26 | 
27 |     return std::make_tuple(wkv_tensor, state2);
28 | }
29 | 
30 | torch::Tensor wkv7_state(
31 |     torch::Tensor w, torch::Tensor k, torch::Tensor v,
32 |     torch::Tensor a, torch::Tensor b, torch::Tensor state2) {
33 |     state2 = state2.squeeze(0);
34 |     auto num_head = state2.size(0);
35 |     auto head_size = state2.size(1);
36 |     int seq_length = k.size(0);
37 | 
38 |     w = w.reshape({seq_length, num_head, 1, head_size});
39 |     k = k.reshape({seq_length, num_head, 1, head_size});
40 |     v = v.reshape({seq_length, num_head, head_size, 1});
41 |     b = b.reshape({seq_length, num_head, 1, head_size});
42 |     a = a.reshape({seq_length, num_head, head_size, 1});
43 | 
44 |     auto kv = torch::matmul(v, k);
45 |     auto ab = torch::matmul(a, b);
46 |     auto state2_out = torch::zeros({seq_length, num_head, head_size, head_size}, kv.options());
47 |     for (int i = 0; i < seq_length; i++) {
48 |         if (i == 0) {
49 |             state2_out[i] = w[i] * state2 + kv[i] + torch::matmul(state2, ab[i]);
50 |         } else {
51 |             state2_out[i] = w[i] * state2_out[i-1] + kv[i] + torch::matmul(state2_out[i-1], ab[i]);
52 |         }
53 |     }
54 |     return state2_out;
55 | }
56 | 
57 | torch::Tensor wkv7_output(torch::Tensor r, torch::Tensor state2) {
58 |     auto num_head = state2.size(1);
59 |     auto head_size = state2.size(2);
60 |     int seq_length = r.size(0);
61 | 
62 |     r = r.reshape({seq_length, num_head, head_size, 1});
63 |     auto x = torch::matmul(state2, r);
64 |     return x;
65 | }
66 | 
67 | TORCH_LIBRARY(rwkv, m) {
68 |   m.def("wkv6", &wkv6);
69 |   m.def("wkv7_state", &wkv7_state);
70 |   m.def("wkv7_output", &wkv7_output);
71 | }
72 | 
73 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
74 | }
75 | """


--------------------------------------------------------------------------------
/utils/htp_devices_config.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | htp_devices = {
 4 |     "SM8750": {
 5 |         "dsp_arch": "v79",
 6 |         "soc_id": 69,
 7 |     },
 8 |     "SM8650": {
 9 |         "dsp_arch": "v75",
10 |         "soc_id": 57,
11 |     },
12 |     "SM8550": {
13 |         "dsp_arch": "v73",
14 |         "soc_id": 43,
15 |     },
16 |     "SC8380": {
17 |         "dsp_arch": "v73",
18 |         "soc_id": 60,
19 |     },
20 |     "SM8475": {
21 |         "dsp_arch": "v69",
22 |         "soc_id": 42,
23 |     },
24 |     "SM8635": {
25 |         "dsp_arch": "v73",
26 |         "soc_id": 68,
27 |     },
28 |     "SM7325": {
29 |         "dsp_arch": "v68",
30 |         "soc_id": 35,
31 |     }
32 | }
33 | 
34 | def dump_htp_config(soc_name: str, graph_names: list, output_path: str, old_qnn = False, weights_sharing=False):
35 |     if not soc_name in htp_devices.keys():
36 |         raise ValueError(f"Invalid SoC name: {soc_name}")
37 |     if graph_names is None or len(graph_names) == 0:
38 |         raise ValueError("Invalid graph names")
39 |     for i in range(len(graph_names)):
40 |         graph_names[i] = graph_names[i].replace("lib", "").replace("-", "_")
41 | 
42 |     config = {
43 |         "graphs": {
44 |             "vtcm_mb": 0,
45 |             "O": 3,
46 |             "graph_names": graph_names,
47 |         },
48 |         "devices": [{
49 |             "dsp_arch": htp_devices[soc_name]["dsp_arch"],
50 |             "device_id": 0,
51 |             "soc_id": htp_devices[soc_name]["soc_id"],
52 |             "pd_session": "unsigned",
53 |             "cores": [{
54 |                 "core_id": 0,
55 |                 "perf_profile": "burst"
56 |             }]
57 |         }],
58 |         "memory": {
59 |             "mem_type": "shared_buffer"
60 |         }
61 |     }
62 | 
63 |     if soc_name != "SM8635" and soc_name != "SM7325":
64 |         config["graphs"]["fp16_relaxed_precision"] = 1
65 | 
66 |     if not old_qnn:
67 |         config["graphs"] = [config["graphs"]]
68 | 
69 |     if weights_sharing:
70 |         config["context"] = {"weight_sharing_enabled": True}
71 | 
72 |     with open(output_path, "w") as f:
73 |         json.dump(config, f, indent=4)
74 | 
75 | def dump_htp_link_config(output_path: str, qnn_sdk_root_path: str):
76 |     link = {
77 |         "backend_extensions":
78 |         {
79 |             "shared_library_path": f"{qnn_sdk_root_path}/lib/x86_64-linux-clang/libQnnHtpNetRunExtensions.so",
80 |             "config_file_path": output_path.replace("link.json", "config.json")
81 |         }
82 |     }
83 |     with open(output_path, "w") as f:
84 |         json.dump(link, f, indent=4)
85 | 


--------------------------------------------------------------------------------