├── wasm-module1 ├── Cargo.toml └── src │ └── lib.rs ├── wasm-module2 ├── Cargo.toml └── src │ └── lib.rs ├── wasm-app ├── Cargo.toml └── src │ └── main.rs ├── .gitignore ├── .github └── workflows │ └── build.yml ├── LICENSE-Apache-2.0 ├── LICENSE-EUPL-1.2 └── README.md /wasm-module1/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "wasm-module1" 3 | version = "0.1.0" 4 | edition = "2024" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [lib] 9 | crate-type = ['cdylib'] 10 | 11 | [dependencies] 12 | 13 | [profile.dev] 14 | panic = "abort" 15 | 16 | [profile.release] 17 | panic = "abort" -------------------------------------------------------------------------------- /wasm-module2/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "wasm-module2" 3 | version = "0.1.0" 4 | edition = "2024" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [lib] 9 | crate-type = ['cdylib'] 10 | 11 | [dependencies] 12 | arrow = { version = "57.0.0", default-features = false, features = ["ipc"] } 13 | time = {version = "0.3.44", features = ["macros"]} -------------------------------------------------------------------------------- /wasm-app/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "wasm-app" 3 | version = "0.1.0" 4 | edition = "2024" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | anyhow = {version = "1.0.100"} 10 | arrow = { version = "57.0.0", default-features = false, features = ["ipc","prettyprint"] } 11 | time = {version = "0.3.44", features = ["macros"]} 12 | wasmtime = { version = "38.0.3"} 13 | wasmtime-wasi = { version = "38.0.3", features = ["p1"]} 14 | wasi-common = { version = "38.0.3"} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | target/ 4 | 5 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 6 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 7 | Cargo.lock 8 | 9 | # These are backup files generated by rustfmt 10 | **/*.rs.bk 11 | 12 | 13 | # IDE 14 | ## VSCodium / VS Code 15 | .vscode/* 16 | !.vscode/settings.json 17 | !.vscode/tasks.json 18 | !.vscode/launch.json 19 | !.vscode/extensions.json 20 | *.code-workspace 21 | 22 | # Local History for Visual Studio Code 23 | .history/ -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: rust-wasm-study 2 | on: [push] 3 | env: 4 | RUSTUP_TOOLCHAIN: "1.91.0" 5 | RUSTUP_HOME: "./rust/rustup" 6 | CARGO_HOME: "./rust/cargo" 7 | 8 | jobs: 9 | build-rust-wasm-study: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout repository 13 | uses: actions/checkout@v4 14 | with: 15 | persist-credentials: false 16 | - name: Cache Rust 17 | id: cache-rust 18 | uses: actions/cache@v4 19 | with: 20 | path: rust 21 | key: ${{ runner.os }}-rustup-${{env.RUSTUP_TOOLCHAIN}} 22 | - name: Install rust 23 | if: steps.cache-rust.outputs.cache-hit != 'true' 24 | run: | 25 | mkdir -p $RUSTUP_HOME 26 | mkdir -p $CARGO_HOME 27 | curl https://sh.rustup.rs -sSf | sh -s -- -y 28 | rustup install $RUSTUP_TOOLCHAIN 29 | rustup target add wasm32-wasip1 --toolchain $RUSTUP_TOOLCHAIN 30 | rustup target add wasm32-unknown-unknown --toolchain $RUSTUP_TOOLCHAIN 31 | - name: Configure Rust and display version 32 | run: | 33 | echo "PATH=$(dirname $(rustup which cargo)):$PATH" >> $GITHUB_ENV 34 | rustc --version 35 | cargo --version 36 | - name: Build Rust Wasm Study 37 | run: | 38 | cd wasm-module1 39 | cargo build --release --target wasm32-wasip1 40 | cd .. 41 | cd wasm-module2 42 | cargo build --release --target wasm32-wasip1 43 | cd .. 44 | cd wasm-app 45 | cargo build --release -------------------------------------------------------------------------------- /wasm-module1/src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::ffi::CString; 2 | 3 | use std::cell::Cell; 4 | use std::cell::RefCell; 5 | use std::collections::HashMap; 6 | use std::ffi::CStr; 7 | use std::mem::ManuallyDrop; 8 | use std::ptr; 9 | 10 | /// A simple function returning a number as this is the most simple and native data type supported by WASM 11 | /// returns a number 12 | #[unsafe(no_mangle)] 13 | pub extern "C" fn answer() -> i32 { 14 | return 42; 15 | } 16 | 17 | // Global variable to keep track of allocated memory 18 | // Note: This is really an execption as allocate by the app to the module should have only for parameters 19 | // Otherwise it would be really bad for performance. 20 | thread_local!( 21 | static MEMORY_AREAS: RefCell>)>> = 22 | RefCell::new(HashMap::new()); 23 | ); 24 | 25 | enum MemoryAreasReturnCode { 26 | Success = 0, 27 | ErrorMemmoryNotAllocated = -1, 28 | } 29 | 30 | /// Allocate some memory for the application to write data for the module 31 | /// Note: It is up to the application (and not the WASM module) to provide enough pages, so the module does not run out of memory 32 | /// # Arguments 33 | /// * `size` - size of memory to allocaten 34 | /// returns a pointer to the allocated memory area 35 | #[unsafe(no_mangle)] 36 | pub extern "C" fn wasm_allocate(size: u32) -> *const u8 { 37 | // create a Box with empty memory 38 | let alloc_box = ManuallyDrop::new(vec![0u8; size as usize].into_boxed_slice()); 39 | return allocate(size as usize, alloc_box); 40 | } 41 | 42 | /// Deallocates existing memory for the purpose of the application 43 | /// # Arguments 44 | /// * `ptr` - mutuable pointer to the memory to deallocate 45 | /// returns a code if it was successful or not 46 | #[unsafe(no_mangle)] 47 | pub extern "C" fn wasm_deallocate(ptr: *const u8) -> i32 { 48 | // check if the ptr exists 49 | let cell: Cell>)>> = Cell::new(None); 50 | MEMORY_AREAS.with(|mem_map| cell.set(mem_map.borrow_mut().remove(&ptr))); 51 | let memory_area: Option<(usize, ManuallyDrop>)> = cell.into_inner(); 52 | match memory_area { 53 | Some(x) => ManuallyDrop::into_inner(x.1), // will then be deleted after function returns 54 | None => return MemoryAreasReturnCode::ErrorMemmoryNotAllocated as i32, 55 | }; 56 | // return success 57 | return MemoryAreasReturnCode::Success as i32; 58 | } 59 | 60 | /// A hello world function that takes as input a pointer to a C string in the WASM module memory and outputs a pointer to a C string in the WASM module memory containing a greeting 61 | /// # Arguments 62 | /// * `name` - pointer to a c string containing a name to greet 63 | /// Returns a pointer to a C string. Note: The calling application must signal to the module that the memory can be fred by calling deallocate on the returned pointer 64 | /// 65 | #[unsafe(no_mangle)] 66 | pub extern "C" fn wasm_memory_c_format_hello_world(name: *const i8) -> *const u8 { 67 | // validate pointer 68 | let expected_size: usize = validate_pointer(name as *const u8); 69 | if expected_size == 0 { 70 | return ptr::null(); 71 | }; // return if no valid allocated memory was provided 72 | // convert parameter to Rust 73 | let c_str: &CStr = unsafe { CStr::from_ptr(name) }; 74 | // check valid memory representation 75 | if c_str.to_bytes_with_nul().len() != expected_size { 76 | return ptr::null(); 77 | }; // return if allocated memory does not match expected memory 78 | let name_str: &str = c_str.to_str().unwrap(); 79 | // execute the real native function 80 | let result_str: String = format_hello_world(&name_str); 81 | 82 | // convert result to C and allocate the memory so the application can release it after reading 83 | let result_cstring: Box<[u8]> = CString::new(result_str) 84 | .unwrap() 85 | .into_bytes_with_nul() 86 | .into_boxed_slice(); 87 | let result_cstring_len: usize = result_cstring.len(); 88 | let allocated_cstring: ManuallyDrop> = ManuallyDrop::new(result_cstring); 89 | 90 | let return_u8: *const u8 = allocate(result_cstring_len, allocated_cstring); 91 | return return_u8; 92 | } 93 | 94 | /// A hello world function that takes as input a pointer (offset, length) in the WASM module memory containing the name (in Rust str format) 95 | /// # Arguments 96 | /// * `offset` - position of the start of the Rust str 97 | /// * `length` - length of the Rust str 98 | /// Returns an offset in the WASM module memory where an offset and length of the result greeting (a Rust str) are stored 99 | #[unsafe(no_mangle)] 100 | pub extern "C" fn wasm_memory_rust_format_hello_world(offset: *mut u32, length: u32) -> u32 { 101 | // validate pointer 102 | let expected_size_param: usize = validate_pointer(offset as *const u8); 103 | if (expected_size_param == 0) | (expected_size_param != length as usize) { 104 | return 0; 105 | }; // return if no valid allocated memory was provided 106 | 107 | // fetch from WASM module memory 108 | let mut input_vec: Vec = Vec::new(); 109 | unsafe { 110 | Vec::extend_from_slice( 111 | &mut input_vec, 112 | std::slice::from_raw_parts(offset as *mut u8, length as usize), 113 | ) 114 | }; 115 | 116 | let name_str: String = unsafe { String::from_utf8_unchecked(input_vec) }; 117 | // execute the real native function ad make sure that String is not dropped 118 | let result_string: Box<[u8]> = format_hello_world(&name_str) 119 | .into_bytes() 120 | .into_boxed_slice(); 121 | let result_string_len: usize = result_string.len(); 122 | let allocated_result_string: ManuallyDrop> = ManuallyDrop::new(result_string); 123 | // return position of WASM memory where we can find a offset, length pair 124 | let string_ptr = allocate(result_string_len, allocated_result_string); 125 | // prepare metadata 126 | let mut vec_meta: Vec = Vec::new(); 127 | let string_ptr_array: [u8; (usize::BITS / 8) as usize] = (string_ptr as usize).to_le_bytes(); 128 | let length_array: [u8; (usize::BITS / 8) as usize] = result_string_len.to_le_bytes(); 129 | for byte in string_ptr_array { 130 | vec_meta.push(byte); 131 | } 132 | for byte in length_array { 133 | vec_meta.push(byte); 134 | } 135 | let str_meta: Box<[u8]> = vec_meta.into_boxed_slice(); 136 | let str_meta_len: usize = str_meta.len(); 137 | let str_meta_ptr = allocate(str_meta_len, ManuallyDrop::new(str_meta)); 138 | // the reason is that Rust only support one return value. Although it can be a tuple, this is translated by wasm to one return type and not multi-value 139 | return str_meta_ptr as u32; 140 | } 141 | 142 | /// Validates if a pointer has been properly allocated in this module 143 | /// # Arguments 144 | /// * `ptr` - pointer 145 | /// returns the size of the allocated memory area. It is 0 if the pointer is invalid 146 | pub fn validate_pointer(ptr: *const u8) -> usize { 147 | let cell: Cell = Cell::new(0); 148 | MEMORY_AREAS.with(|mem_map| match mem_map.borrow().get(&ptr) { 149 | Some(x) => cell.set(x.0), 150 | None => cell.set(0), 151 | }); 152 | return cell.get(); 153 | } 154 | 155 | /// Allocate some memory for the application to write data for the module 156 | /// Note: It is up to the application (and not the WASM module) to provide enough pages, so the module does not run out of memory 157 | /// This function can also be used internally by the WASM module to return data to the calling application of the module 158 | /// # Arguments 159 | /// * `size` - size of memory to allocaten 160 | /// returns a pointer to the allocated memory area 161 | pub fn allocate(size: usize, alloc_box: ManuallyDrop>) -> *const u8 { 162 | let result_ptr: *const u8 = alloc_box.as_ptr(); 163 | // save allocated memory to avoid it is cleaned up after function exits 164 | MEMORY_AREAS.with(|mem_map| mem_map.borrow_mut().insert(result_ptr, (size, alloc_box))); 165 | return result_ptr; 166 | } 167 | 168 | /// The native hello_world function in rust 169 | /// # Arguments 170 | /// * `name` - a str containing the name to greet 171 | /// Returns a string with the greeting 172 | fn format_hello_world(name: &str) -> String { 173 | return format!("Hello World, {name}!"); 174 | } 175 | -------------------------------------------------------------------------------- /LICENSE-Apache-2.0: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /wasm-module2/src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::cell::Cell; 2 | use std::cell::RefCell; 3 | use std::collections::HashMap; 4 | use std::mem::ManuallyDrop; 5 | use std::sync::Arc; 6 | 7 | use arrow::array::{StringArray, UInt64Array}; 8 | use arrow::datatypes::{ 9 | DataType, Field, Float64Type, Schema, TimeUnit, TimestampSecondType, UInt64Type, 10 | }; 11 | use arrow::ipc::reader::StreamReader; 12 | use arrow::ipc::writer::StreamWriter; 13 | use arrow::record_batch::RecordBatch; 14 | 15 | use time::macros::datetime; 16 | 17 | // Global variable to keep track of allocated memory 18 | // Note: This is really an execption as allocate by the app to the module should have only for parameters 19 | // Otherwise it would be really bad for performance. 20 | thread_local!( 21 | static MEMORY_AREAS: RefCell>)>> = 22 | RefCell::new(HashMap::new()); 23 | ); 24 | 25 | enum MemoryAreasReturnCode { 26 | Success = 0, 27 | ErrorMemmoryNotAllocated = -1, 28 | } 29 | 30 | /// Allocate some memory for the application to write data for the module 31 | /// Note: It is up to the application (and not the WASM module) to provide enough pages, so the module does not run out of memory 32 | /// # Arguments 33 | /// * `size` - size of memory to allocaten 34 | /// returns a pointer to the allocated memory area 35 | #[unsafe(no_mangle)] 36 | pub extern "C" fn wasm_allocate(size: u32) -> *const u8 { 37 | // create a Box with empty memory 38 | let alloc_box = ManuallyDrop::new(vec![0u8; size as usize].into_boxed_slice()); 39 | return allocate(size as usize, alloc_box); 40 | } 41 | 42 | /// Deallocates existing memory for the purpose of the application 43 | /// # Arguments 44 | /// * `ptr` - mutuable pointer to the memory to deallocate 45 | /// returns a code if it was successful or not 46 | #[unsafe(no_mangle)] 47 | pub extern "C" fn wasm_deallocate(ptr: *const u8) -> i32 { 48 | // check if the ptr exists 49 | let cell: Cell>)>> = Cell::new(None); 50 | MEMORY_AREAS.with(|mem_map| cell.set(mem_map.borrow_mut().remove(&ptr))); 51 | let memory_area: Option<(usize, ManuallyDrop>)> = cell.into_inner(); 52 | match memory_area { 53 | Some(x) => ManuallyDrop::into_inner(x.1), // will then be deleted after function returns 54 | None => return MemoryAreasReturnCode::ErrorMemmoryNotAllocated as i32, 55 | }; 56 | // return success 57 | return MemoryAreasReturnCode::Success as i32; 58 | } 59 | 60 | /// A simple example function that processes data in Arrow IPC format from the WASM module memory 61 | /// # Arguments 62 | /// * `meta_data_offset` - position of the start of the meta data ("command") in Arrow IPC format 63 | /// * `meta_data_size` - size of the meta data in Arrow IPC format 64 | /// * `data_offset` - position of the start of the data ("data") in Arrow IPC format 65 | /// * `data_size` - size of the data in Arrow IPC format 66 | /// Returns an offset in the WASM module memory where an offset and size of the result data in Arrow IPC format are stored 67 | #[unsafe(no_mangle)] 68 | pub extern "C" fn wasm_memory_process_data_arrow( 69 | meta_data_offset: *mut u32, 70 | meta_data_size: u32, 71 | data_offset: *mut u32, 72 | data_size: u32, 73 | ) -> u32 { 74 | // validate meta data pointer 75 | let expected_size_meta_data: usize = validate_pointer(meta_data_offset as *const u8); 76 | if (expected_size_meta_data == 0) | (expected_size_meta_data != meta_data_size as usize) { 77 | return 0; 78 | }; // return if no valid allocated memory was provided 79 | // validate data pointer 80 | let expected_size_data: usize = validate_pointer(data_offset as *const u8); 81 | if (expected_size_data == 0) | (expected_size_data != data_size as usize) { 82 | return 0; 83 | }; // return if no valid allocated memory was provided 84 | // fetch from WASM module memory - meta data 85 | let mut input_vec_meta_data: Vec = Vec::new(); 86 | unsafe { 87 | Vec::extend_from_slice( 88 | &mut input_vec_meta_data, 89 | std::slice::from_raw_parts(meta_data_offset as *mut u8, meta_data_size as usize), 90 | ) 91 | }; 92 | // fetch from WASM module memory - / data 93 | let mut input_vec_data: Vec = Vec::new(); 94 | unsafe { 95 | Vec::extend_from_slice( 96 | &mut input_vec_data, 97 | std::slice::from_raw_parts(data_offset as *mut u8, data_size as usize), 98 | ) 99 | }; 100 | // check the meta data and data 101 | // deserialize the meta data 102 | let stream_reader_meta_data = 103 | StreamReader::try_new(input_vec_meta_data.as_slice(), None).unwrap(); 104 | // check if the meta data content is as expected (ie hardcoded in app) 105 | for item in stream_reader_meta_data { 106 | let arrow_record_batch = item.unwrap(); 107 | // validate schema 108 | assert_eq!(arrow_record_batch.schema().field(0).name(), "command"); 109 | assert_eq!( 110 | arrow_record_batch.schema().field(0).data_type(), 111 | &DataType::Utf8 112 | ); 113 | assert_eq!(arrow_record_batch.schema().field(1).name(), "config"); 114 | assert_eq!( 115 | arrow_record_batch.schema().field(1).data_type(), 116 | &DataType::Struct(arrow::datatypes::Fields::from(vec![Field::new("filename", DataType::Utf8, false)])) 117 | ); 118 | 119 | // validate meta_data 120 | assert_eq!(arrow_record_batch.num_rows(), 1); 121 | let first_row_command = 122 | arrow::array::as_string_array(arrow_record_batch.column(0)).value(0); 123 | assert_eq!(first_row_command, "test"); 124 | let first_row_config = 125 | arrow::array::as_struct_array(arrow_record_batch.column(1)).column(0); 126 | let first_row_config_filename = arrow::array::as_string_array(first_row_config).value(0); 127 | assert_eq!(first_row_config_filename, "test.txt"); 128 | } 129 | 130 | // deserialize the data 131 | let stream_reader_data = StreamReader::try_new(input_vec_data.as_slice(), None).unwrap(); 132 | // check if the data content is as expected (ie hardcoded in app) 133 | for item in stream_reader_data { 134 | let arrow_record_batch = item.unwrap(); 135 | // validate schema 136 | assert_eq!(arrow_record_batch.schema().field(0).name(), "id"); 137 | assert_eq!( 138 | arrow_record_batch.schema().field(0).data_type(), 139 | &DataType::UInt64 140 | ); 141 | assert_eq!(arrow_record_batch.schema().field(1).name(), "content"); 142 | assert_eq!( 143 | arrow_record_batch.schema().field(1).data_type(), 144 | &DataType::Utf8 145 | ); 146 | assert_eq!(arrow_record_batch.schema().field(2).name(), "title"); 147 | assert_eq!( 148 | arrow_record_batch.schema().field(2).data_type(), 149 | &DataType::Utf8 150 | ); 151 | assert_eq!(arrow_record_batch.schema().field(3).name(), "date"); 152 | assert_eq!( 153 | arrow_record_batch.schema().field(3).data_type(), 154 | &DataType::Timestamp(TimeUnit::Second, Some("+00:00".to_string().into())) 155 | ); 156 | assert_eq!(arrow_record_batch.schema().field(4).name(), "score"); 157 | assert_eq!( 158 | arrow_record_batch.schema().field(4).data_type(), 159 | &DataType::Float64 160 | ); 161 | // validate data 162 | assert_eq!(arrow_record_batch.num_rows(), 1); 163 | let first_row_id = 164 | arrow::array::as_primitive_array::(arrow_record_batch.column(0)).value(0); 165 | assert_eq!(first_row_id, 1); 166 | let first_row_content = 167 | arrow::array::as_string_array(arrow_record_batch.column(1)).value(0); 168 | assert_eq!(first_row_content, "this is a test"); 169 | let first_row_title = arrow::array::as_string_array(arrow_record_batch.column(2)).value(0); 170 | assert_eq!(first_row_title, "test"); 171 | let first_row_date = 172 | arrow::array::as_primitive_array::(arrow_record_batch.column(3)) 173 | .value(0); 174 | assert_eq!( 175 | first_row_date, 176 | datetime!(2022-01-01 12:00:00 UTC).unix_timestamp() 177 | ); 178 | let first_row_score = 179 | arrow::array::as_primitive_array::(arrow_record_batch.column(4)).value(0); 180 | assert_eq!(first_row_score, 1.123456f64); 181 | } 182 | // lets generate a return answer to the processing request modifying the field content of document with id 1 183 | // define schema 184 | let schema = Schema::new(vec![ 185 | Field::new("id", DataType::UInt64, false), 186 | Field::new("content", DataType::Utf8, false), 187 | ]); 188 | let ids = UInt64Array::from(vec![1]); 189 | let contents = StringArray::from(vec!["this is a test2"]); 190 | 191 | // build a record batch 192 | let result_batch = RecordBatch::try_new( 193 | Arc::new(schema.clone()), 194 | vec![Arc::new(ids), Arc::new(contents)], 195 | ) 196 | .unwrap(); 197 | // serialize it 198 | let buffer: Vec = Vec::new(); 199 | 200 | let mut stream_writer = StreamWriter::try_new(buffer, &schema).unwrap(); 201 | stream_writer.write(&result_batch).unwrap(); 202 | 203 | let serialized_result_batch: Vec = stream_writer.into_inner().unwrap(); 204 | // allocate memory for the answer 205 | let serialized_result_batch_alloc: ManuallyDrop> = 206 | ManuallyDrop::new(serialized_result_batch.into_boxed_slice()); 207 | let serialized_result_batch_alloc_len: usize = serialized_result_batch_alloc.len(); 208 | 209 | let serialized_result_batch_ptr = allocate( 210 | serialized_result_batch_alloc_len, 211 | serialized_result_batch_alloc, 212 | ); 213 | // return position of WASM memory where we can find a offset, length pair 214 | let mut vec_meta: Vec = Vec::new(); 215 | let serialized_result_batch_ptr_array: [u8; (usize::BITS / 8) as usize] = 216 | (serialized_result_batch_ptr as usize).to_le_bytes(); 217 | let serialized_result_batch_alloc_len: [u8; (usize::BITS / 8) as usize] = 218 | serialized_result_batch_alloc_len.to_le_bytes(); 219 | for byte in serialized_result_batch_ptr_array { 220 | vec_meta.push(byte); 221 | } 222 | for byte in serialized_result_batch_alloc_len { 223 | vec_meta.push(byte); 224 | } 225 | let serialized_result_batch_meta: Box<[u8]> = vec_meta.into_boxed_slice(); 226 | let serialized_result_batch_meta_len: usize = serialized_result_batch_meta.len(); 227 | let serialized_result_batch_meta_ptr = allocate( 228 | serialized_result_batch_meta_len, 229 | ManuallyDrop::new(serialized_result_batch_meta), 230 | ); 231 | 232 | return serialized_result_batch_meta_ptr as u32; 233 | } 234 | 235 | /// Validates if a pointer has been properly allocated in this module 236 | /// # Arguments 237 | /// * `ptr` - pointer 238 | /// returns the size of the allocated memory area. It is 0 if the pointer is invalid 239 | pub fn validate_pointer(ptr: *const u8) -> usize { 240 | let cell: Cell = Cell::new(0); 241 | MEMORY_AREAS.with(|mem_map| match mem_map.borrow().get(&ptr) { 242 | Some(x) => cell.set(x.0), 243 | None => cell.set(0), 244 | }); 245 | return cell.get(); 246 | } 247 | 248 | /// Allocate some memory for the application to write data for the module 249 | /// Note: It is up to the application (and not the WASM module) to provide enough pages, so the module does not run out of memory 250 | /// This function can also be used internally by the WASM module to return data to the calling application of the module 251 | /// # Arguments 252 | /// * `size` - size of memory to allocaten 253 | /// returns a pointer to the allocated memory area 254 | pub fn allocate(size: usize, alloc_box: ManuallyDrop>) -> *const u8 { 255 | let result_ptr: *const u8 = alloc_box.as_ptr(); 256 | // save allocated memory to avoid it is cleaned up after function exits 257 | MEMORY_AREAS.with(|mem_map| mem_map.borrow_mut().insert(result_ptr, (size, alloc_box))); 258 | return result_ptr; 259 | } 260 | -------------------------------------------------------------------------------- /LICENSE-EUPL-1.2: -------------------------------------------------------------------------------- 1 | EUROPEAN UNION PUBLIC LICENCE v. 1.2 2 | EUPL © the European Union 2007, 2016 3 | 4 | This European Union Public Licence (the ‘EUPL’) applies to the Work (as defined 5 | below) which is provided under the terms of this Licence. Any use of the Work, 6 | other than as authorised under this Licence is prohibited (to the extent such 7 | use is covered by a right of the copyright holder of the Work). 8 | 9 | The Work is provided under the terms of this Licence when the Licensor (as 10 | defined below) has placed the following notice immediately following the 11 | copyright notice for the Work: 12 | 13 | Licensed under the EUPL 14 | 15 | or has expressed by any other means his willingness to license under the EUPL. 16 | 17 | 1. Definitions 18 | 19 | In this Licence, the following terms have the following meaning: 20 | 21 | - ‘The Licence’: this Licence. 22 | 23 | - ‘The Original Work’: the work or software distributed or communicated by the 24 | Licensor under this Licence, available as Source Code and also as Executable 25 | Code as the case may be. 26 | 27 | - ‘Derivative Works’: the works or software that could be created by the 28 | Licensee, based upon the Original Work or modifications thereof. This Licence 29 | does not define the extent of modification or dependence on the Original Work 30 | required in order to classify a work as a Derivative Work; this extent is 31 | determined by copyright law applicable in the country mentioned in Article 15. 32 | 33 | - ‘The Work’: the Original Work or its Derivative Works. 34 | 35 | - ‘The Source Code’: the human-readable form of the Work which is the most 36 | convenient for people to study and modify. 37 | 38 | - ‘The Executable Code’: any code which has generally been compiled and which is 39 | meant to be interpreted by a computer as a program. 40 | 41 | - ‘The Licensor’: the natural or legal person that distributes or communicates 42 | the Work under the Licence. 43 | 44 | - ‘Contributor(s)’: any natural or legal person who modifies the Work under the 45 | Licence, or otherwise contributes to the creation of a Derivative Work. 46 | 47 | - ‘The Licensee’ or ‘You’: any natural or legal person who makes any usage of 48 | the Work under the terms of the Licence. 49 | 50 | - ‘Distribution’ or ‘Communication’: any act of selling, giving, lending, 51 | renting, distributing, communicating, transmitting, or otherwise making 52 | available, online or offline, copies of the Work or providing access to its 53 | essential functionalities at the disposal of any other natural or legal 54 | person. 55 | 56 | 2. Scope of the rights granted by the Licence 57 | 58 | The Licensor hereby grants You a worldwide, royalty-free, non-exclusive, 59 | sublicensable licence to do the following, for the duration of copyright vested 60 | in the Original Work: 61 | 62 | - use the Work in any circumstance and for all usage, 63 | - reproduce the Work, 64 | - modify the Work, and make Derivative Works based upon the Work, 65 | - communicate to the public, including the right to make available or display 66 | the Work or copies thereof to the public and perform publicly, as the case may 67 | be, the Work, 68 | - distribute the Work or copies thereof, 69 | - lend and rent the Work or copies thereof, 70 | - sublicense rights in the Work or copies thereof. 71 | 72 | Those rights can be exercised on any media, supports and formats, whether now 73 | known or later invented, as far as the applicable law permits so. 74 | 75 | In the countries where moral rights apply, the Licensor waives his right to 76 | exercise his moral right to the extent allowed by law in order to make effective 77 | the licence of the economic rights here above listed. 78 | 79 | The Licensor grants to the Licensee royalty-free, non-exclusive usage rights to 80 | any patents held by the Licensor, to the extent necessary to make use of the 81 | rights granted on the Work under this Licence. 82 | 83 | 3. Communication of the Source Code 84 | 85 | The Licensor may provide the Work either in its Source Code form, or as 86 | Executable Code. If the Work is provided as Executable Code, the Licensor 87 | provides in addition a machine-readable copy of the Source Code of the Work 88 | along with each copy of the Work that the Licensor distributes or indicates, in 89 | a notice following the copyright notice attached to the Work, a repository where 90 | the Source Code is easily and freely accessible for as long as the Licensor 91 | continues to distribute or communicate the Work. 92 | 93 | 4. Limitations on copyright 94 | 95 | Nothing in this Licence is intended to deprive the Licensee of the benefits from 96 | any exception or limitation to the exclusive rights of the rights owners in the 97 | Work, of the exhaustion of those rights or of other applicable limitations 98 | thereto. 99 | 100 | 5. Obligations of the Licensee 101 | 102 | The grant of the rights mentioned above is subject to some restrictions and 103 | obligations imposed on the Licensee. Those obligations are the following: 104 | 105 | Attribution right: The Licensee shall keep intact all copyright, patent or 106 | trademarks notices and all notices that refer to the Licence and to the 107 | disclaimer of warranties. The Licensee must include a copy of such notices and a 108 | copy of the Licence with every copy of the Work he/she distributes or 109 | communicates. The Licensee must cause any Derivative Work to carry prominent 110 | notices stating that the Work has been modified and the date of modification. 111 | 112 | Copyleft clause: If the Licensee distributes or communicates copies of the 113 | Original Works or Derivative Works, this Distribution or Communication will be 114 | done under the terms of this Licence or of a later version of this Licence 115 | unless the Original Work is expressly distributed only under this version of the 116 | Licence — for example by communicating ‘EUPL v. 1.2 only’. The Licensee 117 | (becoming Licensor) cannot offer or impose any additional terms or conditions on 118 | the Work or Derivative Work that alter or restrict the terms of the Licence. 119 | 120 | Compatibility clause: If the Licensee Distributes or Communicates Derivative 121 | Works or copies thereof based upon both the Work and another work licensed under 122 | a Compatible Licence, this Distribution or Communication can be done under the 123 | terms of this Compatible Licence. For the sake of this clause, ‘Compatible 124 | Licence’ refers to the licences listed in the appendix attached to this Licence. 125 | Should the Licensee's obligations under the Compatible Licence conflict with 126 | his/her obligations under this Licence, the obligations of the Compatible 127 | Licence shall prevail. 128 | 129 | Provision of Source Code: When distributing or communicating copies of the Work, 130 | the Licensee will provide a machine-readable copy of the Source Code or indicate 131 | a repository where this Source will be easily and freely available for as long 132 | as the Licensee continues to distribute or communicate the Work. 133 | 134 | Legal Protection: This Licence does not grant permission to use the trade names, 135 | trademarks, service marks, or names of the Licensor, except as required for 136 | reasonable and customary use in describing the origin of the Work and 137 | reproducing the content of the copyright notice. 138 | 139 | 6. Chain of Authorship 140 | 141 | The original Licensor warrants that the copyright in the Original Work granted 142 | hereunder is owned by him/her or licensed to him/her and that he/she has the 143 | power and authority to grant the Licence. 144 | 145 | Each Contributor warrants that the copyright in the modifications he/she brings 146 | to the Work are owned by him/her or licensed to him/her and that he/she has the 147 | power and authority to grant the Licence. 148 | 149 | Each time You accept the Licence, the original Licensor and subsequent 150 | Contributors grant You a licence to their contributions to the Work, under the 151 | terms of this Licence. 152 | 153 | 7. Disclaimer of Warranty 154 | 155 | The Work is a work in progress, which is continuously improved by numerous 156 | Contributors. It is not a finished work and may therefore contain defects or 157 | ‘bugs’ inherent to this type of development. 158 | 159 | For the above reason, the Work is provided under the Licence on an ‘as is’ basis 160 | and without warranties of any kind concerning the Work, including without 161 | limitation merchantability, fitness for a particular purpose, absence of defects 162 | or errors, accuracy, non-infringement of intellectual property rights other than 163 | copyright as stated in Article 6 of this Licence. 164 | 165 | This disclaimer of warranty is an essential part of the Licence and a condition 166 | for the grant of any rights to the Work. 167 | 168 | 8. Disclaimer of Liability 169 | 170 | Except in the cases of wilful misconduct or damages directly caused to natural 171 | persons, the Licensor will in no event be liable for any direct or indirect, 172 | material or moral, damages of any kind, arising out of the Licence or of the use 173 | of the Work, including without limitation, damages for loss of goodwill, work 174 | stoppage, computer failure or malfunction, loss of data or any commercial 175 | damage, even if the Licensor has been advised of the possibility of such damage. 176 | However, the Licensor will be liable under statutory product liability laws as 177 | far such laws apply to the Work. 178 | 179 | 9. Additional agreements 180 | 181 | While distributing the Work, You may choose to conclude an additional agreement, 182 | defining obligations or services consistent with this Licence. However, if 183 | accepting obligations, You may act only on your own behalf and on your sole 184 | responsibility, not on behalf of the original Licensor or any other Contributor, 185 | and only if You agree to indemnify, defend, and hold each Contributor harmless 186 | for any liability incurred by, or claims asserted against such Contributor by 187 | the fact You have accepted any warranty or additional liability. 188 | 189 | 10. Acceptance of the Licence 190 | 191 | The provisions of this Licence can be accepted by clicking on an icon ‘I agree’ 192 | placed under the bottom of a window displaying the text of this Licence or by 193 | affirming consent in any other similar way, in accordance with the rules of 194 | applicable law. Clicking on that icon indicates your clear and irrevocable 195 | acceptance of this Licence and all of its terms and conditions. 196 | 197 | Similarly, you irrevocably accept this Licence and all of its terms and 198 | conditions by exercising any rights granted to You by Article 2 of this Licence, 199 | such as the use of the Work, the creation by You of a Derivative Work or the 200 | Distribution or Communication by You of the Work or copies thereof. 201 | 202 | 11. Information to the public 203 | 204 | In case of any Distribution or Communication of the Work by means of electronic 205 | communication by You (for example, by offering to download the Work from a 206 | remote location) the distribution channel or media (for example, a website) must 207 | at least provide to the public the information requested by the applicable law 208 | regarding the Licensor, the Licence and the way it may be accessible, concluded, 209 | stored and reproduced by the Licensee. 210 | 211 | 12. Termination of the Licence 212 | 213 | The Licence and the rights granted hereunder will terminate automatically upon 214 | any breach by the Licensee of the terms of the Licence. 215 | 216 | Such a termination will not terminate the licences of any person who has 217 | received the Work from the Licensee under the Licence, provided such persons 218 | remain in full compliance with the Licence. 219 | 220 | 13. Miscellaneous 221 | 222 | Without prejudice of Article 9 above, the Licence represents the complete 223 | agreement between the Parties as to the Work. 224 | 225 | If any provision of the Licence is invalid or unenforceable under applicable 226 | law, this will not affect the validity or enforceability of the Licence as a 227 | whole. Such provision will be construed or reformed so as necessary to make it 228 | valid and enforceable. 229 | 230 | The European Commission may publish other linguistic versions or new versions of 231 | this Licence or updated versions of the Appendix, so far this is required and 232 | reasonable, without reducing the scope of the rights granted by the Licence. New 233 | versions of the Licence will be published with a unique version number. 234 | 235 | All linguistic versions of this Licence, approved by the European Commission, 236 | have identical value. Parties can take advantage of the linguistic version of 237 | their choice. 238 | 239 | 14. Jurisdiction 240 | 241 | Without prejudice to specific agreement between parties, 242 | 243 | - any litigation resulting from the interpretation of this License, arising 244 | between the European Union institutions, bodies, offices or agencies, as a 245 | Licensor, and any Licensee, will be subject to the jurisdiction of the Court 246 | of Justice of the European Union, as laid down in article 272 of the Treaty on 247 | the Functioning of the European Union, 248 | 249 | - any litigation arising between other parties and resulting from the 250 | interpretation of this License, will be subject to the exclusive jurisdiction 251 | of the competent court where the Licensor resides or conducts its primary 252 | business. 253 | 254 | 15. Applicable Law 255 | 256 | Without prejudice to specific agreement between parties, 257 | 258 | - this Licence shall be governed by the law of the European Union Member State 259 | where the Licensor has his seat, resides or has his registered office, 260 | 261 | - this licence shall be governed by Belgian law if the Licensor has no seat, 262 | residence or registered office inside a European Union Member State. 263 | 264 | Appendix 265 | 266 | ‘Compatible Licences’ according to Article 5 EUPL are: 267 | 268 | - GNU General Public License (GPL) v. 2, v. 3 269 | - GNU Affero General Public License (AGPL) v. 3 270 | - Open Software License (OSL) v. 2.1, v. 3.0 271 | - Eclipse Public License (EPL) v. 1.0 272 | - CeCILL v. 2.0, v. 2.1 273 | - Mozilla Public Licence (MPL) v. 2 274 | - GNU Lesser General Public Licence (LGPL) v. 2.1, v. 3 275 | - Creative Commons Attribution-ShareAlike v. 3.0 Unported (CC BY-SA 3.0) for 276 | works other than software 277 | - European Union Public Licence (EUPL) v. 1.1, v. 1.2 278 | - Québec Free and Open-Source Licence — Reciprocity (LiLiQ-R) or Strong 279 | Reciprocity (LiLiQ-R+). 280 | 281 | The European Commission may update this Appendix to later versions of the above 282 | licences without producing a new version of the EUPL, as long as they provide 283 | the rights granted in Article 2 of this Licence and protect the covered Source 284 | Code from exclusive appropriation. 285 | 286 | All other changes or additions to this Appendix require the production of a new 287 | EUPL version. 288 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | State: 16.07.2022 3 | 4 | **NOTE: THIS EXAMPLE IS FOR DEMONSTRATION ONLY - DO NOT USE FOR PRODUCTION APPLICATIONS** 5 | 6 | This is a small example to use Rust for an application to load dynamically another application module targetting wasm32-wasi. This target enables compilation to [Webassembly](https://webassembly.org/) (WASM). Originally WASM was a framework for secure near-native speed browser applications, it has been extended in the past to serve the needs of all types of cross-platform secure near-native applications, such as server-based or serverless ones. See also this [blog post](https://zuinnote.eu/blog/?p=1567) to find out more on the WASM ecosystem. 7 | 8 | The study contributes to [Zu Stateful Data Pipelines (ZuStDPipe)](https://codeberg.org/ZuInnoTe/zustdpipe) - a highly modular secure stateful data pipeline engine for all types of platforms. Since modularity is core concept of this engine, it is crucial to assess the different possibilities to realize modularity (see also [here](https://zuinnote.eu/blog/?p=540)). The outcomes should be interpreted towards the use case of ZuSearch and not be generalized. 9 | 10 | Rust was selected as the language to write the highly modular search engine and most of its modules. However, third party modules could also be written in other languages. 11 | 12 | Furthermore, the core engine is extremely lightweight - ideally it loads only modules in a secure way, but all other aspects that the user/developer needs are loaded dynamically from modules. 13 | 14 | All those aspects can be summarized as follows: 15 | * Core engine written in Rust that can be extended with modules written in (nearly) any language 16 | * Cross-plattform support 17 | * Modules can be loaded dynamically based on a need basis 18 | * Modules can be load securely - this means all interactions happen through precise interfaces, clear memory separation (no rouge module can bring down the whole application) and we can define additional policies what a module is allowed to do and what not (e.g. access to filesystem) 19 | * There are potential large data volumes to be exchanged between modules - it is not only about parameters and some return values 20 | ## Blog 21 | Find additional considerations in [this blog post](https://zuinnote.eu/blog/post.php?id=dd58f5f9-6517-40da-8341-410851c79fab). 22 | ## Code 23 | The code is available under: 24 | * Codeberg (a non-commercial European hosted Git for Open Source): https://codeberg.org/ZuInnoTe/rust-wasm-dynamic-module-study 25 | * Github (an US hosted commercial Git platform): https://github.com/ZuInnoTe/rust-wasm-dynamic-module-study 26 | 27 | ## License 28 | You can choose to either use [EUPL-1.2](./LICENSE-EUPL-1.2) ([Web](https://spdx.org/licenses/EUPL-1.2.html)) or [Apache-2.0](./LICENSE-Apache-2.0) ([Web](https://spdx.org/licenses/Apache-2.0.html)) license. 29 | 30 | # Technology Choices 31 | This is just a brief overview on the potential technology choices - it is not complete (raise an issue if you think some consideration is missing!). I will link later a more detailed justification. The following choices were there: 32 | * Use Rust dylib: Precise interfaces are possible, but the future/current state of dylib is [rather unclear](https://rust-lang.github.io/rfcs/1510-cdylib.html) - I found hardly any application (beyond examples) that use it (for this reason). Furthermore, there is no standardized Rust Appplication Binary Interface (ABI) (see [here](https://github.com/slightknack/rust-abi-wiki) for a Wiki summarizing the issues) - this means there is no support for other languages. Additionally, even within Rust it can lead to incompatibilities between different Rust versions. Cross-platform is possible, but one has to create dedicated binaries for each platform making it more complicated to ensure all modules for a given installation belong to the same platform, one has to ask module providers to compile for different target etc. Since multiple platforms (e.g. x86_64, arm64, specific processors for embedded etc.) are nowadays the reality, this is challenging. It might not be so efficient for massive data exchange between modules of different languages. 33 | * Use [Rust cdylib](https://doc.rust-lang.org/nomicon/ffi.html): This is the most common way to load dynamic libraries. Libraries can be loaded through the well-established C ABI (there is NO standard for the C ABI - it somehow evolved over years to sth. standard-like due to adaption), which is supported in virtually any programming language. Cross-platform is possible, but with the same issues as for Rust dylib. It might not be so efficient for massive data exchange between modules written in different languages. 34 | * Use [WebAssembly](https://en.wikipedia.org/wiki/WebAssembly) (WASM): This a newer way gaining a lot of popularity. All modules are compiled to WASM and can be dynamically loaded. It is an [established standard](https://webassembly.org/) supported in popular browsers and many different operating systems. Many programming languages can compile to WASM. Most of the runtimes provide also permission management (e.g. on the filesystem and network level). One does not need different binaries for each platform - it is the same binary across all platforms. There is a clear separation of memory between application and modules. Most of the WASM runtimes have a capabilities/permission model through WASI (see [here](https://github.com/bytecodealliance/wasmtime/blob/main/docs/WASI-capabilities.md)) enabling defining security policies around modules. For instance, one can define a policy that module A is not allowed to access the network, but local storage. Module B is allowed to access the network, but not local storage. Complex datatypes, such as strings, array or lists are not standardized, but one can use Apache Arrow to serialize data that can be analyzed by modules written in different languages very efficiently. 35 | 36 | 37 | Note: the [C++ ABI](https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html) - has a similar issue as the Rust ABI - it is not widely used and similar to the C ABI also not standardized. 38 | 39 | 40 | While WASM is the final choice, because it fits all criteria of the use case, there are still some aspects yet missing to fit the use case of ZuSearch: 41 | * 64-Bit memory - large scale search applications - especially with a lot of documents and/or machine learning augmented search - require a lot of memory. Currently, only 32-bit of memory are supported, but the standardization of 64-bit memory is on its way (see [here](https://github.com/WebAssembly/memory64/blob/main/proposals/memory64/Overview.md)). Nevertheless, since each module can have up to 4 GB and each thread can instantiate their own module this might be not as limiting as one might think, but it limits more flexibility/practicality. 42 | * Threads (see [here](https://github.com/WebAssembly/threads/blob/master/proposals/threads/Overview.md) for the proposal)- This is mainly for the main application that will create threads and instantiate from those threads individual WASM modules. However, also within one module one may want to have multiple threads, e.g. for machine learning applications. 43 | * Multi-Memory (see [here](https://github.com/WebAssembly/multi-memory/blob/main/proposals/multi-memory/Overview.md)) provides multiple independent memory areas for a WASM module. This is especially relevant for our use case to increase safety as well as security for the data exchange between application and module. Currently, if the application or the module contains a bug, the application could accidently write into the wrong part of the memory leading to malfunctioning of the module. 44 | * Component model - especially WASM Interface Types (WIT). At the moment, an application can only share with a module a common memory ("Store") or simple integer parameters. However, ZuSearch likely will involve complex interfaces with different basic types (e.g. strings) or complex structures (e.g. arrays, structs etc.). While they can be handed over in memory - there is no standard on how a string or structure looks like and every programming language represents them differently. A standard can help here to reduce the development efforts especially in differrent programming languages. Instead of WIT and given the use case of ZuSearch of large data, [Arrow](https://github.com/apache/arrow-rs) might be a more suitable alternative than WIT. See [here](https://github.com/WebAssembly/component-model) for the standardization and [here](https://radu-matei.com/blog/intro-wasm-components/) for a blog describing more practical aspects. 45 | * Exception handling - exceptions do not currently interrupt execution of the application and/or module. See [here](https://github.com/WebAssembly/exception-handling/blob/master/proposals/exception-handling/Exceptions.md) for the standardization 46 | * Module repository. Many programming languages have module repositories (e.g. Maven Central for Java, Pypi for Python, NPM for Javascript/Typescript etc.) that include (binary/transpiled etc.) version of the modules to be loaded by an application (usually at compile time, but also at runtime). The way to access them is standardized. WASM has no standard for module repositories, but there are multiple competing once (see e.g. this [blog post](https://zuinnote.eu/blog/?p=1567)). The usage of module repositories in WASM is still in its infancy. 47 | * Permission/Capability model in WASI is still work-in-progress and expected to be significantly enhanced. 48 | 49 | Fortunately, those are are addressed or being addressed (see also [WebAssembly Roadmap](https://webassembly.org/roadmap/) or [active WebAssembly Proposals](https://github.com/WebAssembly/proposals)). They do not block the further development of ZuSearch if not yet fully available. Nevertheless, the core assumption is that they become available eventually to realize all benefits and requirements of the use case. 50 | 51 | # Runtime choices 52 | 53 | There are a lot of runtimes (see e.g. this [blog post](https://zuinnote.eu/blog/?p=1567)). We use in this example [wasmtime](https://wasmtime.dev/), but any of the other ones supporting Rust (e.g. [Wasmer](https://wasmer.io/)) would have worked as well. 54 | 55 | We integrate the runtimes as a dependency in the application. The application itself is currently compiled to a native platform (e.g. x86_64). It is unclear if also the application can be compiled to WASM in the future. However, since it will have anyway a minimal functionality and most of the functionality exists in the WASM modules, this is less critical. 56 | 57 | 58 | # Exchange of data between application and modules 59 | A crucial part is how the application and the modules exchange data via the shared memory as the WASM component model is currently not standardized. We look at the following aspects: 60 | * Exchange via C types (via the established C ABI). Usually many programming languages have support for this, but it expects from the developer a lot of "boilerplate" code. Furthermore, it is slow especially due the frequent (de-)serialization of data when moving data between modules of different programming languages. 61 | * Exchange via Rust types. While other programming languages can read any data, the processing of Rust datatypes in other programming languages needs dedicated implementations, which do not exist out of the box. Similarly to C types a lot of boilerplate code is required. Furthermore, it is slow especially due the frequent (de-)serialization of data when moving data between modules of different programming languages. 62 | * Exchange via [Apache Arrow](https://arrow.apache.org/overview/). Apache Arrow is an in-memory analytics format that can be read in many different programming languages and is mostly for tabular data. While this is less useful for standard module integration (functions with different parameters, returning value(s)), it can be very useful for the case of ZuSearch where different modules potentially written in different programming languages need to process data. Additionally, frequent (de-)serialization is not needed, because all programming languages can work directly on the data in Arrow format. In a later stage, ZuSearch may also involve working in a distributed cluster, which could be facilitated by using [arrow-flight](https://arrow.apache.org/blog/2019/10/13/introducing-arrow-flight/). Hence, it is the mechanism of choice when working with different modules. It is probably the better choice compared to the upcoming WebAssembly Types for this specific use case of massive data processing. 63 | * Note: Rust arrow needs to use to_raw() so other programming language can use the Arrow data. 64 | 65 | 66 | # Study 67 | The study here is a very simple application written in Rust that loads dynamically a module written in Rust compiled to WASM: 68 | * [wasm-app](./wasm-app/) - the main application that 69 | * loads dynamically the functions in module1 with a parameter string and get a string as return 70 | * loads dynamically the function in module2 with data in Arrow IPC serialization format ands gets some data in Arrow IPC serialization format back 71 | * [wasm-module1](./wasm-module1/) - an example module that has one function with a parameter name that returns the string "Hello World, Name!". 72 | * Covers exchange via C ABI types and Rust ABI types 73 | * C ABI is with a parameter pointer to a CString in the WASM module memory containing the name. Return is a pointer in the WASM module memory containing the greeting as a CString 74 | * Rust ABI is with two parameters: A pointer to the Rust String in the WASM module memory containing the name AND the length of the string. Return is a pointer in the WASM module memory containing another pointer and length of the string. Reason is that contrary to C strings, Rust strings are not ended by \0. 75 | * [wasm-module2](./wasm-module2/) - an example module that has a functions with two parameters: a pointer to serialized data in Arrow IPC format and the size of the serializeed data. Return is a pointer n the WASM module memory to the processed serialized data by the function in Arrow IPC format and the size of the serialized data. We can implement in Arrow mandatory attributes of a document (e.g. id etc.) and also more flexible dictionaries by having an Array of the struct(key,value), e.g. [{key: "category",value:"news"}] 76 | 77 | 78 | We compile in Rust the module to the target "wasm32-wasip1" (see [here](https://dev-doc.rust-lang.org/stable/rustc/platform-support/wasm32-wasip1.html)). 79 | 80 | Note: We also link the WASI modules dynamically into the module. However, WASI is still experimental and standardization not finalized. We could also circumwent the use of WASI (e.g. by not relying on std etc.), but since WASI will anyway be needed for the use case of ZuStdPipe (e.g. file, network accesss as well as corresponding permissions) we included it also in the study. 81 | 82 | ## Flow to exchange data between the application and the modules 83 | Currently, each WASM module is loaded into a memory. The application calling the module can write to this memory to exchange data with the module. The problem is that the application does not know where it can write the data. Hence, each module - as our examples - need to provide a function to the application ("allocate") that returns an area where it can safely write data. Additionally, the module needs also to provide a function that frees the data ("deallocate") after the application has finished processing the results of a function call. 84 | 85 | Note: Once Multi-Memory is supported by WASM you should exchange data via dedicated memories for paramater exchange: one for parameters from the application to the module and another one from the module to the application. You will still need allocation functions (that are application internal and not module internal), so that in case of multi-threading the threads do not overwrite each others data. 86 | 87 | Find here an illustration of the current situation (only one memory for the module is possible and it is shared with the application) using the steps as pseudocode. 88 | 89 | ```mermaid 90 | sequenceDiagram 91 | Application->>+Application: name="test" 92 | Application->>+Module_1: ptr=allocate(len(name)) 93 | Module_1->>+Module_1:MEMORY_AREAS.insert(ptr, len(name)) 94 | Module_1->>+Application: memory_offset_parameter 95 | Application->>+Application: write_to_memory_module_1(name, memory_offset_parameter) 96 | Application->>+Module_1:call("wasm_memory_rust_format_hello_world", memory_offset_parameter, len(name)) 97 | Module_1->>+Module_1: validate_pointer(MEMORY_AREAS.get(memory_offset_parameter)) 98 | Module_1->>+Module_1: answer="Hello World, test!" 99 | Module_1->>+Module_1: result_ptr=allocate(len(answer)) 100 | Module_1->>+Application: result_ptr 101 | Application->>+Application: result=read_from_memmory_module_1(result_ptr) 102 | Application->>+Module_1: deallocate(ptr) 103 | Application->>+Module_1: deallocate(result_ptr) 104 | Application->>+Application: println!("{}",result) 105 | ``` 106 | 107 | This is a lot of code for simply calling a function of the module with a name ("test") that returns a greeting as string ("Hello World, Test!") that is then printed to the console by the application. Some important aspects: 108 | * The application needs to request from the module a free memory area in the shared memory to store the parameter (e.g. "test") 109 | * The application needs to write the string in a format (see discussion above on ABIs and Arrow serialization) that the module understands to the allocated shared memory areas 110 | * The Module also needs to reserve an area in the memory to store its answer and send the pointer to this answer in the shared memory to the application 111 | * The application needs to read the answer from the shared memory 112 | * The application needs to explicitly call the deallocate function of the module to release the memory for the parameter and the result - otherwise the shared memory area fill be filled with data that is not needed anymore 113 | * The application prints out the result to the console 114 | 115 | The concept works for the exchange of Arrow serialized data similarly. In this case module_2 processes the data in Arrow IPC format provided by the application. 116 | # Build and run 117 | You can build the modules by executing the following command in their folder: 118 | ``` 119 | cargo build --release --target wasm32-wasip1 120 | ``` 121 | 122 | The reason for building a release is that otherwise the module2 containing a wasi runtime and the Arrow library becomes very large and loading it in the application takes ages. 123 | 124 | You can build the application by running the following command: 125 | ``` 126 | cargo build 127 | ``` 128 | 129 | You can then run the application by executing target/debug/wasm-app 130 | 131 | Note: The application itself is not compiled to WASM. This is at the moment not possible (e.g. lack of thread support in WASM etc.), but is of lesser relevance for now for the study and also because it will have minimal functionality itself and all the functionality is implemented by modules. 132 | 133 | 134 | # Observations 135 | 136 | * The application cannot be compiled to wasm32-wasip1 yet (although desired for the future to leverage WASM also in the ZuSearch core), because wasmtime or more specifically WASI does not seem to support this yet. 137 | * The modules can be compiled to wasm32-wasip1 and loaded by the application. 138 | * We use cdylib annoation to create the WASM, but cdylib itself is not needed - this is needed because the WASM Interface Type (WIT), ie the WASM component model is not yet fully specified and implemented 139 | * Including the wasmtime runtime in the application leads to a larger single binary (ca. 14 MB wasmtime 0.38.1 compiled as release). While the size is not very large, it limits the type of embedded device where to deploy it. However, the use case of ZuSearch also does not justify to embed it on any device (e.g. Arduino would be out of scope). 140 | * While there is a WASM Component Example (cf. [here](https://github.com/radu-matei/wasm-components-example)) - it only describes static linking at compile time and it is unclear yet if this works at runtime as well. As mentioned, for our use case probably exchanging data via Arrow is the better choice in any case and we can do it already now. 141 | * One needs to exchange data via a shared memory with the module. While this is possible, the module needs to validate the data in the [shared memory](https://docs.rs/wasmtime/0.38.1/wasmtime/struct.Memory.html) for correctness and the application needs to also check that returned data is correct. Especially one needs to make sure that no memory out-of-bound is accessed, shared memory is very short-lived, memory operations are atomic and that multiple threads do not overwrite each others data. For the latter case it might be best that each thread instantiate its own module. Finally, one needs to take care that the memory is converted to programming-language specific datatypes, which will be cumbersome to support, if one does not use Apache Arrow or similar. 142 | * We covered in the study different ways on exchanging data between the app and memory 143 | * It is not clear if the WASM component model interface types bring any advantage over using Apache Arrow for exchange as the WASM component model interface types is merely for calling functions with parameters and return values. Especially since for the case of ZuSearch, we do not have necessarily complex module interfaces, but the main focus is (large) data processing. 144 | * Each module will have to provide an allocate function, so that one does not write arbitrarily in the modules working memory 145 | * Apache Arrow seems to increase the module size 2x in release mode (4 instead of 2 MB) 146 | * You need to compile the modules and the app with the flag --release to have the right performance 147 | * If we exchange a lot of data between modules, one should avoid to return a copy of the data with some modification as this always implies at least have double of the memory size. For example let us assume you have the data ```{"doc1": {title: "test", cotent:"this is a test"}, "doc2": {title: "spare thing", cotent:"this is a spare part"}``` and you have a function that replaces all occureances of "part" with "piece" then the function would still return the full data, ie including doc1 and the title of doc2, although they are not modified at all. Here one may better replace the data in-place (maybe with some indicator if it has changed and the new size of the data). 148 | * The shared memory might need to grow and this is not automatic - it has to be initiated, ie one needs to check if the data to be written still fits into the memory - a simple and clear function need to be written for this. While this is not an issue with rather standard WASM modules, it will be for the use case of ZuSearch as often the default page size might not be sufficient. The default page seems to be somehow standardized (?) around 64 KB (see [here](https://docs.rs/wasmtime-environ/latest/wasmtime_environ/constant.WASM_PAGE_SIZE.html) for wasmtime), but may be different in certain scenarios. 149 | * There is some inherent memory safety of the shared memory, e.g. one cannot read more then what is available and with a clear separation of memory of different modules (and/or different threads of the same module) one can avoid that they read/write in each memory. Also the modules cannot write into the application memory (*unless a callback is imported by the application that allows to write into memory in an unsafe way) 150 | * One needs to check the number of modules etc. to avoid that unncessary memory is allocated to them, ie do load everything in memory and then process, but more lightweight (load chunks into memory and process) 151 | * Modules might need to request memory themselves from the application calling them to store potentially additional data due to the transformations they do on the input data (e.g. in case of zusearch replacing in a text > 64 KB certain values). Again, this should be avoid as much as possible, a maximum memory limit / module / instance should be configurable. The system itself should be designed to stream data through modules, avoid conversion steps and reducing memory usage. -------------------------------------------------------------------------------- /wasm-app/src/main.rs: -------------------------------------------------------------------------------- 1 | //! mostly adapted from: https://docs.rs/wasmtime/latest/wasmtime/ 2 | use anyhow; 3 | use wasmtime::AsContextMut; 4 | use wasmtime::Engine; 5 | use wasmtime::Instance; 6 | use wasmtime::Linker; 7 | use wasmtime::Module; 8 | use wasmtime::Store; 9 | use wasi_common::sync::WasiCtxBuilder; 10 | use wasi_common::WasiCtx; 11 | 12 | use std::ffi::CStr; 13 | use std::ffi::CString; 14 | use std::sync::Arc; 15 | 16 | use arrow::array::{ 17 | ArrayRef, Float64Array, StringArray, StructArray, TimestampSecondArray, UInt64Array, 18 | }; 19 | use arrow::datatypes::{DataType, Field, Schema, TimeUnit}; 20 | use arrow::ipc::reader::StreamReader; 21 | use arrow::ipc::writer::StreamWriter; 22 | use arrow::record_batch::RecordBatch; 23 | use arrow::util::pretty::print_batches; 24 | 25 | use time::macros::datetime; 26 | 27 | struct MyState { 28 | wasi: WasiCtx, 29 | } 30 | 31 | /// Main function that loads a WASM module 32 | fn main() { 33 | println!("Initializing WASM engine..."); 34 | let engine: Engine = init_wasm_engine().unwrap(); 35 | println!("Loading WASM module 1..."); 36 | let module: Module = init_wasm_module_1(&engine).unwrap(); 37 | println!("Module1: Running WASM function answer..."); 38 | let result_answer = wrapper_answer(&engine, &module).unwrap(); 39 | println!("Result from WASM function \"answer\": {}", result_answer); 40 | println!("Module 1: Running WASM function c_format_hello_world..."); 41 | let result_c_format_hello_world = 42 | wrapper_wasm_c_format_hello_world(&engine, &module, "Rust (C ABI)").unwrap(); 43 | println!( 44 | "Result from WASM function \"c_format_hello_world\": {}", 45 | result_c_format_hello_world 46 | ); 47 | println!("Module 1: Running WASM function rust_format_hello_world..."); 48 | let result_rust_format_hello_world = 49 | wrapper_wasm_rust_format_hello_world(&engine, &module, "Rust (Rust ABI)".to_string()) 50 | .unwrap(); 51 | println!( 52 | "Result from WASM function \"rust_format_hello_world\": {}", 53 | result_rust_format_hello_world 54 | ); 55 | println!("Loading WASM module 2..."); 56 | let module: Module = init_wasm_module_2(&engine).unwrap(); 57 | println!("Module 2: Running WASM function arrow_process_document..."); 58 | wrapper_wasm_process_data_arrow(&engine, &module).unwrap(); 59 | } 60 | 61 | /// Init the WASM Engine 62 | /// returns the WASM engine 63 | fn init_wasm_engine() -> anyhow::Result { 64 | // Create an "Engine" to run wasm modules 65 | let engine = Engine::default(); 66 | Ok(engine) 67 | } 68 | 69 | /// Initialize WASM module 1 70 | /// # Arguments 71 | /// * `engine` - wasmtime engine to use for the store 72 | /// * `store` - in-memory store to use to exchange data with the function 73 | /// returns the module 74 | fn init_wasm_module_1(engine: &Engine) -> anyhow::Result { 75 | // load WASM module 76 | let module = Module::from_file( 77 | &engine, 78 | "../../../wasm-module1/target/wasm32-wasip1/release/wasm_module1.wasm", 79 | )?; 80 | Ok(module) 81 | } 82 | 83 | /// Initialize WASM module 2 84 | /// # Arguments 85 | /// * `engine` - wasmtime engine to use for the store 86 | /// * `store` - in-memory store to use to exchange data with the function 87 | /// returns the module 88 | fn init_wasm_module_2(engine: &Engine) -> anyhow::Result { 89 | // load WASM module 90 | let module = Module::from_file( 91 | &engine, 92 | "../../../wasm-module2/target/wasm32-wasip1/release/wasm_module2.wasm", 93 | )?; 94 | Ok(module) 95 | } 96 | 97 | /// Wrapper around the function answer of the WASM Module. This is needed as the standardization of the componennt model and webassembly interface types is still work-in-progress 98 | /// # Arguments (note the function `answer` of the WASM module itself has no parameters. The parameters are just to initialize the runtime environment) 99 | /// * `engine` - wasmtime engine to use for the store 100 | /// * `module` - module containing the WASM function 101 | /// returns the result of the function `answer` 102 | fn wrapper_answer(engine: &Engine, module: &Module) -> anyhow::Result { 103 | // Load function an instantiate it 104 | let mut linker = Linker::new(&engine); 105 | wasi_common::sync::add_to_linker(&mut linker, |state: &mut MyState| &mut state.wasi)?; 106 | // store to exchange data with the WASM module 107 | let wasi = WasiCtxBuilder::new() 108 | .inherit_stdio() 109 | .inherit_args()? 110 | .build(); 111 | let mut store = Store::new(&engine, MyState { wasi: wasi }); 112 | // instantiate module 113 | // let instance = Instance::new(&mut store, &module, &[])?; 114 | linker.module(&mut store, "", &module)?; 115 | let instance: Instance = linker.instantiate(&mut store, &module).unwrap(); 116 | // get the function 117 | let func_def = instance 118 | .get_func(&mut store, "answer") 119 | .expect("`answer` was not an exported function"); 120 | // validate that it corresponds to the parameters and return types we need 121 | let func_validated = func_def.typed::<(), i32>(&store)?; 122 | // call function 123 | let result = func_validated.call(&mut store, ())?; 124 | Ok(result) 125 | } 126 | 127 | /// Wrapper around the function format_hello_world (C ABI) of the WASM Module. This is needed as the standardization of the component model and webassembly interface types is still work-in-progress 128 | /// # Arguments (note the function `format_hello_world` of the WASM module itself has just one parameter: `func_name`. The pther parameters are just to initialize the runtime environment) 129 | /// * `engine` - wasmtime engine to use for the store 130 | /// * `module` - module containing the WASM function 131 | /// * `func_name` - Parameter `name` for the function 132 | /// returns the result of the function `format_hello_world` 133 | fn wrapper_wasm_c_format_hello_world( 134 | engine: &Engine, 135 | module: &Module, 136 | func_name: &str, 137 | ) -> anyhow::Result { 138 | // convert param to CString 139 | let param_name_str = func_name; 140 | let param_name_cstring: CString = CString::new(param_name_str).unwrap(); 141 | let param_name_cstring_as_bytes: &[u8] = param_name_cstring.to_bytes_with_nul(); 142 | // Load function an instantiate it 143 | let mut linker = Linker::new(&engine); 144 | wasi_common::sync::add_to_linker(&mut linker, |state: &mut MyState| &mut state.wasi)?; 145 | // store to exchange data with the WASM module 146 | let wasi = WasiCtxBuilder::new() 147 | .inherit_stdio() 148 | .inherit_args()? 149 | .build(); 150 | let mut store = Store::new(&engine, MyState { wasi: wasi }); 151 | // instantiate module 152 | // let instance = Instance::new(&mut store, &module, &[])?; 153 | linker.module(&mut store, "", &module)?; 154 | let instance: Instance = linker.instantiate(&mut store, &module).unwrap(); 155 | // allocate shared memory for the parameter 156 | // allocate some memory within the WASM module 157 | let offset: u32 = wrapper_wasm_allocate( 158 | instance, 159 | &mut store, 160 | param_name_cstring_as_bytes.len() as u32, 161 | ) 162 | .unwrap() as u32; 163 | 164 | // get the function 165 | let func_def = instance 166 | .get_func(&mut store, "wasm_memory_c_format_hello_world") 167 | .expect("`wasm_memory_c_format_hello_world` was not an exported function"); 168 | // validate that it corresponds to the parameters and return types we need 169 | let func_validated = func_def.typed::(&store)?; 170 | 171 | // prepare handing over CString as input 172 | // instantiate memory 173 | let memory = instance 174 | .get_memory(&mut store, "memory") 175 | .ok_or(anyhow::format_err!("failed to find `memory` export"))?; 176 | memory 177 | .write( 178 | &mut store, 179 | offset.try_into().unwrap(), 180 | param_name_cstring_as_bytes, 181 | ) 182 | .unwrap(); 183 | // call function answer 184 | let result_offset = func_validated.call(&mut store, offset)?; 185 | if result_offset == 0 { 186 | anyhow::bail!("Error: No valid answer received from function") 187 | } else { 188 | let mut result_offset_position = result_offset; 189 | // read answer 190 | let mut buffer = [1u8; 1]; 191 | let mut result_v_u8: Vec = Vec::new(); 192 | while buffer[0] != 0u8 { 193 | memory.read( 194 | &store, 195 | result_offset_position.try_into().unwrap(), 196 | &mut buffer, 197 | )?; 198 | result_v_u8.push(buffer[0]); 199 | result_offset_position += 1; 200 | } 201 | // deallocate shared WASM Module memory 202 | let dealloc_param_code: i32 = 203 | wrapper_wasm_deallocate(instance, &mut store, offset as *const u8).unwrap(); 204 | if dealloc_param_code != 0 { 205 | println!("Error: Could not deallocate shared WASM module memory for parameter"); 206 | } 207 | let dealloc_return_code: i32 = 208 | wrapper_wasm_deallocate(instance, &mut store, result_offset as *const u8).unwrap(); 209 | if dealloc_return_code != 0 { 210 | println!("Error: Could not deallocate shared WASM module memory for result"); 211 | } 212 | // convert answer 213 | let c_str: &CStr = unsafe { CStr::from_ptr(result_v_u8.as_ptr() as *const i8) }; 214 | let result_str: &str = c_str.to_str().unwrap(); 215 | Ok(result_str.to_string()) 216 | } 217 | } 218 | 219 | /// Wrapper around the function format_hello_world (Rust ABI) of the WASM Module. This is needed as the standardization of the component model and webassembly interface types is still work-in-progress 220 | /// # Arguments (note the function `format_hello_world` of the WASM module itself has just one parameter: `func_name`. The other parameters are just to initialize the runtime environment) 221 | /// * `engine` - wasmtime engine to use for the store 222 | /// * `module` - module containing the WASM function 223 | /// * `func_name` - Parameter `name` for the function 224 | /// returns the result of the function `format_hello_world` 225 | fn wrapper_wasm_rust_format_hello_world( 226 | engine: &Engine, 227 | module: &Module, 228 | func_name: String, 229 | ) -> anyhow::Result { 230 | // Load function an instantiate it 231 | let mut linker = Linker::new(&engine); 232 | wasi_common::sync::add_to_linker(&mut linker, |state: &mut MyState| &mut state.wasi)?; 233 | // store to exchange data with the WASM module 234 | let wasi = WasiCtxBuilder::new() 235 | .inherit_stdio() 236 | .inherit_args()? 237 | .build(); 238 | let mut store = Store::new(&engine, MyState { wasi: wasi }); 239 | // instantiate module 240 | // let instance = Instance::new(&mut store, &module, &[])?; 241 | linker.module(&mut store, "", &module)?; 242 | let instance: Instance = linker.instantiate(&mut store, &module).unwrap(); 243 | // get the function 244 | let func_def = instance 245 | .get_func(&mut store, "wasm_memory_rust_format_hello_world") 246 | .expect("`wasm_memory_rust_format_hello_world` was not an exported function"); 247 | // validate that it corresponds to the parameters and return types we need 248 | let func_validated = func_def.typed::<(u32, u32), u32>(&store)?; 249 | 250 | // prepare handing over Rust as input 251 | // instantiate memory 252 | let memory = instance 253 | .get_memory(&mut store, "memory") 254 | .ok_or(anyhow::format_err!("failed to find `memory` export"))?; 255 | let param_name_str: String = func_name.to_string(); 256 | let param_name_string_as_bytes: &[u8] = param_name_str.as_bytes(); 257 | // allocate some memory within the WASM module 258 | let offset: u32 = wrapper_wasm_allocate( 259 | instance, 260 | &mut store, 261 | param_name_string_as_bytes.len() as u32, 262 | ) 263 | .unwrap() as u32; 264 | let length: u32 = param_name_str.len() as u32; 265 | memory 266 | .write( 267 | &mut store, 268 | offset.try_into().unwrap(), 269 | param_name_string_as_bytes, 270 | ) 271 | .unwrap(); 272 | // call function answer 273 | let result_offset = func_validated.call(&mut store, (offset, length))?; 274 | if result_offset == 0 { 275 | anyhow::bail!("Error: No valid answer received from function") 276 | } else { 277 | let mut result_offset_position = result_offset; 278 | // read answer from memory: these are two values: offset and length of the return string 279 | // read metadata (offset and length of the sring) 280 | // note: WebAssembly is by default 32 bit 281 | let mut ptr_buffer = [0u8; (u32::BITS / 8) as usize]; 282 | let mut len_buffer = [0u8; (u32::BITS / 8) as usize]; 283 | memory.read( 284 | &store, 285 | result_offset_position.try_into().unwrap(), 286 | &mut ptr_buffer, 287 | )?; 288 | result_offset_position += (u32::BITS / 8) as u32; 289 | memory.read( 290 | &store, 291 | result_offset_position.try_into().unwrap(), 292 | &mut len_buffer, 293 | )?; 294 | let result_ptr = u32::from_le_bytes(ptr_buffer); 295 | let result_len = u32::from_le_bytes(len_buffer); 296 | // read the string 297 | let mut result_vec: Vec = vec![0; result_len as usize]; 298 | let mut result_str_buffer = result_vec.as_mut_slice(); 299 | memory.read( 300 | &store, 301 | result_ptr.try_into().unwrap(), 302 | &mut result_str_buffer, 303 | )?; 304 | // deallocate shared WASM Module memory 305 | let dealloc_param_code: i32 = 306 | wrapper_wasm_deallocate(instance, &mut store, offset as *const u8).unwrap(); 307 | if dealloc_param_code != 0 { 308 | println!("Error: Could not deallocate shared WASM module memory for parameter"); 309 | } 310 | let dealloc_return_meta_code: i32 = 311 | wrapper_wasm_deallocate(instance, &mut store, result_offset as *const u8).unwrap(); 312 | if dealloc_return_meta_code != 0 { 313 | println!("Error: Could not deallocate shared WASM module memory for return metadata"); 314 | } 315 | let dealloc_return_data_code: i32 = 316 | wrapper_wasm_deallocate(instance, &mut store, result_ptr as *const u8).unwrap(); 317 | if dealloc_return_data_code != 0 { 318 | println!("Error: Could not deallocate shared WASM module memory for return data"); 319 | } 320 | let result_str: String = String::from_utf8_lossy(&result_str_buffer).into_owned(); 321 | Ok(result_str.to_string()) 322 | } 323 | } 324 | 325 | /// Wrapper around the function process_data_arrow (Use Arrow for cross-programming language data serialization) of the WASM Module. 326 | /// # Arguments (note the function `process_data_arrow` of the WASM module itself expects to have the Arrow data exchanged in the module memory. The Arrow data is generated in this application through the functions create_arrow_example_meta_data (instructing the function what to do with the data) and create_arrow_example_data (containing the data to be processed) 327 | /// * `engine` - wasmtime engine to use for the store 328 | /// * `module` - module containing the WASM function 329 | /// returns the result of the function `format_hello_world` 330 | fn wrapper_wasm_process_data_arrow(engine: &Engine, module: &Module) -> anyhow::Result { 331 | // Load function an instantiate it 332 | let mut linker = Linker::new(&engine); 333 | wasi_common::sync::add_to_linker(&mut linker, |state: &mut MyState| &mut state.wasi)?; 334 | // store to exchange data with the WASM module 335 | let wasi = WasiCtxBuilder::new() 336 | .inherit_stdio() 337 | .inherit_args()? 338 | .build(); 339 | let mut store = Store::new(&engine, MyState { wasi: wasi }); 340 | // instantiate module 341 | // let instance = Instance::new(&mut store, &module, &[])?; 342 | linker.module(&mut store, "", &module)?; 343 | let instance: Instance = linker.instantiate(&mut store, &module).unwrap(); 344 | // get the function 345 | let func_def = instance 346 | .get_func(&mut store, "wasm_memory_process_data_arrow") 347 | .expect("`wasm_memory_process_data_arrow` was not an exported function"); 348 | // validate that it corresponds to the parameters and return types we need 349 | let func_validated = func_def.typed::<(u32, u32, u32, u32), u32>(&store)?; 350 | 351 | // prepare handing Arrow data 352 | let serialized_meta_data = create_arrow_example_meta_data(); 353 | let serialized_meta_data_size = serialized_meta_data.len(); 354 | let serialized_data = create_arrow_example_data(); 355 | let serialized_data_size = serialized_data.len(); 356 | 357 | // instantiate memory 358 | let memory = instance 359 | .get_memory(&mut store, "memory") 360 | .ok_or(anyhow::format_err!("failed to find `memory` export"))?; 361 | 362 | // allocate some memory within the WASM module for metadata 363 | let offset_meta_data: u32 = 364 | wrapper_wasm_allocate(instance, &mut store, serialized_meta_data_size as u32).unwrap() 365 | as u32; 366 | memory 367 | .write( 368 | &mut store, 369 | offset_meta_data.try_into().unwrap(), 370 | serialized_meta_data.as_slice(), 371 | ) 372 | .unwrap(); 373 | // allocate some memory within the WASM module for data 374 | let offset_data: u32 = 375 | wrapper_wasm_allocate(instance, &mut store, serialized_data_size as u32).unwrap() as u32; 376 | memory 377 | .write( 378 | &mut store, 379 | offset_data.try_into().unwrap(), 380 | serialized_data.as_slice(), 381 | ) 382 | .unwrap(); 383 | // call function answer 384 | let result_offset = func_validated.call( 385 | &mut store, 386 | ( 387 | offset_meta_data, 388 | serialized_meta_data_size as u32, 389 | offset_data, 390 | serialized_data_size as u32, 391 | ), 392 | )?; 393 | // deallocate shared WASM Module memory 394 | let dealloc_meta_data_code: i32 = 395 | wrapper_wasm_deallocate(instance, &mut store, offset_meta_data as *const u8).unwrap(); 396 | if dealloc_meta_data_code != 0 { 397 | println!("Error: Could not deallocate shared WASM module memory for meta data"); 398 | } 399 | let dealloc_data_code: i32 = 400 | wrapper_wasm_deallocate(instance, &mut store, offset_data as *const u8).unwrap(); 401 | if dealloc_data_code != 0 { 402 | println!("Error: Could not deallocate shared WASM module memory for data"); 403 | } 404 | if result_offset == 0 { 405 | anyhow::bail!("Error: No valid answer received from function") 406 | } else { 407 | let mut result_offset_position = result_offset; 408 | // read answer from memory: these are two values: offset of the processed data and size of the processed data in Arrow IPC format 409 | // read metadata (offset and size of the Arrow IPC data) 410 | // note: WebAssembly is by default 32 bit 411 | let mut ptr_buffer = [0u8; (u32::BITS / 8) as usize]; 412 | let mut len_buffer = [0u8; (u32::BITS / 8) as usize]; 413 | memory.read( 414 | &store, 415 | result_offset_position.try_into().unwrap(), 416 | &mut ptr_buffer, 417 | )?; 418 | result_offset_position += (u32::BITS / 8) as u32; 419 | memory.read( 420 | &store, 421 | result_offset_position.try_into().unwrap(), 422 | &mut len_buffer, 423 | )?; 424 | let result_ptr = u32::from_le_bytes(ptr_buffer); 425 | let result_len = u32::from_le_bytes(len_buffer); 426 | // read the Arrow IPC data 427 | let mut result_arrow_ipc: Vec = vec![0; result_len as usize]; 428 | let mut result_arrow_ipc_buffer = result_arrow_ipc.as_mut_slice(); 429 | memory.read( 430 | &store, 431 | result_ptr.try_into().unwrap(), 432 | &mut result_arrow_ipc_buffer, 433 | )?; 434 | let dealloc_return_meta_code: i32 = 435 | wrapper_wasm_deallocate(instance, &mut store, result_offset as *const u8).unwrap(); 436 | if dealloc_return_meta_code != 0 { 437 | println!("Error: Could not deallocate shared WASM module memory for return metadata"); 438 | } 439 | let dealloc_return_data_code: i32 = 440 | wrapper_wasm_deallocate(instance, &mut store, result_ptr as *const u8).unwrap(); 441 | if dealloc_return_data_code != 0 { 442 | println!("Error: Could not deallocate shared WASM module memory for return data"); 443 | } 444 | // check correctness of returned Arrow IPC data 445 | println!("Displaying Arrow answer from Module"); 446 | let stream_reader = StreamReader::try_new(result_arrow_ipc.as_slice(), None).unwrap(); 447 | 448 | for item in stream_reader { 449 | print_batches(&[item.unwrap()]).unwrap(); 450 | } 451 | } 452 | Ok("".to_string()) 453 | } 454 | 455 | /// Wrapper around the allocate function of the WASM module to allocate shared WASM memory. Allocate some memory for the application to write data for the module 456 | /// Note: It is up to the application (and not the WASM module) to provide enough pages, so the module does not run out of memory 457 | /// # Arguments 458 | /// * `size` - size of memory to allocaten 459 | /// returns a pointer to the allocated memory area 460 | fn wrapper_wasm_allocate( 461 | instance: Instance, 462 | mut store: impl AsContextMut, 463 | size: u32, 464 | ) -> anyhow::Result<*const u8> { 465 | // Load function an instantiate it 466 | 467 | // get the function 468 | let func_def = instance 469 | .get_func(&mut store, "wasm_allocate") 470 | .expect("`wasm_allocate` was not an exported function"); 471 | // validate that it corresponds to the parameters and return types we need 472 | let func_validated = func_def.typed::(&store)?; 473 | // call function 474 | let result = func_validated.call(&mut store, size)?; 475 | Ok(result as *const u8) 476 | } 477 | 478 | /// Wrapper around the deallocate function of the WASM module to deallocate shared WASM memory. Deallocates existing memory for the purpose of the application 479 | /// # Arguments 480 | /// * `ptr` - mutuable pointer to the memory to deallocate 481 | /// returns a code if it was successful or not 482 | fn wrapper_wasm_deallocate( 483 | instance: Instance, 484 | mut store: impl AsContextMut, 485 | ptr: *const u8, 486 | ) -> anyhow::Result { 487 | // get the function 488 | let func_def = instance 489 | .get_func(&mut store, "wasm_deallocate") 490 | .expect("`wasm_deallocate` was not an exported function"); 491 | // validate that it corresponds to the parameters and return types we need 492 | let func_validated = func_def.typed::(&store)?; 493 | // call function 494 | let result = func_validated.call(&mut store, ptr as u32)?; 495 | Ok(result) 496 | } 497 | 498 | /// Create example data 499 | /// {id: 1, content: "this is a test", title: "test",date:"2022-01-01T12:00:00Z", score: 1.77} 500 | /// returns a binary representation of the data in Arrow IPC format 501 | fn create_arrow_example_data() -> Vec { 502 | // define schema 503 | let schema = Schema::new(vec![ 504 | Field::new("id", DataType::UInt64, false), 505 | Field::new("content", DataType::Utf8, false), 506 | Field::new("title", DataType::Utf8, false), 507 | Field::new( 508 | "date", 509 | DataType::Timestamp(TimeUnit::Second, Some("+00:00".to_string().into())), 510 | false, 511 | ), 512 | Field::new("score", DataType::Float64, false), 513 | ]); 514 | let ids = UInt64Array::from(vec![1]); 515 | let contents = StringArray::from(vec!["this is a test"]); 516 | let titles = StringArray::from(vec!["test"]); 517 | let dates = 518 | TimestampSecondArray::from(vec![datetime!(2022-01-01 12:00:00 UTC).unix_timestamp()]) 519 | .with_timezone("+00:00".to_string()); 520 | 521 | let scores = Float64Array::from(vec![1.123456f64]); 522 | 523 | // build a record batch 524 | let batch = RecordBatch::try_new( 525 | Arc::new(schema.clone()), 526 | vec![ 527 | Arc::new(ids), 528 | Arc::new(contents), 529 | Arc::new(titles), 530 | Arc::new(dates), 531 | Arc::new(scores), 532 | ], 533 | ) 534 | .unwrap(); 535 | // serialize it 536 | let buffer: Vec = Vec::new(); 537 | 538 | let mut stream_writer = StreamWriter::try_new(buffer, &schema).unwrap(); 539 | stream_writer.write(&batch).unwrap(); 540 | 541 | let serialized_batch = stream_writer.into_inner().unwrap(); 542 | return serialized_batch; 543 | } 544 | 545 | /// Create example meta-data, ie commands for the module on what to do with the data 546 | /// A simple commmand structure {command: "test", config: {filename: "test.txt"}} 547 | /// returns a binary representation of the data in Arrow IPC format 548 | fn create_arrow_example_meta_data() -> Vec { 549 | // define schema 550 | let schema = Schema::new(vec![ 551 | Field::new("command", DataType::Utf8, false), 552 | Field::new( 553 | "config", 554 | DataType::Struct(arrow::datatypes::Fields::from(vec![Field::new( 555 | "filename", 556 | DataType::Utf8, 557 | false, 558 | )])), 559 | false, 560 | ), 561 | ]); 562 | // define one data item 563 | let command = StringArray::from(vec!["test"]); 564 | 565 | let config = StructArray::from(vec![( 566 | Arc::new(Field::new("filename", DataType::Utf8, false)), 567 | Arc::new(StringArray::from(vec!["test.txt"])) as ArrayRef, 568 | )]); 569 | // build a record batch 570 | let batch = RecordBatch::try_new( 571 | Arc::new(schema.clone()), 572 | vec![Arc::new(command), Arc::new(config)], 573 | ) 574 | .unwrap(); 575 | // serialize it 576 | let buffer: Vec = Vec::new(); 577 | 578 | let mut stream_writer = StreamWriter::try_new(buffer, &schema).unwrap(); 579 | stream_writer.write(&batch).unwrap(); 580 | 581 | let serialized_batch = stream_writer.into_inner().unwrap(); 582 | return serialized_batch; 583 | } 584 | --------------------------------------------------------------------------------