├── modules.lst ├── tests ├── all_tests.bat ├── slowfastdiff.png ├── make8.bat ├── test_indexing_algorithms.bat ├── output.dot ├── allocator_test.jai ├── indexing_test.py ├── run_all_tests.bat ├── strings_bug.jai ├── compile_all_tests.bat ├── test.jai ├── test7.jai ├── test10.jai ├── test5.jai ├── test6.jai ├── test11.jai ├── test9.jai ├── test8.jai ├── simd_test.jai ├── indexing_test.jai └── output.txt ├── .gitignore ├── CONTRIBUTING.md ├── tools ├── build_index_profile.jai └── index_profile.jai ├── Strings ├── module.jai ├── knuth_morris_pratt.jai └── splitting.jai ├── CHANGELOG.md ├── Scratch ├── Scratch.jai └── module.jai └── README.md /modules.lst: -------------------------------------------------------------------------------- 1 | Scratch 2 | Strings 3 | -------------------------------------------------------------------------------- /tests/all_tests.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | normal_test.exe 3 | 4 | test_indexing_algorithms.bat 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .build/ 2 | */.build/ 3 | *.exe 4 | *.pdb 5 | tests/data 6 | /tools/index_profile 7 | -------------------------------------------------------------------------------- /tests/slowfastdiff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/onelivesleft/jai-string/HEAD/tests/slowfastdiff.png -------------------------------------------------------------------------------- /tests/make8.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | echo Set to false 3 | pause 4 | jai test8.jai -release 5 | copy test8.exe test8slow.exe /y 6 | echo Set to true 7 | pause 8 | jai test8.jai -release 9 | copy test8.exe test8fast.exe /y 10 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | This is currently just a personal repo, so I won't be merging any PRs. If you think there's a missing core string function it should include, or have other suggestions (or bug reports!), feel free to make an issue. 2 | -------------------------------------------------------------------------------- /tests/test_indexing_algorithms.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | ..\tools\index_profile.exe -t data\shakespeare.jai "/*" 3 | if NOT ["%errorlevel%"]==["0"] goto end 4 | 5 | ..\tools\index_profile.exe -t data\shakespeare.jai "__WILLIAM\n" 6 | if NOT ["%errorlevel%"]==["0"] goto end 7 | 8 | 9 | 10 | :end 11 | -------------------------------------------------------------------------------- /tests/output.dot: -------------------------------------------------------------------------------- 1 | digraph { 2 | 3 | s30064779389 [label="LAMBDA_BODY #70000207d\nnormal_test.jai:70",shape=ellipse] 4 | s30064779494 [label="LAMBDA_BODY #7000020e6\nnormal_test.jai:102",shape=ellipse] 5 | s30064779389 [label="LAMBDA_BODY #70000207d\nnormal_test.jai:70",shape=ellipse] 6 | s12884909899 [label="RUN #300001f4b\n.added_strings_w3.jai:11",shape=ellipse] 7 | s12884909901 [label="LAMBDA_BODY #300001f4d\n.added_strings_w3.jai:11",shape=ellipse] 8 | s30064779494 [label="LAMBDA_BODY #7000020e6\nnormal_test.jai:102",shape=ellipse] 9 | 10 | } 11 | -------------------------------------------------------------------------------- /tests/allocator_test.jai: -------------------------------------------------------------------------------- 1 | #import "Basic"; 2 | #import "Scratch"; 3 | #import "Strings"; 4 | 5 | #load "data/shakespeare.jai"; 6 | 7 | 8 | main :: () { 9 | foo := copy_string(shakespeare,, scratch); 10 | 11 | for count: 1 .. 1000 { 12 | foo = replace(foo, "z", "#",, scratch); 13 | for < #char "y" .. #char "a" { 14 | from : u8 = cast(u8)it; 15 | next : u8 = cast(u8)it + 1; 16 | foo = replace(foo, char_as_string(*from), char_as_string(*next),, scratch); 17 | } 18 | foo = replace(foo, "#", "a",, scratch); 19 | } 20 | 21 | print(foo); 22 | } 23 | -------------------------------------------------------------------------------- /tests/indexing_test.py: -------------------------------------------------------------------------------- 1 | import time, sys 2 | 3 | haystack = open("data/shakespeare.jai").read().split("__WILLIAM\n")[1] 4 | needle = " and " 5 | 6 | times = 10 7 | 8 | index = haystack.find(needle) 9 | while index >= 0: 10 | index = haystack.find(needle, index + 1) 11 | 12 | 13 | total_checksum = 0 14 | t = time.time() 15 | for x in range(times): 16 | index = haystack.find(needle) 17 | checksum = 0 18 | while index >= 0: 19 | checksum = checksum ^ index 20 | index = haystack.find(needle, index + 1) 21 | total_checksum += checksum 22 | delta = time.time() - t 23 | print(total_checksum, delta) 24 | -------------------------------------------------------------------------------- /tools/build_index_profile.jai: -------------------------------------------------------------------------------- 1 | #import "Basic"; 2 | #import "Compiler"; 3 | 4 | filepath :: "index_profile.jai"; 5 | 6 | #run { 7 | build_options := get_build_options(); 8 | set_build_options_dc(.{do_output = false}); 9 | 10 | workspace := compiler_create_workspace(); 11 | build_options.output_executable_name = "index_profile"; 12 | set_optimization(*build_options, .VERY_OPTIMIZED, false); 13 | set_build_options(build_options, workspace); 14 | 15 | compiler_begin_intercept(workspace); 16 | 17 | add_build_file(filepath, workspace); 18 | 19 | while true { 20 | message := compiler_wait_for_message(); 21 | if !message continue; 22 | if message.kind == .COMPLETE break; 23 | } 24 | 25 | compiler_end_intercept(workspace); 26 | } 27 | -------------------------------------------------------------------------------- /tests/run_all_tests.bat: -------------------------------------------------------------------------------- 1 | allocator_test.exe 2 | @if NOT ["%errorlevel%"]==["0"] goto end 3 | 4 | indexing_test.exe 5 | @if NOT ["%errorlevel%"]==["0"] goto end 6 | 7 | normal_test.exe 8 | @if NOT ["%errorlevel%"]==["0"] goto end 9 | 10 | simd_test.exe 11 | @if NOT ["%errorlevel%"]==["0"] goto end 12 | 13 | strings_bug.exe 14 | @if NOT ["%errorlevel%"]==["0"] goto end 15 | 16 | test.exe 17 | @if NOT ["%errorlevel%"]==["0"] goto end 18 | 19 | test10.exe 20 | @if NOT ["%errorlevel%"]==["0"] goto end 21 | 22 | test11.exe 23 | @if NOT ["%errorlevel%"]==["0"] goto end 24 | 25 | test5.exe 26 | @if NOT ["%errorlevel%"]==["0"] goto end 27 | 28 | test6.exe 29 | @if NOT ["%errorlevel%"]==["0"] goto end 30 | 31 | test7.exe 32 | @if NOT ["%errorlevel%"]==["0"] goto end 33 | 34 | test8.exe 35 | @if NOT ["%errorlevel%"]==["0"] goto end 36 | 37 | test9.exe 38 | @if NOT ["%errorlevel%"]==["0"] goto end 39 | 40 | :end -------------------------------------------------------------------------------- /tests/strings_bug.jai: -------------------------------------------------------------------------------- 1 | main :: () { 2 | files := #string __ 3 | README.txt - C:\\Users\\farzher\\Downloads\\4coder\\README.txt 4 | changes.txt - C:\\Users\\farzher\\Downloads\\4coder\\changes.txt 5 | __ 6 | 7 | context.allocator = __temporary_allocator; 8 | 9 | 10 | for i: 1 .. 3 { 11 | reset_temporary_storage(); 12 | print("loop: %\n", i); 13 | filesstr: string; 14 | for line: line_split(files) { 15 | //filename, path := split_into_two(line, " - "); 16 | //print("%\n%\n", filename, path); 17 | //filesstr = strings_alloc.join(filesstr, path, "\0"); 18 | filesstr = join(filesstr, substring(line, first_index(line, " - ")+" - ".count), "\0"); 19 | //filesstr = strings_alloc.join(filesstr, substring(line, String.find_index_from_left(line, " - ")+" - ".count), "\0"); 20 | } 21 | print("%\n", filesstr); 22 | print("%\n", get_temporary_storage_mark()); 23 | } 24 | } 25 | 26 | String :: #import "String"; 27 | #import "Strings"; 28 | #import "Basic"; 29 | -------------------------------------------------------------------------------- /tests/compile_all_tests.bat: -------------------------------------------------------------------------------- 1 | jai -quiet -import_dir .. allocator_test.jai 2 | @if NOT ["%errorlevel%"]==["0"] goto end 3 | 4 | jai -quiet -import_dir .. indexing_test.jai 5 | @if NOT ["%errorlevel%"]==["0"] goto end 6 | 7 | jai -quiet -import_dir .. normal_test.jai 8 | @if NOT ["%errorlevel%"]==["0"] goto end 9 | 10 | jai -quiet -import_dir .. simd_test.jai 11 | @if NOT ["%errorlevel%"]==["0"] goto end 12 | 13 | jai -quiet -import_dir .. strings_bug.jai 14 | @if NOT ["%errorlevel%"]==["0"] goto end 15 | 16 | jai -quiet -import_dir .. test.jai 17 | @if NOT ["%errorlevel%"]==["0"] goto end 18 | 19 | jai -quiet -import_dir .. test10.jai 20 | @if NOT ["%errorlevel%"]==["0"] goto end 21 | 22 | jai -quiet -import_dir .. test11.jai 23 | @if NOT ["%errorlevel%"]==["0"] goto end 24 | 25 | jai -quiet -import_dir .. test5.jai 26 | @if NOT ["%errorlevel%"]==["0"] goto end 27 | 28 | jai -quiet -import_dir .. test6.jai 29 | @if NOT ["%errorlevel%"]==["0"] goto end 30 | 31 | jai -quiet -import_dir .. test7.jai 32 | @if NOT ["%errorlevel%"]==["0"] goto end 33 | 34 | jai -quiet -import_dir .. test8.jai 35 | @if NOT ["%errorlevel%"]==["0"] goto end 36 | 37 | jai -quiet -import_dir .. test9.jai 38 | @if NOT ["%errorlevel%"]==["0"] goto end 39 | 40 | :end -------------------------------------------------------------------------------- /tests/test.jai: -------------------------------------------------------------------------------- 1 | #import "Basic"; 2 | #import "Strings"; 3 | jai_string :: #import "String"; 4 | 5 | #load "data/shakespeare.jai"; 6 | 7 | only_problems :: false; 8 | 9 | main :: () { 10 | defer { 11 | if errors.count != 0 { 12 | print("\nErrors:\n%\n", errors); 13 | exit(1); 14 | } 15 | else { 16 | print("\nAll OK!\n"); 17 | } 18 | } 19 | 20 | haystack :: "hell yeah hell yeweeeeeeeeah"; 21 | set_simd_mode(.SSE2); 22 | last_index_of_z := jai_string.find_index_from_right(haystack, "a"); 23 | test(1, last_index(haystack, "a"), last_index_of_z); 24 | } 25 | 26 | 27 | errors : [..] int; 28 | 29 | test :: (test_id: int, value: $T, expected: T) { 30 | if value != expected { 31 | print("[%]:\nWanted: [%]\nGot: [%]\n\n", test_id, expected, value); 32 | array_add(*errors, test_id); 33 | } 34 | else if !only_problems { 35 | print("[%] OK\n", test_id); 36 | } 37 | } 38 | 39 | test :: (test_id: int, value: [] string, expected: [] string) { 40 | if !array_equals(value, expected) { 41 | print("[%]:\nWanted: [%]\nGot: [%]\n\n", test_id, expected, value); 42 | array_add(*errors, test_id); 43 | } 44 | else if !only_problems { 45 | print("[%] OK\n", test_id); 46 | } 47 | } 48 | 49 | array_equals :: (a: [] $T, b: [] T) -> bool { 50 | if a.count != b.count return false; 51 | for i: 0..a.count-1 if a[i] != b[i] return false; 52 | return true; 53 | } 54 | 55 | 56 | xor :: (a: u64, b: u64) -> u64 { 57 | result := a; 58 | #asm { 59 | x : gpr; 60 | mov.q x, result; 61 | xor.q x, b; 62 | mov.q result, x; 63 | } 64 | return result; 65 | } 66 | -------------------------------------------------------------------------------- /tests/test7.jai: -------------------------------------------------------------------------------- 1 | #import "Basic"; 2 | #import "Strings"; 3 | 4 | #load "data/shakespeare.jai"; 5 | 6 | main :: () { 7 | t : float64; 8 | 9 | format_float := *context.print_style.default_format_float; 10 | format_float.zero_removal = .NO; 11 | 12 | total := 0; 13 | for 0 .. shakespeare.count - 1 { 14 | total += < Scalar: %x\n", scalar_delta / sse_delta); 64 | print("AVX2 > Scalar: %x\n", scalar_delta / avx2_delta); 65 | print("AVX2 > SSE2: %x\n", sse_delta / avx2_delta); 66 | } 67 | 68 | 69 | xor :: (a: u64, b: u64) -> u64 { 70 | result := a; 71 | #asm { 72 | x : gpr; 73 | mov.q x, result; 74 | xor.q x, b; 75 | mov.q result, x; 76 | } 77 | return result; 78 | } 79 | -------------------------------------------------------------------------------- /Strings/module.jai: -------------------------------------------------------------------------------- 1 | #module_parameters ( 2 | // Default compare function used to check for character equality. 3 | CHARACTER_COMPARE : enum { CASE_SENSITIVE; IGNORE_CASE; } = .CASE_SENSITIVE, 4 | 5 | // Index algorithm used to find instance of string in other string. 6 | // Can be overriden with `set_index_algorithm` 7 | INDEX_ALGORITHM : enum { 8 | SUPER_SIMPLE; 9 | SIMPLE; SIMPLE_SSE2; SIMPLE_AVX2; SIMPLE_UNSAFE; 10 | BOYER_MOORE; BOYER_MOORE_SSE2; BOYER_MOORE_AVX2; 11 | KNUTH_MORRIS_PRATT; 12 | } = .BOYER_MOORE 13 | ); 14 | 15 | 16 | #if CHARACTER_COMPARE == .CASE_SENSITIVE 17 | default_character_compare :: case_sensitive; 18 | else #if CHARACTER_COMPARE == .IGNORE_CASE 19 | default_character_compare :: ignore_case; 20 | 21 | 22 | #if INDEX_ALGORITHM == .SUPER_SIMPLE { 23 | default_first_index :: super_simple_first_index; 24 | default_last_index :: super_simple_last_index; 25 | } 26 | else #if INDEX_ALGORITHM == .SIMPLE { 27 | default_first_index :: simple_first_index; 28 | default_last_index :: simple_last_index; 29 | } 30 | else #if INDEX_ALGORITHM == .SIMPLE_SSE2 { 31 | default_first_index :: simple_sse2_first_index; 32 | default_last_index :: simple_sse2_last_index; 33 | } 34 | else #if INDEX_ALGORITHM == .SIMPLE_AVX2 { 35 | default_first_index :: simple_avx2_first_index; 36 | default_last_index :: simple_avx2_last_index; 37 | } 38 | else #if INDEX_ALGORITHM == .SIMPLE_UNSAFE { 39 | default_first_index :: unsafe_simple_first_index; 40 | default_last_index :: unsafe_simple_last_index; 41 | } 42 | else #if INDEX_ALGORITHM == .BOYER_MOORE { 43 | default_first_index :: boyer_moore_first_index; 44 | default_last_index :: boyer_moore_last_index; 45 | } 46 | else #if INDEX_ALGORITHM == .BOYER_MOORE_SSE2 { 47 | default_first_index :: boyer_moore_sse2_first_index; 48 | default_last_index :: boyer_moore_sse2_last_index; 49 | } 50 | else #if INDEX_ALGORITHM == .BOYER_MOORE_AVX2 { 51 | default_first_index :: boyer_moore_avx2_first_index; 52 | default_last_index :: boyer_moore_avx2_last_index; 53 | } 54 | else #if INDEX_ALGORITHM == .KNUTH_MORRIS_PRATT { 55 | default_first_index :: knuth_morris_pratt_first_index; 56 | default_last_index :: knuth_morris_pratt_last_index; 57 | } 58 | 59 | #load "Strings.jai"; 60 | #load "indexing.jai"; 61 | #load "splitting.jai"; 62 | #load "boyer_moore.jai"; 63 | #load "knuth_morris_pratt.jai"; 64 | -------------------------------------------------------------------------------- /tests/test10.jai: -------------------------------------------------------------------------------- 1 | #import "Basic"; 2 | #import "Strings"; 3 | jai_string :: #import "String"; 4 | 5 | #load "data/shakespeare.jai"; 6 | //#load "data/dna.jai"; 7 | 8 | main :: () { 9 | data := shakespeare; 10 | times :: 10; 11 | 12 | t : float64; 13 | 14 | format_float := *context.print_style.default_format_float; 15 | format_float.zero_removal = .NO; 16 | format_float.width = 6; 17 | format_float.trailing_width = 3; 18 | 19 | valid := copy_string(data); 20 | jai_string.to_upper_in_place(valid); 21 | 22 | warm :: (str: string) { 23 | total := 0; 24 | for 0 .. str.count - 1 { 25 | total += < Scalar: %x\n", scalar_delta / sse2_delta); 79 | print("AVX2 > Scalar: %x\n", scalar_delta / avx2_delta); 80 | print("AVX2 > SSE2: %x\n", sse2_delta / avx2_delta); 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /tests/test5.jai: -------------------------------------------------------------------------------- 1 | #import "Basic"; 2 | 3 | #load "data/shakespeare.jai"; 4 | 5 | main :: () { 6 | t : float64; 7 | 8 | cased := copy_string(shakespeare); 9 | uncased := copy_string(shakespeare); 10 | to_lower(uncased); 11 | 12 | 13 | format_float := *context.print_style.default_format_float; 14 | format_float.zero_removal = .NO; 15 | 16 | total := 0; 17 | for 0 .. cased.count - 1 { 18 | if < bool { 51 | ptr_a := *_a; 52 | ptr_b := *_b; 53 | result : u8 = 0; // true 54 | ptr_result := *result; 55 | #asm { 56 | a : gpr; 57 | b : gpr; 58 | low : gpr; 59 | high : gpr; 60 | mov.b a, [ptr_a]; 61 | mov.b b, [ptr_b]; 62 | or.b a, 32; 63 | or.b b, 32; 64 | xor.b a, b; 65 | cmp.b b, 97; // a 66 | setl low; 67 | cmp.b b, 122; // z 68 | setg high; 69 | or.b a, low; 70 | or.b a, high; 71 | mov.b [ptr_result], a; 72 | } 73 | return result == 0; 74 | } 75 | 76 | 77 | jai :: (a: u8, b: u8) -> bool { 78 | lower_a : int = ifx a >= #char "A" && a <= #char "Z" then a + #char "a" - #char "A" else a; 79 | lower_b : int = ifx b >= #char "A" && b <= #char "Z" then b + #char "a" - #char "A" else b; 80 | return lower_a == lower_b; 81 | } 82 | 83 | to_lower :: (str: string) { 84 | for 0 .. str.count - 1 { 85 | if str[it] >= #char "A" && str[it] <= #char "Z" { 86 | c : int = str[it] + #char "a" - #char "A"; 87 | str[it] = cast(u8) c; 88 | } 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | ## [2.0.2] = 2025-08-23 4 | * Fixed first_index bug where it ignored compare_character. 5 | 6 | ## [2.0.1] = 2025-08-22 7 | `into_array` -> `to_array` 8 | 9 | ## [2.0.0] - 2024-11-28 10 | ### Module structure 11 | Now simply a single module called `Strings`; no longer split into separate non-allocating/allocating modules (we have `,, allocator)` now) 12 | 13 | ### Mutating in place vs allocating 14 | Any procedure which mutates as string in-place will take a pointer to string, rather than merely a string. 15 | ``` 16 | bar := to_upper(foo); // returns a newly allocated string. 17 | to_upper(*foo); // converts foo to uppercase in-place. 18 | ``` 19 | 20 | ### Splitters 21 | Splitters are now all simply iterators (i.e. for-expansions). You can use `to_array` to generate an expandable array from a splitter, or `into_array` to expand a splitter into an existing array. 22 | 23 | ### Misc 24 | * Fixed threading 25 | * Fixed Scratch allocator 26 | 27 | ### Renamed 28 | `unsafe_slice` -> `raw_slice` 29 | `unsafe_substring` -> `raw_substring` 30 | `trim_into` -> `trim_to` 31 | `trim_start_past` -> `trim_start_through` 32 | `trim_end_from` -> `trim_end_through` 33 | `trim_end_after` -> `trim_end_to` 34 | `advance_past` -> `advance_through` 35 | `pad_center` -> `pad` 36 | 37 | ## [1.0.9] - 2022-12-23 38 | * Removed copy_string in Shared (Basic copy_string is now identical) 39 | * Updated for latest compiler version 40 | 41 | ## [1.0.8] - 2021-12-24 42 | * Renamed `char_split` to `split` 43 | * Updated references to `String_Builder.occupied` to `String_Builder.count` 44 | * Updated references to `String_Builder.data.data` to `get_buffer_data(String_Builder)` 45 | * Added `modules.lst` 46 | 47 | ## [1.0.7] - 2021-12-09 48 | * Updated all built-in index algorithms so that they use a character index when the needle has length 1. 49 | * Renamed `copy` to `copy_string` 50 | * Added `char_split` 51 | 52 | ## [1.0.6] - 2021-11-27 53 | * Updated to work with new `Allocator` style. 54 | * Added some thread-unsafe indexing procs. 55 | 56 | ## [1.0.5] - 2021-10-24 57 | * Fixed array-write version of `split` when used on empty strings. 58 | * Fixed boyer-moore first index returning false for equal haystack/needle. 59 | 60 | ## [1.0.4] - 2021-10-12 61 | * Fixed indexing algorithms erroneously allocating with context.allocator 62 | * Fixed `null_terminate` in `join`. 63 | 64 | ## [1.0.3] - 2021-10-12 65 | * Renamed `trim_to`, `trim_past` -> `trim_into`, `trim_through`. 66 | * Fixed `trim_through` behaviour when only one needle present. 67 | * Updated to work with compiler v86. 68 | 69 | ## [1.0.2] - 2021-09-22 70 | * Fixed SIMD `last_index` procs. 71 | 72 | ## [1.0.1] - 2021-09-21 73 | * `Strings_Alloc.add_convenience_functions` now defaults to `true` (as module should be namespaced anyway). 74 | * Fixed `first_index`, `last_index` not handling empty haystacks correctly. 75 | * Now checks for valid `max_results` in splitters. 76 | 77 | ## [1.0.0] - 2021-09-19 78 | * First release. 79 | -------------------------------------------------------------------------------- /Scratch/Scratch.jai: -------------------------------------------------------------------------------- 1 | scratch :: Allocator.{scratch_allocator_proc, null}; 2 | 3 | scratch_allocator_proc :: (mode: Allocator_Mode, requested_size: s64, old_size: s64, old_memory_pointer: *void, allocator_data: *void) -> *void { 4 | scratch_allocator_data := cast(*Scratch_Allocator_Data) allocator_data; 5 | if !scratch_allocator_data { 6 | if !context.scratch_allocator_data 7 | context.scratch_allocator_data = context.default_allocator.proc(.ALLOCATE, size_of(Scratch_Allocator_Data), 0, null, null); 8 | scratch_allocator_data = context.scratch_allocator_data; 9 | } 10 | using scratch_allocator_data; 11 | 12 | if #complete mode == { 13 | case .RESIZE; 14 | assert(false, "Cannot resize scratch buffer: just allocate instead."); 15 | return null; 16 | 17 | 18 | case .ALLOCATE; 19 | current_buffer_index += 1; 20 | current_buffer_index %= buffer_count; 21 | 22 | if sizes[current_buffer_index] < requested_size { 23 | if buffers[current_buffer_index] free(buffers[current_buffer_index]); 24 | sizes[current_buffer_index] = cast(s64)(requested_size * buffer_size_factor + 1); 25 | buffers[current_buffer_index] = context.default_allocator.proc(.ALLOCATE, sizes[current_buffer_index], 0, null, null); 26 | } 27 | 28 | return buffers[current_buffer_index]; 29 | 30 | case .FREE; #through; 31 | case .STARTUP; #through; 32 | case .SHUTDOWN; #through; 33 | case .THREAD_START; #through; 34 | case .THREAD_STOP; 35 | return null; 36 | 37 | case .CREATE_HEAP; #through; 38 | case .DESTROY_HEAP; 39 | context.handling_assertion_failure = true; 40 | context.assertion_failed(#location(), "This allocator does not support multiple heaps.\n"); 41 | context.handling_assertion_failure = false; 42 | return null; 43 | 44 | 45 | case .IS_THIS_YOURS; 46 | context.handling_assertion_failure = true; 47 | context.assertion_failed(#location(), "This allocator does not support IS_THIS_YOURS.\n"); 48 | context.handling_assertion_failure = false; 49 | return null; 50 | 51 | case .CAPS; 52 | if old_memory_pointer { <= 0 { 26 | index += offset; 27 | jai_total = xor(jai_total, xx index); 28 | offset = index + 1; 29 | s.data = shakespeare.data + offset; 30 | s.count = shakespeare.count - offset; 31 | index = jai_string.find_index_from_left(s, #char "z"); 32 | } 33 | jai_delta = seconds_since_init() - t; 34 | } 35 | 36 | compare :: case_sensitive; 37 | 38 | set_simd_mode(.SSE2); 39 | sse_delta : float64; 40 | sse_total : u64; 41 | { 42 | t = seconds_since_init(); 43 | index, found := first_index(shakespeare, #char "z", compare); 44 | while found { 45 | sse_total = xor(sse_total, xx index); 46 | index, found = first_index(shakespeare, #char "z", index + 1, compare); 47 | } 48 | sse_delta = seconds_since_init() - t; 49 | } 50 | 51 | set_simd_mode(.AVX2); 52 | avx2_delta : float64; 53 | avx2_total : u64; 54 | { 55 | t = seconds_since_init(); 56 | index, found := first_index(shakespeare, #char "z", compare); 57 | while found { 58 | avx2_total = xor(avx2_total, xx index); 59 | index, found = first_index(shakespeare, #char "z", index + 1, compare); 60 | } 61 | avx2_delta = seconds_since_init() - t; 62 | } 63 | 64 | 65 | set_simd_mode(.OFF); 66 | scalar_delta : float64; 67 | scalar_total : u64; 68 | { 69 | t = seconds_since_init(); 70 | index, found := first_index(shakespeare, #char "z", compare); 71 | while found { 72 | scalar_total = xor(scalar_total, xx index); 73 | index, found = first_index(shakespeare, #char "z", index + 1, compare); 74 | } 75 | scalar_delta = seconds_since_init() - t; 76 | } 77 | 78 | print("scalar: (%) %\n", scalar_total, scalar_delta); 79 | print("jai: (%) %\n", jai_total, jai_delta); 80 | print("sse: (%) %\n", sse_total, sse_delta); 81 | print("avx2: (%) %\n", avx2_total, avx2_delta); 82 | print("\n"); 83 | print("SSE2 > Scalar: %x\n", scalar_delta / sse_delta); 84 | print("SSE2 > Jai: %x\n", jai_delta / sse_delta); 85 | print("\n"); 86 | print("AVX2 > Scalar: %x\n", scalar_delta / avx2_delta); 87 | print("AVX2 > Jai: %x\n", jai_delta / avx2_delta); 88 | print("AVX2 > SSE2: %x\n", sse_delta / avx2_delta); 89 | } 90 | 91 | 92 | xor :: (a: u64, b: u64) -> u64 { 93 | result := a; 94 | #asm { 95 | x : gpr; 96 | mov.q x, result; 97 | xor.q x, b; 98 | mov.q result, x; 99 | } 100 | return result; 101 | } 102 | -------------------------------------------------------------------------------- /tests/test11.jai: -------------------------------------------------------------------------------- 1 | #import "Basic"; 2 | #import "Strings"; 3 | #import "Scratch"; 4 | jai_string :: #import "String"; 5 | 6 | #load "data/shakespeare.jai"; 7 | //#load "data/dna.jai"; 8 | 9 | main :: () { 10 | data := shakespeare; 11 | //data := "abcdefghijklm0123456789nopqrstuvwxyzABCDEFGHIJKLM0123456789NOPQRSTUVWXYZ"; 12 | times :: 10;//1000; 13 | 14 | print_output :: false; 15 | 16 | t : float64; 17 | 18 | format_float := *context.print_style.default_format_float; 19 | format_float.zero_removal = .NO; 20 | format_float.width = 6; 21 | format_float.trailing_width = 3; 22 | 23 | valid := copy_string(data); 24 | low_index := 0; 25 | high_index := valid.count - 1; 26 | while low_index < high_index { 27 | c := valid[low_index]; 28 | valid[low_index] = valid[high_index]; 29 | valid[high_index] = c; 30 | low_index += 1; 31 | high_index -= 1; 32 | } 33 | #if print_output print("valid: %\n", valid); 34 | 35 | 36 | warm :: (str: string) { 37 | total := 0; 38 | for 0 .. str.count - 1 { 39 | total += < Scalar: %x\n", scalar_delta / sse2_delta); 99 | print("AVX2 > Scalar: %x\n", scalar_delta / avx2_delta); 100 | print("AVX2 > SSE2: %x\n", sse2_delta / avx2_delta); 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /Scratch/module.jai: -------------------------------------------------------------------------------- 1 | VERSION_STRING :: "Scratch Allocator v1.0"; 2 | 3 | #module_parameters ()( // program parameters 4 | // Number of buffers the allocator will cycle through. 5 | // Usually you only need 2; one to read from and one to write to. 6 | buffer_count := 2, 7 | 8 | // Amount of space allocated when a buffer needs to increase in size 9 | // to accomodate an allocation request. The amount requested times this 10 | // number will be allocated. Overallocating one request means you do 11 | // not need to allocate again on a slightly-bigger following request. 12 | buffer_size_factor := 1.25 13 | ); 14 | 15 | #assert buffer_count >= 2; 16 | #assert buffer_size_factor >= 1.0; 17 | 18 | /** # Scratch Allocator 19 | You will often want to perform a series of string operations, one after 20 | another. 21 | For example, replacing HTML character markers with the actual characters: 22 | 23 | *Example 1* 24 | ```jai 25 | text = replace(text, "<", "<"); 26 | text = replace(text, ">", ">"); 27 | text = replace(text, "&", "&"); 28 | text = replace(text, " ", Chars.NBSP); 29 | ``` 30 | 31 | The above code leaks because each call to replace allocates another copy of 32 | the string. 33 | To fix it with the default allocator you would need to free after every call: 34 | 35 | *Example 2* 36 | ```jai 37 | text1 := replace(text0, "<", "<"); 38 | free(text0); 39 | text2 := replace(text1, ">", ">"); 40 | free(text1); 41 | text3 := replace(text2, "&", "&"); 42 | free(text2); 43 | text4 := replace(text3, " ", Chars.NBSP); 44 | ``` 45 | 46 | This is obviously cumbersome and error-prone. Alternatively you could use 47 | temporary storage: 48 | 49 | *Example 3* 50 | ```jai 51 | text = replace(text, "<", "<",, temp); 52 | text = replace(text, ">", ">",, temp); 53 | text = replace(text, "&", "&",, temp); 54 | text = replace(text, " ", Chars.NBSP); 55 | reset_temporary_storage(); 56 | ``` 57 | 58 | This is much nicer, but it still allocates more data on every call: if you are doing 59 | a very long sequence of operations then you will allocate a very large amount of 60 | memory. 61 | 62 | However, notice in `Example 2` that we did not need to make all those variables: we 63 | free `text0` before creating `text2`, so we could have written the code like this: 64 | 65 | *Example 4* 66 | ```jai 67 | text1 := replace(text0, "<", "<"); 68 | free(text0); 69 | text0 = replace(text1, ">", ">"); 70 | free(text1); 71 | text1 = replace(text0, "&", "&"); 72 | free(text0); 73 | text0 = replace(text1, " ", Chars.NBSP); 74 | free(text1); 75 | ``` 76 | 77 | This is because we are only ever dealing with two pieces of memory: the area we are 78 | reading from and the area we are writing too. 79 | 80 | The scratch allocator applies this reasoning to memory allocation: it will only ever allocate 81 | two* pieces of memory: the buffer we are reading from and the buffer we are writing to. 82 | 83 | Thus our code becomes: 84 | 85 | *Example 5* 86 | ```jai 87 | text = replace(text, "<", "<",, scratch); // writes to scratch buffer 0 88 | text = replace(text, ">", ">",, scratch); // writes to scratch buffer 1 89 | text = replace(text, "&", "&",, scratch); // writes to scratch buffer 0 90 | text = replace(text, " ", Chars.NBSP); 91 | ``` 92 | 93 | We never want to use the scratch allocator to hold data we wish to utilize elsewhere (as 94 | it will be overwritten as soon as the scratch allocator is used anywhere else) so the final 95 | call to replace uses the heap allocator instead. 96 | 97 | * *Though see the `buffer_count` module parameter if you require more than two* 98 | */ 99 | 100 | 101 | #load "Scratch.jai"; 102 | -------------------------------------------------------------------------------- /Strings/knuth_morris_pratt.jai: -------------------------------------------------------------------------------- 1 | // @TODO last_index variants reverse indexing so they can use the mechanics of first_index variants 2 | // (using `get` etc.) - this makes logic simple but is slower than if it was coded directly, 3 | // so... code it directly! 4 | 5 | 6 | 7 | knuth_morris_pratt_first_index :: (haystack: string, needle: string, start_index: int, character_compare: Character_Compare) -> index: int, found: bool { 8 | #insert,scope() first_index_header; 9 | using context.knuth_morris_pratt_thread_data; 10 | if !context.knuth_morris_pratt_thread_data context.knuth_morris_pratt_thread_data = New(Knuth_Morris_Pratt_Thread_Data); 11 | 12 | if needle != last_used_needle || character_compare != last_used_compare || last_search_was_reversed { 13 | last_search_was_reversed = false; 14 | last_used_compare = character_compare; 15 | if last_used_needle free(last_used_needle); 16 | last_used_needle = copy_string(needle,, context.default_allocator); 17 | make_needle_table(needle, character_compare, false); 18 | } 19 | 20 | j := 0; 21 | needle_end := needle.count - 1; 22 | end := cast(u64)haystack.data + cast(u64)haystack.count - 1; 23 | for i: cast(u64)haystack.data + cast(u64)start_index .. end { 24 | while j >= 0 && !character_compare.compare(< index: int, found: bool { 35 | #insert,scope() last_index_header; 36 | using context.knuth_morris_pratt_thread_data; 37 | if !context.knuth_morris_pratt_thread_data context.knuth_morris_pratt_thread_data = New(Knuth_Morris_Pratt_Thread_Data); 38 | 39 | start_index = haystack.count - start_index; 40 | 41 | if needle != last_used_needle || !last_search_was_reversed || character_compare != last_used_compare { 42 | last_search_was_reversed = true; 43 | last_used_compare = character_compare; 44 | if last_used_needle free(last_used_needle); 45 | last_used_needle = copy_string(needle,, context.default_allocator); 46 | make_needle_table(needle, character_compare, true); 47 | } 48 | 49 | j := 0; 50 | needle_end := needle.count - 1; 51 | for i: start_index .. haystack.count - 1 { 52 | while j >= 0 && !character_compare.compare(get(haystack, i, true), get(needle, j, true)) 53 | j = needle_table[j]; 54 | if j == needle_end return haystack.count - needle.count - (i - j), true; 55 | j += 1; 56 | } 57 | 58 | return -1, false; 59 | } 60 | 61 | 62 | #scope_file 63 | 64 | 65 | #add_context knuth_morris_pratt_thread_data: *Knuth_Morris_Pratt_Thread_Data; 66 | 67 | Knuth_Morris_Pratt_Thread_Data :: struct { 68 | last_used_needle : string; 69 | last_used_compare : Character_Compare; 70 | last_search_was_reversed : bool; 71 | needle_table : [..] int; 72 | }; 73 | 74 | 75 | get :: inline (str: string, index: int, $reversed: bool) -> u8 { 76 | #if reversed return str[str.count - 1 - index]; 77 | else return str[index]; 78 | } 79 | 80 | 81 | make_needle_table :: (needle: string, character_compare: Character_Compare, $reversed: bool) { 82 | using context.knuth_morris_pratt_thread_data; 83 | 84 | push_allocator(context.default_allocator); 85 | 86 | if needle_table.count <= needle.count 87 | array_resize(*needle_table, needle.count + 1, false); 88 | 89 | needle_table.data[0] = -1; 90 | 91 | for i: 0 .. needle.count - 1 { 92 | j := i + 1; 93 | needle_table[j] = needle_table[i] + 1; 94 | while needle_table[j] > 0 && !character_compare.compare(get(needle, i, reversed), get(needle, needle_table[j] - 1, reversed)) 95 | needle_table[j] = needle_table[needle_table[j] - 1] + 1; 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /tests/test9.jai: -------------------------------------------------------------------------------- 1 | #import "Basic"; 2 | #import "Strings"; 3 | 4 | #load "data/shakespeare.jai"; 5 | //#load "data/dna.jai"; 6 | 7 | /* Timings as one asm block 8 | CASE SENSITIVE: 9 | scalar: (24427000) 0.705 10 | sse2: (24427000) 0.044 11 | avx2: (24427000) 0.032 12 | 13 | SSE2 > Scalar: 15.983x 14 | AVX2 > Scalar: 22.271x 15 | AVX2 > SSE2: 1.393x 16 | 17 | 18 | IGNORE CASE: 19 | scalar: (28859400) 2.564 20 | sse2: (28859400) 0.057 21 | avx2: (28859400) 0.033 22 | 23 | SSE2 > Scalar: 45.147x 24 | AVX2 > Scalar: 78.874x 25 | AVX2 > SSE2: 1.747x 26 | 27 | 28 | After 2xASM 29 | 30 | CASE SENSITIVE: 31 | scalar: (24427000) 0.706 32 | sse2: (24427000) 0.059 33 | avx2: (24427000) 0.023 34 | 35 | SSE2 > Scalar: 12.044x 36 | AVX2 > Scalar: 30.381x 37 | AVX2 > SSE2: 2.522x 38 | 39 | 40 | IGNORE CASE: 41 | scalar: (28859400) 2.565 42 | sse2: (28859400) 0.053 43 | avx2: (28859400) 0.029 44 | 45 | SSE2 > Scalar: 48.216x 46 | AVX2 > Scalar: 87.188x 47 | AVX2 > SSE2: 1.808x 48 | 49 | */ 50 | 51 | main :: () { 52 | haystack := shakespeare; 53 | needle : u8 = #char "a"; 54 | times :: 1; 55 | 56 | t : float64; 57 | 58 | format_float := *context.print_style.default_format_float; 59 | format_float.zero_removal = .NO; 60 | format_float.width = 6; 61 | format_float.trailing_width = 3; 62 | 63 | total := 0; 64 | for 0 .. haystack.count - 1 { 65 | total += < Scalar: %x\n", scalar_delta / sse_delta); 104 | print("AVX2 > Scalar: %x\n", scalar_delta / avx2_delta); 105 | print("AVX2 > SSE2: %x\n", sse_delta / avx2_delta); 106 | } 107 | 108 | { 109 | set_simd_mode(.SSE2); 110 | sse_delta : float64; 111 | sse_total : int; 112 | { 113 | t = seconds_since_init(); 114 | for 1..times sse_total += count(haystack, needle, ignore_case); 115 | sse_delta = seconds_since_init() - t; 116 | } 117 | 118 | 119 | set_simd_mode(.AVX2); 120 | avx2_delta : float64; 121 | avx2_total : int; 122 | { 123 | t = seconds_since_init(); 124 | for 1..times avx2_total += count(haystack, needle, ignore_case); 125 | avx2_delta = seconds_since_init() - t; 126 | } 127 | 128 | 129 | set_simd_mode(.OFF); 130 | scalar_delta : float64; 131 | scalar_total : int; 132 | { 133 | t = seconds_since_init(); 134 | for 1..times scalar_total += count(haystack, needle, ignore_case); 135 | scalar_delta = seconds_since_init() - t; 136 | } 137 | 138 | print("\n\nIGNORE CASE:\n"); 139 | print("scalar: (%) %\n", scalar_total, scalar_delta); 140 | print("sse2: (%) %\n", sse_total, sse_delta); 141 | print("avx2: (%) %\n", avx2_total, avx2_delta); 142 | print("\n"); 143 | print("SSE2 > Scalar: %x\n", scalar_delta / sse_delta); 144 | print("AVX2 > Scalar: %x\n", scalar_delta / avx2_delta); 145 | print("AVX2 > SSE2: %x\n", sse_delta / avx2_delta); 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /tests/test8.jai: -------------------------------------------------------------------------------- 1 | #import "Basic"; 2 | #import "Sort"; 3 | #import "Strings"(INDEX_ALGORITHM=.SIMPLE); 4 | jai_string :: #import "String"; 5 | 6 | 7 | #load "data/shakespeare.jai"; 8 | #load "data/degenerate.jai"; 9 | 10 | main :: () { 11 | haystack :: shakespeare;// degenerate; 12 | needle :: " and "; //"ffffffffffffffffffffffffffffffffffffffffn"; 13 | times :: 10; 14 | //needle :: "zealous"; 15 | 16 | jai :: true; 17 | boyer_moore :: true; 18 | kmp :: true; 19 | simd_off :: true; 20 | sse :: true; 21 | avx2 :: true; 22 | 23 | 24 | format_float := *context.print_style.default_format_float; 25 | format_float.zero_removal = .NO; 26 | format_float.trailing_width = 3; 27 | 28 | checksum : u64; 29 | { 30 | s := haystack; 31 | offset := 0; 32 | index := jai_string.find_index_from_left(s, needle); 33 | while index >= 0 { 34 | index += offset; 35 | checksum = xor(checksum, xx index); 36 | offset = index + 1; 37 | s.data = haystack.data + offset; 38 | s.count = haystack.count - offset; 39 | index = jai_string.find_index_from_left(s, needle); 40 | } 41 | checksum *= times; 42 | } 43 | 44 | t : float64; 45 | 46 | 47 | 48 | jai_delta : float64; 49 | jai_total : u64; 50 | kmp_delta : float64; 51 | kmp_total : u64; 52 | simd_off_delta : float64; 53 | simd_off_total : u64; 54 | sse_delta : float64; 55 | sse_total : u64; 56 | avx2_delta : float64; 57 | avx2_total : u64; 58 | boyer_moore_delta : float64; 59 | boyer_moore_total : u64; 60 | 61 | 62 | for 1 .. times { 63 | print("%", it % 10); 64 | #if jai 65 | {{ 66 | t = seconds_since_init(); 67 | s := haystack; 68 | offset := 0; 69 | index := jai_string.find_index_from_left(haystack, needle); 70 | total : u64 = 0; 71 | while index >= 0 { 72 | index += offset; 73 | total = xor(total, xx index); 74 | offset = index + 1; 75 | s.data = haystack.data + offset; 76 | s.count = haystack.count - offset; 77 | index = jai_string.find_index_from_left(s, needle); 78 | } 79 | jai_total += total; 80 | jai_delta += seconds_since_init() - t; 81 | }} 82 | 83 | #if boyer_moore 84 | {{ 85 | set_index_algorithm(boyer_moore_first_index); 86 | t = seconds_since_init(); 87 | index, found := first_index(haystack, needle); 88 | total : u64 = 0; 89 | while found { 90 | total = xor(total, xx index); 91 | index, found = first_index(haystack, needle, index + 1); 92 | } 93 | boyer_moore_total += total; 94 | boyer_moore_delta += seconds_since_init() - t; 95 | }} 96 | 97 | #if kmp 98 | {{ 99 | set_index_algorithm(knuth_morris_pratt_first_index); 100 | t = seconds_since_init(); 101 | index, found := first_index(haystack, needle); 102 | total : u64 = 0; 103 | while found { 104 | total = xor(total, xx index); 105 | index, found = first_index(haystack, needle, index + 1); 106 | } 107 | kmp_total += total; 108 | kmp_delta += seconds_since_init() - t; 109 | }} 110 | 111 | #if simd_off 112 | {{ 113 | set_index_algorithm(simple_first_index); 114 | set_simd_mode(.OFF); 115 | t = seconds_since_init(); 116 | index, found := first_index(haystack, needle); 117 | total : u64 = 0; 118 | while found { 119 | total = xor(total, xx index); 120 | index, found = first_index(haystack, needle, index + 1); 121 | } 122 | simd_off_total += total; 123 | simd_off_delta += seconds_since_init() - t; 124 | }} 125 | 126 | #if sse 127 | {{ 128 | set_index_algorithm(simple_sse2_first_index); 129 | set_simd_mode(.SSE2); 130 | t = seconds_since_init(); 131 | index, found := first_index(haystack, needle); 132 | total : u64 = 0; 133 | while found { 134 | total = xor(total, xx index); 135 | index, found = first_index(haystack, needle, index + 1); 136 | } 137 | sse_total += total; 138 | sse_delta += seconds_since_init() - t; 139 | }} 140 | 141 | #if avx2 142 | {{ 143 | set_index_algorithm(simple_avx2_first_index); 144 | set_simd_mode(.AVX2); 145 | t = seconds_since_init(); 146 | index, found := first_index(haystack, needle); 147 | total : u64 = 0; 148 | while found { 149 | total = xor(total, xx index); 150 | index, found = first_index(haystack, needle, index + 1); 151 | } 152 | avx2_total += total; 153 | avx2_delta += seconds_since_init() - t; 154 | }} 155 | } 156 | 157 | 158 | Entry :: struct { 159 | name : string; 160 | time : float64; 161 | checksum : u64; 162 | } 163 | 164 | entry :: (name: string, time: float64, checksum: u64) -> Entry { 165 | result : Entry = ---; 166 | result.name = name; 167 | result.time = time; 168 | result.checksum = checksum; 169 | return result; 170 | } 171 | 172 | entries : [..] Entry; 173 | 174 | #if jai array_add(*entries, entry("Jai", jai_delta, jai_total)); 175 | #if kmp array_add(*entries, entry("Knuth-Morris-Pratt", kmp_delta, kmp_total)); 176 | #if boyer_moore array_add(*entries, entry("Boyer-Moore", boyer_moore_delta, boyer_moore_total)); 177 | #if simd_off array_add(*entries, entry("Naive", simd_off_delta, simd_off_total)); 178 | #if sse array_add(*entries, entry("Naive + SSE2", sse_delta, sse_total)); 179 | #if avx2 array_add(*entries, entry("Naive + AVX2", avx2_delta, avx2_total)); 180 | 181 | quick_sort(entries, (a: Entry) -> float64 { return a.time; }); 182 | 183 | print("\n\n"); 184 | 185 | for entries { 186 | print("%", pad_end(it.name, 20)); 187 | time := sprint("%", it.time); 188 | if it.checksum != checksum { 189 | print("BAD CHECKSUM: % != %\n", it.checksum, checksum); 190 | continue; 191 | } 192 | 193 | #if jai { 194 | factor := sprint("%", jai_delta / it.time); 195 | print("% = %x\n", pad_start(time, 7), pad_start(factor, 7)); 196 | } 197 | else { 198 | print("%\n", pad_end(time, 7)); 199 | } 200 | if !it_index print("\n"); 201 | } 202 | } 203 | 204 | 205 | xor :: (a: u64, b: u64) -> u64 { 206 | result := a; 207 | #asm { 208 | x : gpr; 209 | mov.q x, result; 210 | xor.q x, b; 211 | mov.q result, x; 212 | } 213 | return result; 214 | } 215 | -------------------------------------------------------------------------------- /tests/simd_test.jai: -------------------------------------------------------------------------------- 1 | #import "Basic"; 2 | #import "Strings"; 3 | jai_string :: #import "String"; 4 | 5 | #load "data/shakespeare.jai"; 6 | 7 | only_problems :: true; 8 | 9 | main :: () { 10 | defer { 11 | if errors.count != 0 12 | print("\nErrors:\n%\n", errors); 13 | else 14 | print("\nAll OK!\n"); 15 | } 16 | 17 | // calculate_checksums_from_jai_string(); 18 | index_case_sensitive_checksum :: 5029604; 19 | index_ignore_case_checksum :: 2957927; 20 | 21 | haystack :: " Hello world in string form can you give me a hell yeah hell yeah"; 22 | 23 | for simd: 0 .. 3 { 24 | set_simd_mode(xx simd); 25 | 26 | x := (simd + 1) * 100; 27 | 28 | test( 1 + x, first_index(haystack, #char "H", case_sensitive), 3); 29 | test( 2 + x, first_index(haystack, #char "h", case_sensitive), 48); 30 | test( 3 + x, first_index(haystack, #char "E", case_sensitive), -1); 31 | test( 4 + x, first_index(haystack, #char "e", case_sensitive), 4); 32 | test( 5 + x, first_index(haystack, #char "L", case_sensitive), -1); 33 | test( 6 + x, first_index(haystack, #char "l", case_sensitive), 5); 34 | test( 7 + x, first_index(haystack, #char "O", case_sensitive), -1); 35 | test( 8 + x, first_index(haystack, #char "o", case_sensitive), 7); 36 | test( 9 + x, first_index(haystack, #char " ", case_sensitive), 0); 37 | test(10 + x, first_index(haystack, #char "W", case_sensitive), -1); 38 | test(11 + x, first_index(haystack, #char "w", case_sensitive), 9); 39 | test(12 + x, first_index(haystack, #char "R", case_sensitive), -1); 40 | test(13 + x, first_index(haystack, #char "r", case_sensitive), 11); 41 | test(14 + x, first_index(haystack, #char "D", case_sensitive), -1); 42 | test(15 + x, first_index(haystack, #char "d", case_sensitive), 13); 43 | test(16 + x, first_index(haystack, #char "m", case_sensitive), 28); 44 | test(17 + x, first_index(haystack, #char "A", case_sensitive), -1); 45 | test(18 + x, first_index(haystack, #char "a", case_sensitive), 31); 46 | test(19 + x, first_index(haystack, #char "Z", case_sensitive), -1); 47 | test(20 + x, first_index(haystack, #char "z", case_sensitive), -1); 48 | 49 | test(21 + x, first_index(haystack, #char "H", ignore_case), 3); 50 | test(22 + x, first_index(haystack, #char "h", ignore_case), 3); 51 | test(23 + x, first_index(haystack, #char "E", ignore_case), 4); 52 | test(24 + x, first_index(haystack, #char "e", ignore_case), 4); 53 | test(25 + x, first_index(haystack, #char "L", ignore_case), 5); 54 | test(26 + x, first_index(haystack, #char "l", ignore_case), 5); 55 | test(27 + x, first_index(haystack, #char "O", ignore_case), 7); 56 | test(28 + x, first_index(haystack, #char "o", ignore_case), 7); 57 | test(29 + x, first_index(haystack, #char " ", ignore_case), 0); 58 | test(30 + x, first_index(haystack, #char "W", ignore_case), 9); 59 | test(31 + x, first_index(haystack, #char "w", ignore_case), 9); 60 | test(32 + x, first_index(haystack, #char "R", ignore_case), 11); 61 | test(33 + x, first_index(haystack, #char "r", ignore_case), 11); 62 | test(34 + x, first_index(haystack, #char "D", ignore_case), 13); 63 | test(35 + x, first_index(haystack, #char "d", ignore_case), 13); 64 | test(36 + x, first_index(haystack, #char "m", ignore_case), 28); 65 | test(37 + x, first_index(haystack, #char "A", ignore_case), 31); 66 | test(38 + x, first_index(haystack, #char "a", ignore_case), 31); 67 | test(39 + x, first_index(haystack, #char "Z", ignore_case), -1); 68 | test(40 + x, first_index(haystack, #char "z", ignore_case), -1); 69 | 70 | test(41 + x, last_index(haystack, #char "H", case_sensitive), 3); 71 | test(42 + x, last_index(haystack, #char "h", case_sensitive), 66); 72 | test(43 + x, last_index(haystack, #char "E", case_sensitive), -1); 73 | test(44 + x, last_index(haystack, #char "e", case_sensitive), 64); 74 | test(45 + x, last_index(haystack, #char "L", case_sensitive), -1); 75 | test(46 + x, last_index(haystack, #char "l", case_sensitive), 61); 76 | test(47 + x, last_index(haystack, #char "O", case_sensitive), -1); 77 | test(48 + x, last_index(haystack, #char "o", case_sensitive), 35); 78 | test(49 + x, last_index(haystack, #char " ", case_sensitive), 62); 79 | test(50 + x, last_index(haystack, #char "W", case_sensitive), -1); 80 | test(51 + x, last_index(haystack, #char "w", case_sensitive), 9); 81 | test(52 + x, last_index(haystack, #char "R", case_sensitive), -1); 82 | test(53 + x, last_index(haystack, #char "r", case_sensitive), 27); 83 | test(54 + x, last_index(haystack, #char "D", case_sensitive), -1); 84 | test(55 + x, last_index(haystack, #char "d", case_sensitive), 13); 85 | test(56 + x, last_index(haystack, #char "m", case_sensitive), 43); 86 | test(57 + x, last_index(haystack, #char "A", case_sensitive), -1); 87 | test(58 + x, last_index(haystack, #char "a", case_sensitive), 65); 88 | test(59 + x, last_index(haystack, #char "Z", case_sensitive), -1); 89 | test(60 + x, last_index(haystack, #char "z", case_sensitive), -1); 90 | 91 | test(61 + x, last_index(haystack, #char "H", ignore_case), 66); 92 | test(62 + x, last_index(haystack, #char "h", ignore_case), 66); 93 | test(63 + x, last_index(haystack, #char "E", ignore_case), 64); 94 | test(64 + x, last_index(haystack, #char "e", ignore_case), 64); 95 | test(65 + x, last_index(haystack, #char "L", ignore_case), 61); 96 | test(66 + x, last_index(haystack, #char "l", ignore_case), 61); 97 | test(67 + x, last_index(haystack, #char "O", ignore_case), 35); 98 | test(68 + x, last_index(haystack, #char "o", ignore_case), 35); 99 | test(69 + x, last_index(haystack, #char " ", ignore_case), 62); 100 | test(70 + x, last_index(haystack, #char "W", ignore_case), 9); 101 | test(71 + x, last_index(haystack, #char "w", ignore_case), 9); 102 | test(72 + x, last_index(haystack, #char "R", ignore_case), 27); 103 | test(73 + x, last_index(haystack, #char "r", ignore_case), 27); 104 | test(74 + x, last_index(haystack, #char "D", ignore_case), 13); 105 | test(75 + x, last_index(haystack, #char "d", ignore_case), 13); 106 | test(76 + x, last_index(haystack, #char "m", ignore_case), 43); 107 | test(77 + x, last_index(haystack, #char "A", ignore_case), 65); 108 | test(78 + x, last_index(haystack, #char "a", ignore_case), 65); 109 | test(79 + x, last_index(haystack, #char "Z", ignore_case), -1); 110 | test(80 + x, last_index(haystack, #char "z", ignore_case), -1); 111 | 112 | checksum : u64; 113 | index, found := first_index(shakespeare, #char "z", case_sensitive); 114 | while found { 115 | checksum = xor(checksum, xx index); 116 | index, found = first_index(shakespeare, #char "z", index + 1, case_sensitive); 117 | } 118 | test(81 + x, checksum, index_case_sensitive_checksum); 119 | 120 | checksum = 0; 121 | index, found = last_index(shakespeare, #char "z", case_sensitive); 122 | while found { 123 | checksum = xor(checksum, xx index); 124 | index, found = last_index(shakespeare, #char "z", index, case_sensitive); 125 | } 126 | test(82 + x, checksum, index_case_sensitive_checksum); 127 | 128 | 129 | checksum = 0; 130 | index, found = first_index(shakespeare, #char "z", ignore_case); 131 | while found { 132 | checksum = xor(checksum, xx index); 133 | index, found = first_index(shakespeare, #char "z", index + 1, ignore_case); 134 | } 135 | test(83 + x, checksum, index_ignore_case_checksum); 136 | 137 | 138 | checksum = 0; 139 | index, found = last_index(shakespeare, #char "z", ignore_case); 140 | while found { 141 | checksum = xor(checksum, xx index); 142 | index, found = last_index(shakespeare, #char "z", index, ignore_case); 143 | } 144 | test(84 + x, checksum, index_ignore_case_checksum); 145 | } 146 | } 147 | 148 | 149 | 150 | errors : [..] int; 151 | 152 | test :: (test_id: int, value: $T, expected: T) { 153 | if value != expected { 154 | print("[%]:\nWanted: [%]\nGot: [%]\n\n", test_id, expected, value); 155 | array_add(*errors, test_id); 156 | } 157 | else if !only_problems { 158 | print("[%] OK\n", test_id); 159 | } 160 | } 161 | 162 | test :: (test_id: int, value: [] string, expected: [] string) { 163 | if !array_equals(value, expected) { 164 | print("[%]:\nWanted: [%]\nGot: [%]\n\n", test_id, expected, value); 165 | array_add(*errors, test_id); 166 | } 167 | else if !only_problems { 168 | print("[%] OK\n", test_id); 169 | } 170 | } 171 | 172 | array_equals :: (a: [] $T, b: [] T) -> bool { 173 | if a.count != b.count return false; 174 | for i: 0..a.count-1 if a[i] != b[i] return false; 175 | return true; 176 | } 177 | 178 | 179 | xor :: (a: u64, b: u64) -> u64 { 180 | result := a; 181 | #asm { 182 | x : gpr; 183 | mov.q x, result; 184 | xor.q x, b; 185 | mov.q result, x; 186 | } 187 | return result; 188 | } 189 | 190 | 191 | calculate_checksums_from_jai_string :: () { 192 | s := shakespeare; 193 | offset := 0; 194 | first_index_case_sensitive_checksum : u64; 195 | index := jai_string.find_index_from_left(shakespeare, #char "z"); 196 | while index >= 0 { 197 | index += offset; 198 | first_index_case_sensitive_checksum = xor(first_index_case_sensitive_checksum, xx index); 199 | offset = index + 1; 200 | s.data = shakespeare.data + offset; 201 | s.count = shakespeare.count - offset; 202 | index = jai_string.find_index_from_left(s, #char "z"); 203 | } 204 | 205 | s = shakespeare; 206 | offset = 0; 207 | first_index_ignore_case_checksum : u64; 208 | index = jai_string.find_index_of_any_from_left(shakespeare, "zZ"); 209 | while index >= 0 { 210 | index += offset; 211 | first_index_ignore_case_checksum = xor(first_index_ignore_case_checksum, xx index); 212 | offset = index + 1; 213 | s.data = shakespeare.data + offset; 214 | s.count = shakespeare.count - offset; 215 | index = jai_string.find_index_of_any_from_left(s, "zZ"); 216 | } 217 | 218 | s = shakespeare; 219 | last_index_case_sensitive_checksum : u64; 220 | index = jai_string.find_index_from_right(shakespeare, #char "z"); 221 | while index >= 0 { 222 | last_index_case_sensitive_checksum = xor(last_index_case_sensitive_checksum, xx index); 223 | s.count = index; 224 | index = jai_string.find_index_from_right(s, #char "z"); 225 | } 226 | 227 | s = shakespeare; 228 | last_index_ignore_case_checksum : u64; 229 | index = jai_string.find_index_of_any_from_right(shakespeare, "zZ"); 230 | while index >= 0 { 231 | last_index_ignore_case_checksum = xor(last_index_ignore_case_checksum, xx index); 232 | s.count = index; 233 | index = jai_string.find_index_of_any_from_right(s, "zZ"); 234 | } 235 | 236 | print("first_index_case_sensitive_checksum: %\n", first_index_case_sensitive_checksum); 237 | print("first_index_ignore_case_checksum: %\n", first_index_ignore_case_checksum); 238 | print("last_index_case_sensitive_checksum: %\n", last_index_case_sensitive_checksum); 239 | print("last_index_ignore_case_checksum: %\n", last_index_ignore_case_checksum); 240 | } 241 | -------------------------------------------------------------------------------- /tests/indexing_test.jai: -------------------------------------------------------------------------------- 1 | #import "Basic"; 2 | #import "Hash_Table"; 3 | #import "Sort"; 4 | 5 | jai_string :: #import "String"; 6 | 7 | 8 | #load "data/shakespeare.jai"; 9 | #load "data/degenerate.jai"; 10 | 11 | no_case :: false; 12 | #if no_case { 13 | #import "Strings"(CHARACTER_COMPARE=.IGNORE_CASE); 14 | } 15 | else { 16 | #import "Strings"(CHARACTER_COMPARE=.CASE_SENSITIVE); 17 | } 18 | 19 | main :: () { 20 | haystack :: shakespeare;// degenerate; 21 | needle :: " and "; 22 | //needle :: "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffN"; 23 | //needle :: "nffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"; 24 | //"THE end\n";//" and "; //"ffffffffffffffffffffffffffffffffffffffffn"; "zealous"; 25 | times :: 10; 26 | debug :: false; 27 | 28 | x :: true; 29 | _ :: false; 30 | simple :: x; 31 | jai :: x; 32 | simple_sse2 :: x; 33 | simple_avx2 :: x; 34 | kmp :: x; 35 | boyer_moore :: x; 36 | boyer_moore_sse2 :: x; 37 | boyer_moore_avx2 :: x; 38 | 39 | 40 | format_float := *context.print_style.default_format_float; 41 | format_float.zero_removal = .NO; 42 | format_float.trailing_width = 3; 43 | 44 | #if no_case { 45 | find_index_from_left :: find_index_from_left_nocase; 46 | find_index_from_right :: find_index_from_right_nocase; 47 | } 48 | else { 49 | find_index_from_left :: jai_string.find_index_from_left; 50 | find_index_from_right :: jai_string.find_index_from_right; 51 | } 52 | 53 | first_checksum : u64; 54 | last_checksum : u64; 55 | 56 | { 57 | s := haystack; 58 | offset := 0; 59 | total := 0; 60 | index := find_index_from_left(s, needle); 61 | while index >= 0 { 62 | index += offset; 63 | total += 1; 64 | #if debug print("%\n", index); 65 | first_checksum = xor(first_checksum, xx index); 66 | offset = index + 1; 67 | s.data = haystack.data + offset; 68 | s.count = haystack.count - offset; 69 | index = find_index_from_left(s, needle); 70 | } 71 | #if debug print("\n\n"); 72 | //print("INDEX: %\n", total); 73 | 74 | s = haystack; 75 | index = find_index_from_right(haystack, needle); 76 | while index >= 0 { 77 | last_checksum = xor(last_checksum, xx index); 78 | s.count = index; 79 | index = find_index_from_right(s, needle); 80 | } 81 | 82 | first_checksum *= times; 83 | last_checksum *= times; 84 | } 85 | 86 | 87 | t : float64; 88 | 89 | Entry :: struct { 90 | name : string; 91 | first_time : float64 = 0; 92 | last_time : float64 = 0; 93 | first_checksum : u64 = 0; 94 | last_checksum : u64 = 0; 95 | } 96 | 97 | entry :: (name: string) -> Entry { 98 | result : Entry; 99 | result.name = name; 100 | return result; 101 | } 102 | 103 | #if jai jai_entry := entry("Jai"); 104 | #if boyer_moore boyer_moore_entry := entry("Boyer-Moore"); 105 | #if boyer_moore_sse2 boyer_moore_sse2_entry := entry("Boyer-Moore SSE2"); 106 | #if boyer_moore_avx2 boyer_moore_avx2_entry := entry("Boyer-Moore AVX2"); 107 | #if kmp kmp_entry := entry("Knuth-Morris-Pratt"); 108 | #if simple simple_entry := entry("Simple"); 109 | #if simple_sse2 simple_sse2_entry := entry("Simple SSE2"); 110 | #if simple_avx2 simple_avx2_entry := entry("Simple AVX2"); 111 | 112 | prev_marker := -1; 113 | 114 | for 1 .. times { 115 | #if times >= 10 { 116 | marker := it / (times / 10); 117 | if marker != prev_marker { 118 | prev_marker = marker; 119 | if marker < 10 120 | print("%", marker); 121 | } 122 | } 123 | 124 | #if jai {{ 125 | t = seconds_since_init(); 126 | s := haystack; 127 | offset := 0; 128 | index := find_index_from_left(haystack, needle); 129 | checksum : u64 = 0; 130 | while index >= 0 { 131 | index += offset; 132 | checksum = xor(checksum, xx index); 133 | offset = index + 1; 134 | s.data = haystack.data + offset; 135 | s.count = haystack.count - offset; 136 | index = find_index_from_left(s, needle); 137 | } 138 | jai_entry.first_checksum += checksum; 139 | jai_entry.first_time += seconds_since_init() - t; 140 | 141 | t = seconds_since_init(); 142 | s = haystack; 143 | index = find_index_from_right(haystack, needle); 144 | checksum = 0; 145 | while index >= 0 { 146 | checksum = xor(checksum, xx index); 147 | s.count = index; 148 | index = find_index_from_right(s, needle); 149 | } 150 | jai_entry.last_checksum += checksum; 151 | jai_entry.last_time += seconds_since_init() - t; 152 | }} 153 | 154 | #if boyer_moore {{ 155 | set_index_algorithm(boyer_moore_first_index,boyer_moore_last_index); 156 | 157 | t = seconds_since_init(); 158 | index, found := first_index(haystack, needle); 159 | checksum : u64 = 0; 160 | while found { 161 | checksum = xor(checksum, xx index); 162 | index, found = first_index(haystack, needle, index + 1); 163 | } 164 | boyer_moore_entry.first_checksum += checksum; 165 | boyer_moore_entry.first_time += seconds_since_init() - t; 166 | 167 | t = seconds_since_init(); 168 | index, found = last_index(haystack, needle); 169 | checksum = 0; 170 | while found { 171 | checksum = xor(checksum, xx index); 172 | index, found = last_index(haystack, needle, index); 173 | } 174 | boyer_moore_entry.last_checksum += checksum; 175 | boyer_moore_entry.last_time += seconds_since_init() - t; 176 | }} 177 | 178 | #if boyer_moore_sse2 {{ 179 | set_index_algorithm(boyer_moore_sse2_first_index,boyer_moore_sse2_last_index); 180 | 181 | t = seconds_since_init(); 182 | index, found := first_index(haystack, needle); 183 | checksum : u64 = 0; 184 | total := 0; 185 | while found { 186 | #if debug print("%\n", index); 187 | total += 1; 188 | checksum = xor(checksum, xx index); 189 | index, found = first_index(haystack, needle, index + 1); 190 | } 191 | //print("BM: %\n", total); 192 | boyer_moore_sse2_entry.first_checksum += checksum; 193 | boyer_moore_sse2_entry.first_time += seconds_since_init() - t; 194 | 195 | t = seconds_since_init(); 196 | index, found = last_index(haystack, needle); 197 | checksum = 0; 198 | while found { 199 | checksum = xor(checksum, xx index); 200 | index, found = last_index(haystack, needle, index); 201 | } 202 | boyer_moore_sse2_entry.last_checksum += checksum; 203 | boyer_moore_sse2_entry.last_time += seconds_since_init() - t; 204 | }} 205 | 206 | #if boyer_moore_avx2 {{ 207 | set_index_algorithm(boyer_moore_avx2_first_index,boyer_moore_avx2_last_index); 208 | 209 | t = seconds_since_init(); 210 | index, found := first_index(haystack, needle); 211 | checksum : u64 = 0; 212 | total := 0; 213 | while found { 214 | #if debug print("%\n", index); 215 | total += 1; 216 | checksum = xor(checksum, xx index); 217 | index, found = first_index(haystack, needle, index + 1); 218 | } 219 | //print("BM: %\n", total); 220 | boyer_moore_avx2_entry.first_checksum += checksum; 221 | boyer_moore_avx2_entry.first_time += seconds_since_init() - t; 222 | 223 | t = seconds_since_init(); 224 | index, found = last_index(haystack, needle); 225 | checksum = 0; 226 | while found { 227 | checksum = xor(checksum, xx index); 228 | index, found = last_index(haystack, needle, index); 229 | } 230 | boyer_moore_avx2_entry.last_checksum += checksum; 231 | boyer_moore_avx2_entry.last_time += seconds_since_init() - t; 232 | }} 233 | 234 | #if kmp {{ 235 | set_index_algorithm(knuth_morris_pratt_first_index,knuth_morris_pratt_last_index); 236 | 237 | t = seconds_since_init(); 238 | index, found := first_index(haystack, needle); 239 | checksum : u64 = 0; 240 | while found { 241 | checksum = xor(checksum, xx index); 242 | index, found = first_index(haystack, needle, index + 1); 243 | } 244 | kmp_entry.first_checksum += checksum; 245 | kmp_entry.first_time += seconds_since_init() - t; 246 | 247 | t = seconds_since_init(); 248 | index, found = last_index(haystack, needle); 249 | checksum = 0; 250 | while found { 251 | checksum = xor(checksum, xx index); 252 | index, found = last_index(haystack, needle, index); 253 | } 254 | kmp_entry.last_checksum += checksum; 255 | kmp_entry.last_time += seconds_since_init() - t; 256 | }} 257 | 258 | #if simple {{ 259 | set_index_algorithm(simple_first_index, simple_last_index); 260 | 261 | t = seconds_since_init(); 262 | index, found := first_index(haystack, needle); 263 | checksum : u64 = 0; 264 | while found { 265 | checksum = xor(checksum, xx index); 266 | index, found = first_index(haystack, needle, index + 1); 267 | } 268 | simple_entry.first_checksum += checksum; 269 | simple_entry.first_time += seconds_since_init() - t; 270 | 271 | t = seconds_since_init(); 272 | index, found = last_index(haystack, needle); 273 | checksum = 0; 274 | while found { 275 | checksum = xor(checksum, xx index); 276 | index, found = last_index(haystack, needle, index); 277 | } 278 | simple_entry.last_checksum += checksum; 279 | simple_entry.last_time += seconds_since_init() - t; 280 | }} 281 | 282 | #if simple_sse2 {{ 283 | set_index_algorithm(simple_sse2_first_index, simple_sse2_last_index); 284 | 285 | t = seconds_since_init(); 286 | index, found := first_index(haystack, needle); 287 | checksum : u64 = 0; 288 | while found { 289 | checksum = xor(checksum, xx index); 290 | index, found = first_index(haystack, needle, index + 1); 291 | } 292 | simple_sse2_entry.first_checksum += checksum; 293 | simple_sse2_entry.first_time += seconds_since_init() - t; 294 | 295 | t = seconds_since_init(); 296 | index, found = last_index(haystack, needle); 297 | checksum = 0; 298 | while found { 299 | checksum = xor(checksum, xx index); 300 | index, found = last_index(haystack, needle, index); 301 | } 302 | simple_sse2_entry.last_checksum += checksum; 303 | simple_sse2_entry.last_time += seconds_since_init() - t; 304 | }} 305 | 306 | #if simple_avx2 {{ 307 | set_index_algorithm(simple_avx2_first_index, simple_avx2_last_index); 308 | 309 | t = seconds_since_init(); 310 | index, found := first_index(haystack, needle); 311 | checksum : u64 = 0; 312 | while found { 313 | checksum = xor(checksum, xx index); 314 | index, found = first_index(haystack, needle, index + 1); 315 | } 316 | simple_avx2_entry.first_checksum += checksum; 317 | simple_avx2_entry.first_time += seconds_since_init() - t; 318 | 319 | t = seconds_since_init(); 320 | index, found = last_index(haystack, needle); 321 | checksum = 0; 322 | while found { 323 | checksum = xor(checksum, xx index); 324 | index, found = last_index(haystack, needle, index); 325 | } 326 | simple_avx2_entry.last_checksum += checksum; 327 | simple_avx2_entry.last_time += seconds_since_init() - t; 328 | }} 329 | } 330 | 331 | 332 | 333 | entries : [..] Entry; 334 | 335 | #if jai array_add(*entries, jai_entry); 336 | #if kmp array_add(*entries, kmp_entry); 337 | #if boyer_moore array_add(*entries, boyer_moore_entry); 338 | #if boyer_moore_sse2 array_add(*entries, boyer_moore_sse2_entry); 339 | #if boyer_moore_avx2 array_add(*entries, boyer_moore_avx2_entry); 340 | #if simple array_add(*entries, simple_entry); 341 | #if simple_sse2 array_add(*entries, simple_sse2_entry); 342 | #if simple_avx2 array_add(*entries, simple_avx2_entry); 343 | 344 | quick_sort(entries, (a: Entry) -> float64 { return a.first_time; }); 345 | 346 | print("\n\n"); 347 | 348 | for entries { 349 | print("%", pad_end(it.name, 20)); 350 | time := sprint("% / %", it.first_time, it.last_time); 351 | if it.first_checksum != first_checksum { 352 | print("BAD FIRST CHECKSUM: % != %\n", it.first_checksum, first_checksum); 353 | if it.last_checksum != last_checksum 354 | print(" BAD LAST CHECKSUM: % != %\n", it.last_checksum, last_checksum); 355 | continue; 356 | } 357 | else if it.last_checksum != last_checksum { 358 | print("BAD LAST CHECKSUM: % != %\n", it.last_checksum, last_checksum); 359 | continue; 360 | } 361 | 362 | #if jai { 363 | first_factor := sprint("%", jai_entry.first_time / it.first_time); 364 | last_factor := sprint("%", jai_entry.last_time / it.last_time); 365 | print("% = %x / %x\n", pad_start(time, 7), pad_start(first_factor, 7), pad_start(last_factor, 7)); 366 | } 367 | else { 368 | print("% %\n", it.first_checksum, pad_end(time, 7)); 369 | } 370 | if !it_index print("\n"); 371 | } 372 | } 373 | 374 | 375 | xor :: (a: u64, b: u64) -> u64 { 376 | result := a; 377 | #asm { 378 | x : gpr; 379 | mov.q x, result; 380 | xor.q x, b; 381 | mov.q result, x; 382 | } 383 | return result; 384 | } 385 | 386 | 387 | find_index_from_left_nocase :: (haystack: string, needle: string) -> int { 388 | if haystack.count < needle.count return -1; 389 | 390 | for 0 .. haystack.count - needle.count { 391 | t := jai_string.slice(haystack, it, needle.count); 392 | if jai_string.equal_nocase(t, needle) return it; 393 | } 394 | 395 | return -1; 396 | } 397 | 398 | 399 | find_index_from_right_nocase :: (haystack: string, needle: string) -> int { 400 | if haystack.count < needle.count return -1; 401 | 402 | for < haystack.count - needle.count .. 0 { 403 | t := jai_string.slice(haystack, it, needle.count); 404 | if jai_string.equal_nocase(t, needle) return it; 405 | } 406 | 407 | return -1; 408 | } 409 | -------------------------------------------------------------------------------- /tools/index_profile.jai: -------------------------------------------------------------------------------- 1 | #import "Basic"; 2 | #import "File"; 3 | #import "File_Utilities"; 4 | #import "Sort"; 5 | #import "Strings"; 6 | jai_string :: #import "String"; 7 | 8 | 9 | main :: () { 10 | args := get_command_line_arguments(); 11 | haystack_file := ""; 12 | needle := ""; 13 | no_case := false; 14 | times : s64 = 100; 15 | i := 1; 16 | test_only := false; 17 | valid_args := true; 18 | while i < args.count { 19 | arg := args[i]; 20 | if arg == "-i" { 21 | no_case = true; 22 | } 23 | else if arg == "-t" { 24 | test_only = true; 25 | } 26 | else if arg == "-c" { 27 | i += 1; 28 | if i >= args.count { 29 | valid_args = false; 30 | break; 31 | } 32 | n, ok := jai_string.parse_int(*args[i]); 33 | if !ok || n < 1 { 34 | valid_args = false; 35 | break; 36 | } 37 | times = n; 38 | } 39 | else if haystack_file == "" { 40 | haystack_file = arg; 41 | } 42 | else if needle == "" { 43 | needle = apply_backslash(arg); 44 | } 45 | else { 46 | valid_args = false; 47 | break; 48 | } 49 | 50 | i += 1; 51 | } 52 | 53 | if !(haystack_file && needle && valid_args) { 54 | print("\nUSAGE: index_profile.exe [-i] [-c ] \n\n"); 55 | print(" = path of file containing haystack text\n"); 56 | print(" = text to search for\n"); 57 | print(" -i = ignore case\n"); 58 | print(" -c = perform N iterations, default is 100\n"); 59 | exit(1); 60 | } 61 | 62 | if !file_exists(haystack_file) { 63 | print("\nFile not found: %\n", haystack_file); 64 | exit(2); 65 | } 66 | 67 | if test_only times = 1; 68 | 69 | haystack := read_entire_file(haystack_file); 70 | 71 | jai :: true; 72 | simple :: true; 73 | super_simple :: false; 74 | simple_sse2 :: true; 75 | simple_avx2 :: true; 76 | simple_unsafe :: true; 77 | kmp :: true; 78 | boyer_moore :: true; 79 | boyer_moore_sse2 :: true; 80 | boyer_moore_avx2 :: true; 81 | 82 | #if kmp { 83 | #load "../Strings/knuth_morris_pratt.jai"; 84 | } 85 | 86 | format_float := *context.print_style.default_format_float; 87 | format_float.zero_removal = .NO; 88 | format_float.width = 6; 89 | format_float.trailing_width = 3; 90 | 91 | find_index_from_left := jai_find_index_from_left; 92 | find_index_from_right := jai_find_index_from_right; 93 | 94 | if no_case { 95 | find_index_from_left = jai_find_index_from_left_nocase; 96 | find_index_from_right = jai_find_index_from_right_nocase; 97 | } 98 | else { 99 | } 100 | compare := ifx no_case then ignore_case else case_sensitive; 101 | 102 | expected_first_checksum : u64; 103 | expected_last_checksum : u64; 104 | expected_total := 0; 105 | 106 | { 107 | s := haystack; 108 | offset := 0; 109 | index := find_index_from_left(s, needle); 110 | while index >= 0 { 111 | index += offset; 112 | expected_first_checksum = xor(expected_first_checksum, xx index); 113 | expected_total += 1; 114 | offset = index + 1; 115 | s.data = haystack.data + offset; 116 | s.count = haystack.count - offset; 117 | index = find_index_from_left(s, needle); 118 | } 119 | 120 | s = haystack; 121 | index = find_index_from_right(haystack, needle); 122 | while index >= 0 { 123 | expected_last_checksum = xor(expected_last_checksum, xx index); 124 | s.count = index; 125 | index = find_index_from_right(s, needle); 126 | } 127 | 128 | expected_first_checksum *= cast(u64)times; 129 | expected_last_checksum *= cast(u64)times; 130 | expected_total *= times; 131 | } 132 | 133 | 134 | Entry :: struct { 135 | name : string; 136 | id : string; 137 | prefix : string; 138 | first_time : float64 = 0; 139 | last_time : float64 = 0; 140 | first_total := 0; 141 | first_checksum : u64 = 0; 142 | last_checksum : u64 = 0; 143 | last_total := 0; 144 | } 145 | 146 | entry :: (id: string, name: string, prefix: string) -> Entry { 147 | result : Entry; 148 | result.id = id; 149 | result.name = name; 150 | result.prefix = prefix; 151 | return result; 152 | } 153 | 154 | #if jai jai_entry := entry("?", "Jai", "jai"); 155 | #if boyer_moore boyer_moore_entry := entry("BOYER_MOORE", "Boyer-Moore", "boyer_moore"); 156 | #if boyer_moore_sse2 boyer_moore_sse2_entry := entry("BOYER_MOORE_SSE2", "Boyer-Moore SSE2", "boyer_moore_sse2"); 157 | #if boyer_moore_avx2 boyer_moore_avx2_entry := entry("BOYER_MOORE_AVX2", "Boyer-Moore AVX2", "boyer_moore_avx2"); 158 | #if kmp kmp_entry := entry("KNUTH_MORRIS_PRATT", "Knuth-Morris-Pratt", "knuth_morris_pratt"); 159 | #if simple simple_entry := entry("SIMPLE", "Simple", "simple"); 160 | #if simple_unsafe simple_unsafe_entry := entry("SIMPLE_UNSAFE", "Simple Unsafe", "unsafe_simple"); 161 | #if simple_sse2 simple_sse2_entry := entry("SIMPLE_SSE2", "Simple SSE2", "simple_sse2"); 162 | #if simple_avx2 simple_avx2_entry := entry("SIMPLE_AVX2", "Simple AVX2", "simple_avx2"); 163 | #if super_simple super_simple_entry := entry("SUPER_SIMPLE", "Super Simple", "super_simple"); 164 | 165 | prev_marker := -1; 166 | 167 | for 1 .. times { 168 | if times >= 10 { 169 | marker : s64 = it / (times / 10); 170 | if marker != prev_marker { 171 | prev_marker = marker; 172 | if marker < 10 173 | print("%", marker); 174 | } 175 | } 176 | 177 | #if jai {{ 178 | t := seconds_since_init(); 179 | s := haystack; 180 | offset := 0; 181 | index := find_index_from_left(haystack, needle); 182 | checksum : u64 = 0; 183 | total := 0; 184 | while index >= 0 { 185 | index += offset; 186 | checksum = xor(checksum, xx index); 187 | total += 1; 188 | offset = index + 1; 189 | s.data = haystack.data + offset; 190 | s.count = haystack.count - offset; 191 | index = find_index_from_left(s, needle); 192 | } 193 | jai_entry.first_checksum += checksum; 194 | jai_entry.first_total += total; 195 | jai_entry.first_time += seconds_since_init() - t; 196 | 197 | t = seconds_since_init(); 198 | s = haystack; 199 | index = find_index_from_right(haystack, needle); 200 | checksum = 0; 201 | total = 0; 202 | while index >= 0 { 203 | checksum = xor(checksum, xx index); 204 | total += 1; 205 | s.count = index; 206 | index = find_index_from_right(s, needle); 207 | } 208 | jai_entry.last_checksum += checksum; 209 | jai_entry.last_total += total; 210 | jai_entry.last_time += seconds_since_init() - t; 211 | }} 212 | 213 | test :: (first_index_proc: type_of(simple_first_index), last_index_proc: type_of(simple_last_index), entry: *Entry) #expand { 214 | set_index_algorithm(first_index_proc, last_index_proc); 215 | 216 | t := seconds_since_init(); 217 | index, found := first_index(`haystack, `needle, compare); 218 | checksum : u64 = 0; 219 | total := 0; 220 | while found { 221 | checksum = xor(checksum, xx index); 222 | total += 1; 223 | index, found = first_index(`haystack, `needle, index + 1, compare); 224 | } 225 | entry.first_checksum += checksum; 226 | entry.first_total += total; 227 | entry.first_time += seconds_since_init() - t; 228 | 229 | t = seconds_since_init(); 230 | index, found = last_index(`haystack, `needle, compare); 231 | checksum = 0; 232 | total = 0; 233 | while found { 234 | checksum = xor(checksum, xx index); 235 | total += 1; 236 | index, found = last_index(`haystack, `needle, index, compare); 237 | } 238 | entry.last_checksum += checksum; 239 | entry.last_total += total; 240 | entry.last_time += seconds_since_init() - t; 241 | } 242 | 243 | #if boyer_moore test(boyer_moore_first_index, boyer_moore_last_index, *boyer_moore_entry); 244 | #if boyer_moore_sse2 test(boyer_moore_sse2_first_index, boyer_moore_sse2_last_index, *boyer_moore_sse2_entry); 245 | #if boyer_moore_avx2 test(boyer_moore_avx2_first_index, boyer_moore_avx2_last_index, *boyer_moore_avx2_entry); 246 | #if kmp test(knuth_morris_pratt_first_index, knuth_morris_pratt_last_index, *kmp_entry); 247 | #if simple test(simple_first_index, simple_last_index, *simple_entry); 248 | #if simple_unsafe test(unsafe_simple_first_index, unsafe_simple_last_index, *simple_unsafe_entry); 249 | #if simple_sse2 test(simple_sse2_first_index, simple_sse2_last_index, *simple_sse2_entry); 250 | #if simple_avx2 test(simple_avx2_first_index, simple_avx2_last_index, *simple_avx2_entry); 251 | #if super_simple test(super_simple_first_index, super_simple_last_index, *super_simple_entry); 252 | } 253 | 254 | entries : [..] Entry; 255 | 256 | #if jai array_add(*entries, jai_entry); 257 | #if kmp array_add(*entries, kmp_entry); 258 | #if boyer_moore array_add(*entries, boyer_moore_entry); 259 | #if boyer_moore_sse2 array_add(*entries, boyer_moore_sse2_entry); 260 | #if boyer_moore_avx2 array_add(*entries, boyer_moore_avx2_entry); 261 | #if simple array_add(*entries, simple_entry); 262 | #if simple_unsafe array_add(*entries, simple_unsafe_entry); 263 | #if simple_sse2 array_add(*entries, simple_sse2_entry); 264 | #if simple_avx2 array_add(*entries, simple_avx2_entry); 265 | #if super_simple array_add(*entries, super_simple_entry); 266 | 267 | quick_sort(entries, (a: Entry) -> float64 { return a.first_time; }); 268 | 269 | fastest_last_time := entries[0].last_time; 270 | for entries if it.last_time < fastest_last_time fastest_last_time = it.last_time; 271 | 272 | if test_only { 273 | all_ok := true; 274 | print("Testing: %\nNeedle: %\nFound: %\n", haystack_file, needle, expected_total); 275 | for entries { 276 | print("%", pad_end(it.name, 20)); 277 | 278 | valid := true; 279 | if it.first_checksum != expected_first_checksum { 280 | print("BAD FIRST CHECKSUM: Expected [%] Got [%]\n", expected_first_checksum, it.first_checksum); 281 | valid = false; 282 | } 283 | if it.last_checksum != expected_last_checksum { 284 | if !valid print(" "); 285 | print("BAD LAST CHECKSUM: Expected [%] Got [%]\n", expected_last_checksum, it.last_checksum); 286 | valid = false; 287 | } 288 | if it.first_total != expected_total { 289 | if !valid print(" "); 290 | print("BAD FIRST TOTAL: Expected [%] Got [%]\n", expected_total, it.first_total); 291 | valid = false; 292 | } 293 | if it.last_checksum != expected_last_checksum { 294 | if !valid print(" "); 295 | print("BAD LAST TOTAL: Expected [%] Got [%]\n", expected_total, it.last_total); 296 | valid = false; 297 | } 298 | 299 | if valid print("OK\n"); 300 | } 301 | print("\n"); 302 | if !all_ok exit(1); 303 | } 304 | else { 305 | print("\n\n"); 306 | 307 | for entries { 308 | if !it_index print("\e[0;32m"); 309 | print("%", pad_end(it.name, 20)); 310 | first_color := ""; 311 | if !it_index { 312 | print("\e[m"); 313 | first_color = "\e[0;33m"; 314 | } 315 | last_color := ""; 316 | if it.last_time == fastest_last_time 317 | last_color = "\e[0;33m"; 318 | time := sprint("%4%1%3 / %5%2%3", it.first_time, it.last_time, "\e[m", first_color, last_color); 319 | 320 | valid := true; 321 | if it.first_checksum != expected_first_checksum { 322 | print("BAD FIRST CHECKSUM: Expected [%] Got [%]\n", expected_first_checksum, it.first_checksum); 323 | valid = false; 324 | } 325 | if it.last_checksum != expected_last_checksum { 326 | if !valid print(" "); 327 | print("BAD LAST CHECKSUM: Expected [%] Got [%]\n", expected_last_checksum, it.last_checksum); 328 | valid = false; 329 | } 330 | if it.first_total != expected_total { 331 | if !valid print(" "); 332 | print("BAD FIRST TOTAL: Expected [%] Got [%]\n", expected_total, it.first_total); 333 | valid = false; 334 | } 335 | if it.last_checksum != expected_last_checksum { 336 | if !valid print(" "); 337 | print("BAD LAST TOTAL: Expected [%] Got [%]\n", expected_total, it.last_total); 338 | valid = false; 339 | } 340 | 341 | if !valid { 342 | print("\n Bad checksums/totals only happen if there's a bug in the\n Strings library, please report it!\n\n"); 343 | continue; 344 | } 345 | 346 | #if jai { 347 | format_float.width = 5; 348 | first_factor := sprint("%", jai_entry.first_time / it.first_time); 349 | last_factor := sprint("%", jai_entry.last_time / it.last_time); 350 | format_float.width = 6; 351 | print("%1 = %4%2x\e[m / %5%3x\e[m\n", pad_start(time, 7), pad_start(first_factor, 7), pad_start(last_factor, 7), first_color, last_color); 352 | } 353 | else { 354 | print("%\n", pad_end(time, 7)); 355 | } 356 | if !it_index print("\n"); 357 | } 358 | 359 | print(#string __info 360 | 361 | Results are listed for first_index / last_index, and sorted by first_index (as 362 | first_index is used internally so is more important). If you want to use a 363 | different algorithm for last_index than for first_index you can use the bottom 364 | call below with different arguments. 365 | 366 | The winning algorithm for this dataset is: %5%1%4 367 | 368 | To use it import with module parameters: 369 | 370 | %6#import "Strings"(index_algorithm = .%2);%4 371 | 372 | or call set_index_algorithm: 373 | 374 | %6set_index_algorithm(%3_first_index, %3_last_index);%4 375 | __info, entries[0].name, entries[0].id, entries[0].prefix, "\e[m", "\e[0;32m", "\e[0;33m"); 376 | } 377 | } 378 | 379 | 380 | xor :: (a: u64, b: u64) -> u64 { 381 | result := a; 382 | #asm { 383 | x : gpr; 384 | mov.q x, result; 385 | xor.q x, b; 386 | mov.q result, x; 387 | } 388 | return result; 389 | } 390 | 391 | 392 | jai_find_index_from_left_nocase :: (haystack: string, needle: string) -> int { 393 | if haystack.count < needle.count return -1; 394 | 395 | for 0 .. haystack.count - needle.count { 396 | t := jai_string.slice(haystack, it, needle.count); 397 | if jai_string.equal_nocase(t, needle) return it; 398 | } 399 | 400 | return -1; 401 | } 402 | 403 | 404 | jai_find_index_from_right_nocase :: (haystack: string, needle: string) -> int { 405 | if haystack.count < needle.count return -1; 406 | 407 | for < haystack.count - needle.count .. 0 { 408 | t := jai_string.slice(haystack, it, needle.count); 409 | if jai_string.equal_nocase(t, needle) return it; 410 | } 411 | 412 | return -1; 413 | } 414 | 415 | 416 | jai_find_index_from_left :: (s: string, substring: string) -> int { 417 | if !substring return -1; 418 | 419 | // This is SO SLOW! @Cleanup. 420 | for i: 0..s.count-substring.count { 421 | t := jai_string.slice(s, i, substring.count); 422 | if t == substring return i; 423 | } 424 | 425 | return -1; 426 | } 427 | 428 | jai_find_index_from_right :: (s: string, substring: string) -> int { 429 | if !substring return -1; 430 | 431 | // This is SO SLOW! @Cleanup. 432 | for < i: s.count-substring.count..0 { 433 | t := jai_string.slice(s, i, substring.count); 434 | if t == substring return i; 435 | } 436 | 437 | return -1; 438 | } 439 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # jai-string 2 | 3 | Modules present: 4 | 5 | * `Strings` Fairly performant and well reasoned api for working with strings. 6 | * `Scratch` A simple allocator for doing multiple operations in a row without grabbing more memory on each one. [Info](Scratch/module.jai) 7 | 8 | To use clone the repo then copy the `Strings` folder into your `jai/modules` folder, or symlink them: `mklink /d c:\jai\modules\Strings c:\repos\jai-string\Strings` 9 | Optionally do the same for the `Scratch` folder if you want to have access to the scratch allocator. 10 | 11 | 12 | ## Mechanics 13 | 14 | ### Mutating in-place vs returning result 15 | 16 | Any proc in this module which writes to the string's data will have a pointer to the string as its parameter instead of just a string. This gives a clear indicator of intent, and also delineates between different versions of a proc. For example: 17 | ```jai 18 | bar := to_upper(foo); // returns a copy of foo converted to uppercase (allocates!) 19 | to_upper(*foo); // mutates foo in-place, converting it to uppercase. 20 | ``` 21 | 22 | ### Generating strings 23 | 24 | Any proc which generates (allocates) a string will take an optional `null_terminate` parameter; setting this to true ensures the resulting string ends in `\0`. 25 | 26 | 27 | ### Character Comparison 28 | 29 | By default characters being compared between two strings are compared using the `case_sensitive` function (unless you override it with the module parameter). In this library any procedure which involves comparing strings will take a `character_compare` parameter in which you can specify a different procedure from the default. For example: 30 | 31 | ```jai 32 | assert( contains("Hello", "h") == false ); 33 | assert( contains("Hello", "h", ignore_case) == true ); 34 | ``` 35 | 36 | The comparator is a struct; you can make your own like this: 37 | 38 | ```jai 39 | are_numbers :: Character_Compare.{ 40 | .CUSTOM, 41 | (a: u8, b: u8) -> bool { 42 | return (a >= #char "0" && a <= #char "9") 43 | == (b >= #char "0" && b <= #char "9"); 44 | } 45 | }; 46 | 47 | share_case :: Character_Compare.{ 48 | .CUSTOM, 49 | (a: u8, b: u8) -> bool { 50 | a_is_alpha := is_alpha(a); 51 | if a_is_alpha != is_alpha(b) return false; 52 | if !a_is_alpha return true; 53 | return is_upper(a) == is_upper(b); 54 | } 55 | }; 56 | ``` 57 | 58 | The two comparators built-in to the module are `case_sensitive`, `ignore_case`. 59 | 60 | *(The other two options to `.CUSTOM` are `.CASE_SENSITIVE` and `.IGNORE_CASE`: you may roll your own versions of those comparators if you wish, and by choosing the relevant identifier the correct SIMD optimisations will be invoked - however, there's not a lot of point in doing so...)* 61 | 62 | 63 | ### Tool types: u8 / [] u8 / string / Index_Proc 64 | 65 | In a string library it is often the case that you have a string which you are applying an operation to using a *tool* parameter. In this library there will generally be four version of such procedures, the first three of which are the single parameters: `u8`, `[] u8`, `string`. As tools these types behave consistently across the library: 66 | 67 | * `u8`
68 | The single character specified will be used. 69 | 70 | * `[] u8`
71 | A match to any of the characters in the array will be used. 72 | 73 | * `string`
74 | The exact string will be used: i.e. the characters specified in the sequence specified. 75 | 76 | 77 | For example: 78 | ```jai 79 | assert( trim( " apple ", #char " " ) == "apple" ); 80 | assert( trim( "banana pear", cast([]u8) "ban" ) == " pear" ); 81 | assert( trim( "banana pear", "ban" ) == "ana pear" ); 82 | ``` 83 | 84 | Additionally, any time the tool is a `string` you may specify an `Index_Proc`. An `Index_Proc` is a procedure with the signature: 85 | 86 | `(haystack: string, needle: string, initial_index: int, reversed: bool) -> from_index: int, to_index: int, found: bool` 87 | 88 | This allows you to feed an arbitrarily complex pattern match into the procedure you are using. When using an `Index_Proc`, a character comparator is not used (as your own code is instead). 89 | 90 | For example: 91 | ```jai 92 | question_mark_index :: (haystack: string, needle: string, initial_index: int, $$reversed: bool) -> from_index: int, to_index: int, found: bool { 93 | if reversed { 94 | from_index, to_index, found := reverse_index_proc(question_mark_index, haystack, needle, initial_index); 95 | return from_index, to_index, found; 96 | } 97 | else { 98 | index := slice_index(haystack, initial_index); 99 | if index >= haystack.count return -1, -1, false; 100 | 101 | for haystack_index: index .. haystack.count - needle.count { 102 | for needle_index: 0 .. needle.count - 1 { 103 | c := needle[needle_index]; 104 | if c != #char "?" && c != haystack[haystack_index + needle_index] 105 | continue haystack_index; 106 | } 107 | 108 | return haystack_index, haystack_index + needle.count, true; 109 | } 110 | 111 | return -1, -1, false; 112 | } 113 | } 114 | 115 | assert( starts_with("Hello World", "He??o") == false ); 116 | assert( starts_with("Hello World", "He??o", question_mark_index) == true ); 117 | ``` 118 | 119 | Notice the use of `reverse_index_proc` to handle when the `reversed` parameter is set. This is a library procedure that you can use if you don't want to write out the reverse algorithm yourself, but note that it is extremely inefficient! 120 | 121 | In the docs below, any time a parameter of type `%Tool` is specified, it means there are four versions of the procedure, each corresponding to the behaviour described above (the fourth being that the %Tool is two parameters: `string`+`Index_Proc`). 122 | 123 |
124 | 125 | ### `#module_parameters` 126 | 127 | * `CHARACTER_COMPARE`
Default comparator used to check if two string characters are equal. One of: 128 | * `.CASE_SENSITIVE` 129 | * `.IGNORE_CASE` 130 | 131 | * `INDEX_ALGORITHM`
Determines the default string search algorithm to use (they can be changed later using `set_index_algorithm`). One of: 132 | * `.SIMPLE`, `.SIMPLE_SSE2`, `.SIMPLE_AVX2`, `.SIMPLE_UNSAFE`
Simplest algorithm, no memory overhead. 133 | * `.BOYER_MOORE`, `.BOYER_MOORE_SSE2`, `.BOYER_MOORE_AVX2`
[Boyer-Moore algorithm](https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm). Fastest tested scalar algorithm overall, has a small memory footprint that increases with needle size. 134 | * `.KNUTH_MORRIS_PRATT`
[Knuth-Morris-Pratt algorithm](https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm). Another fast algorithm, with a similar memory footprint. 135 | 136 | #### A note on indexing algorithms 137 | 138 | The indexing algorithm set by `set_index_algorithm` is used internally in the module for most operations: any time you call things like `first_index`, `replace`, `split` it will be employed. 139 | Whereas other functions in the library will utilize SIMD features (SSE2 & AVX2) when told to with the `set_simd_mode` command, you must explicitly set an index algorithm to use them if that is what you wish*: the default indexing algorithm is scalar `Boyer-Moore`, because it is good on practically any dataset; a safe choice. Choosing a different indexing algorithm can provide impressive performance improvements, but this depends on the dataset you are working on (the specific strings and substrings you are searching with). SIMD algorithms can be orders of magnitude faster, but they can also be catastrophically slow when facing degenerate datasets. If you want to get the most performance out of the library then you should choose an appropriate indexing algorithm for your dataset. 140 | 141 | To help with this there is the `index_profile` tool (in the `tools/` folder): provide it with a file and a typical search string from your data and it will show you how each available algorithm performs with the data you are manipulating. 142 | 143 | * *(Though all the built-in indexing algorithms will detect if the needle is a single character long, and if so will use the relevant built-in character index algorithm, which will obey `set_simd_mode`)* 144 | 145 | 146 | ### Procedures 147 | 148 | 149 | #### Configuration 150 | 151 | 152 | * `set_index_algorithm (first_index_proc := default_first_index, last_index_proc := default_last_index)`
153 | Sets the index procedures used internally when searching through strings with strings (for `replace`, `split`, etc.) 154 | 155 | 156 | * `set_simd_mode (mode)`
Sets whether to use SIMD optimisations. One of: 157 | * `.OFF`
Disables all SIMD optimisations, utilizing scalar code only. 158 | * `.AUTO`
Uses the fastest SIMD instruction set available on the CPU. 159 | * `.SSE2`
Uses SSE2 (128bit) optimisations. This is the default. 160 | * `.AVX2`
Uses AVX2 (256bit) optimisations. 161 | 162 | 163 | #### Substrings 164 | 165 | 166 | * `slice (str: string, from_index: int, [to_index: int]) -> string, normalized_from_index: int, normalized_to_index: int`
167 | Returns the string inside `str`, between the specified indices. You may use a negative index to specify backwards from the end of the string. If you do not specify a `to_index` then it will include all characters up to the end of the string. The last two return parameters are the positive indexes the slice ends up using, after validation. 168 | 169 | 170 | * `substring (str: string, from_index: int, [count: int]) -> string, normalized_from_index: int, normalized_to_index: int`
171 | Same as `slice`, except instead of a `to_index` you specify a character count. If you do not specify a `count` then it will include all characters up to the end of the string. 172 | 173 | 174 | * `slice_index (str: string, index: int) -> normalized_index: int, well_formed: bool`
175 | Returns the validated and normalized index which would be used with the provided string, as well as whether the index was within the bounds of the string. 176 | 177 | 178 | * `raw_slice (str: string, from_index: int, to_index: int) -> string`
179 | As `slice`, but without any checking on the indices, and without being able to use negative indices (and thus faster). If you do not specify a `to_index` then it will include all characters up to the end of the string. Generally speaking, just use `slice` instead. 180 | 181 | 182 | * `raw_substring (str: string, from_index: int, count: int) -> string`
183 | As `substring`, but without any checking on the indices, and without being able to use negative indices (and thus faster). If you do not specify a `count` then it will include all characters up to the end of the string. Generally speaking, just use `substring` instead. 184 | 185 | 186 | * `trim (str: string) -> string`
187 | Returns the substring of `str` with all characters from the start and end which are <= `#char " "` removed (i.e. all whitespace and control codes). 188 | 189 | 190 | * `trim (str: string, tool: %Tool, character_compare := default_compare) -> string`
191 | Returns the substring of `str` with all characters matching tool removed from the start and end. 192 | 193 | 194 | * `trim_start (str: string, tool: %Tool, character_compare := default_compare) -> string`
195 | Returns the substring of `str` with all characters matching tool removed from the start. 196 | 197 | 198 | * `trim_end (str: string, tool: %Tool, character_compare := default_compare) -> string, found: bool`
199 | Returns the substring of `str` with all characters matching tool removed from the end. 200 | 201 | 202 | * `trim_to (str: string, tool: %Tool, character_compare := default_compare) -> string, found: bool`
203 | Returns the substring of `str` with all characters before the first instance and after the last instance of tool removed. If tool is not found then the entire string is returned. 204 | 205 | 206 | * `trim_start_to (str: string, tool: %Tool, character_compare := default_compare) -> string, found: bool`
207 | Returns the substring of `str` with all characters before the first instance of tool removed from the start. If tool is not found then the entire string is returned. 208 | 209 | 210 | * `trim_end_to (str: string, tool: %Tool, character_compare := default_compare) -> string, found: bool`
211 | Returns the substring of `str` with all characters after the last instance of tool removed from the end. If tool is not found then the entire string is returned. 212 | 213 | 214 | * `trim_through (str: string, tool: %Tool, character_compare := default_compare) -> string, found: bool`
215 | Returns the substring of `str` with all characters before the first instance and after the last instance of tool, as well as the tool itself, removed. If tool is not found then the entire string is returned. 216 | 217 | 218 | * `trim_start_through (str: string, tool: %Tool, character_compare := default_compare) -> string, found: bool`
219 | Returns the substring of `str` with all characters before the first instance of tool, and the tool, removed from the start. If tool is not found then the entire string is returned. 220 | 221 | 222 | * `trim_end_through (str: string, tool: %Tool, character_compare := default_compare) -> string, found: bool`
223 | Returns the substring of `str` with all characters after the last instance of tool, and the tool, removed from the end. If tool is not found then the entire string is returned. 224 | 225 | 226 | #### Consuming 227 | 228 | 229 | * `advance_to (haystack: *string, needle: %Tool) -> characters_skipped: int, found: bool`
230 | Modifies `haystack` in-place, moving its start point forward until it hits `%Tool` (or empties). 231 | 232 | 233 | * `advance_through (haystack: *string, needle: %Tool) -> characters_skipped: int, found: bool`
234 | Modifies `haystack` in-place, moving its start point forward until it hits and reaches the end of `%Tool` (or empties). 235 | 236 | 237 | #### Splitting 238 | 239 | 240 | All split procedures return an iterator (a for-expansion). If you want the substrings to be in an array you can feed this iterator into `to_array`. 241 | 242 | 243 | * `split (text: string, separator: %Tool, skip_empty := false, max_results := 0, keep_separator := .NO, character_compare := default_compare)`
244 | Used to iterate over `text` in a `for` loop, splitting the string by the chosen tool. 245 | If `skip_empty` is set then your code will not be called with the empty string (i.e. when there are two consecutive `seperator`s). 246 | If `max_results` is non-zero then `text` will only be split into at most that 247 | many pieces. 248 | If `keep_separator` is set to `.AS_PREFIX` or `.AS_POSTFIX` then the separator will be included in the strings, at the specified position. 249 | 250 | For example: 251 | ```jai 252 | for word, index: split(" aa bb cc dd ", #char " ", skip_empty = true, max_results = 3) { 253 | if index == { 254 | case 0; assert(word == "aa"); 255 | case 1; assert(word == "bb"); 256 | case 2; assert(word == "cc"); 257 | case 3; assert(false); 258 | } 259 | } 260 | 261 | for word, index: split("Hello, World.", ", ", keep_separator = .AS_POSTFIX) { 262 | if index == { 263 | case 0; assert(word == "Hello, "); 264 | case 1; assert(word == "World."); 265 | case 2; assert(false); 266 | } 267 | } 268 | ``` 269 | 270 | 271 | * `split_into_two (text: string, separator: %Tool, keep_separator := .NO, character_compare := default_compare) -> string, string`
272 | As split with max_results set to 2, but returns the two strings directly rather than an iterator. 273 | 274 | 275 | * `to_array (splitter: $T/Splitter, reversed := false) -> [..] string`
276 | Executes the splitter, generating an array. 277 | ```jai 278 | splitter := split("How about a nice game of chess?", #char " "); 279 | words := to_array(splitter,, temp); 280 | assert(words.count == 7); 281 | ``` 282 | 283 | 284 | * `to_array (array: *[] string, splitter: $T/Splitter, reversed := false, clear_unused := true) -> [] string`
285 | Executes the splitter and places its results into the array. Returns an array_view over the array with the used count. 286 | If `clear_unused` is set then any trailing slots in the array after the resulting count will be cleared. 287 | ```jai 288 | parts : [20] string; 289 | view := into_array(*parts, split("How about a nice game of chess?", #char " ")); 290 | assert(view.count == 7); 291 | ``` 292 | 293 | 294 | * `count_split (text: string, count: int, max_results := 0)`
295 | As `split`, except the string is split into sections with the specified `count`. 296 | 297 | 298 | * `index_split (text: string, indexes: .. int, skip_empty := false, max_results := 0)`
299 | As `split`, except the string is split at the specified indices. 300 | 301 | 302 | * `line_split (text: string, keep_end := false, skip_empty := false, max_results := 0, keep_separator := .NO)`
303 | As `split` using `#char "\n"` as the tool, but will automatically handle windows vs unix file formats (i.e. will take care of `"\r\n"`). 304 | 305 | 306 | #### Querying 307 | 308 | 309 | * `first_index (haystack: string, needle: %Tool, start_index := 0, character_compare := default_compare) -> index: int, found: bool, [to_index: int]`
310 | Returns the first index in `haystack` at which `needle` occurs, or `-1` if it does not occur. `found` will be true if `needle` was found. In the case when `%Tool` is an `Index_Proc`, `to_index` will be set to the index the pattern terminates at. 311 | 312 | 313 | * `last_index (haystack: string, needle: %Tool, start_index := 0, character_compare := default_compare) -> index: int, found: bool, [to_index: int]`
314 | As per `first_index`, but working backwards from the end of the `haystack`. 315 | 316 | 317 | * `contains (haystack: string, needle: %Tool, character_compare := default_compare) -> bool`
318 | Whether `needle` occurs within `haystack`. 319 | 320 | 321 | * `count (haystack: string, needle: %Tool, character_compare := default_compare) -> int`
322 | How many times `needle` occurs within `haystack` (non-overlapping). 323 | 324 | 325 | * `equal (a: string, b: string, character_compare := default_compare) -> bool`
326 | Returns whether the two strings are equal, using current or specified comparator. 327 | 328 | 329 | * `is_any (needle: u8, characters: [] u8, character_compare := default_compare) -> bool`
330 | Returns whether `needle` is equal to any of `characters`. 331 | 332 | 333 | * `is_lower (char: u8) -> bool`
334 | Whether `char` falls in the range `#char "a" - #char "z"`. 335 | 336 | 337 | * `is_upper (char: u8) -> bool`
338 | Whether `char` falls in the range `#char "A" - #char "Z"`. 339 | 340 | 341 | * `starts_with (haystack: string, needle: %Tool, character_compare := default_compare) -> bool`
342 | Returns whether `haystack` begins with `needle`. 343 | 344 | 345 | * `ends_with (haystack: string, needle: %Tool, character_compare := default_compare) -> bool`
346 | Returns whether `haystack` ends with `needle`. 347 | 348 | 349 | #### Mutating 350 | 351 | 352 | * `pad_start (str: string, desired_count: int, pad_with := " ", null_terminate := false) -> string`
353 | Returns a copy of `str` with `pad_with` repeated at the beginning such that the string length reaches the `desired_count`. 354 | Note that `pad_with` can be multiple characters long (and in fact the default value is actually multiple spaces, for performance). 355 | 356 | 357 | * `pad_start (str: string, desired_count: int, pad_with: u8, null_terminate := false) -> string`
358 | Returns a copy of `str` with `pad_with` repeated at the beginning such that the string length reaches the `desired_count`. 359 | 360 | 361 | * `pad_end (str: string, desired_count: int, pad_with := " ", null_terminate := false) -> string`
362 | Returns a copy of `str` with `pad_with` repeated from the end such that the string length reaches the `desired_count`. 363 | Note that `pad_with` can be multiple characters long (and in fact the default value is actually multiple spaces, for performance). 364 | 365 | 366 | * `pad_end (str: string, desired_count: int, pad_with: u8, null_terminate := false) -> string`
367 | Returns a copy of `str` with `pad_with` repeated from the end such that the string length reaches the `desired_count`. 368 | 369 | 370 | * `pad (str: string, desired_count: int, pad_with := " ", null_terminate := false) -> string`
371 | Returns a copy of `str` with `pad_with` repeated from the begining *and* from the end such that the string length reaches the `desired_count`. 372 | Note that `pad_with` can be multiple characters long (and in fact the default value is actually multiple spaces, for performance). 373 | 374 | 375 | * `pad (str: string, desired_count: int, pad_with: u8, null_terminate := false) -> string`
376 | Returns a copy of `str` with `pad_with` repeated from the begining *and* from the end such that the string length reaches the `desired_count`. 377 | 378 | 379 | * `repeat (str: string, times: int, null_terminate := false) -> string`
380 | Returns a string consisting of `str` repeated `times` times. 381 | 382 | 383 | * `replace (haystack: *string, needle: %Tool, replacement: u8, max_replacements := 0, null_terminate := false) -> change_count: int`
384 | Mutates the haystack in-place, replacing `needle` with the `replacement` character specified. 385 | 386 | 387 | * `replace (haystack: string, needle: %Tool, replacement: string, max_replacements := 0, character_compare := default_compare, null_terminate := false) -> string`
388 | Returns a copy of `str` with all (non-overlapping) instances of `needle` replaced with `replacement`. 389 | If `max_replacements` is non-zero then at most that many replacements will be made (starting at the beginning of the string). 390 | 391 | 392 | * `reverse (str: *string)`
393 | Reverses the characters in `str` in-place. 394 | 395 | 396 | * `reverse (str: string, null_terminate := false) -> string`
397 | Returns a copy of `str` with the characters in the reverse order. 398 | 399 | 400 | * `to_upper (str: *string)`
401 | Mutates `str` in-place, overwritting any lower-case characters with their upper-case equivalent. 402 | 403 | 404 | * `to_upper (str: string, null_terminate := false)`
405 | Returns a copy of `str` with all lower-case characters converted to their upper-case equivalent. 406 | 407 | 408 | * `to_lower (str: *string)`
409 | Mutates `str` in-place, overwritting any upper-case characters with their lower-case equivalent. 410 | 411 | 412 | * `to_lower (str: string, null_terminate := false) -> string`
413 | Returns a copy of `str` with all upper-case characters converted to their lower-case equivalent. 414 | 415 | 416 | * `to_capitalized (str: *string, preserve_caps := true)`
417 | Sets the first letter of `str` to upper-case. If `preserve_caps` is set to false, will set all following letters to lower-case. 418 | 419 | 420 | * `to_capitalized (str: string, preserve_caps := true, null_terminate := false) -> string`
421 | Returns a copy of `str` with the first letter converted to upper-case. If `preserve_caps` is disabled then all subsequent letters will be converted to lower-case. 422 | 423 | 424 | * `camel_from_snake (str: string, preserve_caps := false, null_terminate := false) -> string`
425 | Returns a copy of underscore-separated `str`, changed into programmer CamelCase; i.e. with the leading letter, and every letter after an underscore, converted to upper-case, and with underscores removed. If `preserve_caps` is enabled then the the underscore removal still happens, but the case is kept. 426 | 427 | For example: 428 | ```jai 429 | assert( camel_from_snake("play_RTS") == "playRts" ); 430 | assert( camel_from_snake("play_RTS", true) == "playRTS" ); 431 | ``` 432 | 433 | 434 | * `snake_from_camel (str: string, preserve_caps := false, null_terminate := false) -> string`
435 | Returns a copy of CamelCased `str`, changed into programmer snake case; i.e. converted to lower-case, but split by `_` at each formerly upper-case letter edge. If `preserve_caps` is enabled then the the split still happens, but the case is kept. 436 | 437 | For example: 438 | ```jai 439 | assert( snake_from_camel("PlayRTS") == "play_rts" ); 440 | assert( snake_from_camel("PlayRTS", true) == "play_RTS" ); 441 | ``` 442 | 443 | 444 | #### Utilities 445 | 446 | 447 | * `char_as_string (char: *u8) -> string`
448 | Returns a string representation of the single character provided. 449 | 450 | 451 | * `copy_string (str: string, null_terminate: bool) -> string`
452 | Returns of a copy of `str`. 453 | 454 | 455 | * `join (strings: .. string, null_terminate := false) -> string`
456 | Returns a single string created by concatenating all the provided strings together. 457 | 458 | 459 | * `join (strings: [] string, null_terminate := false) -> string`
460 | Returns a single string, the result of joining all the strings in the `strings` array together. 461 | 462 | 463 | * `join (strings: [] string, separator: string|u8, null_terminate := false) -> string`
464 | Returns a single string, the result of joining all the strings in the `strings` array together with `separator` between them. 465 | 466 | 467 | * `join (strings: $T/Splitter, null_terminate := false) -> string`
468 | Returns a single string, the result of joining all the strings in the `strings` iterator together. 469 | 470 | 471 | * `join (strings: $T/Splitter, separator: string|u8, null_terminate := false) -> string`
472 | Returns a single string, the result of joining all the strings in the `strings` iterator together with `separator` between them. 473 | 474 | 475 | * `apply_backslash (str: string, null_terminate := false) -> string, well_formed: bool`
476 | Converts legal jai backslash escape sequences (i.e. `\n`, `\t`, etc) into their specified character. i.e. a two character string `"\n"` will yield a single character string with byte value `10`; 477 | `well_formed` will be true if all backslash characters in `str` are followed by an appropriate escape sequence. 478 | 479 | 480 | * `escape (str: string, null_terminate := false) -> string`
481 | Replaces the special characters which jai uses backslash escapes to represent with said backslash escape sequence. i.e. the single character string with byte value `10` will yield the two character string `"\n"` 482 | 483 | 484 | * `reverse_index_proc (index_proc: Index_Proc, haystack: string, needle: string, boundary_index: int) -> from_index: int, to_index: int, found: bool`
485 | Can be used to automatically make a reversed version of an `Index_Proc` (see `question_mark_index` example above). It does so in an extremely inefficient way; if you care about the performance of the reverse search then you should code it directly. 486 | -------------------------------------------------------------------------------- /Strings/splitting.jai: -------------------------------------------------------------------------------- 1 | Splitter :: struct { 2 | text : string; 3 | max_results : int; 4 | skip_empty : bool; 5 | keep_separator : Keep_Separator; 6 | character_compare : Character_Compare; 7 | } 8 | 9 | Split_By_String :: struct { 10 | #as using base: Splitter; 11 | separator : string; 12 | first_index : String_Index_Proc; 13 | last_index : String_Index_Proc; 14 | } 15 | 16 | Split_By_Chars :: struct { 17 | #as using base: Splitter; 18 | separator : [] u8; 19 | } 20 | 21 | Split_By_Char :: struct { 22 | #as using base: Splitter; 23 | separator : u8; 24 | } 25 | 26 | Split_By_Proc :: struct { 27 | #as using base: Splitter; 28 | separator : string; 29 | index_proc : Index_Proc; 30 | } 31 | 32 | Split_By_Index :: struct { 33 | #as using base: Splitter; 34 | indexes : [] int; 35 | } 36 | 37 | Split_By_Count :: struct { 38 | #as using base: Splitter; 39 | count : int; 40 | } 41 | 42 | Split_By_Line :: struct { 43 | #as using base: Splitter; 44 | } 45 | 46 | 47 | to_array :: (splitter: $T/Splitter, reversed := false) -> [..] string { 48 | result : [..] string; 49 | if reversed for < splitter array_add(*result, it); 50 | else for splitter array_add(*result, it); 51 | return result; 52 | } 53 | 54 | to_array :: (array: *[] string, splitter: $T/Splitter, reversed := false, clear_unused := true) -> [] string { 55 | // @Note We set max_results so we don't overflow the destination array: the last entry in the array may 56 | // therefor contain further potential splits. 57 | 58 | result : [] string = .{0, array.data}; 59 | 60 | _splitter := splitter; 61 | if _splitter.max_results == 0 || _splitter.max_results > array.count 62 | _splitter.max_results = array.count; 63 | 64 | if reversed for < _splitter { result.count += 1; result[it_index] = it; } 65 | else for _splitter { result.count += 1; result[it_index] = it; } 66 | 67 | remainder := array.count - result.count; 68 | if clear_unused && remainder > 0 69 | memset(array.data + result.count, 0, remainder * size_of(string)); 70 | 71 | return result; 72 | } 73 | 74 | to_array :: inline (array: *[$N] string, splitter: $T/Splitter, reversed := false, clear_unused := true) -> [] string { 75 | array_view : [] string = array.*; 76 | return to_array(*array_view, splitter, reversed, clear_unused); 77 | } 78 | 79 | 80 | split_at_index :: (text: string, index: int) -> string, string { 81 | return slice(text, 0, index), slice(text, index); 82 | } 83 | 84 | 85 | split_into_two :: (text: string, separator: u8) -> string, string { 86 | index, found := first_index(text, separator); 87 | if !found return text, ""; 88 | return raw_slice(text, 0, index), raw_slice(text, index + 1); 89 | } 90 | 91 | split_into_two :: (text: string, separator: [] u8) -> string, string { 92 | index, found := first_index(text, separator); 93 | if !found return text, ""; 94 | return raw_slice(text, 0, index), raw_slice(text, index + 1); 95 | } 96 | 97 | split_into_two :: (text: string, separator: string) -> string, string { 98 | index, found := first_index(text, separator); 99 | if !found return text, ""; 100 | return raw_slice(text, 0, index), raw_slice(text, index + separator.count); 101 | } 102 | 103 | split_into_two :: (text: string, separator: string, $$separator_proc: Index_Proc) -> string, string { 104 | from_index, found, to_index := first_index(text, separator, separator_proc); 105 | if !found return text, ""; 106 | return raw_slice(text, 0, from_index), raw_slice(text, to_index); 107 | } 108 | 109 | split_into_two :: (text: string, separator: $T, keep_separator := Keep_Separator.NO, character_compare := default_character_compare) -> string, string { 110 | parts : [2] string = ---; 111 | split(*parts, text, separator, keep_separator = keep_separator, character_compare = character_compare); 112 | return parts[0], parts[1]; 113 | } 114 | 115 | 116 | split :: inline (text: string, separator: string, max_results := 0, skip_empty := false, keep_separator := Keep_Separator.NO, character_compare := default_character_compare) -> Split_By_String { 117 | return Split_By_String.{.{text, max_results, skip_empty, keep_separator, character_compare}, separator, context.strings_thread_data.string_first_index, context.strings_thread_data.string_last_index}; 118 | } 119 | 120 | for_expansion :: (splitter: *Split_By_String, body: Code, flags: For_Flags) #expand { 121 | #assert(!(flags & .POINTER)); 122 | reverse := flags & .REVERSE; 123 | 124 | `it : string = ---; 125 | `it_index : int = -1; 126 | 127 | for escape: 1..1 { // so break avoids the cleanup #insert 128 | if splitter.max_results == 1 { 129 | it = splitter.text; 130 | it_index = 0; 131 | 132 | #insert body; 133 | } 134 | else { 135 | max_it_index := ifx splitter.max_results == 0 then 0 else splitter.max_results - 2; 136 | separator_offset_to_start, separator_offset_to_end, empty_count : int = ---; 137 | 138 | if reverse { 139 | end_index := splitter.text.count; 140 | index, found := last_index(splitter.text, splitter.separator, splitter.last_index, splitter.character_compare); 141 | 142 | if splitter.keep_separator == { 143 | case .NO; 144 | empty_count = 0; 145 | separator_offset_to_start = splitter.separator.count; 146 | separator_offset_to_end = 0; 147 | 148 | case .AS_PREFIX; 149 | empty_count = splitter.separator.count; 150 | separator_offset_to_start = 0; 151 | separator_offset_to_end = 0; 152 | 153 | case .AS_POSTFIX; 154 | empty_count = splitter.separator.count; 155 | separator_offset_to_start = splitter.separator.count; 156 | separator_offset_to_end = splitter.separator.count; 157 | } 158 | 159 | while found { 160 | defer index, found = last_index(splitter.text, splitter.separator, index, splitter.last_index, splitter.character_compare); 161 | 162 | it = raw_slice(splitter.text, index + separator_offset_to_start, end_index); 163 | end_index = index + separator_offset_to_end; 164 | 165 | if !splitter.skip_empty || it.count != empty_count { 166 | it_index += 1; 167 | defer if splitter.max_results != 0 && it_index >= max_it_index break; 168 | 169 | #insert(break = break escape) body; 170 | } 171 | 172 | } 173 | 174 | it = raw_slice(splitter.text, 0, end_index); 175 | } 176 | else { 177 | start_index := 0; 178 | index, found := first_index(splitter.text, splitter.separator, splitter.first_index, splitter.character_compare); 179 | 180 | if splitter.keep_separator == { 181 | case .NO; 182 | empty_count = 0; 183 | separator_offset_to_start = splitter.separator.count; 184 | separator_offset_to_end = 0; 185 | 186 | case .AS_PREFIX; 187 | empty_count = splitter.separator.count; 188 | separator_offset_to_start = 0; 189 | separator_offset_to_end = 0; 190 | 191 | case .AS_POSTFIX; 192 | empty_count = splitter.separator.count; 193 | separator_offset_to_start = splitter.separator.count; 194 | separator_offset_to_end = splitter.separator.count; 195 | } 196 | 197 | while found { 198 | defer index, found = first_index(splitter.text, splitter.separator, index + splitter.separator.count, splitter.first_index, splitter.character_compare); 199 | 200 | it = raw_slice(splitter.text, start_index, index + separator_offset_to_end); 201 | start_index = index + separator_offset_to_start; 202 | 203 | if !splitter.skip_empty || it.count > empty_count { 204 | it_index += 1; 205 | defer if splitter.max_results != 0 && it_index >= max_it_index break; 206 | 207 | #insert(break = break escape) body; 208 | } 209 | } 210 | 211 | it = raw_slice(splitter.text, start_index); 212 | } 213 | 214 | if !splitter.skip_empty || it.count != empty_count { 215 | it_index += 1; 216 | #insert body; 217 | } 218 | } 219 | } 220 | } 221 | 222 | 223 | split :: inline (text: string, separator: [] u8, max_results := 0, skip_empty := false, keep_separator := Keep_Separator.NO, character_compare := default_character_compare) -> Split_By_Chars { 224 | return Split_By_Chars.{.{text, max_results, skip_empty, keep_separator, character_compare}, separator}; 225 | } 226 | 227 | for_expansion :: (splitter: *Split_By_Chars, body: Code, flags: For_Flags) #expand { 228 | #assert(!(flags & .POINTER)); 229 | reverse := flags & .REVERSE; 230 | 231 | `it : string = ---; 232 | `it_index : int = -1; 233 | 234 | for escape: 1..1 { // so break avoids the cleanup #insert 235 | if splitter.max_results == 1 { 236 | it = splitter.text; 237 | it_index = 0; 238 | 239 | #insert body; 240 | } 241 | else { 242 | max_it_index := ifx splitter.max_results == 0 then 0 else splitter.max_results - 2; 243 | separator_offset_to_start, separator_offset_to_end, empty_count : int = ---; 244 | 245 | if reverse { 246 | end_index := splitter.text.count; 247 | index, found := last_index(splitter.text, splitter.separator, splitter.character_compare); 248 | if splitter.keep_separator == { 249 | case .NO; 250 | empty_count = 0; 251 | separator_offset_to_start = 1; 252 | separator_offset_to_end = 0; 253 | 254 | case .AS_PREFIX; 255 | empty_count = 1; 256 | separator_offset_to_start = 0; 257 | separator_offset_to_end = 0; 258 | 259 | case .AS_POSTFIX; 260 | empty_count = 1; 261 | separator_offset_to_start = 1; 262 | separator_offset_to_end = 1; 263 | } 264 | 265 | while found { 266 | defer index, found = last_index(splitter.text, splitter.separator, index, splitter.character_compare); 267 | 268 | it = raw_slice(splitter.text, index + separator_offset_to_start, end_index); 269 | end_index = index + separator_offset_to_end; 270 | 271 | if !splitter.skip_empty || it.count != empty_count { 272 | it_index += 1; 273 | defer if splitter.max_results != 0 && it_index >= max_it_index break; 274 | 275 | #insert(break = break escape) body; 276 | } 277 | 278 | } 279 | 280 | it = raw_slice(splitter.text, 0, end_index); 281 | } 282 | else { 283 | start_index := 0; 284 | index, found := first_index(splitter.text, splitter.separator, splitter.character_compare); 285 | if splitter.keep_separator == { 286 | case .NO; 287 | empty_count = 0; 288 | separator_offset_to_start = 1; 289 | separator_offset_to_end = 0; 290 | 291 | case .AS_PREFIX; 292 | empty_count = 1; 293 | separator_offset_to_start = 0; 294 | separator_offset_to_end = 0; 295 | 296 | case .AS_POSTFIX; 297 | empty_count = 1; 298 | separator_offset_to_start = 1; 299 | separator_offset_to_end = 1; 300 | } 301 | 302 | while found { 303 | defer index, found = first_index(splitter.text, splitter.separator, index + 1, splitter.character_compare); 304 | 305 | it = raw_slice(splitter.text, start_index, index + separator_offset_to_end); 306 | start_index = index + separator_offset_to_start; 307 | 308 | if !splitter.skip_empty || it.count != empty_count { 309 | it_index += 1; 310 | defer if splitter.max_results != 0 && it_index >= max_it_index break; 311 | 312 | #insert(break = break escape) body; 313 | } 314 | } 315 | 316 | it = raw_slice(splitter.text, start_index); 317 | } 318 | 319 | if !splitter.skip_empty || it.count != empty_count { 320 | it_index += 1; 321 | #insert body; 322 | } 323 | } 324 | } 325 | } 326 | 327 | 328 | split :: inline (text: string, separator: u8, max_results := 0, skip_empty := false, keep_separator := Keep_Separator.NO, character_compare := default_character_compare) -> Split_By_Char { 329 | return Split_By_Char.{.{text, max_results, skip_empty, keep_separator, character_compare}, separator}; 330 | } 331 | 332 | for_expansion :: (splitter: *Split_By_Char, body: Code, flags: For_Flags) #expand { 333 | #assert(!(flags & .POINTER)); 334 | reverse := flags & .REVERSE; 335 | 336 | `it : string = ---; 337 | `it_index : int = -1; 338 | 339 | for escape: 1..1 { // so break avoids the cleanup #insert 340 | if splitter.max_results == 1 { 341 | it = splitter.text; 342 | it_index = 0; 343 | 344 | #insert body; 345 | } 346 | else { 347 | max_it_index := ifx splitter.max_results == 0 then 0 else splitter.max_results - 2; 348 | separator_offset_to_start, separator_offset_to_end, empty_count : int = ---; 349 | 350 | if reverse { 351 | end_index := splitter.text.count; 352 | index, found := last_index(splitter.text, splitter.separator, splitter.character_compare); 353 | if splitter.keep_separator == { 354 | case .NO; 355 | empty_count = 0; 356 | separator_offset_to_start = 1; 357 | separator_offset_to_end = 0; 358 | 359 | case .AS_PREFIX; 360 | empty_count = 1; 361 | separator_offset_to_start = 0; 362 | separator_offset_to_end = 0; 363 | 364 | case .AS_POSTFIX; 365 | empty_count = 1; 366 | separator_offset_to_start = 1; 367 | separator_offset_to_end = 1; 368 | } 369 | 370 | while found { 371 | defer index, found = last_index(splitter.text, splitter.separator, index, splitter.character_compare); 372 | 373 | it = raw_slice(splitter.text, index + separator_offset_to_start, end_index); 374 | end_index = index + separator_offset_to_end; 375 | 376 | if !splitter.skip_empty || it.count != empty_count { 377 | it_index += 1; 378 | defer if splitter.max_results != 0 && it_index >= max_it_index break; 379 | 380 | #insert(break = break escape) body; 381 | } 382 | } 383 | 384 | it = raw_slice(splitter.text, 0, end_index); 385 | } 386 | else { 387 | start_index := 0; 388 | index, found := first_index(splitter.text, splitter.separator, splitter.character_compare); 389 | if splitter.keep_separator == { 390 | case .NO; 391 | empty_count = 0; 392 | separator_offset_to_start = 1; 393 | separator_offset_to_end = 0; 394 | 395 | case .AS_PREFIX; 396 | empty_count = 1; 397 | separator_offset_to_start = 0; 398 | separator_offset_to_end = 0; 399 | 400 | case .AS_POSTFIX; 401 | empty_count = 1; 402 | separator_offset_to_start = 1; 403 | separator_offset_to_end = 1; 404 | } 405 | 406 | while found { 407 | defer index, found = first_index(splitter.text, splitter.separator, index + 1, splitter.character_compare); 408 | 409 | it = raw_slice(splitter.text, start_index, index + separator_offset_to_end); 410 | start_index = index + separator_offset_to_start; 411 | 412 | if !splitter.skip_empty || it.count != empty_count { 413 | it_index += 1; 414 | defer if splitter.max_results != 0 && it_index >= max_it_index break; 415 | 416 | #insert(break = break escape) body; 417 | } 418 | } 419 | 420 | it = raw_slice(splitter.text, start_index); 421 | } 422 | 423 | if !splitter.skip_empty || it.count != empty_count { 424 | it_index += 1; 425 | #insert body; 426 | } 427 | } 428 | } 429 | } 430 | 431 | 432 | split :: inline (text: string, separator: string, $$index: Index_Proc, max_results := 0, skip_empty := false, keep_separator := Keep_Separator.NO) -> Split_By_Proc { 433 | return Split_By_Proc.{.{text, max_results, skip_empty, keep_separator, .{}}, separator, index}; 434 | } 435 | 436 | for_expansion :: (splitter: *Split_By_Proc, body: Code, flags: For_Flags) #expand { 437 | #assert(!(flags & .POINTER)); 438 | reverse := flags & .REVERSE; 439 | 440 | `it : string = ---; 441 | `it_index : int = -1; 442 | 443 | for escape: 1..1 { // so break avoids the cleanup #insert 444 | if splitter.max_results == 1 { 445 | it = splitter.text; 446 | it_index = 0; 447 | 448 | #insert body; 449 | } 450 | else { 451 | max_it_index := ifx splitter.max_results == 0 then 0 else splitter.max_results - 2; 452 | 453 | if reverse { 454 | end_index := splitter.text.count; 455 | from_index, to_index, found := splitter.index_proc(splitter.text, splitter.separator, splitter.text.count, true); 456 | while found { 457 | defer from_index, to_index, found = splitter.index_proc(splitter.text, splitter.separator, from_index, true); 458 | 459 | if #complete splitter.keep_separator == { 460 | case .AS_PREFIX; 461 | it = raw_slice(splitter.text, from_index, end_index); 462 | end_index = from_index; 463 | 464 | case .AS_POSTFIX; 465 | it = raw_slice(splitter.text, to_index, end_index); 466 | end_index = to_index; 467 | 468 | case .NO; 469 | it = raw_slice(splitter.text, to_index, end_index); 470 | end_index = from_index; 471 | } 472 | 473 | if !splitter.skip_empty || it != "" { 474 | it_index += 1; 475 | defer if splitter.max_results != 0 && it_index >= max_it_index break; 476 | 477 | #insert(break = break escape) body; 478 | } 479 | } 480 | 481 | it = raw_slice(splitter.text, 0, end_index); 482 | } 483 | else { 484 | start_index := 0; 485 | from_index, to_index, found := splitter.index_proc(splitter.text, splitter.separator, 0, false); 486 | while found { 487 | defer from_index, to_index, found = splitter.index_proc(splitter.text, splitter.separator, to_index, false); 488 | 489 | if #complete splitter.keep_separator == { 490 | case .AS_PREFIX; 491 | it = raw_slice(splitter.text, start_index, from_index); 492 | start_index = from_index; 493 | 494 | case .AS_POSTFIX; 495 | it = raw_slice(splitter.text, start_index, to_index); 496 | start_index = to_index; 497 | 498 | case .NO; 499 | it = raw_slice(splitter.text, start_index, from_index); 500 | start_index = to_index; 501 | } 502 | 503 | if !splitter.skip_empty || it != "" { 504 | it_index += 1; 505 | defer if splitter.max_results != 0 && it_index >= max_it_index break; 506 | 507 | #insert(break = break escape) body; 508 | } 509 | } 510 | 511 | it = raw_slice(splitter.text, start_index); 512 | } 513 | 514 | if !splitter.skip_empty || it != "" { 515 | it_index += 1; 516 | 517 | #insert body; 518 | } 519 | } 520 | } 521 | } 522 | 523 | 524 | index_split :: inline (text: string, indexes: .. int, max_results := 0, skip_empty := false, keep_separator := Keep_Separator.NO) -> Split_By_Index { 525 | return Split_By_Index.{.{text, max_results, skip_empty, keep_separator, .{}}, indexes}; 526 | } 527 | 528 | for_expansion :: (splitter: *Split_By_Index, body: Code, flags: For_Flags) #expand { 529 | #assert(!(flags & .POINTER)); 530 | reverse := flags & .REVERSE; 531 | 532 | `it : string = ---; 533 | `it_index : int = -1; 534 | 535 | for escape: 1..1 { // so break avoids the cleanup #insert 536 | if splitter.max_results == 1 { 537 | it = splitter.text; 538 | it_index = 0; 539 | 540 | #insert body; 541 | } 542 | else { 543 | max_it_index := ifx splitter.max_results == 0 then 0 else splitter.max_results - 2; 544 | 545 | if reverse { 546 | end_index := splitter.text.count; 547 | for < index, splitter_index: splitter.indexes { 548 | start_index := slice_index(splitter.text, ifx index < end_index then index else end_index); 549 | it = raw_slice(splitter.text, start_index, end_index); 550 | end_index = start_index; 551 | 552 | if splitter.skip_empty && it == "" continue; 553 | 554 | it_index += 1; 555 | defer if splitter.max_results != 0 && it_index >= max_it_index break; 556 | 557 | #insert(break = break escape) body; 558 | } 559 | 560 | it = raw_slice(splitter.text, 0, end_index); 561 | } 562 | else { 563 | start_index := 0; 564 | for index, splitter_index: splitter.indexes { 565 | start_index = slice_index(splitter.text, start_index); 566 | end_index := slice_index(splitter.text, ifx index > start_index then index else start_index); 567 | it = raw_slice(splitter.text, start_index, end_index); 568 | start_index = end_index; 569 | 570 | if splitter.skip_empty && it == "" continue; 571 | 572 | it_index += 1; 573 | defer if splitter.max_results != 0 && it_index >= max_it_index break; 574 | 575 | #insert(break = break escape) body; 576 | } 577 | 578 | it = raw_slice(splitter.text, start_index); 579 | } 580 | 581 | if !splitter.skip_empty || it != "" { 582 | it_index += 1; 583 | 584 | #insert body; 585 | } 586 | } 587 | } 588 | } 589 | 590 | 591 | count_split :: inline (text: string, count: int, max_results := 0) -> Split_By_Count { 592 | return Split_By_Count.{.{text, max_results, false, .NO, .{}}, count}; 593 | } 594 | 595 | for_expansion :: (splitter: *Split_By_Count, body: Code, flags: For_Flags) #expand { 596 | #assert(!(flags & .POINTER)); 597 | reverse := flags & .REVERSE; 598 | 599 | `it : string = ---; 600 | `it_index : int = -1; 601 | 602 | for escape: 1..1 { // so break avoids the cleanup #insert 603 | if splitter.max_results == 1 { 604 | it = splitter.text; 605 | it_index = 0; 606 | 607 | #insert body; 608 | } 609 | else { 610 | max_it_index := ifx splitter.max_results == 0 then 0 else splitter.max_results - 2; 611 | 612 | if reverse { 613 | end_index := splitter.text.count; 614 | start_index := end_index - splitter.count; 615 | while start_index > 0 { 616 | it = raw_slice(splitter.text, start_index, end_index); 617 | end_index = start_index; 618 | start_index -= splitter.count; 619 | 620 | it_index += 1; 621 | defer if splitter.max_results != 0 && it_index >= max_it_index break; 622 | 623 | #insert(break = break escape) body; 624 | } 625 | 626 | it = raw_slice(splitter.text, 0, end_index); 627 | } 628 | else { 629 | start_index := 0; 630 | end_index := start_index + splitter.count; 631 | while end_index < splitter.text.count { 632 | it = raw_slice(splitter.text, start_index, end_index); 633 | start_index = end_index; 634 | end_index += splitter.count; 635 | 636 | it_index += 1; 637 | defer if splitter.max_results != 0 && it_index >= max_it_index break; 638 | 639 | #insert(break = break escape) body; 640 | } 641 | 642 | it = raw_slice(splitter.text, start_index); 643 | } 644 | 645 | if it { 646 | it_index += 1; 647 | #insert body; 648 | } 649 | } 650 | } 651 | } 652 | 653 | 654 | 655 | 656 | 657 | line_split :: inline (text: string, max_results := 0, skip_empty := false, keep_separator := Keep_Separator.NO) -> Split_By_Line { 658 | return .{.{text, max_results, skip_empty, keep_separator, case_sensitive}}; 659 | } 660 | 661 | for_expansion :: (splitter: *Split_By_Line, body: Code, flags: For_Flags) #expand { 662 | #assert(!(flags & .POINTER)); 663 | reverse := flags & .REVERSE; 664 | 665 | it_is_empty :: () -> bool #expand { 666 | if `splitter.keep_separator != .NO 667 | return `it == "" || `it.count == 1 || `it.count == 2 && `it[0] == Chars.CR; 668 | else 669 | return `it == ""; 670 | } 671 | 672 | `it : string = ---; 673 | `it_index : int = -1; 674 | 675 | for escape: 1..1 { // so break avoids the cleanup #insert 676 | if splitter.text == "" || (splitter.max_results != 0 && splitter.max_results < 2) { 677 | it = splitter.text; 678 | it_index = 0; 679 | 680 | #insert body; 681 | } 682 | else { 683 | max_it_index := ifx splitter.max_results == 0 then 0 else splitter.max_results - 2; 684 | 685 | if reverse { 686 | index := splitter.text.count; 687 | end_index : int = ---; 688 | found := false; 689 | 690 | if #complete splitter.keep_separator == { 691 | case .AS_PREFIX; 692 | end_index = index; 693 | 694 | case .AS_POSTFIX; 695 | end_index = index; 696 | if splitter.text[index - 1] == Chars.LF index -= 1; 697 | 698 | case .NO; 699 | if splitter.text[index - 1] == Chars.LF index -= 1; 700 | if index > 0 && splitter.text[index - 1] == Chars.CR index -= 1; 701 | end_index = index; 702 | } 703 | 704 | index, found = last_index(splitter.text, Chars.LF, index, splitter.character_compare); 705 | 706 | while found { 707 | defer index, found = last_index(splitter.text, Chars.LF, index, splitter.character_compare); 708 | 709 | if #complete splitter.keep_separator == { 710 | case .AS_PREFIX; 711 | if index > 0 && splitter.text[index - 1] == Chars.CR 712 | index -= 1; 713 | it = raw_slice(splitter.text, index, end_index); 714 | end_index = index; 715 | 716 | case .AS_POSTFIX; 717 | it = raw_slice(splitter.text, index + 1, end_index); 718 | end_index = index + 1; 719 | 720 | case .NO; 721 | it = raw_slice(splitter.text, index + 1, end_index); 722 | if index > 0 && splitter.text[index - 1] == Chars.CR 723 | end_index = index - 1; 724 | else 725 | end_index = index; 726 | } 727 | 728 | if !splitter.skip_empty || !it_is_empty() { 729 | it_index += 1; 730 | defer if splitter.max_results != 0 && it_index >= max_it_index break; 731 | 732 | #insert(break = break escape) body; 733 | } 734 | } 735 | 736 | if end_index > 0 { 737 | it = raw_slice(splitter.text, 0, end_index); 738 | 739 | if !splitter.skip_empty || !it_is_empty() { 740 | it_index += 1; 741 | #insert body; 742 | } 743 | } 744 | } 745 | else { 746 | start_index := 0; 747 | index, found := first_index(splitter.text, Chars.LF, splitter.character_compare); 748 | 749 | while found { 750 | defer index, found = first_index(splitter.text, Chars.LF, index + 1, splitter.character_compare); 751 | 752 | if #complete splitter.keep_separator == { 753 | case .AS_PREFIX; 754 | end_index := index; 755 | if end_index > 0 && splitter.text[end_index - 1] == Chars.CR 756 | end_index -= 1; 757 | it = raw_slice(splitter.text, start_index, end_index); 758 | start_index = end_index; 759 | 760 | case .AS_POSTFIX; 761 | it = raw_slice(splitter.text, start_index, index + 1); 762 | start_index = index + 1; 763 | 764 | case .NO; 765 | end_index := index; 766 | if end_index > 0 && splitter.text[end_index - 1] == Chars.CR 767 | end_index -= 1; 768 | it = raw_slice(splitter.text, start_index, end_index); 769 | start_index = index + 1; 770 | } 771 | 772 | if !splitter.skip_empty || !it_is_empty() { 773 | it_index += 1; 774 | defer if splitter.max_results != 0 && it_index >= max_it_index break; 775 | 776 | #insert(break = break escape) body; 777 | } 778 | } 779 | 780 | if start_index < splitter.text.count { 781 | it = raw_slice(splitter.text, start_index, splitter.text.count); 782 | 783 | if !splitter.skip_empty || !it_is_empty() { 784 | it_index += 1; 785 | #insert body; 786 | } 787 | } 788 | } 789 | } 790 | } 791 | } 792 | -------------------------------------------------------------------------------- /tests/output.txt: -------------------------------------------------------------------------------- 1 | 2 | Disassembly of 'count' at c:/jai/modules/Strings_Shared/Strings_Shared.jai:1009 3 | - Stack size 188 4 | 5 | -------- Basic Block 0 -------- defines v5-7 -------- 6 | 7 | (no dominating) 8 | 9 | 0| lea v6, stack+0 10 | 1| clear v6 {size 16} 11 | 2| string== v5, v1 == v6 12 | 3| jump 1 if v5 == 0 13 | 4| constant v7 = 0 14 | 5| return_value v7 -> 1 15 | 6| return 16 | 17 | -------- Basic Block 1 -------- defines v8-11 -------- 18 | 19 | ... dominating: block 0 instruction 3 20 | 21 | 7| mov v8, [v3] :1b 22 | 8| constant v9 = 1 23 | 9| jump 5 if v8 != v9 24 | 10| call is_upper (v2) -> v10 25 | 11| jump 2 if v10 == 0 26 | 12| lea v11, stack+10 27 | 13| memcpy v11, v3 {size 16} 28 | 14| jump 4 29 | 30 | -------- Basic Block 2 -------- defines v12-14 -------- 31 | 32 | ... dominating: block 1 instruction 11 33 | 34 | 15| call is_lower (v2) -> v12 35 | 16| jump 3 if v12 == 0 36 | 17| call to_upper (v2) -> v13 37 | 18| copy v2 = v13 38 | 19| lea v14, stack+10 39 | 20| memcpy v14, v3 {size 16} 40 | 21| jump 4 41 | 42 | -------- Basic Block 3 -------- defines v15-16 -------- 43 | 44 | ... dominating: block 2 instruction 16 45 | 46 | 22| lea v15, bss+40 47 | 23| lea v16, stack+10 48 | 24| memcpy v16, v15 {size 16} 49 | 50 | -------- Basic Block 4 -------- 51 | 52 | ... dominating: block 1 instruction 11 53 | 54 | 25| jump 6 55 | 56 | -------- Basic Block 5 -------- defines v17 -------- 57 | 58 | ... dominating: block 1 instruction 9 59 | 60 | 26| lea v17, stack+10 61 | 27| memcpy v17, v3 {size 16} 62 | 63 | -------- Basic Block 6 -------- defines v18-19 -------- 64 | 65 | ... dominating: block 1 instruction 9 66 | 67 | 28| constant v18 = 0 68 | 29| mov v19, [stack+10] :1b 69 | 70 | -------- Basic Block 7 -------- defines v20 -------- 71 | 72 | ... dominating: block 6 instruction 30 73 | 74 | 30| constant v20 = 0 75 | 31| jump 23 if v19 != v20 76 | 77 | -------- Basic Block 8 -------- defines v21-26 -------- 78 | 79 | ... dominating: block 7 instruction 31 80 | 81 | 32| lea v21, data+8048 82 | 33| mov v22, [v0] :4b 83 | 34| imul v23 = v22 * 8 84 | 35| add_extend v25 = v21 + v23 85 | 36| mov v26, [v25] :8b 86 | 87 | -------- Basic Block 9 -------- defines v27 -------- 88 | 89 | ... dominating: block 8 instruction 37 90 | 91 | 37| constant v27 = 0 92 | 38| jump 15 if v26 != v27 93 | 94 | -------- Basic Block 10 -------- defines v28-42 -------- 95 | 96 | ... dominating: block 9 instruction 38 97 | 98 | 39| mov v29, [v1+8] :8b 99 | 40| cast_number v31 (u64), v29 (*u8) 100 | 41| copy v28 = v31 101 | 42| mov v33, [v1+8] :8b 102 | 43| cast_number v35 (u64), v33 (*u8) 103 | 44| mov v36, [v1] :8b 104 | 45| cast_number v37 (u64), v36 (s64) 105 | 46| binop v32, v35 + v37 106 | 47| lea v38, stack+20 107 | 48| constant v39 = 0x10 108 | 49| memset v38, v2 {count v39} 109 | 50| copy v40 = v38 110 | 51| add_int v41 = v32 + -15 111 | 52| call is_debugger_present () -> v42 112 | 53| jump 11 if v42 == 0 113 | 54| call debug_break () 114 | 115 | -------- Basic Block 11 -------- defines v43-50 -------- 116 | 117 | ... dominating: block 10 instruction 53 118 | 119 | 55| asm_x86 (contents not shown) 120 | 56| copy v46 = v28 121 | 57| add_int v49 = v28 + 15 122 | 58| add_int v50 = v32 + -1 123 | 59| call min (v49, v50) -> v48 124 | 60| jump 14 if v46 > v48 125 | 126 | -------- Basic Block 12 -------- defines v51-54 -------- 127 | 128 | ... dominating: block 11 instruction 60 129 | 130 | 61| cast_number v52 (*u8), v46 (u64) 131 | 62| mov v53, [v52] :1b 132 | 63| jump 13 if v53 != v2 133 | 64| add_int v54 = v18 + 1 134 | 65| copy v18 = v54 135 | 136 | -------- Basic Block 13 -------- defines v55 -------- 137 | 138 | ... dominating: block 12 instruction 61 139 | 140 | 66| compare v55 = (v46 >= v48) 141 | 67| add_int v46 = v46 + 1 142 | 68| jump 12 if v55 == 0 143 | 144 | -------- Basic Block 14 -------- 145 | 146 | ... dominating: block 11 instruction 60 147 | 148 | 69| return_value v18 -> 1 149 | 70| return 150 | 151 | -------- Basic Block 15 -------- defines v56 -------- 152 | 153 | ... dominating: block 8 instruction 37 154 | 155 | 71| constant v56 = 3 156 | 72| jump 22 if v26 != v56 157 | 158 | -------- Basic Block 16 -------- defines v57-76 -------- 159 | 160 | ... dominating: block 15 instruction 72 161 | 162 | 73| mov v58, [v1+8] :8b 163 | 74| cast_number v60 (u64), v58 (*u8) 164 | 75| copy v57 = v60 165 | 76| mov v62, [v1+8] :8b 166 | 77| cast_number v64 (u64), v62 (*u8) 167 | 78| mov v65, [v1] :8b 168 | 79| cast_number v66 (u64), v65 (s64) 169 | 80| binop v61, v64 + v66 170 | 81| lea v67, stack+30 171 | 82| constant v68 = 0x20 172 | 83| memset v67, v2 {count v68} 173 | 84| copy v69 = v67 174 | 85| lea v71, stack+50 175 | 86| copy v70 = v71 176 | 87| add_int v72 = v61 + -31 177 | 88| asm_x86 (contents not shown) 178 | 179 | -------- Basic Block 17 -------- defines v77-86 -------- 180 | 181 | ... dominating: block 16 instruction 89 182 | 183 | 89| jump 18 if v57 >= v72 184 | 90| asm_x86 (contents not shown) 185 | 91| mov v84, [v71] :4b 186 | 92| cast_number v85 (s64), v84 (u32) 187 | 93| binop v83, v18 + v85 188 | 94| copy v18 = v83 189 | 95| add_int v86 = v57 + 32 190 | 96| copy v57 = v86 191 | 97| jump 17 192 | 193 | -------- Basic Block 18 -------- defines v87-91 -------- 194 | 195 | ... dominating: block 17 instruction 89 196 | 197 | 98| copy v87 = v57 198 | 99| add_int v90 = v57 + 31 199 | 100| add_int v91 = v61 + -1 200 | 101| call min (v90, v91) -> v89 201 | 102| jump 21 if v87 > v89 202 | 203 | -------- Basic Block 19 -------- defines v92-97 -------- 204 | 205 | ... dominating: block 18 instruction 102 206 | 207 | 103| mov v93, [stack+18] :8b 208 | 104| cast_number v95 (*u8), v87 (u64) 209 | 105| mov v96, [v95] :1b 210 | 106| call v93 (v96, v2) -> v92 211 | 107| jump 20 if v92 == 0 212 | 108| add_int v97 = v18 + 1 213 | 109| copy v18 = v97 214 | 215 | -------- Basic Block 20 -------- defines v98 -------- 216 | 217 | ... dominating: block 19 instruction 103 218 | 219 | 110| compare v98 = (v87 >= v89) 220 | 111| add_int v87 = v87 + 1 221 | 112| jump 19 if v98 == 0 222 | 223 | -------- Basic Block 21 -------- 224 | 225 | ... dominating: block 18 instruction 102 226 | 227 | 113| return_value v18 -> 1 228 | 114| return 229 | 230 | -------- Basic Block 22 -------- 231 | 232 | ... dominating: block 8 instruction 37 233 | 234 | 115| jump 40 235 | 236 | -------- Basic Block 23 -------- 237 | 238 | ... dominating: block 6 instruction 30 239 | 240 | 116| jump 40 if v19 != v9 241 | 242 | -------- Basic Block 24 -------- defines v99-104 -------- 243 | 244 | ... dominating: block 23 instruction 116 245 | 246 | 117| lea v99, data+8048 247 | 118| mov v100, [v0] :4b 248 | 119| imul v101 = v100 * 8 249 | 120| add_extend v103 = v99 + v101 250 | 121| mov v104, [v103] :8b 251 | 252 | -------- Basic Block 25 -------- defines v105 -------- 253 | 254 | ... dominating: block 24 instruction 122 255 | 256 | 122| constant v105 = 0 257 | 123| jump 32 if v104 != v105 258 | 259 | -------- Basic Block 26 -------- defines v106-128 -------- 260 | 261 | ... dominating: block 25 instruction 123 262 | 263 | 124| mov v107, [v1+8] :8b 264 | 125| cast_number v109 (u64), v107 (*u8) 265 | 126| copy v106 = v109 266 | 127| mov v111, [v1+8] :8b 267 | 128| cast_number v113 (u64), v111 (*u8) 268 | 129| mov v114, [v1] :8b 269 | 130| cast_number v115 (u64), v114 (s64) 270 | 131| binop v110, v113 + v115 271 | 132| lea v116, stack+54 272 | 133| constant v117 = 0x10 273 | 134| memset v116, v2 {count v117} 274 | 135| copy v118 = v116 275 | 136| lea v119, stack+64 276 | 137| call to_lower (v2) -> v120 277 | 138| memset v119, v120 {count v117} 278 | 139| copy v121 = v119 279 | 140| lea v123, stack+74 280 | 141| copy v122 = v123 281 | 142| add_int v124 = v110 + -15 282 | 143| asm_x86 (contents not shown) 283 | 284 | -------- Basic Block 27 -------- defines v129-138 -------- 285 | 286 | ... dominating: block 26 instruction 144 287 | 288 | 144| jump 28 if v106 >= v124 289 | 145| asm_x86 (contents not shown) 290 | 146| mov v136, [v123] :2b 291 | 147| cast_number v137 (s64), v136 (u16) 292 | 148| binop v135, v18 + v137 293 | 149| copy v18 = v135 294 | 150| add_int v138 = v106 + 16 295 | 151| copy v106 = v138 296 | 152| jump 27 297 | 298 | -------- Basic Block 28 -------- defines v139-143 -------- 299 | 300 | ... dominating: block 27 instruction 144 301 | 302 | 153| copy v139 = v106 303 | 154| add_int v142 = v106 + 15 304 | 155| add_int v143 = v110 + -1 305 | 156| call min (v142, v143) -> v141 306 | 157| jump 31 if v139 > v141 307 | 308 | -------- Basic Block 29 -------- defines v144-149 -------- 309 | 310 | ... dominating: block 28 instruction 157 311 | 312 | 158| mov v145, [stack+18] :8b 313 | 159| cast_number v147 (*u8), v139 (u64) 314 | 160| mov v148, [v147] :1b 315 | 161| call v145 (v148, v2) -> v144 316 | 162| jump 30 if v144 == 0 317 | 163| add_int v149 = v18 + 1 318 | 164| copy v18 = v149 319 | 320 | -------- Basic Block 30 -------- defines v150 -------- 321 | 322 | ... dominating: block 29 instruction 158 323 | 324 | 165| compare v150 = (v139 >= v141) 325 | 166| add_int v139 = v139 + 1 326 | 167| jump 29 if v150 == 0 327 | 328 | -------- Basic Block 31 -------- 329 | 330 | ... dominating: block 28 instruction 157 331 | 332 | 168| return_value v18 -> 1 333 | 169| return 334 | 335 | -------- Basic Block 32 -------- defines v151 -------- 336 | 337 | ... dominating: block 24 instruction 122 338 | 339 | 170| constant v151 = 3 340 | 171| jump 39 if v104 != v151 341 | 342 | -------- Basic Block 33 -------- defines v152-178 -------- 343 | 344 | ... dominating: block 32 instruction 171 345 | 346 | 172| mov v153, [v1+8] :8b 347 | 173| cast_number v155 (u64), v153 (*u8) 348 | 174| copy v152 = v155 349 | 175| mov v157, [v1+8] :8b 350 | 176| cast_number v159 (u64), v157 (*u8) 351 | 177| mov v160, [v1] :8b 352 | 178| cast_number v161 (u64), v160 (s64) 353 | 179| binop v156, v159 + v161 354 | 180| lea v162, stack+76 355 | 181| constant v163 = 0x20 356 | 182| memset v162, v2 {count v163} 357 | 183| copy v164 = v162 358 | 184| lea v165, stack+96 359 | 185| call to_lower (v2) -> v166 360 | 186| memset v165, v166 {count v163} 361 | 187| copy v167 = v165 362 | 188| lea v169, stack+b8 363 | 189| copy v168 = v169 364 | 190| add_int v170 = v156 + -31 365 | 191| asm_x86 (contents not shown) 366 | 367 | -------- Basic Block 34 -------- defines v179-196 -------- 368 | 369 | ... dominating: block 33 instruction 192 370 | 371 | 192| jump 35 if v152 >= v170 372 | 193| asm_x86 (contents not shown) 373 | 194| mov v194, [v169] :4b 374 | 195| cast_number v195 (s64), v194 (u32) 375 | 196| binop v193, v18 + v195 376 | 197| copy v18 = v193 377 | 198| add_int v196 = v152 + 32 378 | 199| copy v152 = v196 379 | 200| jump 34 380 | 381 | -------- Basic Block 35 -------- defines v197-201 -------- 382 | 383 | ... dominating: block 34 instruction 192 384 | 385 | 201| copy v197 = v152 386 | 202| add_int v200 = v152 + 31 387 | 203| add_int v201 = v156 + -1 388 | 204| call min (v200, v201) -> v199 389 | 205| jump 38 if v197 > v199 390 | 391 | -------- Basic Block 36 -------- defines v202-207 -------- 392 | 393 | ... dominating: block 35 instruction 205 394 | 395 | 206| mov v203, [stack+18] :8b 396 | 207| cast_number v205 (*u8), v197 (u64) 397 | 208| mov v206, [v205] :1b 398 | 209| call v203 (v206, v2) -> v202 399 | 210| jump 37 if v202 == 0 400 | 211| add_int v207 = v18 + 1 401 | 212| copy v18 = v207 402 | 403 | -------- Basic Block 37 -------- defines v208 -------- 404 | 405 | ... dominating: block 36 instruction 206 406 | 407 | 213| compare v208 = (v197 >= v199) 408 | 214| add_int v197 = v197 + 1 409 | 215| jump 36 if v208 == 0 410 | 411 | -------- Basic Block 38 -------- 412 | 413 | ... dominating: block 35 instruction 205 414 | 415 | 216| return_value v18 -> 1 416 | 217| return 417 | 418 | -------- Basic Block 39 -------- 419 | 420 | ... dominating: block 24 instruction 122 421 | 422 | 218| jump 40 423 | 424 | -------- Basic Block 40 -------- defines v209-213 -------- 425 | 426 | ... dominating: block 6 instruction 30 427 | 428 | 219| constant v209 = 0 429 | 220| copy v210 = v209 430 | 221| mov v212, [v1] :8b 431 | 222| add_int v213 = v212 + -1 432 | 223| jump 43 if v210 > v213 433 | 434 | -------- Basic Block 41 -------- defines v214-219 -------- 435 | 436 | ... dominating: block 40 instruction 223 437 | 438 | 224| mov v215, [stack+18] :8b 439 | 225| mov v216, [v1+8] :8b 440 | 226| add_extend v217 = v216 + v210 441 | 227| mov v218, [v217] :1b 442 | 228| call v215 (v218, v2) -> v214 443 | 229| jump 42 if v214 == 0 444 | 230| add_int v219 = v18 + 1 445 | 231| copy v18 = v219 446 | 447 | -------- Basic Block 42 -------- 448 | 449 | ... dominating: block 41 instruction 224 450 | 451 | 232| add_int v210 = v210 + 1 452 | 233| jump 41 if v210 <= v213 453 | 454 | -------- Basic Block 43 -------- 455 | 456 | ... dominating: block 40 instruction 223 457 | 458 | 234| return_value v18 -> 1 459 | 235| return 460 | Creating library C:/Repos/jai-modules/Strings_Modules/tests/.build/test9.lib and object C:/Repos/jai-modules/Strings_Modules/tests/.build/test9.exp 461 | Running linker: "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.26.28801\bin\Hostx64\x64\link.exe" /nologo C:/Repos/jai-modules/Strings_Modules/tests/.build/test9_2_0.obj /OUT:test9.exe /MACHINE:AMD64 /INCREMENTAL:NO /DEBUG /IMPLIB:C:/Repos/jai-modules/Strings_Modules/tests/.build/test9.lib /libpath:"C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.26.28801\lib\x64" /libpath:"C:\Program Files (x86)\Windows Kits\10\Lib\10.0.18362.0\um\x64" /libpath:"C:\Program Files (x86)\Windows Kits\10\Lib\10.0.18362.0\ucrt\x64" -nodefaultlib libcmt.lib vcruntime.lib ucrt.lib kernel32.lib comdlg32.lib shlwapi.lib Advapi32.lib DbgHelp.lib Dwmapi.lib opengl32.lib Gdi32.lib msvcrt.lib shell32.lib user32.lib winmm.lib kernel32.lib Ntdll.lib c:/jai/modules/stb_sprintf/win/stb_sprintf.lib 462 | 463 | Stats for Workspace 2 ("Target Program"): 464 | Lexer lines processed: 135047 (141800 including blank lines, comments.) 465 | Front-end time: 0.068047 seconds. 466 | llvm time: 0.905029 seconds. 467 | 468 | Compiler time: 0.973076 seconds. 469 | Link time: 0.467812 seconds. 470 | Total time: 1.440889 seconds. 471 | ------- 472 | 473 | Disassembly of 'count' at c:/jai/modules/Strings_Shared/Strings_Shared.jai:1009 474 | - Stack size 188 475 | 476 | -------- Basic Block 0 -------- defines v5-7 -------- 477 | 478 | (no dominating) 479 | 480 | 0| lea v6, stack+0 481 | 1| clear v6 {size 16} 482 | 2| string== v5, v1 == v6 483 | 3| jump 1 if v5 == 0 484 | 4| constant v7 = 0 485 | 5| return_value v7 -> 1 486 | 6| return 487 | 488 | -------- Basic Block 1 -------- defines v8-11 -------- 489 | 490 | ... dominating: block 0 instruction 3 491 | 492 | 7| mov v8, [v3] :1b 493 | 8| constant v9 = 1 494 | 9| jump 5 if v8 != v9 495 | 10| call is_upper (v2) -> v10 496 | 11| jump 2 if v10 == 0 497 | 12| lea v11, stack+10 498 | 13| memcpy v11, v3 {size 16} 499 | 14| jump 4 500 | 501 | -------- Basic Block 2 -------- defines v12-14 -------- 502 | 503 | ... dominating: block 1 instruction 11 504 | 505 | 15| call is_lower (v2) -> v12 506 | 16| jump 3 if v12 == 0 507 | 17| call to_upper (v2) -> v13 508 | 18| copy v2 = v13 509 | 19| lea v14, stack+10 510 | 20| memcpy v14, v3 {size 16} 511 | 21| jump 4 512 | 513 | -------- Basic Block 3 -------- defines v15-16 -------- 514 | 515 | ... dominating: block 2 instruction 16 516 | 517 | 22| lea v15, bss+40 518 | 23| lea v16, stack+10 519 | 24| memcpy v16, v15 {size 16} 520 | 521 | -------- Basic Block 4 -------- 522 | 523 | ... dominating: block 1 instruction 11 524 | 525 | 25| jump 6 526 | 527 | -------- Basic Block 5 -------- defines v17 -------- 528 | 529 | ... dominating: block 1 instruction 9 530 | 531 | 26| lea v17, stack+10 532 | 27| memcpy v17, v3 {size 16} 533 | 534 | -------- Basic Block 6 -------- defines v18-19 -------- 535 | 536 | ... dominating: block 1 instruction 9 537 | 538 | 28| constant v18 = 0 539 | 29| mov v19, [stack+10] :1b 540 | 541 | -------- Basic Block 7 -------- defines v20 -------- 542 | 543 | ... dominating: block 6 instruction 30 544 | 545 | 30| constant v20 = 0 546 | 31| jump 23 if v19 != v20 547 | 548 | -------- Basic Block 8 -------- defines v21-26 -------- 549 | 550 | ... dominating: block 7 instruction 31 551 | 552 | 32| lea v21, data+8048 553 | 33| mov v22, [v0] :4b 554 | 34| imul v23 = v22 * 8 555 | 35| add_extend v25 = v21 + v23 556 | 36| mov v26, [v25] :8b 557 | 558 | -------- Basic Block 9 -------- defines v27 -------- 559 | 560 | ... dominating: block 8 instruction 37 561 | 562 | 37| constant v27 = 0 563 | 38| jump 15 if v26 != v27 564 | 565 | -------- Basic Block 10 -------- defines v28-42 -------- 566 | 567 | ... dominating: block 9 instruction 38 568 | 569 | 39| mov v29, [v1+8] :8b 570 | 40| cast_number v31 (u64), v29 (*u8) 571 | 41| copy v28 = v31 572 | 42| mov v33, [v1+8] :8b 573 | 43| cast_number v35 (u64), v33 (*u8) 574 | 44| mov v36, [v1] :8b 575 | 45| cast_number v37 (u64), v36 (s64) 576 | 46| binop v32, v35 + v37 577 | 47| lea v38, stack+20 578 | 48| constant v39 = 0x10 579 | 49| memset v38, v2 {count v39} 580 | 50| copy v40 = v38 581 | 51| add_int v41 = v32 + -15 582 | 52| call is_debugger_present () -> v42 583 | 53| jump 11 if v42 == 0 584 | 54| call debug_break () 585 | 586 | -------- Basic Block 11 -------- defines v43-50 -------- 587 | 588 | ... dominating: block 10 instruction 53 589 | 590 | 55| asm_x86 (contents not shown) 591 | 56| asm_x86 (contents not shown) 592 | 57| copy v46 = v28 593 | 58| add_int v49 = v28 + 15 594 | 59| add_int v50 = v32 + -1 595 | 60| call min (v49, v50) -> v48 596 | 61| jump 14 if v46 > v48 597 | 598 | -------- Basic Block 12 -------- defines v51-54 -------- 599 | 600 | ... dominating: block 11 instruction 61 601 | 602 | 62| cast_number v52 (*u8), v46 (u64) 603 | 63| mov v53, [v52] :1b 604 | 64| jump 13 if v53 != v2 605 | 65| add_int v54 = v18 + 1 606 | 66| copy v18 = v54 607 | 608 | -------- Basic Block 13 -------- defines v55 -------- 609 | 610 | ... dominating: block 12 instruction 62 611 | 612 | 67| compare v55 = (v46 >= v48) 613 | 68| add_int v46 = v46 + 1 614 | 69| jump 12 if v55 == 0 615 | 616 | -------- Basic Block 14 -------- 617 | 618 | ... dominating: block 11 instruction 61 619 | 620 | 70| return_value v18 -> 1 621 | 71| return 622 | 623 | -------- Basic Block 15 -------- defines v56 -------- 624 | 625 | ... dominating: block 8 instruction 37 626 | 627 | 72| constant v56 = 3 628 | 73| jump 22 if v26 != v56 629 | 630 | -------- Basic Block 16 -------- defines v57-76 -------- 631 | 632 | ... dominating: block 15 instruction 73 633 | 634 | 74| mov v58, [v1+8] :8b 635 | 75| cast_number v60 (u64), v58 (*u8) 636 | 76| copy v57 = v60 637 | 77| mov v62, [v1+8] :8b 638 | 78| cast_number v64 (u64), v62 (*u8) 639 | 79| mov v65, [v1] :8b 640 | 80| cast_number v66 (u64), v65 (s64) 641 | 81| binop v61, v64 + v66 642 | 82| lea v67, stack+30 643 | 83| constant v68 = 0x20 644 | 84| memset v67, v2 {count v68} 645 | 85| copy v69 = v67 646 | 86| lea v71, stack+50 647 | 87| copy v70 = v71 648 | 88| add_int v72 = v61 + -31 649 | 89| asm_x86 (contents not shown) 650 | 651 | -------- Basic Block 17 -------- defines v77-86 -------- 652 | 653 | ... dominating: block 16 instruction 90 654 | 655 | 90| jump 18 if v57 >= v72 656 | 91| asm_x86 (contents not shown) 657 | 92| mov v84, [v71] :4b 658 | 93| cast_number v85 (s64), v84 (u32) 659 | 94| binop v83, v18 + v85 660 | 95| copy v18 = v83 661 | 96| add_int v86 = v57 + 32 662 | 97| copy v57 = v86 663 | 98| jump 17 664 | 665 | -------- Basic Block 18 -------- defines v87-91 -------- 666 | 667 | ... dominating: block 17 instruction 90 668 | 669 | 99| copy v87 = v57 670 | 100| add_int v90 = v57 + 31 671 | 101| add_int v91 = v61 + -1 672 | 102| call min (v90, v91) -> v89 673 | 103| jump 21 if v87 > v89 674 | 675 | -------- Basic Block 19 -------- defines v92-97 -------- 676 | 677 | ... dominating: block 18 instruction 103 678 | 679 | 104| mov v93, [stack+18] :8b 680 | 105| cast_number v95 (*u8), v87 (u64) 681 | 106| mov v96, [v95] :1b 682 | 107| call v93 (v96, v2) -> v92 683 | 108| jump 20 if v92 == 0 684 | 109| add_int v97 = v18 + 1 685 | 110| copy v18 = v97 686 | 687 | -------- Basic Block 20 -------- defines v98 -------- 688 | 689 | ... dominating: block 19 instruction 104 690 | 691 | 111| compare v98 = (v87 >= v89) 692 | 112| add_int v87 = v87 + 1 693 | 113| jump 19 if v98 == 0 694 | 695 | -------- Basic Block 21 -------- 696 | 697 | ... dominating: block 18 instruction 103 698 | 699 | 114| return_value v18 -> 1 700 | 115| return 701 | 702 | -------- Basic Block 22 -------- 703 | 704 | ... dominating: block 8 instruction 37 705 | 706 | 116| jump 40 707 | 708 | -------- Basic Block 23 -------- 709 | 710 | ... dominating: block 6 instruction 30 711 | 712 | 117| jump 40 if v19 != v9 713 | 714 | -------- Basic Block 24 -------- defines v99-104 -------- 715 | 716 | ... dominating: block 23 instruction 117 717 | 718 | 118| lea v99, data+8048 719 | 119| mov v100, [v0] :4b 720 | 120| imul v101 = v100 * 8 721 | 121| add_extend v103 = v99 + v101 722 | 122| mov v104, [v103] :8b 723 | 724 | -------- Basic Block 25 -------- defines v105 -------- 725 | 726 | ... dominating: block 24 instruction 123 727 | 728 | 123| constant v105 = 0 729 | 124| jump 32 if v104 != v105 730 | 731 | -------- Basic Block 26 -------- defines v106-128 -------- 732 | 733 | ... dominating: block 25 instruction 124 734 | 735 | 125| mov v107, [v1+8] :8b 736 | 126| cast_number v109 (u64), v107 (*u8) 737 | 127| copy v106 = v109 738 | 128| mov v111, [v1+8] :8b 739 | 129| cast_number v113 (u64), v111 (*u8) 740 | 130| mov v114, [v1] :8b 741 | 131| cast_number v115 (u64), v114 (s64) 742 | 132| binop v110, v113 + v115 743 | 133| lea v116, stack+54 744 | 134| constant v117 = 0x10 745 | 135| memset v116, v2 {count v117} 746 | 136| copy v118 = v116 747 | 137| lea v119, stack+64 748 | 138| call to_lower (v2) -> v120 749 | 139| memset v119, v120 {count v117} 750 | 140| copy v121 = v119 751 | 141| lea v123, stack+74 752 | 142| copy v122 = v123 753 | 143| add_int v124 = v110 + -15 754 | 144| asm_x86 (contents not shown) 755 | 756 | -------- Basic Block 27 -------- defines v129-138 -------- 757 | 758 | ... dominating: block 26 instruction 145 759 | 760 | 145| jump 28 if v106 >= v124 761 | 146| asm_x86 (contents not shown) 762 | 147| mov v136, [v123] :2b 763 | 148| cast_number v137 (s64), v136 (u16) 764 | 149| binop v135, v18 + v137 765 | 150| copy v18 = v135 766 | 151| add_int v138 = v106 + 16 767 | 152| copy v106 = v138 768 | 153| jump 27 769 | 770 | -------- Basic Block 28 -------- defines v139-143 -------- 771 | 772 | ... dominating: block 27 instruction 145 773 | 774 | 154| copy v139 = v106 775 | 155| add_int v142 = v106 + 15 776 | 156| add_int v143 = v110 + -1 777 | 157| call min (v142, v143) -> v141 778 | 158| jump 31 if v139 > v141 779 | 780 | -------- Basic Block 29 -------- defines v144-149 -------- 781 | 782 | ... dominating: block 28 instruction 158 783 | 784 | 159| mov v145, [stack+18] :8b 785 | 160| cast_number v147 (*u8), v139 (u64) 786 | 161| mov v148, [v147] :1b 787 | 162| call v145 (v148, v2) -> v144 788 | 163| jump 30 if v144 == 0 789 | 164| add_int v149 = v18 + 1 790 | 165| copy v18 = v149 791 | 792 | -------- Basic Block 30 -------- defines v150 -------- 793 | 794 | ... dominating: block 29 instruction 159 795 | 796 | 166| compare v150 = (v139 >= v141) 797 | 167| add_int v139 = v139 + 1 798 | 168| jump 29 if v150 == 0 799 | 800 | -------- Basic Block 31 -------- 801 | 802 | ... dominating: block 28 instruction 158 803 | 804 | 169| return_value v18 -> 1 805 | 170| return 806 | 807 | -------- Basic Block 32 -------- defines v151 -------- 808 | 809 | ... dominating: block 24 instruction 123 810 | 811 | 171| constant v151 = 3 812 | 172| jump 39 if v104 != v151 813 | 814 | -------- Basic Block 33 -------- defines v152-178 -------- 815 | 816 | ... dominating: block 32 instruction 172 817 | 818 | 173| mov v153, [v1+8] :8b 819 | 174| cast_number v155 (u64), v153 (*u8) 820 | 175| copy v152 = v155 821 | 176| mov v157, [v1+8] :8b 822 | 177| cast_number v159 (u64), v157 (*u8) 823 | 178| mov v160, [v1] :8b 824 | 179| cast_number v161 (u64), v160 (s64) 825 | 180| binop v156, v159 + v161 826 | 181| lea v162, stack+76 827 | 182| constant v163 = 0x20 828 | 183| memset v162, v2 {count v163} 829 | 184| copy v164 = v162 830 | 185| lea v165, stack+96 831 | 186| call to_lower (v2) -> v166 832 | 187| memset v165, v166 {count v163} 833 | 188| copy v167 = v165 834 | 189| lea v169, stack+b8 835 | 190| copy v168 = v169 836 | 191| add_int v170 = v156 + -31 837 | 192| asm_x86 (contents not shown) 838 | 839 | -------- Basic Block 34 -------- defines v179-196 -------- 840 | 841 | ... dominating: block 33 instruction 193 842 | 843 | 193| jump 35 if v152 >= v170 844 | 194| asm_x86 (contents not shown) 845 | 195| mov v194, [v169] :4b 846 | 196| cast_number v195 (s64), v194 (u32) 847 | 197| binop v193, v18 + v195 848 | 198| copy v18 = v193 849 | 199| add_int v196 = v152 + 32 850 | 200| copy v152 = v196 851 | 201| jump 34 852 | 853 | -------- Basic Block 35 -------- defines v197-201 -------- 854 | 855 | ... dominating: block 34 instruction 193 856 | 857 | 202| copy v197 = v152 858 | 203| add_int v200 = v152 + 31 859 | 204| add_int v201 = v156 + -1 860 | 205| call min (v200, v201) -> v199 861 | 206| jump 38 if v197 > v199 862 | 863 | -------- Basic Block 36 -------- defines v202-207 -------- 864 | 865 | ... dominating: block 35 instruction 206 866 | 867 | 207| mov v203, [stack+18] :8b 868 | 208| cast_number v205 (*u8), v197 (u64) 869 | 209| mov v206, [v205] :1b 870 | 210| call v203 (v206, v2) -> v202 871 | 211| jump 37 if v202 == 0 872 | 212| add_int v207 = v18 + 1 873 | 213| copy v18 = v207 874 | 875 | -------- Basic Block 37 -------- defines v208 -------- 876 | 877 | ... dominating: block 36 instruction 207 878 | 879 | 214| compare v208 = (v197 >= v199) 880 | 215| add_int v197 = v197 + 1 881 | 216| jump 36 if v208 == 0 882 | 883 | -------- Basic Block 38 -------- 884 | 885 | ... dominating: block 35 instruction 206 886 | 887 | 217| return_value v18 -> 1 888 | 218| return 889 | 890 | -------- Basic Block 39 -------- 891 | 892 | ... dominating: block 24 instruction 123 893 | 894 | 219| jump 40 895 | 896 | -------- Basic Block 40 -------- defines v209-213 -------- 897 | 898 | ... dominating: block 6 instruction 30 899 | 900 | 220| constant v209 = 0 901 | 221| copy v210 = v209 902 | 222| mov v212, [v1] :8b 903 | 223| add_int v213 = v212 + -1 904 | 224| jump 43 if v210 > v213 905 | 906 | -------- Basic Block 41 -------- defines v214-219 -------- 907 | 908 | ... dominating: block 40 instruction 224 909 | 910 | 225| mov v215, [stack+18] :8b 911 | 226| mov v216, [v1+8] :8b 912 | 227| add_extend v217 = v216 + v210 913 | 228| mov v218, [v217] :1b 914 | 229| call v215 (v218, v2) -> v214 915 | 230| jump 42 if v214 == 0 916 | 231| add_int v219 = v18 + 1 917 | 232| copy v18 = v219 918 | 919 | -------- Basic Block 42 -------- 920 | 921 | ... dominating: block 41 instruction 225 922 | 923 | 233| add_int v210 = v210 + 1 924 | 234| jump 41 if v210 <= v213 925 | 926 | -------- Basic Block 43 -------- 927 | 928 | ... dominating: block 40 instruction 224 929 | 930 | 235| return_value v18 -> 1 931 | 236| return 932 | Creating library C:/Repos/jai-modules/Strings_Modules/tests/.build/test9.lib and object C:/Repos/jai-modules/Strings_Modules/tests/.build/test9.exp 933 | Running linker: "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.26.28801\bin\Hostx64\x64\link.exe" /nologo C:/Repos/jai-modules/Strings_Modules/tests/.build/test9_2_0.obj /OUT:test9.exe /MACHINE:AMD64 /INCREMENTAL:NO /DEBUG /IMPLIB:C:/Repos/jai-modules/Strings_Modules/tests/.build/test9.lib /libpath:"C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.26.28801\lib\x64" /libpath:"C:\Program Files (x86)\Windows Kits\10\Lib\10.0.18362.0\um\x64" /libpath:"C:\Program Files (x86)\Windows Kits\10\Lib\10.0.18362.0\ucrt\x64" -nodefaultlib libcmt.lib vcruntime.lib ucrt.lib kernel32.lib comdlg32.lib shlwapi.lib Advapi32.lib DbgHelp.lib Dwmapi.lib opengl32.lib Gdi32.lib msvcrt.lib shell32.lib user32.lib winmm.lib kernel32.lib Ntdll.lib c:/jai/modules/stb_sprintf/win/stb_sprintf.lib 934 | 935 | Stats for Workspace 2 ("Target Program"): 936 | Lexer lines processed: 135050 (141800 including blank lines, comments.) 937 | Front-end time: 0.070711 seconds. 938 | llvm time: 0.933580 seconds. 939 | 940 | Compiler time: 1.004290 seconds. 941 | Link time: 0.602430 seconds. 942 | Total time: 1.606720 seconds. 943 | --------------------------------------------------------------------------------