├── modules.lst
├── tests
    ├── all_tests.bat
    ├── slowfastdiff.png
    ├── make8.bat
    ├── test_indexing_algorithms.bat
    ├── output.dot
    ├── allocator_test.jai
    ├── indexing_test.py
    ├── run_all_tests.bat
    ├── strings_bug.jai
    ├── compile_all_tests.bat
    ├── test.jai
    ├── test7.jai
    ├── test10.jai
    ├── test5.jai
    ├── test6.jai
    ├── test11.jai
    ├── test9.jai
    ├── test8.jai
    ├── simd_test.jai
    ├── indexing_test.jai
    └── output.txt
├── .gitignore
├── CONTRIBUTING.md
├── tools
    ├── build_index_profile.jai
    └── index_profile.jai
├── Strings
    ├── module.jai
    ├── knuth_morris_pratt.jai
    └── splitting.jai
├── CHANGELOG.md
├── Scratch
    ├── Scratch.jai
    └── module.jai
└── README.md


/modules.lst:
--------------------------------------------------------------------------------
1 | Scratch
2 | Strings
3 | 


--------------------------------------------------------------------------------
/tests/all_tests.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 | normal_test.exe
3 | 
4 | test_indexing_algorithms.bat
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .build/
2 | */.build/
3 | *.exe
4 | *.pdb
5 | tests/data
6 | /tools/index_profile
7 | 


--------------------------------------------------------------------------------
/tests/slowfastdiff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/onelivesleft/jai-string/HEAD/tests/slowfastdiff.png


--------------------------------------------------------------------------------
/tests/make8.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | echo Set to false
 3 | pause
 4 | jai test8.jai -release
 5 | copy test8.exe test8slow.exe /y
 6 | echo Set to true
 7 | pause
 8 | jai test8.jai -release
 9 | copy test8.exe test8fast.exe /y
10 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | This is currently just a personal repo, so I won't be merging any PRs.  If you think there's a missing core string function it should include, or have other suggestions (or bug reports!), feel free to make an issue.
2 | 


--------------------------------------------------------------------------------
/tests/test_indexing_algorithms.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | ..\tools\index_profile.exe -t data\shakespeare.jai "/*"
 3 | if NOT ["%errorlevel%"]==["0"] goto end
 4 | 
 5 | ..\tools\index_profile.exe -t data\shakespeare.jai "__WILLIAM\n"
 6 | if NOT ["%errorlevel%"]==["0"] goto end
 7 | 
 8 | 
 9 | 
10 | :end
11 | 


--------------------------------------------------------------------------------
/tests/output.dot:
--------------------------------------------------------------------------------
 1 | digraph {
 2 | 
 3 |     s30064779389 [label="LAMBDA_BODY #70000207d\nnormal_test.jai:70",shape=ellipse]
 4 |     s30064779494 [label="LAMBDA_BODY #7000020e6\nnormal_test.jai:102",shape=ellipse]
 5 |     s30064779389 [label="LAMBDA_BODY #70000207d\nnormal_test.jai:70",shape=ellipse]
 6 |     s12884909899 [label="RUN #300001f4b\n.added_strings_w3.jai:11",shape=ellipse]
 7 |     s12884909901 [label="LAMBDA_BODY #300001f4d\n.added_strings_w3.jai:11",shape=ellipse]
 8 |     s30064779494 [label="LAMBDA_BODY #7000020e6\nnormal_test.jai:102",shape=ellipse]
 9 | 
10 | }
11 | 


--------------------------------------------------------------------------------
/tests/allocator_test.jai:
--------------------------------------------------------------------------------
 1 | #import "Basic";
 2 | #import "Scratch";
 3 | #import "Strings";
 4 | 
 5 | #load "data/shakespeare.jai";
 6 | 
 7 | 
 8 | main :: () {
 9 |     foo := copy_string(shakespeare,, scratch);
10 | 
11 |     for count: 1 .. 1000 {
12 |         foo = replace(foo, "z", "#",, scratch);
13 |         for < #char "y" .. #char "a" {
14 |             from : u8 = cast(u8)it;
15 |             next : u8 = cast(u8)it + 1;
16 |             foo = replace(foo, char_as_string(*from), char_as_string(*next),, scratch);
17 |         }
18 |         foo = replace(foo, "#", "a",, scratch);
19 |     }
20 | 
21 |     print(foo);
22 | }
23 | 


--------------------------------------------------------------------------------
/tests/indexing_test.py:
--------------------------------------------------------------------------------
 1 | import time, sys
 2 | 
 3 | haystack = open("data/shakespeare.jai").read().split("__WILLIAM\n")[1]
 4 | needle = " and "
 5 | 
 6 | times = 10
 7 | 
 8 | index = haystack.find(needle)
 9 | while index >= 0:
10 |     index = haystack.find(needle, index + 1)
11 | 
12 | 
13 | total_checksum = 0
14 | t = time.time()
15 | for x in range(times):
16 |     index = haystack.find(needle)
17 |     checksum = 0
18 |     while index >= 0:
19 |         checksum = checksum ^ index
20 |         index = haystack.find(needle, index + 1)
21 |     total_checksum += checksum
22 | delta = time.time() - t
23 | print(total_checksum, delta)
24 | 


--------------------------------------------------------------------------------
/tools/build_index_profile.jai:
--------------------------------------------------------------------------------
 1 | #import "Basic";
 2 | #import "Compiler";
 3 | 
 4 | filepath :: "index_profile.jai";
 5 | 
 6 | #run {
 7 |     build_options := get_build_options();
 8 |     set_build_options_dc(.{do_output = false});
 9 | 
10 |     workspace := compiler_create_workspace();
11 |     build_options.output_executable_name = "index_profile";
12 |     set_optimization(*build_options, .VERY_OPTIMIZED, false);
13 |     set_build_options(build_options, workspace);
14 | 
15 |     compiler_begin_intercept(workspace);
16 | 
17 |     add_build_file(filepath, workspace);
18 | 
19 |     while true {
20 |         message := compiler_wait_for_message();
21 |         if !message continue;
22 |         if message.kind == .COMPLETE break;
23 |     }
24 | 
25 |     compiler_end_intercept(workspace);
26 | }
27 | 


--------------------------------------------------------------------------------
/tests/run_all_tests.bat:
--------------------------------------------------------------------------------
 1 | allocator_test.exe
 2 | @if NOT ["%errorlevel%"]==["0"] goto end
 3 | 
 4 | indexing_test.exe
 5 | @if NOT ["%errorlevel%"]==["0"] goto end
 6 | 
 7 | normal_test.exe
 8 | @if NOT ["%errorlevel%"]==["0"] goto end
 9 | 
10 | simd_test.exe
11 | @if NOT ["%errorlevel%"]==["0"] goto end
12 | 
13 | strings_bug.exe
14 | @if NOT ["%errorlevel%"]==["0"] goto end
15 | 
16 | test.exe
17 | @if NOT ["%errorlevel%"]==["0"] goto end
18 | 
19 | test10.exe
20 | @if NOT ["%errorlevel%"]==["0"] goto end
21 | 
22 | test11.exe
23 | @if NOT ["%errorlevel%"]==["0"] goto end
24 | 
25 | test5.exe
26 | @if NOT ["%errorlevel%"]==["0"] goto end
27 | 
28 | test6.exe
29 | @if NOT ["%errorlevel%"]==["0"] goto end
30 | 
31 | test7.exe
32 | @if NOT ["%errorlevel%"]==["0"] goto end
33 | 
34 | test8.exe
35 | @if NOT ["%errorlevel%"]==["0"] goto end
36 | 
37 | test9.exe
38 | @if NOT ["%errorlevel%"]==["0"] goto end
39 | 
40 | :end


--------------------------------------------------------------------------------
/tests/strings_bug.jai:
--------------------------------------------------------------------------------
 1 | main :: () {
 2 |   files := #string __
 3 | README.txt - C:\\Users\\farzher\\Downloads\\4coder\\README.txt
 4 | changes.txt - C:\\Users\\farzher\\Downloads\\4coder\\changes.txt
 5 | __
 6 | 
 7 |   context.allocator = __temporary_allocator;
 8 | 
 9 | 
10 |   for i: 1 .. 3 {
11 |     reset_temporary_storage();
12 |     print("loop: %\n", i);
13 |     filesstr: string;
14 |     for line: line_split(files) {
15 |       //filename, path := split_into_two(line, " - ");
16 |       //print("%\n%\n", filename, path);
17 |       //filesstr = strings_alloc.join(filesstr, path, "\0");
18 |       filesstr = join(filesstr, substring(line, first_index(line, " - ")+" - ".count), "\0");
19 |       //filesstr = strings_alloc.join(filesstr, substring(line, String.find_index_from_left(line, " - ")+" - ".count), "\0");
20 |     }
21 |     print("%\n", filesstr);
22 |     print("%\n", get_temporary_storage_mark());
23 |   }
24 | }
25 | 
26 | String :: #import "String";
27 | #import "Strings";
28 | #import "Basic";
29 | 


--------------------------------------------------------------------------------
/tests/compile_all_tests.bat:
--------------------------------------------------------------------------------
 1 | jai -quiet -import_dir .. allocator_test.jai
 2 | @if NOT ["%errorlevel%"]==["0"] goto end
 3 | 
 4 | jai -quiet -import_dir .. indexing_test.jai
 5 | @if NOT ["%errorlevel%"]==["0"] goto end
 6 | 
 7 | jai -quiet -import_dir .. normal_test.jai
 8 | @if NOT ["%errorlevel%"]==["0"] goto end
 9 | 
10 | jai -quiet -import_dir .. simd_test.jai
11 | @if NOT ["%errorlevel%"]==["0"] goto end
12 | 
13 | jai -quiet -import_dir .. strings_bug.jai
14 | @if NOT ["%errorlevel%"]==["0"] goto end
15 | 
16 | jai -quiet -import_dir .. test.jai
17 | @if NOT ["%errorlevel%"]==["0"] goto end
18 | 
19 | jai -quiet -import_dir .. test10.jai
20 | @if NOT ["%errorlevel%"]==["0"] goto end
21 | 
22 | jai -quiet -import_dir .. test11.jai
23 | @if NOT ["%errorlevel%"]==["0"] goto end
24 | 
25 | jai -quiet -import_dir .. test5.jai
26 | @if NOT ["%errorlevel%"]==["0"] goto end
27 | 
28 | jai -quiet -import_dir .. test6.jai
29 | @if NOT ["%errorlevel%"]==["0"] goto end
30 | 
31 | jai -quiet -import_dir .. test7.jai
32 | @if NOT ["%errorlevel%"]==["0"] goto end
33 | 
34 | jai -quiet -import_dir .. test8.jai
35 | @if NOT ["%errorlevel%"]==["0"] goto end
36 | 
37 | jai -quiet -import_dir .. test9.jai
38 | @if NOT ["%errorlevel%"]==["0"] goto end
39 | 
40 | :end


--------------------------------------------------------------------------------
/tests/test.jai:
--------------------------------------------------------------------------------
 1 | #import "Basic";
 2 | #import "Strings";
 3 | jai_string :: #import "String";
 4 | 
 5 | #load "data/shakespeare.jai";
 6 | 
 7 | only_problems :: false;
 8 | 
 9 | main :: () {
10 |     defer {
11 |         if errors.count != 0 {
12 |             print("\nErrors:\n%\n", errors);
13 |             exit(1);
14 |         }
15 |         else {
16 |             print("\nAll OK!\n");
17 |         }
18 |     }
19 | 
20 |     haystack :: "hell yeah hell yeweeeeeeeeah";
21 |     set_simd_mode(.SSE2);
22 |     last_index_of_z := jai_string.find_index_from_right(haystack, "a");
23 |     test(1, last_index(haystack, "a"), last_index_of_z);
24 | }
25 | 
26 | 
27 | errors : [..] int;
28 | 
29 | test :: (test_id: int, value: $T, expected: T) {
30 |     if value != expected {
31 |         print("[%]:\nWanted: [%]\nGot: [%]\n\n", test_id, expected, value);
32 |         array_add(*errors, test_id);
33 |     }
34 |     else if !only_problems {
35 |         print("[%] OK\n", test_id);
36 |     }
37 | }
38 | 
39 | test :: (test_id: int, value: [] string, expected: [] string) {
40 |     if !array_equals(value, expected) {
41 |         print("[%]:\nWanted: [%]\nGot: [%]\n\n", test_id, expected, value);
42 |         array_add(*errors, test_id);
43 |     }
44 |     else if !only_problems {
45 |         print("[%] OK\n", test_id);
46 |     }
47 | }
48 | 
49 | array_equals :: (a: [] $T, b: [] T) -> bool {
50 |     if a.count != b.count  return false;
51 |     for i: 0..a.count-1  if a[i] != b[i]  return false;
52 |     return true;
53 | }
54 | 
55 | 
56 | xor :: (a: u64, b: u64) -> u64 {
57 |     result := a;
58 |     #asm {
59 |         x : gpr;
60 |         mov.q x, result;
61 |         xor.q x, b;
62 |         mov.q result, x;
63 |     }
64 |     return result;
65 | }
66 | 


--------------------------------------------------------------------------------
/tests/test7.jai:
--------------------------------------------------------------------------------
 1 | #import "Basic";
 2 | #import "Strings";
 3 | 
 4 | #load "data/shakespeare.jai";
 5 | 
 6 | main :: () {
 7 |     t : float64;
 8 | 
 9 |     format_float := *context.print_style.default_format_float;
10 |     format_float.zero_removal = .NO;
11 | 
12 |     total := 0;
13 |     for 0 .. shakespeare.count - 1 {
14 |         total += <<cast(*u8)(shakespeare.data + it);
15 |     }
16 | 
17 |     compare :: case_sensitive;
18 | 
19 |     set_simd_mode(.SSE2);
20 |     sse_delta : float64;
21 |     sse_total : u64;
22 |     {
23 |         t = seconds_since_init();
24 |         index, found := last_index(shakespeare, #char "z", compare);
25 |         while found {
26 |             sse_total = xor(sse_total, xx index);
27 |             index, found = last_index(shakespeare, #char "z", index, compare);
28 |         }
29 |         sse_delta = seconds_since_init() - t;
30 |     }
31 | 
32 |     set_simd_mode(.AVX2);
33 |     avx2_delta : float64;
34 |     avx2_total : u64;
35 |     {
36 |         t = seconds_since_init();
37 |         index, found := last_index(shakespeare, #char "z", compare);
38 |         while found {
39 |             avx2_total = xor(avx2_total, xx index);
40 |             index, found = last_index(shakespeare, #char "z", index, compare);
41 |         }
42 |         avx2_delta = seconds_since_init() - t;
43 |     }
44 | 
45 | 
46 |     set_simd_mode(.OFF);
47 |     scalar_delta : float64;
48 |     scalar_total : u64;
49 |     {
50 |         t = seconds_since_init();
51 |         index, found := last_index(shakespeare, #char "z", compare);
52 |         while found {
53 |             scalar_total = xor(scalar_total, xx index);
54 |             index, found = last_index(shakespeare, #char "z", index, compare);
55 |         }
56 |         scalar_delta = seconds_since_init() - t;
57 |     }
58 | 
59 |     print("scalar: (%) %\n", scalar_total, scalar_delta);
60 |     print("sse:    (%) %\n", sse_total, sse_delta);
61 |     print("avx2:   (%) %\n", avx2_total, avx2_delta);
62 |     print("\n");
63 |     print("SSE2  > Scalar: %x\n", scalar_delta / sse_delta);
64 |     print("AVX2 > Scalar: %x\n", scalar_delta / avx2_delta);
65 |     print("AVX2 > SSE2:    %x\n", sse_delta / avx2_delta);
66 | }
67 | 
68 | 
69 | xor :: (a: u64, b: u64) -> u64 {
70 |     result := a;
71 |     #asm {
72 |         x : gpr;
73 |         mov.q x, result;
74 |         xor.q x, b;
75 |         mov.q result, x;
76 |     }
77 |     return result;
78 | }
79 | 


--------------------------------------------------------------------------------
/Strings/module.jai:
--------------------------------------------------------------------------------
 1 | #module_parameters (
 2 |     // Default compare function used to check for character equality.
 3 |     CHARACTER_COMPARE : enum { CASE_SENSITIVE; IGNORE_CASE; } = .CASE_SENSITIVE,
 4 | 
 5 |     // Index algorithm used to find instance of string in other string.
 6 |     // Can be overriden with `set_index_algorithm`
 7 |     INDEX_ALGORITHM : enum {
 8 |         SUPER_SIMPLE;
 9 |         SIMPLE; SIMPLE_SSE2; SIMPLE_AVX2; SIMPLE_UNSAFE;
10 |         BOYER_MOORE; BOYER_MOORE_SSE2; BOYER_MOORE_AVX2;
11 |         KNUTH_MORRIS_PRATT;
12 |     } = .BOYER_MOORE
13 | );
14 | 
15 | 
16 | #if CHARACTER_COMPARE == .CASE_SENSITIVE
17 |     default_character_compare :: case_sensitive;
18 | else #if CHARACTER_COMPARE == .IGNORE_CASE
19 |     default_character_compare :: ignore_case;
20 | 
21 | 
22 | #if INDEX_ALGORITHM == .SUPER_SIMPLE {
23 |     default_first_index :: super_simple_first_index;
24 |     default_last_index  :: super_simple_last_index;
25 | }
26 | else #if INDEX_ALGORITHM == .SIMPLE {
27 |     default_first_index :: simple_first_index;
28 |     default_last_index  :: simple_last_index;
29 | }
30 | else #if INDEX_ALGORITHM == .SIMPLE_SSE2 {
31 |     default_first_index :: simple_sse2_first_index;
32 |     default_last_index  :: simple_sse2_last_index;
33 | }
34 | else #if INDEX_ALGORITHM == .SIMPLE_AVX2 {
35 |     default_first_index :: simple_avx2_first_index;
36 |     default_last_index  :: simple_avx2_last_index;
37 | }
38 | else #if INDEX_ALGORITHM == .SIMPLE_UNSAFE {
39 |     default_first_index :: unsafe_simple_first_index;
40 |     default_last_index  :: unsafe_simple_last_index;
41 | }
42 | else #if INDEX_ALGORITHM == .BOYER_MOORE {
43 |     default_first_index :: boyer_moore_first_index;
44 |     default_last_index  :: boyer_moore_last_index;
45 | }
46 | else #if INDEX_ALGORITHM == .BOYER_MOORE_SSE2 {
47 |     default_first_index :: boyer_moore_sse2_first_index;
48 |     default_last_index  :: boyer_moore_sse2_last_index;
49 | }
50 | else #if INDEX_ALGORITHM == .BOYER_MOORE_AVX2 {
51 |     default_first_index :: boyer_moore_avx2_first_index;
52 |     default_last_index  :: boyer_moore_avx2_last_index;
53 | }
54 | else #if INDEX_ALGORITHM == .KNUTH_MORRIS_PRATT {
55 |     default_first_index :: knuth_morris_pratt_first_index;
56 |     default_last_index  :: knuth_morris_pratt_last_index;
57 | }
58 | 
59 | #load "Strings.jai";
60 | #load "indexing.jai";
61 | #load "splitting.jai";
62 | #load "boyer_moore.jai";
63 | #load "knuth_morris_pratt.jai";
64 | 


--------------------------------------------------------------------------------
/tests/test10.jai:
--------------------------------------------------------------------------------
 1 | #import "Basic";
 2 | #import "Strings";
 3 | jai_string :: #import "String";
 4 | 
 5 | #load "data/shakespeare.jai";
 6 | //#load "data/dna.jai";
 7 | 
 8 | main :: () {
 9 |     data := shakespeare;
10 |     times :: 10;
11 | 
12 |     t : float64;
13 | 
14 |     format_float := *context.print_style.default_format_float;
15 |     format_float.zero_removal = .NO;
16 |     format_float.width = 6;
17 |     format_float.trailing_width = 3;
18 | 
19 |     valid := copy_string(data);
20 |     jai_string.to_upper_in_place(valid);
21 | 
22 |     warm :: (str: string) {
23 |         total := 0;
24 |         for 0 .. str.count - 1 {
25 |             total += <<cast(*u8)(str.data + it);
26 |         }
27 |     }
28 | 
29 |     {
30 |         set_simd_mode(.SSE2);
31 |         sse2_delta : float64;
32 |         sse2_valid := true;
33 |         {
34 |             for 1..times {
35 |                 s := copy_string(data);
36 |                 warm(s);
37 |                 t = seconds_since_init();
38 |                 to_upper(*s);
39 |                 sse2_delta += seconds_since_init() - t;
40 |                 if s != valid  sse2_valid = false;
41 |             }
42 |         }
43 | 
44 | 
45 |         set_simd_mode(.AVX2);
46 |         avx2_delta : float64;
47 |         avx2_valid := true;
48 |         {
49 |             for 1..times {
50 |                 s := copy_string(data);
51 |                 warm(s);
52 |                 t = seconds_since_init();
53 |                 to_upper(*s);
54 |                 avx2_delta += seconds_since_init() - t;
55 |                 if s != valid  avx2_valid = false;
56 |             }
57 |         }
58 | 
59 | 
60 |         set_simd_mode(.OFF);
61 |         scalar_delta : float64;
62 |         scalar_valid := true;
63 |         {
64 |             for 1..times {
65 |                 s := copy_string(data);
66 |                 warm(s);
67 |                 t = seconds_since_init();
68 |                 to_upper(*s);
69 |                 scalar_delta += seconds_since_init() - t;
70 |                 if s != valid  scalar_valid = false;
71 |             }
72 |         }
73 | 
74 |         print("scalar: (%) %\n", scalar_valid, scalar_delta);
75 |         print("sse2:   (%) %\n", sse2_valid, sse2_delta);
76 |         print("avx2:   (%) %\n", avx2_valid, avx2_delta);
77 |         print("\n");
78 |         print("SSE2 > Scalar: %x\n", scalar_delta / sse2_delta);
79 |         print("AVX2 > Scalar: %x\n", scalar_delta / avx2_delta);
80 |         print("AVX2 > SSE2:   %x\n", sse2_delta / avx2_delta);
81 |     }
82 | }
83 | 


--------------------------------------------------------------------------------
/tests/test5.jai:
--------------------------------------------------------------------------------
 1 | #import "Basic";
 2 | 
 3 | #load "data/shakespeare.jai";
 4 | 
 5 | main :: () {
 6 |     t : float64;
 7 | 
 8 |     cased := copy_string(shakespeare);
 9 |     uncased := copy_string(shakespeare);
10 |     to_lower(uncased);
11 | 
12 | 
13 |     format_float := *context.print_style.default_format_float;
14 |     format_float.zero_removal = .NO;
15 | 
16 |     total := 0;
17 |     for 0 .. cased.count - 1 {
18 |         if <<cast(*u8)(cased.data + it) == <<cast(*u8)(uncased.data + it)
19 |             total += 1;
20 |     }
21 | 
22 |     asm_delta : float64;
23 |     {
24 |         t = seconds_since_init();
25 |         total = 0;
26 |         for 0 .. cased.count - 1{
27 |             if asm(<<cast(*u8)(cased.data + it), <<cast(*u8)(uncased.data + it))
28 |                 total += 1;
29 |         }
30 |         asm_delta = seconds_since_init() - t;
31 |         print("asm: (%) %\n", total, asm_delta);
32 |     }
33 | 
34 |     jai_delta : float64;
35 |     {
36 |         t = seconds_since_init();
37 |         total = 0;
38 |         for 0 .. cased.count - 1{
39 |             if jai(<<cast(*u8)(cased.data + it), <<cast(*u8)(uncased.data + it))
40 |                 total += 1;
41 |         }
42 |         jai_delta = seconds_since_init() - t;
43 |         print("jai: (%) %\n", total, jai_delta);
44 |     }
45 | 
46 |     print("\nSpeedup: %x\n", jai_delta / asm_delta);
47 | }
48 | 
49 | 
50 | asm :: (_a: u8, _b: u8) -> bool {
51 |     ptr_a := *_a;
52 |     ptr_b := *_b;
53 |     result : u8 = 0; // true
54 |     ptr_result := *result;
55 |     #asm {
56 |         a : gpr;
57 |         b : gpr;
58 |         low : gpr;
59 |         high : gpr;
60 |         mov.b   a, [ptr_a];
61 |         mov.b   b, [ptr_b];
62 |         or.b    a, 32;
63 |         or.b    b, 32;
64 |         xor.b   a, b;
65 |         cmp.b   b, 97;  // a
66 |         setl    low;
67 |         cmp.b   b, 122; // z
68 |         setg    high;
69 |         or.b    a, low;
70 |         or.b    a, high;
71 |         mov.b   [ptr_result], a;
72 |     }
73 |     return result == 0;
74 | }
75 | 
76 | 
77 | jai :: (a: u8, b: u8) -> bool {
78 |     lower_a : int = ifx a >= #char "A" && a <= #char "Z" then a + #char "a" - #char "A" else a;
79 |     lower_b : int = ifx b >= #char "A" && b <= #char "Z" then b + #char "a" - #char "A" else b;
80 |     return lower_a == lower_b;
81 | }
82 | 
83 | to_lower :: (str: string) {
84 |     for 0 .. str.count - 1 {
85 |         if str[it] >= #char "A" && str[it] <= #char "Z" {
86 |             c : int = str[it] + #char "a" - #char "A";
87 |             str[it] = cast(u8) c;
88 |         }
89 |     }
90 | }
91 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Change Log
 2 | 
 3 | ## [2.0.2] = 2025-08-23
 4 | * Fixed first_index bug where it ignored compare_character.
 5 | 
 6 | ## [2.0.1] = 2025-08-22
 7 | `into_array` -> `to_array`
 8 | 
 9 | ## [2.0.0] - 2024-11-28
10 | ### Module structure
11 | Now simply a single module called `Strings`; no longer split into separate non-allocating/allocating modules (we have `,, allocator)` now)
12 | 
13 | ### Mutating in place vs allocating
14 | Any procedure which mutates as string in-place will take a pointer to string, rather than merely a string.
15 | ```
16 | bar := to_upper(foo);  // returns a newly allocated string.
17 | to_upper(*foo);        // converts foo to uppercase in-place.
18 | ```
19 | 
20 | ### Splitters
21 | Splitters are now all simply iterators (i.e. for-expansions).  You can use `to_array` to generate an expandable array from a splitter, or `into_array` to expand a splitter into an existing array.
22 | 
23 | ### Misc
24 | * Fixed threading
25 | * Fixed Scratch allocator
26 | 
27 | ### Renamed
28 | `unsafe_slice` -> `raw_slice`
29 | `unsafe_substring` -> `raw_substring`
30 | `trim_into` -> `trim_to`
31 | `trim_start_past` -> `trim_start_through`
32 | `trim_end_from` -> `trim_end_through`
33 | `trim_end_after` -> `trim_end_to`
34 | `advance_past` -> `advance_through`
35 | `pad_center` -> `pad`
36 | 
37 | ## [1.0.9] - 2022-12-23
38 | * Removed copy_string in Shared (Basic copy_string is now identical)
39 | * Updated for latest compiler version
40 | 
41 | ## [1.0.8] - 2021-12-24
42 | * Renamed `char_split` to `split`
43 | * Updated references to `String_Builder.occupied` to `String_Builder.count`
44 | * Updated references to `String_Builder.data.data` to `get_buffer_data(String_Builder)`
45 | * Added `modules.lst`
46 | 
47 | ## [1.0.7] - 2021-12-09
48 | * Updated all built-in index algorithms so that they use a character index when the needle has length 1.
49 | * Renamed `copy` to `copy_string`
50 | * Added `char_split`
51 | 
52 | ## [1.0.6] - 2021-11-27
53 | * Updated to work with new `Allocator` style.
54 | * Added some thread-unsafe indexing procs.
55 | 
56 | ## [1.0.5] - 2021-10-24
57 | * Fixed array-write version of `split` when used on empty strings.
58 | * Fixed boyer-moore first index returning false for equal haystack/needle.
59 | 
60 | ## [1.0.4] - 2021-10-12
61 | * Fixed indexing algorithms erroneously allocating with context.allocator
62 | * Fixed `null_terminate` in `join`.
63 | 
64 | ## [1.0.3] - 2021-10-12
65 | * Renamed `trim_to`, `trim_past` -> `trim_into`, `trim_through`.
66 | * Fixed `trim_through` behaviour when only one needle present.
67 | * Updated to work with compiler v86.
68 | 
69 | ## [1.0.2] - 2021-09-22
70 | * Fixed SIMD `last_index` procs.
71 | 
72 | ## [1.0.1] - 2021-09-21
73 | * `Strings_Alloc.add_convenience_functions` now defaults to `true` (as module should be namespaced anyway).
74 | * Fixed `first_index`, `last_index` not handling empty haystacks correctly.
75 | * Now checks for valid `max_results` in splitters.
76 | 
77 | ## [1.0.0] - 2021-09-19
78 | * First release.
79 | 


--------------------------------------------------------------------------------
/Scratch/Scratch.jai:
--------------------------------------------------------------------------------
 1 | scratch :: Allocator.{scratch_allocator_proc, null};
 2 | 
 3 | scratch_allocator_proc :: (mode: Allocator_Mode, requested_size: s64, old_size: s64, old_memory_pointer: *void, allocator_data: *void) -> *void {
 4 |     scratch_allocator_data := cast(*Scratch_Allocator_Data) allocator_data;
 5 |     if !scratch_allocator_data {
 6 |         if !context.scratch_allocator_data
 7 |             context.scratch_allocator_data = context.default_allocator.proc(.ALLOCATE, size_of(Scratch_Allocator_Data), 0, null, null);
 8 |         scratch_allocator_data = context.scratch_allocator_data;
 9 |     }
10 |     using scratch_allocator_data;
11 | 
12 |     if #complete mode == {
13 |         case .RESIZE;
14 |             assert(false, "Cannot resize scratch buffer: just allocate instead.");
15 |             return null;
16 | 
17 | 
18 |         case .ALLOCATE;
19 |             current_buffer_index += 1;
20 |             current_buffer_index %= buffer_count;
21 | 
22 |             if sizes[current_buffer_index] < requested_size {
23 |                 if buffers[current_buffer_index]  free(buffers[current_buffer_index]);
24 |                 sizes[current_buffer_index] = cast(s64)(requested_size * buffer_size_factor + 1);
25 |                 buffers[current_buffer_index] = context.default_allocator.proc(.ALLOCATE, sizes[current_buffer_index], 0, null, null);
26 |             }
27 | 
28 |             return buffers[current_buffer_index];
29 | 
30 |         case .FREE;         #through;
31 |         case .STARTUP;      #through;
32 |         case .SHUTDOWN;     #through;
33 |         case .THREAD_START; #through;
34 |         case .THREAD_STOP;
35 |             return null;
36 | 
37 |         case .CREATE_HEAP; #through;
38 |         case .DESTROY_HEAP;
39 |             context.handling_assertion_failure = true;
40 |             context.assertion_failed(#location(), "This allocator does not support multiple heaps.\n");
41 |             context.handling_assertion_failure = false;
42 |             return null;
43 | 
44 | 
45 |         case .IS_THIS_YOURS;
46 |             context.handling_assertion_failure = true;
47 |             context.assertion_failed(#location(), "This allocator does not support IS_THIS_YOURS.\n");
48 |             context.handling_assertion_failure = false;
49 |             return null;
50 | 
51 |         case .CAPS;
52 |             if old_memory_pointer { <<cast(*string) old_memory_pointer = VERSION_STRING; }
53 |             return cast(*void) (Allocator_Caps.HINT_I_AM_A_FAST_BUMP_ALLOCATOR|.MULTIPLE_THREADS);
54 |     }
55 | }
56 | 
57 | free_scratch_allocator_buffers :: () {
58 |     // If you are done using the scratch allocator and want to release the buffer memory.
59 |     using context.scratch_allocator;
60 |     for i: 0 .. buffer_count - 1 {
61 |         if buffers[i]  free(buffers[i]);
62 |         sizes[i] = 0;
63 |     }
64 | }
65 | 
66 | Scratch_Allocator_Data :: struct {
67 |     current_buffer_index := 0;
68 |     buffers: [buffer_count] *void;
69 |     sizes:   [buffer_count] int;
70 | }
71 | 
72 | 
73 | #scope_file
74 | 
75 | #add_context scratch_allocator_data: *Scratch_Allocator_Data;
76 | 
77 | #import "Basic";
78 | 


--------------------------------------------------------------------------------
/tests/test6.jai:
--------------------------------------------------------------------------------
  1 | #import "Basic";
  2 | #import "Strings";
  3 | jai_string :: #import "String";
  4 | 
  5 | #load "data/shakespeare.jai";
  6 | 
  7 | main :: () {
  8 |     t : float64;
  9 | 
 10 |     format_float := *context.print_style.default_format_float;
 11 |     format_float.zero_removal = .NO;
 12 | 
 13 |     total := 0;
 14 |     for 0 .. shakespeare.count - 1 {
 15 |         total += <<cast(*u8)(shakespeare.data + it);
 16 |     }
 17 | 
 18 |     jai_delta : float64;
 19 |     jai_total : u64;
 20 |     {
 21 |         t = seconds_since_init();
 22 |         s := shakespeare;
 23 |         offset := 0;
 24 |         index := jai_string.find_index_from_left(shakespeare, #char "z");
 25 |         while index >= 0 {
 26 |             index += offset;
 27 |             jai_total = xor(jai_total, xx index);
 28 |             offset = index + 1;
 29 |             s.data = shakespeare.data + offset;
 30 |             s.count = shakespeare.count - offset;
 31 |             index = jai_string.find_index_from_left(s, #char "z");
 32 |         }
 33 |         jai_delta = seconds_since_init() - t;
 34 |     }
 35 | 
 36 |     compare :: case_sensitive;
 37 | 
 38 |     set_simd_mode(.SSE2);
 39 |     sse_delta : float64;
 40 |     sse_total : u64;
 41 |     {
 42 |         t = seconds_since_init();
 43 |         index, found := first_index(shakespeare, #char "z", compare);
 44 |         while found {
 45 |             sse_total = xor(sse_total, xx index);
 46 |             index, found = first_index(shakespeare, #char "z", index + 1, compare);
 47 |         }
 48 |         sse_delta = seconds_since_init() - t;
 49 |     }
 50 | 
 51 |     set_simd_mode(.AVX2);
 52 |     avx2_delta : float64;
 53 |     avx2_total : u64;
 54 |     {
 55 |         t = seconds_since_init();
 56 |         index, found := first_index(shakespeare, #char "z", compare);
 57 |         while found {
 58 |             avx2_total = xor(avx2_total, xx index);
 59 |             index, found = first_index(shakespeare, #char "z", index + 1, compare);
 60 |         }
 61 |         avx2_delta = seconds_since_init() - t;
 62 |     }
 63 | 
 64 | 
 65 |     set_simd_mode(.OFF);
 66 |     scalar_delta : float64;
 67 |     scalar_total : u64;
 68 |     {
 69 |         t = seconds_since_init();
 70 |         index, found := first_index(shakespeare, #char "z", compare);
 71 |         while found {
 72 |             scalar_total = xor(scalar_total, xx index);
 73 |             index, found = first_index(shakespeare, #char "z", index + 1, compare);
 74 |         }
 75 |         scalar_delta = seconds_since_init() - t;
 76 |     }
 77 | 
 78 |     print("scalar: (%) %\n", scalar_total, scalar_delta);
 79 |     print("jai:    (%) %\n", jai_total, jai_delta);
 80 |     print("sse:    (%) %\n", sse_total, sse_delta);
 81 |     print("avx2:   (%) %\n", avx2_total, avx2_delta);
 82 |     print("\n");
 83 |     print("SSE2  > Scalar: %x\n", scalar_delta / sse_delta);
 84 |     print("SSE2  > Jai:    %x\n", jai_delta / sse_delta);
 85 |     print("\n");
 86 |     print("AVX2 > Scalar: %x\n", scalar_delta / avx2_delta);
 87 |     print("AVX2 > Jai:    %x\n",  jai_delta / avx2_delta);
 88 |     print("AVX2 > SSE2:    %x\n", sse_delta / avx2_delta);
 89 | }
 90 | 
 91 | 
 92 | xor :: (a: u64, b: u64) -> u64 {
 93 |     result := a;
 94 |     #asm {
 95 |         x : gpr;
 96 |         mov.q x, result;
 97 |         xor.q x, b;
 98 |         mov.q result, x;
 99 |     }
100 |     return result;
101 | }
102 | 


--------------------------------------------------------------------------------
/tests/test11.jai:
--------------------------------------------------------------------------------
  1 | #import "Basic";
  2 | #import "Strings";
  3 | #import "Scratch";
  4 | jai_string :: #import "String";
  5 | 
  6 | #load "data/shakespeare.jai";
  7 | //#load "data/dna.jai";
  8 | 
  9 | main :: () {
 10 |     data := shakespeare;
 11 |     //data := "abcdefghijklm0123456789nopqrstuvwxyzABCDEFGHIJKLM0123456789NOPQRSTUVWXYZ";
 12 |     times :: 10;//1000;
 13 | 
 14 |     print_output :: false;
 15 | 
 16 |     t : float64;
 17 | 
 18 |     format_float := *context.print_style.default_format_float;
 19 |     format_float.zero_removal = .NO;
 20 |     format_float.width = 6;
 21 |     format_float.trailing_width = 3;
 22 | 
 23 |     valid := copy_string(data);
 24 |     low_index := 0;
 25 |     high_index := valid.count - 1;
 26 |     while low_index < high_index {
 27 |         c := valid[low_index];
 28 |         valid[low_index] = valid[high_index];
 29 |         valid[high_index] = c;
 30 |         low_index += 1;
 31 |         high_index -= 1;
 32 |     }
 33 |     #if print_output  print("valid:  %\n", valid);
 34 | 
 35 | 
 36 |     warm :: (str: string) {
 37 |         total := 0;
 38 |         for 0 .. str.count - 1 {
 39 |             total += <<cast(*u8)(str.data + it);
 40 |         }
 41 |     }
 42 | 
 43 |     {
 44 |         set_simd_mode(.SSE2);
 45 |         sse2_delta : float64;
 46 |         sse2_valid := true;
 47 |         {
 48 |             for 1..times {
 49 |                 s := copy_string(data,, scratch);
 50 |                 warm(s);
 51 |                 t = seconds_since_init();
 52 |                 reverse(*s);
 53 |                 sse2_delta += seconds_since_init() - t;
 54 |                 #if print_output  print("sse2:   %\n", s);
 55 |                 if it == 1 && s != valid  sse2_valid = false;
 56 |                 reset_temporary_storage();
 57 |             }
 58 |         }
 59 | 
 60 | 
 61 |         set_simd_mode(.AVX2);
 62 |         avx2_delta : float64;
 63 |         avx2_valid := true;
 64 |         {
 65 |             for 1..times {
 66 |                 s := copy_string(data,, scratch);
 67 |                 warm(s);
 68 |                 t = seconds_since_init();
 69 |                 reverse(*s);
 70 |                 avx2_delta += seconds_since_init() - t;
 71 |                 #if print_output  print("avx2:   %\n", s);
 72 |                 if it == 1 &&  s != valid  avx2_valid = false;
 73 |                 reset_temporary_storage();
 74 |             }
 75 |         }
 76 | 
 77 | 
 78 |         set_simd_mode(.OFF);
 79 |         scalar_delta : float64;
 80 |         scalar_valid := true;
 81 |         {
 82 |             for 1..times {
 83 |                 s := copy_string(data,, scratch);
 84 |                 warm(s);
 85 |                 t = seconds_since_init();
 86 |                 reverse(*s);
 87 |                 scalar_delta += seconds_since_init() - t;
 88 |                 #if print_output  print("scalar: %\n", s);
 89 |                 if it == 1 && s != valid  scalar_valid = false;
 90 |                 reset_temporary_storage();
 91 |             }
 92 |         }
 93 | 
 94 |         print("scalar: (%) %\n", scalar_valid, scalar_delta);
 95 |         print("sse2:   (%) %\n", sse2_valid, sse2_delta);
 96 |         print("avx2:   (%) %\n", avx2_valid, avx2_delta);
 97 |         print("\n");
 98 |         print("SSE2 > Scalar: %x\n", scalar_delta / sse2_delta);
 99 |         print("AVX2 > Scalar: %x\n", scalar_delta / avx2_delta);
100 |         print("AVX2 > SSE2:   %x\n", sse2_delta / avx2_delta);
101 |     }
102 | }
103 | 


--------------------------------------------------------------------------------
/Scratch/module.jai:
--------------------------------------------------------------------------------
  1 | VERSION_STRING :: "Scratch Allocator v1.0";
  2 | 
  3 | #module_parameters ()( // program parameters
  4 |     // Number of buffers the allocator will cycle through.
  5 |     // Usually you only need 2; one to read from and one to write to.
  6 |     buffer_count := 2,
  7 | 
  8 |     // Amount of space allocated when a buffer needs to increase in size
  9 |     // to accomodate an allocation request.  The amount requested times this
 10 |     // number will be allocated.  Overallocating one request means you do
 11 |     // not need to allocate again on a slightly-bigger following request.
 12 |     buffer_size_factor := 1.25
 13 | );
 14 | 
 15 | #assert buffer_count >= 2;
 16 | #assert buffer_size_factor >= 1.0;
 17 | 
 18 | /** # Scratch Allocator
 19 | You will often want to perform a series of string operations, one after
 20 | another.
 21 | For example, replacing HTML character markers with the actual characters:
 22 | 
 23 | *Example 1*
 24 | ```jai
 25 |     text = replace(text, "&lt;", "<");
 26 |     text = replace(text, "&gt;", ">");
 27 |     text = replace(text, "&amp;", "&");
 28 |     text = replace(text, "&nbsp;", Chars.NBSP);
 29 | ```
 30 | 
 31 | The above code leaks because each call to replace allocates another copy of
 32 | the string.
 33 | To fix it with the default allocator you would need to free after every call:
 34 | 
 35 | *Example 2*
 36 | ```jai
 37 |     text1 := replace(text0, "&lt;", "<");
 38 |     free(text0);
 39 |     text2 := replace(text1, "&gt;", ">");
 40 |     free(text1);
 41 |     text3 := replace(text2, "&amp;", "&");
 42 |     free(text2);
 43 |     text4 := replace(text3, "&nbsp;", Chars.NBSP);
 44 | ```
 45 | 
 46 | This is obviously cumbersome and error-prone.  Alternatively you could use
 47 | temporary storage:
 48 | 
 49 | *Example 3*
 50 | ```jai
 51 |     text = replace(text, "&lt;", "<",, temp);
 52 |     text = replace(text, "&gt;", ">",, temp);
 53 |     text = replace(text, "&amp;", "&",, temp);
 54 |     text = replace(text, "&nbsp;", Chars.NBSP);
 55 |     reset_temporary_storage();
 56 | ```
 57 | 
 58 | This is much nicer, but it still allocates more data on every call: if you are doing
 59 | a very long sequence of operations then you will allocate a very large amount of
 60 | memory.
 61 | 
 62 | However, notice in `Example 2` that we did not need to make all those variables: we
 63 | free `text0` before creating `text2`, so we could have written the code like this:
 64 | 
 65 | *Example 4*
 66 | ```jai
 67 |     text1 := replace(text0, "&lt;", "<");
 68 |     free(text0);
 69 |     text0 = replace(text1, "&gt;", ">");
 70 |     free(text1);
 71 |     text1 = replace(text0, "&amp;", "&");
 72 |     free(text0);
 73 |     text0 = replace(text1, "&nbsp;", Chars.NBSP);
 74 |     free(text1);
 75 | ```
 76 | 
 77 | This is because we are only ever dealing with two pieces of memory: the area we are
 78 | reading from and the area we are writing too.
 79 | 
 80 | The scratch allocator applies this reasoning to memory allocation: it will only ever allocate
 81 | two* pieces of memory: the buffer we are reading from and the buffer we are writing to.
 82 | 
 83 | Thus our code becomes:
 84 | 
 85 | *Example 5*
 86 | ```jai
 87 |     text = replace(text, "&lt;", "<",, scratch);       // writes to scratch buffer 0
 88 |     text = replace(text, "&gt;", ">",, scratch);       // writes to scratch buffer 1
 89 |     text = replace(text, "&amp;", "&",, scratch);      // writes to scratch buffer 0
 90 |     text = replace(text, "&nbsp;", Chars.NBSP);
 91 | ```
 92 | 
 93 | We never want to use the scratch allocator to hold data we wish to utilize elsewhere (as
 94 | it will be overwritten as soon as the scratch allocator is used anywhere else) so the final
 95 | call to replace uses the heap allocator instead.
 96 | 
 97 | * *Though see the `buffer_count` module parameter if you require more than two*
 98 | */
 99 | 
100 | 
101 | #load "Scratch.jai";
102 | 


--------------------------------------------------------------------------------
/Strings/knuth_morris_pratt.jai:
--------------------------------------------------------------------------------
 1 | // @TODO last_index variants reverse indexing so they can use the mechanics of first_index variants
 2 | //       (using `get` etc.) - this makes logic simple but is slower than if it was coded directly,
 3 | //       so... code it directly!
 4 | 
 5 | 
 6 | 
 7 | knuth_morris_pratt_first_index :: (haystack: string, needle: string, start_index: int, character_compare: Character_Compare) -> index: int, found: bool {
 8 |     #insert,scope() first_index_header;
 9 |     using context.knuth_morris_pratt_thread_data;
10 |     if !context.knuth_morris_pratt_thread_data  context.knuth_morris_pratt_thread_data = New(Knuth_Morris_Pratt_Thread_Data);
11 | 
12 |     if needle != last_used_needle || character_compare != last_used_compare || last_search_was_reversed {
13 |         last_search_was_reversed = false;
14 |         last_used_compare = character_compare;
15 |         if last_used_needle  free(last_used_needle);
16 |         last_used_needle = copy_string(needle,, context.default_allocator);
17 |         make_needle_table(needle, character_compare, false);
18 |     }
19 | 
20 |     j := 0;
21 |     needle_end := needle.count - 1;
22 |     end := cast(u64)haystack.data + cast(u64)haystack.count - 1;
23 |     for i: cast(u64)haystack.data + cast(u64)start_index .. end {
24 |         while j >= 0 && !character_compare.compare(<<cast(*u8)i, needle[j])
25 |             j = needle_table[j];
26 |         if j == needle_end  return cast(s64)((i - cast(u64)j) - cast(u64)haystack.data), true;
27 |         j += 1;
28 |     }
29 | 
30 |     return -1, false;
31 | }
32 | 
33 | 
34 | knuth_morris_pratt_last_index :: (haystack: string, needle: string, start_index: int, character_compare: Character_Compare) -> index: int, found: bool {
35 |     #insert,scope() last_index_header;
36 |     using context.knuth_morris_pratt_thread_data;
37 |     if !context.knuth_morris_pratt_thread_data  context.knuth_morris_pratt_thread_data = New(Knuth_Morris_Pratt_Thread_Data);
38 | 
39 |     start_index = haystack.count - start_index;
40 | 
41 |     if needle != last_used_needle || !last_search_was_reversed || character_compare != last_used_compare {
42 |         last_search_was_reversed = true;
43 |         last_used_compare = character_compare;
44 |         if last_used_needle  free(last_used_needle);
45 |         last_used_needle = copy_string(needle,, context.default_allocator);
46 |         make_needle_table(needle, character_compare, true);
47 |     }
48 | 
49 |     j := 0;
50 |     needle_end := needle.count - 1;
51 |     for i: start_index .. haystack.count - 1 {
52 |         while j >= 0 && !character_compare.compare(get(haystack, i, true), get(needle, j, true))
53 |             j = needle_table[j];
54 |         if j == needle_end  return haystack.count - needle.count - (i - j), true;
55 |         j += 1;
56 |     }
57 | 
58 |     return -1, false;
59 | }
60 | 
61 | 
62 | #scope_file
63 | 
64 | 
65 | #add_context knuth_morris_pratt_thread_data: *Knuth_Morris_Pratt_Thread_Data;
66 | 
67 | Knuth_Morris_Pratt_Thread_Data :: struct {
68 |     last_used_needle : string;
69 |     last_used_compare : Character_Compare;
70 |     last_search_was_reversed : bool;
71 |     needle_table : [..] int;
72 | };
73 | 
74 | 
75 | get :: inline (str: string, index: int, $reversed: bool) -> u8 {
76 |     #if reversed  return str[str.count - 1 - index];
77 |     else          return str[index];
78 | }
79 | 
80 | 
81 | make_needle_table :: (needle: string, character_compare: Character_Compare, $reversed: bool) {
82 |     using context.knuth_morris_pratt_thread_data;
83 | 
84 |     push_allocator(context.default_allocator);
85 | 
86 |     if needle_table.count <= needle.count
87 |         array_resize(*needle_table, needle.count + 1, false);
88 | 
89 |     needle_table.data[0] = -1;
90 | 
91 |     for i: 0 .. needle.count - 1 {
92 |         j := i + 1;
93 |         needle_table[j] = needle_table[i] + 1;
94 |         while needle_table[j] > 0 && !character_compare.compare(get(needle, i, reversed), get(needle, needle_table[j] - 1, reversed))
95 |             needle_table[j] = needle_table[needle_table[j] - 1] + 1;
96 |     }
97 | }
98 | 


--------------------------------------------------------------------------------
/tests/test9.jai:
--------------------------------------------------------------------------------
  1 | #import "Basic";
  2 | #import "Strings";
  3 | 
  4 | #load "data/shakespeare.jai";
  5 | //#load "data/dna.jai";
  6 | 
  7 | /* Timings as one asm block
  8 | CASE SENSITIVE:
  9 | scalar: (24427000)  0.705
 10 | sse2:   (24427000)  0.044
 11 | avx2:   (24427000)  0.032
 12 | 
 13 | SSE2 > Scalar: 15.983x
 14 | AVX2 > Scalar: 22.271x
 15 | AVX2 > SSE2:    1.393x
 16 | 
 17 | 
 18 | IGNORE CASE:
 19 | scalar: (28859400)  2.564
 20 | sse2:   (28859400)  0.057
 21 | avx2:   (28859400)  0.033
 22 | 
 23 | SSE2 > Scalar: 45.147x
 24 | AVX2 > Scalar: 78.874x
 25 | AVX2 > SSE2:    1.747x
 26 | 
 27 | 
 28 | After 2xASM
 29 | 
 30 | CASE SENSITIVE:
 31 | scalar: (24427000)  0.706
 32 | sse2:   (24427000)  0.059
 33 | avx2:   (24427000)  0.023
 34 | 
 35 | SSE2 > Scalar: 12.044x
 36 | AVX2 > Scalar: 30.381x
 37 | AVX2 > SSE2:    2.522x
 38 | 
 39 | 
 40 | IGNORE CASE:
 41 | scalar: (28859400)  2.565
 42 | sse2:   (28859400)  0.053
 43 | avx2:   (28859400)  0.029
 44 | 
 45 | SSE2 > Scalar: 48.216x
 46 | AVX2 > Scalar: 87.188x
 47 | AVX2 > SSE2:    1.808x
 48 | 
 49 | */
 50 | 
 51 | main :: () {
 52 |     haystack := shakespeare;
 53 |     needle : u8 = #char "a";
 54 |     times :: 1;
 55 | 
 56 |     t : float64;
 57 | 
 58 |     format_float := *context.print_style.default_format_float;
 59 |     format_float.zero_removal = .NO;
 60 |     format_float.width = 6;
 61 |     format_float.trailing_width = 3;
 62 | 
 63 |     total := 0;
 64 |     for 0 .. haystack.count - 1 {
 65 |         total += <<cast(*u8)(haystack.data + it);
 66 |     }
 67 | 
 68 |     {
 69 |         set_simd_mode(.SSE2);
 70 |         sse_delta : float64;
 71 |         sse_total : int;
 72 |         {
 73 |             t = seconds_since_init();
 74 |             for 1..times  sse_total += count(haystack, needle);
 75 |             sse_delta = seconds_since_init() - t;
 76 |         }
 77 | 
 78 | 
 79 |         set_simd_mode(.AVX2);
 80 |         avx2_delta : float64;
 81 |         avx2_total : int;
 82 |         {
 83 |             t = seconds_since_init();
 84 |             for 1..times  avx2_total += count(haystack, needle);
 85 |             avx2_delta = seconds_since_init() - t;
 86 |         }
 87 | 
 88 | 
 89 |         set_simd_mode(.OFF);
 90 |         scalar_delta : float64;
 91 |         scalar_total : int;
 92 |         {
 93 |             t = seconds_since_init();
 94 |             for 1..times  scalar_total += count(haystack, needle);
 95 |             scalar_delta = seconds_since_init() - t;
 96 |         }
 97 | 
 98 |         print("CASE SENSITIVE:\n");
 99 |         print("scalar: (%) %\n", scalar_total, scalar_delta);
100 |         print("sse2:   (%) %\n", sse_total, sse_delta);
101 |         print("avx2:   (%) %\n", avx2_total, avx2_delta);
102 |         print("\n");
103 |         print("SSE2 > Scalar: %x\n", scalar_delta / sse_delta);
104 |         print("AVX2 > Scalar: %x\n", scalar_delta / avx2_delta);
105 |         print("AVX2 > SSE2:   %x\n", sse_delta / avx2_delta);
106 |     }
107 | 
108 |     {
109 |         set_simd_mode(.SSE2);
110 |         sse_delta : float64;
111 |         sse_total : int;
112 |         {
113 |             t = seconds_since_init();
114 |             for 1..times  sse_total += count(haystack, needle, ignore_case);
115 |             sse_delta = seconds_since_init() - t;
116 |         }
117 | 
118 | 
119 |         set_simd_mode(.AVX2);
120 |         avx2_delta : float64;
121 |         avx2_total : int;
122 |         {
123 |             t = seconds_since_init();
124 |             for 1..times  avx2_total += count(haystack, needle, ignore_case);
125 |             avx2_delta = seconds_since_init() - t;
126 |         }
127 | 
128 | 
129 |         set_simd_mode(.OFF);
130 |         scalar_delta : float64;
131 |         scalar_total : int;
132 |         {
133 |             t = seconds_since_init();
134 |             for 1..times  scalar_total += count(haystack, needle, ignore_case);
135 |             scalar_delta = seconds_since_init() - t;
136 |         }
137 | 
138 |         print("\n\nIGNORE CASE:\n");
139 |         print("scalar: (%) %\n", scalar_total, scalar_delta);
140 |         print("sse2:   (%) %\n", sse_total, sse_delta);
141 |         print("avx2:   (%) %\n", avx2_total, avx2_delta);
142 |         print("\n");
143 |         print("SSE2 > Scalar: %x\n", scalar_delta / sse_delta);
144 |         print("AVX2 > Scalar: %x\n", scalar_delta / avx2_delta);
145 |         print("AVX2 > SSE2:   %x\n", sse_delta / avx2_delta);
146 |     }
147 | }
148 | 


--------------------------------------------------------------------------------
/tests/test8.jai:
--------------------------------------------------------------------------------
  1 | #import "Basic";
  2 | #import "Sort";
  3 | #import "Strings"(INDEX_ALGORITHM=.SIMPLE);
  4 | jai_string :: #import "String";
  5 | 
  6 | 
  7 | #load "data/shakespeare.jai";
  8 | #load "data/degenerate.jai";
  9 | 
 10 | main :: () {
 11 |     haystack :: shakespeare;// degenerate;
 12 |     needle :: " and "; //"ffffffffffffffffffffffffffffffffffffffffn";
 13 |     times :: 10;
 14 |     //needle :: "zealous";
 15 | 
 16 |     jai         :: true;
 17 |     boyer_moore :: true;
 18 |     kmp         :: true;
 19 |     simd_off    :: true;
 20 |     sse         :: true;
 21 |     avx2        :: true;
 22 | 
 23 | 
 24 |     format_float := *context.print_style.default_format_float;
 25 |     format_float.zero_removal = .NO;
 26 |     format_float.trailing_width = 3;
 27 | 
 28 |     checksum : u64;
 29 |     {
 30 |         s := haystack;
 31 |         offset := 0;
 32 |         index := jai_string.find_index_from_left(s, needle);
 33 |         while index >= 0 {
 34 |             index += offset;
 35 |             checksum = xor(checksum, xx index);
 36 |             offset = index + 1;
 37 |             s.data = haystack.data + offset;
 38 |             s.count = haystack.count - offset;
 39 |             index = jai_string.find_index_from_left(s, needle);
 40 |         }
 41 |         checksum *= times;
 42 |     }
 43 | 
 44 |     t : float64;
 45 | 
 46 | 
 47 | 
 48 |     jai_delta : float64;
 49 |     jai_total : u64;
 50 |     kmp_delta : float64;
 51 |     kmp_total : u64;
 52 |     simd_off_delta : float64;
 53 |     simd_off_total : u64;
 54 |     sse_delta : float64;
 55 |     sse_total : u64;
 56 |     avx2_delta : float64;
 57 |     avx2_total : u64;
 58 |     boyer_moore_delta : float64;
 59 |     boyer_moore_total : u64;
 60 | 
 61 | 
 62 |     for 1 .. times {
 63 |         print("%", it % 10);
 64 |         #if jai
 65 |         {{
 66 |             t = seconds_since_init();
 67 |             s := haystack;
 68 |             offset := 0;
 69 |             index := jai_string.find_index_from_left(haystack, needle);
 70 |             total : u64 = 0;
 71 |             while index >= 0 {
 72 |                 index += offset;
 73 |                 total = xor(total, xx index);
 74 |                 offset = index + 1;
 75 |                 s.data = haystack.data + offset;
 76 |                 s.count = haystack.count - offset;
 77 |                 index = jai_string.find_index_from_left(s, needle);
 78 |             }
 79 |             jai_total += total;
 80 |             jai_delta += seconds_since_init() - t;
 81 |         }}
 82 | 
 83 |         #if boyer_moore
 84 |         {{
 85 |             set_index_algorithm(boyer_moore_first_index);
 86 |             t = seconds_since_init();
 87 |             index, found := first_index(haystack, needle);
 88 |             total : u64 = 0;
 89 |             while found {
 90 |                 total = xor(total, xx index);
 91 |                 index, found = first_index(haystack, needle, index + 1);
 92 |             }
 93 |             boyer_moore_total += total;
 94 |             boyer_moore_delta += seconds_since_init() - t;
 95 |         }}
 96 | 
 97 |         #if kmp
 98 |         {{
 99 |             set_index_algorithm(knuth_morris_pratt_first_index);
100 |             t = seconds_since_init();
101 |             index, found := first_index(haystack, needle);
102 |             total : u64 = 0;
103 |             while found {
104 |                 total = xor(total, xx index);
105 |                 index, found = first_index(haystack, needle, index + 1);
106 |             }
107 |             kmp_total += total;
108 |             kmp_delta += seconds_since_init() - t;
109 |         }}
110 | 
111 |         #if simd_off
112 |         {{
113 |             set_index_algorithm(simple_first_index);
114 |             set_simd_mode(.OFF);
115 |             t = seconds_since_init();
116 |             index, found := first_index(haystack, needle);
117 |             total : u64 = 0;
118 |             while found {
119 |                 total = xor(total, xx index);
120 |                 index, found = first_index(haystack, needle, index + 1);
121 |             }
122 |             simd_off_total += total;
123 |             simd_off_delta += seconds_since_init() - t;
124 |         }}
125 | 
126 |         #if sse
127 |         {{
128 |             set_index_algorithm(simple_sse2_first_index);
129 |             set_simd_mode(.SSE2);
130 |             t = seconds_since_init();
131 |             index, found := first_index(haystack, needle);
132 |             total : u64 = 0;
133 |             while found {
134 |                 total = xor(total, xx index);
135 |                 index, found = first_index(haystack, needle, index + 1);
136 |             }
137 |             sse_total += total;
138 |             sse_delta += seconds_since_init() - t;
139 |         }}
140 | 
141 |         #if avx2
142 |         {{
143 |             set_index_algorithm(simple_avx2_first_index);
144 |             set_simd_mode(.AVX2);
145 |             t = seconds_since_init();
146 |             index, found := first_index(haystack, needle);
147 |             total : u64 = 0;
148 |             while found {
149 |                 total = xor(total, xx index);
150 |                 index, found = first_index(haystack, needle, index + 1);
151 |             }
152 |             avx2_total += total;
153 |             avx2_delta += seconds_since_init() - t;
154 |         }}
155 |     }
156 | 
157 | 
158 |     Entry :: struct {
159 |         name : string;
160 |         time : float64;
161 |         checksum : u64;
162 |     }
163 | 
164 |     entry :: (name: string, time: float64, checksum: u64) -> Entry {
165 |         result : Entry = ---;
166 |         result.name = name;
167 |         result.time = time;
168 |         result.checksum = checksum;
169 |         return result;
170 |     }
171 | 
172 |     entries : [..] Entry;
173 | 
174 |     #if jai          array_add(*entries, entry("Jai", jai_delta, jai_total));
175 |     #if kmp          array_add(*entries, entry("Knuth-Morris-Pratt", kmp_delta, kmp_total));
176 |     #if boyer_moore  array_add(*entries, entry("Boyer-Moore", boyer_moore_delta, boyer_moore_total));
177 |     #if simd_off     array_add(*entries, entry("Naive", simd_off_delta, simd_off_total));
178 |     #if sse          array_add(*entries, entry("Naive + SSE2", sse_delta, sse_total));
179 |     #if avx2         array_add(*entries, entry("Naive + AVX2", avx2_delta, avx2_total));
180 | 
181 |     quick_sort(entries, (a: Entry) -> float64 { return a.time; });
182 | 
183 |     print("\n\n");
184 | 
185 |     for entries {
186 |         print("%", pad_end(it.name, 20));
187 |         time := sprint("%", it.time);
188 |         if it.checksum != checksum {
189 |             print("BAD CHECKSUM: % != %\n", it.checksum, checksum);
190 |             continue;
191 |         }
192 | 
193 |         #if jai {
194 |             factor := sprint("%", jai_delta / it.time);
195 |             print("%   = %x\n", pad_start(time, 7), pad_start(factor, 7));
196 |         }
197 |         else {
198 |             print("%\n", pad_end(time, 7));
199 |         }
200 |         if !it_index  print("\n");
201 |     }
202 | }
203 | 
204 | 
205 | xor :: (a: u64, b: u64) -> u64 {
206 |     result := a;
207 |     #asm {
208 |         x : gpr;
209 |         mov.q x, result;
210 |         xor.q x, b;
211 |         mov.q result, x;
212 |     }
213 |     return result;
214 | }
215 | 


--------------------------------------------------------------------------------
/tests/simd_test.jai:
--------------------------------------------------------------------------------
  1 | #import "Basic";
  2 | #import "Strings";
  3 | jai_string :: #import "String";
  4 | 
  5 | #load "data/shakespeare.jai";
  6 | 
  7 | only_problems :: true;
  8 | 
  9 | main :: () {
 10 |     defer {
 11 |         if errors.count != 0
 12 |             print("\nErrors:\n%\n", errors);
 13 |         else
 14 |             print("\nAll OK!\n");
 15 |     }
 16 | 
 17 |     // calculate_checksums_from_jai_string();
 18 |     index_case_sensitive_checksum :: 5029604;
 19 |     index_ignore_case_checksum :: 2957927;
 20 | 
 21 |     haystack :: "   Hello world in string form can you give me a hell yeah hell yeah";
 22 | 
 23 |     for simd: 0 .. 3 {
 24 |         set_simd_mode(xx simd);
 25 | 
 26 |         x := (simd + 1) * 100;
 27 | 
 28 |         test( 1 + x, first_index(haystack, #char "H", case_sensitive),  3);
 29 |         test( 2 + x, first_index(haystack, #char "h", case_sensitive), 48);
 30 |         test( 3 + x, first_index(haystack, #char "E", case_sensitive), -1);
 31 |         test( 4 + x, first_index(haystack, #char "e", case_sensitive),  4);
 32 |         test( 5 + x, first_index(haystack, #char "L", case_sensitive), -1);
 33 |         test( 6 + x, first_index(haystack, #char "l", case_sensitive),  5);
 34 |         test( 7 + x, first_index(haystack, #char "O", case_sensitive), -1);
 35 |         test( 8 + x, first_index(haystack, #char "o", case_sensitive),  7);
 36 |         test( 9 + x, first_index(haystack, #char " ", case_sensitive),  0);
 37 |         test(10 + x, first_index(haystack, #char "W", case_sensitive), -1);
 38 |         test(11 + x, first_index(haystack, #char "w", case_sensitive),  9);
 39 |         test(12 + x, first_index(haystack, #char "R", case_sensitive), -1);
 40 |         test(13 + x, first_index(haystack, #char "r", case_sensitive), 11);
 41 |         test(14 + x, first_index(haystack, #char "D", case_sensitive), -1);
 42 |         test(15 + x, first_index(haystack, #char "d", case_sensitive), 13);
 43 |         test(16 + x, first_index(haystack, #char "m", case_sensitive), 28);
 44 |         test(17 + x, first_index(haystack, #char "A", case_sensitive), -1);
 45 |         test(18 + x, first_index(haystack, #char "a", case_sensitive), 31);
 46 |         test(19 + x, first_index(haystack, #char "Z", case_sensitive), -1);
 47 |         test(20 + x, first_index(haystack, #char "z", case_sensitive), -1);
 48 | 
 49 |         test(21 + x, first_index(haystack, #char "H", ignore_case),  3);
 50 |         test(22 + x, first_index(haystack, #char "h", ignore_case),  3);
 51 |         test(23 + x, first_index(haystack, #char "E", ignore_case),  4);
 52 |         test(24 + x, first_index(haystack, #char "e", ignore_case),  4);
 53 |         test(25 + x, first_index(haystack, #char "L", ignore_case),  5);
 54 |         test(26 + x, first_index(haystack, #char "l", ignore_case),  5);
 55 |         test(27 + x, first_index(haystack, #char "O", ignore_case),  7);
 56 |         test(28 + x, first_index(haystack, #char "o", ignore_case),  7);
 57 |         test(29 + x, first_index(haystack, #char " ", ignore_case),  0);
 58 |         test(30 + x, first_index(haystack, #char "W", ignore_case),  9);
 59 |         test(31 + x, first_index(haystack, #char "w", ignore_case),  9);
 60 |         test(32 + x, first_index(haystack, #char "R", ignore_case), 11);
 61 |         test(33 + x, first_index(haystack, #char "r", ignore_case), 11);
 62 |         test(34 + x, first_index(haystack, #char "D", ignore_case), 13);
 63 |         test(35 + x, first_index(haystack, #char "d", ignore_case), 13);
 64 |         test(36 + x, first_index(haystack, #char "m", ignore_case), 28);
 65 |         test(37 + x, first_index(haystack, #char "A", ignore_case), 31);
 66 |         test(38 + x, first_index(haystack, #char "a", ignore_case), 31);
 67 |         test(39 + x, first_index(haystack, #char "Z", ignore_case), -1);
 68 |         test(40 + x, first_index(haystack, #char "z", ignore_case), -1);
 69 | 
 70 |         test(41 + x, last_index(haystack, #char "H", case_sensitive),  3);
 71 |         test(42 + x, last_index(haystack, #char "h", case_sensitive), 66);
 72 |         test(43 + x, last_index(haystack, #char "E", case_sensitive), -1);
 73 |         test(44 + x, last_index(haystack, #char "e", case_sensitive), 64);
 74 |         test(45 + x, last_index(haystack, #char "L", case_sensitive), -1);
 75 |         test(46 + x, last_index(haystack, #char "l", case_sensitive), 61);
 76 |         test(47 + x, last_index(haystack, #char "O", case_sensitive), -1);
 77 |         test(48 + x, last_index(haystack, #char "o", case_sensitive), 35);
 78 |         test(49 + x, last_index(haystack, #char " ", case_sensitive), 62);
 79 |         test(50 + x, last_index(haystack, #char "W", case_sensitive), -1);
 80 |         test(51 + x, last_index(haystack, #char "w", case_sensitive),  9);
 81 |         test(52 + x, last_index(haystack, #char "R", case_sensitive), -1);
 82 |         test(53 + x, last_index(haystack, #char "r", case_sensitive), 27);
 83 |         test(54 + x, last_index(haystack, #char "D", case_sensitive), -1);
 84 |         test(55 + x, last_index(haystack, #char "d", case_sensitive), 13);
 85 |         test(56 + x, last_index(haystack, #char "m", case_sensitive), 43);
 86 |         test(57 + x, last_index(haystack, #char "A", case_sensitive), -1);
 87 |         test(58 + x, last_index(haystack, #char "a", case_sensitive), 65);
 88 |         test(59 + x, last_index(haystack, #char "Z", case_sensitive), -1);
 89 |         test(60 + x, last_index(haystack, #char "z", case_sensitive), -1);
 90 | 
 91 |         test(61 + x, last_index(haystack, #char "H", ignore_case), 66);
 92 |         test(62 + x, last_index(haystack, #char "h", ignore_case), 66);
 93 |         test(63 + x, last_index(haystack, #char "E", ignore_case), 64);
 94 |         test(64 + x, last_index(haystack, #char "e", ignore_case), 64);
 95 |         test(65 + x, last_index(haystack, #char "L", ignore_case), 61);
 96 |         test(66 + x, last_index(haystack, #char "l", ignore_case), 61);
 97 |         test(67 + x, last_index(haystack, #char "O", ignore_case), 35);
 98 |         test(68 + x, last_index(haystack, #char "o", ignore_case), 35);
 99 |         test(69 + x, last_index(haystack, #char " ", ignore_case), 62);
100 |         test(70 + x, last_index(haystack, #char "W", ignore_case),  9);
101 |         test(71 + x, last_index(haystack, #char "w", ignore_case),  9);
102 |         test(72 + x, last_index(haystack, #char "R", ignore_case), 27);
103 |         test(73 + x, last_index(haystack, #char "r", ignore_case), 27);
104 |         test(74 + x, last_index(haystack, #char "D", ignore_case), 13);
105 |         test(75 + x, last_index(haystack, #char "d", ignore_case), 13);
106 |         test(76 + x, last_index(haystack, #char "m", ignore_case), 43);
107 |         test(77 + x, last_index(haystack, #char "A", ignore_case), 65);
108 |         test(78 + x, last_index(haystack, #char "a", ignore_case), 65);
109 |         test(79 + x, last_index(haystack, #char "Z", ignore_case), -1);
110 |         test(80 + x, last_index(haystack, #char "z", ignore_case), -1);
111 | 
112 |         checksum : u64;
113 |         index, found := first_index(shakespeare, #char "z", case_sensitive);
114 |         while found {
115 |             checksum = xor(checksum, xx index);
116 |             index, found = first_index(shakespeare, #char "z", index + 1, case_sensitive);
117 |         }
118 |         test(81 + x, checksum, index_case_sensitive_checksum);
119 | 
120 |         checksum = 0;
121 |         index, found = last_index(shakespeare, #char "z", case_sensitive);
122 |         while found {
123 |             checksum = xor(checksum, xx index);
124 |             index, found = last_index(shakespeare, #char "z", index, case_sensitive);
125 |         }
126 |         test(82 + x, checksum, index_case_sensitive_checksum);
127 | 
128 | 
129 |         checksum = 0;
130 |         index, found = first_index(shakespeare, #char "z", ignore_case);
131 |         while found {
132 |             checksum = xor(checksum, xx index);
133 |             index, found = first_index(shakespeare, #char "z", index + 1, ignore_case);
134 |         }
135 |         test(83 + x, checksum, index_ignore_case_checksum);
136 | 
137 | 
138 |         checksum = 0;
139 |         index, found = last_index(shakespeare, #char "z", ignore_case);
140 |         while found {
141 |             checksum = xor(checksum, xx index);
142 |             index, found = last_index(shakespeare, #char "z", index, ignore_case);
143 |         }
144 |         test(84 + x, checksum, index_ignore_case_checksum);
145 |     }
146 | }
147 | 
148 | 
149 | 
150 | errors : [..] int;
151 | 
152 | test :: (test_id: int, value: $T, expected: T) {
153 |     if value != expected {
154 |         print("[%]:\nWanted: [%]\nGot: [%]\n\n", test_id, expected, value);
155 |         array_add(*errors, test_id);
156 |     }
157 |     else if !only_problems {
158 |         print("[%] OK\n", test_id);
159 |     }
160 | }
161 | 
162 | test :: (test_id: int, value: [] string, expected: [] string) {
163 |     if !array_equals(value, expected) {
164 |         print("[%]:\nWanted: [%]\nGot: [%]\n\n", test_id, expected, value);
165 |         array_add(*errors, test_id);
166 |     }
167 |     else if !only_problems {
168 |         print("[%] OK\n", test_id);
169 |     }
170 | }
171 | 
172 | array_equals :: (a: [] $T, b: [] T) -> bool {
173 |     if a.count != b.count  return false;
174 |     for i: 0..a.count-1  if a[i] != b[i]  return false;
175 |     return true;
176 | }
177 | 
178 | 
179 | xor :: (a: u64, b: u64) -> u64 {
180 |     result := a;
181 |     #asm {
182 |         x : gpr;
183 |         mov.q x, result;
184 |         xor.q x, b;
185 |         mov.q result, x;
186 |     }
187 |     return result;
188 | }
189 | 
190 | 
191 | calculate_checksums_from_jai_string :: () {
192 |     s := shakespeare;
193 |     offset := 0;
194 |     first_index_case_sensitive_checksum : u64;
195 |     index := jai_string.find_index_from_left(shakespeare, #char "z");
196 |     while index >= 0 {
197 |         index += offset;
198 |         first_index_case_sensitive_checksum = xor(first_index_case_sensitive_checksum, xx index);
199 |         offset = index + 1;
200 |         s.data = shakespeare.data + offset;
201 |         s.count = shakespeare.count - offset;
202 |         index = jai_string.find_index_from_left(s, #char "z");
203 |     }
204 | 
205 |     s = shakespeare;
206 |     offset = 0;
207 |     first_index_ignore_case_checksum : u64;
208 |     index = jai_string.find_index_of_any_from_left(shakespeare, "zZ");
209 |     while index >= 0 {
210 |         index += offset;
211 |         first_index_ignore_case_checksum = xor(first_index_ignore_case_checksum, xx index);
212 |         offset = index + 1;
213 |         s.data = shakespeare.data + offset;
214 |         s.count = shakespeare.count - offset;
215 |         index = jai_string.find_index_of_any_from_left(s, "zZ");
216 |     }
217 | 
218 |     s = shakespeare;
219 |     last_index_case_sensitive_checksum : u64;
220 |     index = jai_string.find_index_from_right(shakespeare, #char "z");
221 |     while index >= 0 {
222 |         last_index_case_sensitive_checksum = xor(last_index_case_sensitive_checksum, xx index);
223 |         s.count = index;
224 |         index = jai_string.find_index_from_right(s, #char "z");
225 |     }
226 | 
227 |     s = shakespeare;
228 |     last_index_ignore_case_checksum : u64;
229 |     index = jai_string.find_index_of_any_from_right(shakespeare, "zZ");
230 |     while index >= 0 {
231 |         last_index_ignore_case_checksum = xor(last_index_ignore_case_checksum, xx index);
232 |         s.count = index;
233 |         index = jai_string.find_index_of_any_from_right(s, "zZ");
234 |     }
235 | 
236 |     print("first_index_case_sensitive_checksum: %\n", first_index_case_sensitive_checksum);
237 |     print("first_index_ignore_case_checksum: %\n", first_index_ignore_case_checksum);
238 |     print("last_index_case_sensitive_checksum: %\n", last_index_case_sensitive_checksum);
239 |     print("last_index_ignore_case_checksum: %\n", last_index_ignore_case_checksum);
240 | }
241 | 


--------------------------------------------------------------------------------
/tests/indexing_test.jai:
--------------------------------------------------------------------------------
  1 | #import "Basic";
  2 | #import "Hash_Table";
  3 | #import "Sort";
  4 | 
  5 | jai_string :: #import "String";
  6 | 
  7 | 
  8 | #load "data/shakespeare.jai";
  9 | #load "data/degenerate.jai";
 10 | 
 11 | no_case :: false;
 12 | #if no_case {
 13 |     #import "Strings"(CHARACTER_COMPARE=.IGNORE_CASE);
 14 | }
 15 | else {
 16 |     #import "Strings"(CHARACTER_COMPARE=.CASE_SENSITIVE);
 17 | }
 18 | 
 19 | main :: () {
 20 |     haystack :: shakespeare;// degenerate;
 21 |     needle :: " and ";
 22 |     //needle :: "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffN";
 23 |     //needle :: "nffffffffffffffffffffffffffffffffffffffffffffffffffffffffff";
 24 |     //"THE end\n";//" and "; //"ffffffffffffffffffffffffffffffffffffffffn"; "zealous";
 25 |     times :: 10;
 26 |     debug :: false;
 27 | 
 28 |     x :: true;
 29 |     _ :: false;
 30 |     simple           :: x;
 31 |     jai              :: x;
 32 |     simple_sse2      :: x;
 33 |     simple_avx2      :: x;
 34 |     kmp              :: x;
 35 |     boyer_moore      :: x;
 36 |     boyer_moore_sse2 :: x;
 37 |     boyer_moore_avx2 :: x;
 38 | 
 39 | 
 40 |     format_float := *context.print_style.default_format_float;
 41 |     format_float.zero_removal = .NO;
 42 |     format_float.trailing_width = 3;
 43 | 
 44 |     #if no_case {
 45 |         find_index_from_left :: find_index_from_left_nocase;
 46 |         find_index_from_right :: find_index_from_right_nocase;
 47 |     }
 48 |     else {
 49 |         find_index_from_left :: jai_string.find_index_from_left;
 50 |         find_index_from_right :: jai_string.find_index_from_right;
 51 |     }
 52 | 
 53 |     first_checksum : u64;
 54 |     last_checksum : u64;
 55 | 
 56 |     {
 57 |         s := haystack;
 58 |         offset := 0;
 59 |         total := 0;
 60 |         index := find_index_from_left(s, needle);
 61 |         while index >= 0 {
 62 |             index += offset;
 63 |             total += 1;
 64 |             #if debug  print("%\n", index);
 65 |             first_checksum = xor(first_checksum, xx index);
 66 |             offset = index + 1;
 67 |             s.data = haystack.data + offset;
 68 |             s.count = haystack.count - offset;
 69 |             index = find_index_from_left(s, needle);
 70 |         }
 71 |         #if debug  print("\n\n");
 72 |         //print("INDEX: %\n", total);
 73 | 
 74 |         s = haystack;
 75 |         index = find_index_from_right(haystack, needle);
 76 |         while index >= 0 {
 77 |             last_checksum = xor(last_checksum, xx index);
 78 |             s.count = index;
 79 |             index = find_index_from_right(s, needle);
 80 |         }
 81 | 
 82 |         first_checksum *= times;
 83 |         last_checksum *= times;
 84 |     }
 85 | 
 86 | 
 87 |     t : float64;
 88 | 
 89 |     Entry :: struct {
 90 |         name : string;
 91 |         first_time : float64 = 0;
 92 |         last_time  : float64 = 0;
 93 |         first_checksum : u64 = 0;
 94 |         last_checksum  : u64 = 0;
 95 |     }
 96 | 
 97 |     entry :: (name: string) -> Entry {
 98 |         result : Entry;
 99 |         result.name = name;
100 |         return result;
101 |     }
102 | 
103 |     #if jai               jai_entry              := entry("Jai");
104 |     #if boyer_moore       boyer_moore_entry      := entry("Boyer-Moore");
105 |     #if boyer_moore_sse2  boyer_moore_sse2_entry := entry("Boyer-Moore SSE2");
106 |     #if boyer_moore_avx2  boyer_moore_avx2_entry := entry("Boyer-Moore AVX2");
107 |     #if kmp               kmp_entry              := entry("Knuth-Morris-Pratt");
108 |     #if simple            simple_entry           := entry("Simple");
109 |     #if simple_sse2       simple_sse2_entry      := entry("Simple SSE2");
110 |     #if simple_avx2       simple_avx2_entry      := entry("Simple AVX2");
111 | 
112 |     prev_marker := -1;
113 | 
114 |     for 1 .. times {
115 |         #if times >= 10 {
116 |             marker := it / (times / 10);
117 |             if marker != prev_marker {
118 |                 prev_marker = marker;
119 |                 if marker < 10
120 |                     print("%", marker);
121 |             }
122 |         }
123 | 
124 |         #if jai {{
125 |             t = seconds_since_init();
126 |             s := haystack;
127 |             offset := 0;
128 |             index := find_index_from_left(haystack, needle);
129 |             checksum : u64 = 0;
130 |             while index >= 0 {
131 |                 index += offset;
132 |                 checksum = xor(checksum, xx index);
133 |                 offset = index + 1;
134 |                 s.data = haystack.data + offset;
135 |                 s.count = haystack.count - offset;
136 |                 index = find_index_from_left(s, needle);
137 |             }
138 |             jai_entry.first_checksum += checksum;
139 |             jai_entry.first_time += seconds_since_init() - t;
140 | 
141 |             t = seconds_since_init();
142 |             s = haystack;
143 |             index = find_index_from_right(haystack, needle);
144 |             checksum = 0;
145 |             while index >= 0 {
146 |                 checksum = xor(checksum, xx index);
147 |                 s.count  = index;
148 |                 index = find_index_from_right(s, needle);
149 |             }
150 |             jai_entry.last_checksum += checksum;
151 |             jai_entry.last_time += seconds_since_init() - t;
152 |         }}
153 | 
154 |         #if boyer_moore {{
155 |             set_index_algorithm(boyer_moore_first_index,boyer_moore_last_index);
156 | 
157 |             t = seconds_since_init();
158 |             index, found := first_index(haystack, needle);
159 |             checksum : u64 = 0;
160 |             while found {
161 |                 checksum = xor(checksum, xx index);
162 |                 index, found = first_index(haystack, needle, index + 1);
163 |             }
164 |             boyer_moore_entry.first_checksum += checksum;
165 |             boyer_moore_entry.first_time += seconds_since_init() - t;
166 | 
167 |             t = seconds_since_init();
168 |             index, found = last_index(haystack, needle);
169 |             checksum = 0;
170 |             while found {
171 |                 checksum = xor(checksum, xx index);
172 |                 index, found = last_index(haystack, needle, index);
173 |             }
174 |             boyer_moore_entry.last_checksum += checksum;
175 |             boyer_moore_entry.last_time += seconds_since_init() - t;
176 |         }}
177 | 
178 |         #if boyer_moore_sse2 {{
179 |             set_index_algorithm(boyer_moore_sse2_first_index,boyer_moore_sse2_last_index);
180 | 
181 |             t = seconds_since_init();
182 |             index, found := first_index(haystack, needle);
183 |             checksum : u64 = 0;
184 |             total := 0;
185 |             while found {
186 |                 #if debug  print("%\n", index);
187 |                 total += 1;
188 |                 checksum = xor(checksum, xx index);
189 |                 index, found = first_index(haystack, needle, index + 1);
190 |             }
191 |             //print("BM: %\n", total);
192 |             boyer_moore_sse2_entry.first_checksum += checksum;
193 |             boyer_moore_sse2_entry.first_time += seconds_since_init() - t;
194 | 
195 |             t = seconds_since_init();
196 |             index, found = last_index(haystack, needle);
197 |             checksum = 0;
198 |             while found {
199 |                 checksum = xor(checksum, xx index);
200 |                 index, found = last_index(haystack, needle, index);
201 |             }
202 |             boyer_moore_sse2_entry.last_checksum += checksum;
203 |             boyer_moore_sse2_entry.last_time += seconds_since_init() - t;
204 |         }}
205 | 
206 |         #if boyer_moore_avx2 {{
207 |             set_index_algorithm(boyer_moore_avx2_first_index,boyer_moore_avx2_last_index);
208 | 
209 |             t = seconds_since_init();
210 |             index, found := first_index(haystack, needle);
211 |             checksum : u64 = 0;
212 |             total := 0;
213 |             while found {
214 |                 #if debug  print("%\n", index);
215 |                 total += 1;
216 |                 checksum = xor(checksum, xx index);
217 |                 index, found = first_index(haystack, needle, index + 1);
218 |             }
219 |             //print("BM: %\n", total);
220 |             boyer_moore_avx2_entry.first_checksum += checksum;
221 |             boyer_moore_avx2_entry.first_time += seconds_since_init() - t;
222 | 
223 |             t = seconds_since_init();
224 |             index, found = last_index(haystack, needle);
225 |             checksum = 0;
226 |             while found {
227 |                 checksum = xor(checksum, xx index);
228 |                 index, found = last_index(haystack, needle, index);
229 |             }
230 |             boyer_moore_avx2_entry.last_checksum += checksum;
231 |             boyer_moore_avx2_entry.last_time += seconds_since_init() - t;
232 |         }}
233 | 
234 |         #if kmp {{
235 |             set_index_algorithm(knuth_morris_pratt_first_index,knuth_morris_pratt_last_index);
236 | 
237 |             t = seconds_since_init();
238 |             index, found := first_index(haystack, needle);
239 |             checksum : u64 = 0;
240 |             while found {
241 |                 checksum = xor(checksum, xx index);
242 |                 index, found = first_index(haystack, needle, index + 1);
243 |             }
244 |             kmp_entry.first_checksum += checksum;
245 |             kmp_entry.first_time += seconds_since_init() - t;
246 | 
247 |             t = seconds_since_init();
248 |             index, found = last_index(haystack, needle);
249 |             checksum = 0;
250 |             while found {
251 |                 checksum = xor(checksum, xx index);
252 |                 index, found = last_index(haystack, needle, index);
253 |             }
254 |             kmp_entry.last_checksum += checksum;
255 |             kmp_entry.last_time += seconds_since_init() - t;
256 |         }}
257 | 
258 |         #if simple {{
259 |             set_index_algorithm(simple_first_index, simple_last_index);
260 | 
261 |             t = seconds_since_init();
262 |             index, found := first_index(haystack, needle);
263 |             checksum : u64 = 0;
264 |             while found {
265 |                 checksum = xor(checksum, xx index);
266 |                 index, found = first_index(haystack, needle, index + 1);
267 |             }
268 |             simple_entry.first_checksum += checksum;
269 |             simple_entry.first_time += seconds_since_init() - t;
270 | 
271 |             t = seconds_since_init();
272 |             index, found = last_index(haystack, needle);
273 |             checksum = 0;
274 |             while found {
275 |                 checksum = xor(checksum, xx index);
276 |                 index, found = last_index(haystack, needle, index);
277 |             }
278 |             simple_entry.last_checksum += checksum;
279 |             simple_entry.last_time += seconds_since_init() - t;
280 |         }}
281 | 
282 |         #if simple_sse2 {{
283 |             set_index_algorithm(simple_sse2_first_index, simple_sse2_last_index);
284 | 
285 |             t = seconds_since_init();
286 |             index, found := first_index(haystack, needle);
287 |             checksum : u64 = 0;
288 |             while found {
289 |                 checksum = xor(checksum, xx index);
290 |                 index, found = first_index(haystack, needle, index + 1);
291 |             }
292 |             simple_sse2_entry.first_checksum += checksum;
293 |             simple_sse2_entry.first_time += seconds_since_init() - t;
294 | 
295 |             t = seconds_since_init();
296 |             index, found = last_index(haystack, needle);
297 |             checksum = 0;
298 |             while found {
299 |                 checksum = xor(checksum, xx index);
300 |                 index, found = last_index(haystack, needle, index);
301 |             }
302 |             simple_sse2_entry.last_checksum += checksum;
303 |             simple_sse2_entry.last_time += seconds_since_init() - t;
304 |         }}
305 | 
306 |         #if simple_avx2 {{
307 |             set_index_algorithm(simple_avx2_first_index, simple_avx2_last_index);
308 | 
309 |             t = seconds_since_init();
310 |             index, found := first_index(haystack, needle);
311 |             checksum : u64 = 0;
312 |             while found {
313 |                 checksum = xor(checksum, xx index);
314 |                 index, found = first_index(haystack, needle, index + 1);
315 |             }
316 |             simple_avx2_entry.first_checksum += checksum;
317 |             simple_avx2_entry.first_time += seconds_since_init() - t;
318 | 
319 |             t = seconds_since_init();
320 |             index, found = last_index(haystack, needle);
321 |             checksum = 0;
322 |             while found {
323 |                 checksum = xor(checksum, xx index);
324 |                 index, found = last_index(haystack, needle, index);
325 |             }
326 |             simple_avx2_entry.last_checksum += checksum;
327 |             simple_avx2_entry.last_time += seconds_since_init() - t;
328 |         }}
329 |     }
330 | 
331 | 
332 | 
333 |     entries : [..] Entry;
334 | 
335 |     #if jai               array_add(*entries, jai_entry);
336 |     #if kmp               array_add(*entries, kmp_entry);
337 |     #if boyer_moore       array_add(*entries, boyer_moore_entry);
338 |     #if boyer_moore_sse2  array_add(*entries, boyer_moore_sse2_entry);
339 |     #if boyer_moore_avx2  array_add(*entries, boyer_moore_avx2_entry);
340 |     #if simple            array_add(*entries, simple_entry);
341 |     #if simple_sse2       array_add(*entries, simple_sse2_entry);
342 |     #if simple_avx2       array_add(*entries, simple_avx2_entry);
343 | 
344 |     quick_sort(entries, (a: Entry) -> float64 { return a.first_time; });
345 | 
346 |     print("\n\n");
347 | 
348 |     for entries {
349 |         print("%", pad_end(it.name, 20));
350 |         time := sprint("% / %", it.first_time, it.last_time);
351 |         if it.first_checksum != first_checksum {
352 |             print("BAD FIRST CHECKSUM: % != %\n", it.first_checksum, first_checksum);
353 |             if it.last_checksum != last_checksum
354 |                 print("                    BAD LAST CHECKSUM: % != %\n", it.last_checksum, last_checksum);
355 |             continue;
356 |         }
357 |         else if it.last_checksum != last_checksum {
358 |             print("BAD LAST CHECKSUM: % != %\n", it.last_checksum, last_checksum);
359 |             continue;
360 |         }
361 | 
362 |         #if jai {
363 |             first_factor := sprint("%", jai_entry.first_time / it.first_time);
364 |             last_factor :=  sprint("%", jai_entry.last_time / it.last_time);
365 |             print("%   = %x / %x\n", pad_start(time, 7), pad_start(first_factor, 7), pad_start(last_factor, 7));
366 |         }
367 |         else {
368 |             print("% %\n", it.first_checksum, pad_end(time, 7));
369 |         }
370 |         if !it_index  print("\n");
371 |     }
372 | }
373 | 
374 | 
375 | xor :: (a: u64, b: u64) -> u64 {
376 |     result := a;
377 |     #asm {
378 |         x : gpr;
379 |         mov.q x, result;
380 |         xor.q x, b;
381 |         mov.q result, x;
382 |     }
383 |     return result;
384 | }
385 | 
386 | 
387 | find_index_from_left_nocase :: (haystack: string, needle: string) -> int {
388 |     if haystack.count < needle.count  return -1;
389 | 
390 |     for 0 .. haystack.count - needle.count {
391 |         t := jai_string.slice(haystack, it, needle.count);
392 |         if jai_string.equal_nocase(t, needle) return it;
393 |     }
394 | 
395 |     return -1;
396 | }
397 | 
398 | 
399 | find_index_from_right_nocase :: (haystack: string, needle: string) -> int {
400 |     if haystack.count < needle.count  return -1;
401 | 
402 |     for < haystack.count - needle.count .. 0 {
403 |         t := jai_string.slice(haystack, it, needle.count);
404 |         if jai_string.equal_nocase(t, needle) return it;
405 |     }
406 | 
407 |     return -1;
408 | }
409 | 


--------------------------------------------------------------------------------
/tools/index_profile.jai:
--------------------------------------------------------------------------------
  1 | #import "Basic";
  2 | #import "File";
  3 | #import "File_Utilities";
  4 | #import "Sort";
  5 | #import "Strings";
  6 | jai_string :: #import "String";
  7 | 
  8 | 
  9 | main :: () {
 10 |     args := get_command_line_arguments();
 11 |     haystack_file := "";
 12 |     needle := "";
 13 |     no_case := false;
 14 |     times : s64 = 100;
 15 |     i := 1;
 16 |     test_only := false;
 17 |     valid_args := true;
 18 |     while i < args.count {
 19 |         arg := args[i];
 20 |         if arg == "-i" {
 21 |             no_case = true;
 22 |         }
 23 |         else if arg == "-t" {
 24 |             test_only = true;
 25 |         }
 26 |         else if arg == "-c" {
 27 |             i += 1;
 28 |             if i >= args.count {
 29 |                 valid_args = false;
 30 |                 break;
 31 |             }
 32 |             n, ok := jai_string.parse_int(*args[i]);
 33 |             if !ok || n < 1 {
 34 |                 valid_args = false;
 35 |                 break;
 36 |             }
 37 |             times = n;
 38 |         }
 39 |         else if haystack_file == "" {
 40 |             haystack_file = arg;
 41 |         }
 42 |         else if needle == "" {
 43 |             needle = apply_backslash(arg);
 44 |         }
 45 |         else {
 46 |             valid_args = false;
 47 |             break;
 48 |         }
 49 | 
 50 |         i += 1;
 51 |     }
 52 | 
 53 |     if !(haystack_file && needle && valid_args) {
 54 |         print("\nUSAGE: index_profile.exe [-i] [-c <N>] <haystackfile> <needle>\n\n");
 55 |         print(" <haystackfile> = path of file containing haystack text\n");
 56 |         print(" <needle>       = text to search for\n");
 57 |         print(" -i             = ignore case\n");
 58 |         print(" -c <N>         = perform N iterations, default is 100\n");
 59 |         exit(1);
 60 |     }
 61 | 
 62 |     if !file_exists(haystack_file) {
 63 |         print("\nFile not found: %\n", haystack_file);
 64 |         exit(2);
 65 |     }
 66 | 
 67 |     if test_only  times = 1;
 68 | 
 69 |     haystack := read_entire_file(haystack_file);
 70 | 
 71 |     jai               :: true;
 72 |     simple            :: true;
 73 |     super_simple      :: false;
 74 |     simple_sse2       :: true;
 75 |     simple_avx2       :: true;
 76 |     simple_unsafe     :: true;
 77 |     kmp               :: true;
 78 |     boyer_moore       :: true;
 79 |     boyer_moore_sse2  :: true;
 80 |     boyer_moore_avx2  :: true;
 81 | 
 82 |     #if kmp {
 83 |         #load "../Strings/knuth_morris_pratt.jai";
 84 |     }
 85 | 
 86 |     format_float := *context.print_style.default_format_float;
 87 |     format_float.zero_removal = .NO;
 88 |     format_float.width = 6;
 89 |     format_float.trailing_width = 3;
 90 | 
 91 |     find_index_from_left := jai_find_index_from_left;
 92 |     find_index_from_right := jai_find_index_from_right;
 93 | 
 94 |     if no_case {
 95 |         find_index_from_left = jai_find_index_from_left_nocase;
 96 |         find_index_from_right = jai_find_index_from_right_nocase;
 97 |     }
 98 |     else {
 99 |     }
100 |     compare := ifx no_case then ignore_case else case_sensitive;
101 | 
102 |     expected_first_checksum : u64;
103 |     expected_last_checksum : u64;
104 |     expected_total := 0;
105 | 
106 |     {
107 |         s := haystack;
108 |         offset := 0;
109 |         index := find_index_from_left(s, needle);
110 |         while index >= 0 {
111 |             index += offset;
112 |             expected_first_checksum = xor(expected_first_checksum, xx index);
113 |             expected_total += 1;
114 |             offset = index + 1;
115 |             s.data = haystack.data + offset;
116 |             s.count = haystack.count - offset;
117 |             index = find_index_from_left(s, needle);
118 |         }
119 | 
120 |         s = haystack;
121 |         index = find_index_from_right(haystack, needle);
122 |         while index >= 0 {
123 |             expected_last_checksum = xor(expected_last_checksum, xx index);
124 |             s.count = index;
125 |             index = find_index_from_right(s, needle);
126 |         }
127 | 
128 |         expected_first_checksum *= cast(u64)times;
129 |         expected_last_checksum  *= cast(u64)times;
130 |         expected_total *= times;
131 |     }
132 | 
133 | 
134 |     Entry :: struct {
135 |         name : string;
136 |         id : string;
137 |         prefix : string;
138 |         first_time : float64 = 0;
139 |         last_time  : float64 = 0;
140 |         first_total := 0;
141 |         first_checksum : u64 = 0;
142 |         last_checksum  : u64 = 0;
143 |         last_total := 0;
144 |     }
145 | 
146 |     entry :: (id: string, name: string, prefix: string) -> Entry {
147 |         result : Entry;
148 |         result.id = id;
149 |         result.name = name;
150 |         result.prefix = prefix;
151 |         return result;
152 |     }
153 | 
154 |     #if jai               jai_entry              := entry("?", "Jai", "jai");
155 |     #if boyer_moore       boyer_moore_entry      := entry("BOYER_MOORE", "Boyer-Moore", "boyer_moore");
156 |     #if boyer_moore_sse2  boyer_moore_sse2_entry := entry("BOYER_MOORE_SSE2", "Boyer-Moore SSE2", "boyer_moore_sse2");
157 |     #if boyer_moore_avx2  boyer_moore_avx2_entry := entry("BOYER_MOORE_AVX2", "Boyer-Moore AVX2", "boyer_moore_avx2");
158 |     #if kmp               kmp_entry              := entry("KNUTH_MORRIS_PRATT", "Knuth-Morris-Pratt", "knuth_morris_pratt");
159 |     #if simple            simple_entry           := entry("SIMPLE", "Simple", "simple");
160 |     #if simple_unsafe     simple_unsafe_entry    := entry("SIMPLE_UNSAFE", "Simple Unsafe", "unsafe_simple");
161 |     #if simple_sse2       simple_sse2_entry      := entry("SIMPLE_SSE2", "Simple SSE2", "simple_sse2");
162 |     #if simple_avx2       simple_avx2_entry      := entry("SIMPLE_AVX2", "Simple AVX2", "simple_avx2");
163 |     #if super_simple      super_simple_entry     := entry("SUPER_SIMPLE", "Super Simple", "super_simple");
164 | 
165 |     prev_marker := -1;
166 | 
167 |     for 1 .. times {
168 |         if times >= 10 {
169 |             marker : s64 = it / (times / 10);
170 |             if marker != prev_marker {
171 |                 prev_marker = marker;
172 |                 if marker < 10
173 |                     print("%", marker);
174 |             }
175 |         }
176 | 
177 |         #if jai {{
178 |             t := seconds_since_init();
179 |             s := haystack;
180 |             offset := 0;
181 |             index := find_index_from_left(haystack, needle);
182 |             checksum : u64 = 0;
183 |             total := 0;
184 |             while index >= 0 {
185 |                 index += offset;
186 |                 checksum = xor(checksum, xx index);
187 |                 total += 1;
188 |                 offset = index + 1;
189 |                 s.data = haystack.data + offset;
190 |                 s.count = haystack.count - offset;
191 |                 index = find_index_from_left(s, needle);
192 |             }
193 |             jai_entry.first_checksum += checksum;
194 |             jai_entry.first_total += total;
195 |             jai_entry.first_time += seconds_since_init() - t;
196 | 
197 |             t = seconds_since_init();
198 |             s = haystack;
199 |             index = find_index_from_right(haystack, needle);
200 |             checksum = 0;
201 |             total = 0;
202 |             while index >= 0 {
203 |                 checksum = xor(checksum, xx index);
204 |                 total += 1;
205 |                 s.count  = index;
206 |                 index = find_index_from_right(s, needle);
207 |             }
208 |             jai_entry.last_checksum += checksum;
209 |             jai_entry.last_total += total;
210 |             jai_entry.last_time += seconds_since_init() - t;
211 |         }}
212 | 
213 |         test :: (first_index_proc: type_of(simple_first_index), last_index_proc: type_of(simple_last_index), entry: *Entry) #expand {
214 |             set_index_algorithm(first_index_proc, last_index_proc);
215 | 
216 |             t := seconds_since_init();
217 |             index, found := first_index(`haystack, `needle, compare);
218 |             checksum : u64 = 0;
219 |             total := 0;
220 |             while found {
221 |                 checksum = xor(checksum, xx index);
222 |                 total += 1;
223 |                 index, found = first_index(`haystack, `needle, index + 1, compare);
224 |             }
225 |             entry.first_checksum += checksum;
226 |             entry.first_total += total;
227 |             entry.first_time += seconds_since_init() - t;
228 | 
229 |             t = seconds_since_init();
230 |             index, found = last_index(`haystack, `needle, compare);
231 |             checksum = 0;
232 |             total = 0;
233 |             while found {
234 |                 checksum = xor(checksum, xx index);
235 |                 total += 1;
236 |                 index, found = last_index(`haystack, `needle, index, compare);
237 |             }
238 |             entry.last_checksum += checksum;
239 |             entry.last_total += total;
240 |             entry.last_time += seconds_since_init() - t;
241 |         }
242 | 
243 |         #if boyer_moore       test(boyer_moore_first_index, boyer_moore_last_index, *boyer_moore_entry);
244 |         #if boyer_moore_sse2  test(boyer_moore_sse2_first_index, boyer_moore_sse2_last_index, *boyer_moore_sse2_entry);
245 |         #if boyer_moore_avx2  test(boyer_moore_avx2_first_index, boyer_moore_avx2_last_index, *boyer_moore_avx2_entry);
246 |         #if kmp               test(knuth_morris_pratt_first_index, knuth_morris_pratt_last_index, *kmp_entry);
247 |         #if simple            test(simple_first_index, simple_last_index, *simple_entry);
248 |         #if simple_unsafe     test(unsafe_simple_first_index, unsafe_simple_last_index, *simple_unsafe_entry);
249 |         #if simple_sse2       test(simple_sse2_first_index, simple_sse2_last_index, *simple_sse2_entry);
250 |         #if simple_avx2       test(simple_avx2_first_index, simple_avx2_last_index, *simple_avx2_entry);
251 |         #if super_simple      test(super_simple_first_index, super_simple_last_index, *super_simple_entry);
252 |     }
253 | 
254 |     entries : [..] Entry;
255 | 
256 |     #if jai               array_add(*entries, jai_entry);
257 |     #if kmp               array_add(*entries, kmp_entry);
258 |     #if boyer_moore       array_add(*entries, boyer_moore_entry);
259 |     #if boyer_moore_sse2  array_add(*entries, boyer_moore_sse2_entry);
260 |     #if boyer_moore_avx2  array_add(*entries, boyer_moore_avx2_entry);
261 |     #if simple            array_add(*entries, simple_entry);
262 |     #if simple_unsafe     array_add(*entries, simple_unsafe_entry);
263 |     #if simple_sse2       array_add(*entries, simple_sse2_entry);
264 |     #if simple_avx2       array_add(*entries, simple_avx2_entry);
265 |     #if super_simple      array_add(*entries, super_simple_entry);
266 | 
267 |     quick_sort(entries, (a: Entry) -> float64 { return a.first_time; });
268 | 
269 |     fastest_last_time := entries[0].last_time;
270 |     for entries  if it.last_time < fastest_last_time  fastest_last_time = it.last_time;
271 | 
272 |     if test_only {
273 |         all_ok := true;
274 |         print("Testing: %\nNeedle:  %\nFound:   %\n", haystack_file, needle, expected_total);
275 |         for entries {
276 |             print("%", pad_end(it.name, 20));
277 | 
278 |             valid := true;
279 |             if it.first_checksum != expected_first_checksum {
280 |                 print("BAD FIRST CHECKSUM: Expected [%] Got [%]\n", expected_first_checksum, it.first_checksum);
281 |                 valid = false;
282 |             }
283 |             if it.last_checksum != expected_last_checksum {
284 |                 if !valid  print("                    ");
285 |                 print("BAD LAST CHECKSUM:  Expected [%] Got [%]\n", expected_last_checksum, it.last_checksum);
286 |                 valid = false;
287 |             }
288 |             if it.first_total != expected_total {
289 |                 if !valid  print("                    ");
290 |                 print("BAD FIRST TOTAL:  Expected [%] Got [%]\n", expected_total, it.first_total);
291 |                 valid = false;
292 |             }
293 |             if it.last_checksum != expected_last_checksum {
294 |                 if !valid  print("                    ");
295 |                 print("BAD LAST TOTAL:  Expected [%] Got [%]\n", expected_total, it.last_total);
296 |                 valid = false;
297 |             }
298 | 
299 |             if valid  print("OK\n");
300 |         }
301 |         print("\n");
302 |         if !all_ok  exit(1);
303 |     }
304 |     else {
305 |         print("\n\n");
306 | 
307 |         for entries {
308 |             if !it_index  print("\e[0;32m");
309 |             print("%", pad_end(it.name, 20));
310 |             first_color := "";
311 |             if !it_index {
312 |                 print("\e[m");
313 |                 first_color = "\e[0;33m";
314 |             }
315 |             last_color := "";
316 |             if it.last_time == fastest_last_time
317 |                 last_color = "\e[0;33m";
318 |             time := sprint("%4%1%3 / %5%2%3", it.first_time, it.last_time, "\e[m", first_color, last_color);
319 | 
320 |             valid := true;
321 |             if it.first_checksum != expected_first_checksum {
322 |                 print("BAD FIRST CHECKSUM: Expected [%] Got [%]\n", expected_first_checksum, it.first_checksum);
323 |                 valid = false;
324 |             }
325 |             if it.last_checksum != expected_last_checksum {
326 |                 if !valid  print("                    ");
327 |                 print("BAD LAST CHECKSUM:  Expected [%] Got [%]\n", expected_last_checksum, it.last_checksum);
328 |                 valid = false;
329 |             }
330 |             if it.first_total != expected_total {
331 |                 if !valid  print("                    ");
332 |                 print("BAD FIRST TOTAL:  Expected [%] Got [%]\n", expected_total, it.first_total);
333 |                 valid = false;
334 |             }
335 |             if it.last_checksum != expected_last_checksum {
336 |                 if !valid  print("                    ");
337 |                 print("BAD LAST TOTAL:  Expected [%] Got [%]\n", expected_total, it.last_total);
338 |                 valid = false;
339 |             }
340 | 
341 |             if !valid {
342 |                 print("\n                    Bad checksums/totals only happen if there's a bug in the\n                    Strings library, please report it!\n\n");
343 |                 continue;
344 |             }
345 | 
346 |             #if jai {
347 |                 format_float.width = 5;
348 |                 first_factor := sprint("%", jai_entry.first_time / it.first_time);
349 |                 last_factor :=  sprint("%", jai_entry.last_time / it.last_time);
350 |                 format_float.width = 6;
351 |                 print("%1   = %4%2x\e[m / %5%3x\e[m\n", pad_start(time, 7), pad_start(first_factor, 7), pad_start(last_factor, 7), first_color, last_color);
352 |             }
353 |             else {
354 |                 print("%\n", pad_end(time, 7));
355 |             }
356 |             if !it_index  print("\n");
357 |         }
358 | 
359 |         print(#string __info
360 | 
361 | Results are listed for first_index / last_index, and sorted by first_index (as
362 | first_index is used internally so is more important).  If you want to use a
363 | different algorithm for last_index than for first_index you can use the bottom
364 | call below with different arguments.
365 | 
366 | The winning algorithm for this dataset is: %5%1%4
367 | 
368 | To use it import with module parameters:
369 | 
370 |     %6#import "Strings"(index_algorithm = .%2);%4
371 | 
372 | or call set_index_algorithm:
373 | 
374 |     %6set_index_algorithm(%3_first_index, %3_last_index);%4
375 |     __info, entries[0].name, entries[0].id, entries[0].prefix, "\e[m", "\e[0;32m", "\e[0;33m");
376 |     }
377 | }
378 | 
379 | 
380 | xor :: (a: u64, b: u64) -> u64 {
381 |     result := a;
382 |     #asm {
383 |         x : gpr;
384 |         mov.q x, result;
385 |         xor.q x, b;
386 |         mov.q result, x;
387 |     }
388 |     return result;
389 | }
390 | 
391 | 
392 | jai_find_index_from_left_nocase :: (haystack: string, needle: string) -> int {
393 |     if haystack.count < needle.count  return -1;
394 | 
395 |     for 0 .. haystack.count - needle.count {
396 |         t := jai_string.slice(haystack, it, needle.count);
397 |         if jai_string.equal_nocase(t, needle) return it;
398 |     }
399 | 
400 |     return -1;
401 | }
402 | 
403 | 
404 | jai_find_index_from_right_nocase :: (haystack: string, needle: string) -> int {
405 |     if haystack.count < needle.count  return -1;
406 | 
407 |     for < haystack.count - needle.count .. 0 {
408 |         t := jai_string.slice(haystack, it, needle.count);
409 |         if jai_string.equal_nocase(t, needle) return it;
410 |     }
411 | 
412 |     return -1;
413 | }
414 | 
415 | 
416 | jai_find_index_from_left :: (s: string, substring: string) -> int {
417 |     if !substring return -1;
418 | 
419 |     // This is SO SLOW! @Cleanup.
420 |     for i: 0..s.count-substring.count {
421 |         t := jai_string.slice(s, i, substring.count);
422 |         if t == substring return i;
423 |     }
424 | 
425 |     return -1;
426 | }
427 | 
428 | jai_find_index_from_right :: (s: string, substring: string) -> int {
429 |     if !substring return -1;
430 | 
431 |     // This is SO SLOW! @Cleanup.
432 |     for < i: s.count-substring.count..0 {
433 |         t := jai_string.slice(s, i, substring.count);
434 |         if t == substring return i;
435 |     }
436 | 
437 |     return -1;
438 | }
439 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # jai-string
  2 | 
  3 | Modules present:
  4 | 
  5 | * `Strings` Fairly performant and well reasoned api for working with strings.
  6 | * `Scratch` A simple allocator for doing multiple operations in a row without grabbing more memory on each one. [Info](Scratch/module.jai)
  7 | 
  8 | To use clone the repo then copy the `Strings` folder into your `jai/modules` folder, or symlink them: `mklink /d c:\jai\modules\Strings c:\repos\jai-string\Strings`
  9 | Optionally do the same for the `Scratch` folder if you want to have access to the scratch allocator.
 10 | 
 11 | 
 12 | ## Mechanics
 13 | 
 14 | ### Mutating in-place vs returning result
 15 | 
 16 | Any proc in this module which writes to the string's data will have a pointer to the string as its parameter instead of just a string.  This gives a clear indicator of intent, and also delineates between different versions of a proc.  For example:
 17 | ```jai
 18 | bar := to_upper(foo); // returns a copy of foo converted to uppercase (allocates!)
 19 | to_upper(*foo);       // mutates foo in-place, converting it to uppercase.
 20 | ```
 21 | 
 22 | ### Generating strings
 23 | 
 24 | Any proc which generates (allocates) a string will take an optional `null_terminate` parameter; setting this to true ensures the resulting string ends in `\0`.
 25 | 
 26 | 
 27 | ### Character Comparison
 28 | 
 29 | By default characters being compared between two strings are compared using the `case_sensitive` function (unless you override it with the module parameter).  In this library any procedure which involves comparing strings will take a `character_compare` parameter in which you can specify a different procedure from the default.  For example:
 30 | 
 31 | ```jai
 32 |     assert( contains("Hello", "h")              == false );
 33 |     assert( contains("Hello", "h", ignore_case) == true  );
 34 | ```
 35 | 
 36 | The comparator is a struct; you can make your own like this:
 37 | 
 38 | ```jai
 39 | are_numbers :: Character_Compare.{
 40 |     .CUSTOM,
 41 |     (a: u8, b: u8) -> bool {
 42 |         return (a >= #char "0" && a <= #char "9")
 43 |             == (b >= #char "0" && b <= #char "9");
 44 |     }
 45 | };
 46 | 
 47 | share_case :: Character_Compare.{
 48 |     .CUSTOM,
 49 |     (a: u8, b: u8) -> bool {
 50 |         a_is_alpha := is_alpha(a);
 51 |         if a_is_alpha != is_alpha(b)  return false;
 52 |         if !a_is_alpha  return true;
 53 |         return is_upper(a) == is_upper(b);
 54 |     }
 55 | };
 56 | ```
 57 | 
 58 | The two comparators built-in to the module are `case_sensitive`, `ignore_case`.
 59 | 
 60 | *(The other two options to `.CUSTOM` are `.CASE_SENSITIVE` and `.IGNORE_CASE`: you may roll your own versions of those comparators if you wish, and by choosing the relevant identifier the correct SIMD optimisations will be invoked - however, there's not a lot of point in doing so...)*
 61 | 
 62 | 
 63 | ### Tool types: u8 / [] u8 / string / Index_Proc
 64 | 
 65 | In a string library it is often the case that you have a string which you are applying an operation to using a *tool* parameter.  In this library there will generally be four version of such procedures, the first three of which are the single parameters: `u8`, `[] u8`, `string`.  As tools these types behave consistently across the library:
 66 | 
 67 | * `u8`<br>
 68 | The single character specified will be used.
 69 | 
 70 | * `[] u8`<br>
 71 | A match to any of the characters in the array will be used.
 72 | 
 73 | * `string`<br>
 74 | The exact string will be used: i.e. the characters specified in the sequence specified.
 75 | 
 76 | 
 77 | For example:
 78 | ```jai
 79 |     assert( trim( " apple  ",    #char " "        )  == "apple"  );
 80 |     assert( trim( "banana pear", cast([]u8) "ban" )  == " pear" );
 81 |     assert( trim( "banana pear", "ban"            )  == "ana pear" );
 82 | ```
 83 | 
 84 | Additionally, any time the tool is a `string` you may specify an `Index_Proc`.  An `Index_Proc` is a procedure with the signature:
 85 | 
 86 | `(haystack: string, needle: string, initial_index: int, reversed: bool) -> from_index: int, to_index: int, found: bool`
 87 | 
 88 | This allows you to feed an arbitrarily complex pattern match into the procedure you are using.  When using an `Index_Proc`, a character comparator is not used (as your own code is instead).
 89 | 
 90 | For example:
 91 | ```jai
 92 |     question_mark_index :: (haystack: string, needle: string, initial_index: int, $$reversed: bool) -> from_index: int, to_index: int, found: bool {
 93 |         if reversed {
 94 |             from_index, to_index, found := reverse_index_proc(question_mark_index, haystack, needle, initial_index);
 95 |             return from_index, to_index, found;
 96 |         }
 97 |         else {
 98 |             index := slice_index(haystack, initial_index);
 99 |             if index >= haystack.count  return -1, -1, false;
100 | 
101 |             for haystack_index: index .. haystack.count - needle.count {
102 |                 for needle_index: 0 .. needle.count - 1 {
103 |                     c := needle[needle_index];
104 |                     if c != #char "?" && c != haystack[haystack_index + needle_index]
105 |                         continue haystack_index;
106 |                 }
107 | 
108 |                 return haystack_index, haystack_index + needle.count, true;
109 |             }
110 | 
111 |             return -1, -1, false;
112 |         }
113 |     }
114 | 
115 |     assert( starts_with("Hello World", "He??o")                      == false );
116 |     assert( starts_with("Hello World", "He??o", question_mark_index) == true );
117 | ```
118 | 
119 | Notice the use of `reverse_index_proc` to handle when the `reversed` parameter is set.  This is a library procedure that you can use if you don't want to write out the reverse algorithm yourself, but note that it is extremely inefficient!
120 | 
121 | In the docs below, any time a parameter of type `%Tool` is specified, it means there are four versions of the procedure, each corresponding to the behaviour described above (the fourth being that the %Tool is two parameters: `string`+`Index_Proc`).
122 | 
123 | <hr>
124 | 
125 | ### `#module_parameters`
126 | 
127 | * `CHARACTER_COMPARE`<br>Default comparator used to check if two string characters are equal.  One of:
128 |     * `.CASE_SENSITIVE`
129 |     * `.IGNORE_CASE`
130 | 
131 | * `INDEX_ALGORITHM`<br>Determines the default string search algorithm to use (they can be changed later using `set_index_algorithm`).  One of:
132 |     * `.SIMPLE`, `.SIMPLE_SSE2`, `.SIMPLE_AVX2`, `.SIMPLE_UNSAFE`<br>Simplest algorithm, no memory overhead.
133 |     * `.BOYER_MOORE`, `.BOYER_MOORE_SSE2`, `.BOYER_MOORE_AVX2`<br>[Boyer-Moore algorithm](https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm).  Fastest tested scalar algorithm overall, has a small memory footprint that increases with needle size.
134 |     * `.KNUTH_MORRIS_PRATT`<br>[Knuth-Morris-Pratt algorithm](https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm). Another fast algorithm, with a similar memory footprint.
135 | 
136 | #### A note on indexing algorithms
137 | 
138 | The indexing algorithm set by `set_index_algorithm` is used internally in the module for most operations: any time you call things like `first_index`, `replace`, `split` it will be employed.
139 | Whereas other functions in the library will utilize SIMD features (SSE2 & AVX2) when told to with the `set_simd_mode` command, you must explicitly set an index algorithm to use them if that is what you wish<sup>*</sup>:  the default indexing algorithm is scalar `Boyer-Moore`, because it is good on practically any dataset; a safe choice.  Choosing a different indexing algorithm can provide impressive performance improvements, but this depends on the dataset you are working on (the specific strings and substrings you are searching with).  SIMD algorithms can be orders of magnitude faster, but they can also be catastrophically slow when facing degenerate datasets.  If you want to get the most performance out of the library then you should choose an appropriate indexing algorithm for your dataset.
140 | 
141 | To help with this there is the `index_profile` tool (in the `tools/` folder): provide it with a file and a typical search string from your data and it will show you how each available algorithm performs with the data you are manipulating.
142 | 
143 | <sup>*</sup> *(Though all the built-in indexing algorithms will detect if the needle is a single character long, and if so will use the relevant built-in character index algorithm, which will obey `set_simd_mode`)*
144 | 
145 | 
146 | ### Procedures
147 | 
148 | 
149 | #### Configuration
150 | 
151 | 
152 | * `set_index_algorithm (first_index_proc := default_first_index, last_index_proc := default_last_index)`<br>
153 | Sets the index procedures used internally when searching through strings with strings (for `replace`, `split`, etc.)
154 | 
155 | 
156 | * `set_simd_mode (mode)`<br>Sets whether to use SIMD optimisations.  One of:
157 |     * `.OFF`<br>Disables all SIMD optimisations, utilizing scalar code only.
158 |     * `.AUTO`<br>Uses the fastest SIMD instruction set available on the CPU.
159 |     * `.SSE2`<br>Uses SSE2 (128bit) optimisations.  This is the default.
160 |     * `.AVX2`<br>Uses AVX2 (256bit) optimisations.
161 | 
162 | 
163 | #### Substrings
164 | 
165 | 
166 | * `slice (str: string, from_index: int, [to_index: int]) -> string, normalized_from_index: int, normalized_to_index: int`<br>
167 | Returns the string inside `str`, between the specified indices.  You may use a negative index to specify backwards from the end of the string.  If you do not specify a `to_index` then it will include all characters up to the end of the string.  The last two return parameters are the positive indexes the slice ends up using, after validation.
168 | 
169 | 
170 | * `substring (str: string, from_index: int, [count: int]) -> string, normalized_from_index: int, normalized_to_index: int`<br>
171 | Same as `slice`, except instead of a `to_index` you specify a character count.  If you do not specify a `count` then it will include all characters up to the end of the string.
172 | 
173 | 
174 | * `slice_index (str: string, index: int) -> normalized_index: int, well_formed: bool`<br>
175 | Returns the validated and normalized index which would be used with the provided string, as well as whether the index was within the bounds of the string.
176 | 
177 | 
178 | * `raw_slice (str: string, from_index: int, to_index: int) -> string`<br>
179 | As `slice`, but without any checking on the indices, and without being able to use negative indices (and thus faster).  If you do not specify a `to_index` then it will include all characters up to the end of the string.  Generally speaking, just use `slice` instead.
180 | 
181 | 
182 | * `raw_substring (str: string, from_index: int, count: int) -> string`<br>
183 | As `substring`, but without any checking on the indices, and without being able to use negative indices (and thus faster).  If you do not specify a `count` then it will include all characters up to the end of the string.  Generally speaking, just use `substring` instead.
184 | 
185 | 
186 | * `trim (str: string) -> string`<br>
187 | Returns the substring of `str` with all characters from the start and end which are <= `#char " "` removed (i.e. all whitespace and control codes).
188 | 
189 | 
190 | * `trim (str: string, tool: %Tool, character_compare := default_compare) -> string`<br>
191 | Returns the substring of `str` with all characters matching tool removed from the start and end.
192 | 
193 | 
194 | * `trim_start (str: string, tool: %Tool, character_compare := default_compare) -> string`<br>
195 | Returns the substring of `str` with all characters matching tool removed from the start.
196 | 
197 | 
198 | * `trim_end (str: string, tool: %Tool, character_compare := default_compare) -> string, found: bool`<br>
199 | Returns the substring of `str` with all characters matching tool removed from the end.
200 | 
201 | 
202 | * `trim_to (str: string, tool: %Tool, character_compare := default_compare) -> string, found: bool`<br>
203 | Returns the substring of `str` with all characters before the first instance and after the last instance of tool removed.  If tool is not found then the entire string is returned.
204 | 
205 | 
206 | * `trim_start_to (str: string, tool: %Tool, character_compare := default_compare) -> string, found: bool`<br>
207 | Returns the substring of `str` with all characters before the first instance of tool removed from the start.  If tool is not found then the entire string is returned.
208 | 
209 | 
210 | * `trim_end_to (str: string, tool: %Tool, character_compare := default_compare) -> string, found: bool`<br>
211 | Returns the substring of `str` with all characters after the last instance of tool removed from the end.  If tool is not found then the entire string is returned.
212 | 
213 | 
214 | * `trim_through (str: string, tool: %Tool, character_compare := default_compare) -> string, found: bool`<br>
215 | Returns the substring of `str` with all characters before the first instance and after the last instance of tool, as well as the tool itself, removed.  If tool is not found then the entire string is returned.
216 | 
217 | 
218 | * `trim_start_through (str: string, tool: %Tool, character_compare := default_compare) -> string, found: bool`<br>
219 | Returns the substring of `str` with all characters before the first instance of tool, and the tool, removed from the start.  If tool is not found then the entire string is returned.
220 | 
221 | 
222 | * `trim_end_through (str: string, tool: %Tool, character_compare := default_compare) -> string, found: bool`<br>
223 | Returns the substring of `str` with all characters after the last instance of tool, and the tool, removed from the end.  If tool is not found then the entire string is returned.
224 | 
225 | 
226 | #### Consuming
227 | 
228 | 
229 | * `advance_to (haystack: *string, needle: %Tool) -> characters_skipped: int, found: bool`<br>
230 | Modifies `haystack` in-place, moving its start point forward until it hits `%Tool` (or empties).
231 | 
232 | 
233 | * `advance_through (haystack: *string, needle: %Tool) -> characters_skipped: int, found: bool`<br>
234 | Modifies `haystack` in-place, moving its start point forward until it hits and reaches the end of `%Tool` (or empties).
235 | 
236 | 
237 | #### Splitting
238 | 
239 | 
240 | All split procedures return an iterator (a for-expansion).  If you want the substrings to be in an array you can feed this iterator into `to_array`.
241 | 
242 | 
243 | * `split (text: string, separator: %Tool, skip_empty := false, max_results := 0, keep_separator := .NO, character_compare := default_compare)`<br>
244 | Used to iterate over `text` in a `for` loop, splitting the string by the chosen tool.
245 | If `skip_empty` is set then your code will not be called with the empty string (i.e. when there are two consecutive `seperator`s).
246 | If `max_results` is non-zero then `text` will only be split into at most that
247 | many pieces.
248 | If `keep_separator` is set to `.AS_PREFIX` or `.AS_POSTFIX` then the separator will be included in the strings, at the specified position.
249 | 
250 | For example:
251 | ```jai
252 |     for word, index: split(" aa  bb  cc dd  ", #char " ", skip_empty = true, max_results = 3) {
253 |         if index == {
254 |             case  0; assert(word == "aa");
255 |             case  1; assert(word == "bb");
256 |             case  2; assert(word == "cc");
257 |             case  3; assert(false);
258 |         }
259 |     }
260 | 
261 |     for word, index: split("Hello, World.", ", ", keep_separator = .AS_POSTFIX) {
262 |         if index == {
263 |             case  0; assert(word == "Hello, ");
264 |             case  1; assert(word == "World.");
265 |             case  2; assert(false);
266 |         }
267 |     }
268 | ```
269 | 
270 | 
271 | * `split_into_two (text: string, separator: %Tool, keep_separator := .NO, character_compare := default_compare) -> string, string`<br>
272 | As split with max_results set to 2, but returns the two strings directly rather than an iterator.
273 | 
274 | 
275 | * `to_array (splitter: $T/Splitter, reversed := false) -> [..] string`<br>
276 | Executes the splitter, generating an array.
277 | ```jai
278 | splitter := split("How about a nice game of chess?", #char " ");
279 | words := to_array(splitter,, temp);
280 | assert(words.count == 7);
281 | ```
282 | 
283 | 
284 | * `to_array (array: *[] string, splitter: $T/Splitter, reversed := false, clear_unused := true) -> [] string`<br>
285 | Executes the splitter and places its results into the array.  Returns an array_view over the array with the used count.
286 | If `clear_unused` is set then any trailing slots in the array after the resulting count will be cleared.
287 | ```jai
288 | parts : [20] string;
289 | view := into_array(*parts, split("How about a nice game of chess?", #char " "));
290 | assert(view.count == 7);
291 | ```
292 | 
293 | 
294 | * `count_split (text: string, count: int, max_results := 0)`<br>
295 | As `split`, except the string is split into sections with the specified `count`.
296 | 
297 | 
298 | * `index_split (text: string, indexes: .. int, skip_empty := false, max_results := 0)`<br>
299 | As `split`, except the string is split at the specified indices.
300 | 
301 | 
302 | * `line_split (text: string, keep_end := false, skip_empty := false, max_results := 0, keep_separator := .NO)`<br>
303 | As `split` using `#char "\n"` as the tool, but will automatically handle windows vs unix file formats (i.e. will take care of `"\r\n"`).
304 | 
305 | 
306 | #### Querying
307 | 
308 | 
309 | * `first_index (haystack: string, needle: %Tool, start_index := 0, character_compare := default_compare) -> index: int, found: bool, [to_index: int]`<br>
310 | Returns the first index in `haystack` at which `needle` occurs, or `-1` if it does not occur.  `found` will be true if `needle` was found.  In the case when `%Tool` is an `Index_Proc`, `to_index` will be set to the index the pattern terminates at.
311 | 
312 | 
313 | * `last_index (haystack: string, needle: %Tool, start_index := 0, character_compare := default_compare) -> index: int, found: bool, [to_index: int]`<br>
314 | As per `first_index`, but working backwards from the end of the `haystack`.
315 | 
316 | 
317 | * `contains (haystack: string, needle: %Tool, character_compare := default_compare) -> bool`<br>
318 | Whether `needle` occurs within `haystack`.
319 | 
320 | 
321 | * `count (haystack: string, needle: %Tool, character_compare := default_compare) -> int`<br>
322 | How many times `needle` occurs within `haystack` (non-overlapping).
323 | 
324 | 
325 | * `equal (a: string, b: string, character_compare := default_compare) -> bool`<br>
326 | Returns whether the two strings are equal, using current or specified comparator.
327 | 
328 | 
329 | * `is_any (needle: u8, characters: [] u8, character_compare := default_compare) -> bool`<br>
330 | Returns whether `needle` is equal to any of `characters`.
331 | 
332 | 
333 | * `is_lower (char: u8) -> bool`<br>
334 | Whether `char` falls in the range `#char "a" - #char "z"`.
335 | 
336 | 
337 | * `is_upper (char: u8) -> bool`<br>
338 | Whether `char` falls in the range `#char "A" - #char "Z"`.
339 | 
340 | 
341 | * `starts_with (haystack: string, needle: %Tool, character_compare := default_compare) -> bool`<br>
342 | Returns whether `haystack` begins with `needle`.
343 | 
344 | 
345 | * `ends_with (haystack: string, needle: %Tool, character_compare := default_compare) -> bool`<br>
346 | Returns whether `haystack` ends with `needle`.
347 | 
348 | 
349 | #### Mutating
350 | 
351 | 
352 | * `pad_start (str: string, desired_count: int, pad_with := " ", null_terminate := false) -> string`<br>
353 | Returns a copy of `str` with `pad_with` repeated at the beginning such that the string length reaches the `desired_count`.
354 | Note that `pad_with` can be multiple characters long (and in fact the default value is actually multiple spaces, for performance).
355 | 
356 | 
357 | * `pad_start (str: string, desired_count: int, pad_with: u8, null_terminate := false) -> string`<br>
358 | Returns a copy of `str` with `pad_with` repeated at the beginning such that the string length reaches the `desired_count`.
359 | 
360 | 
361 | * `pad_end (str: string, desired_count: int, pad_with := " ", null_terminate := false) -> string`<br>
362 | Returns a copy of `str` with `pad_with` repeated from the end such that the string length reaches the `desired_count`.
363 | Note that `pad_with` can be multiple characters long (and in fact the default value is actually multiple spaces, for performance).
364 | 
365 | 
366 | * `pad_end (str: string, desired_count: int, pad_with: u8, null_terminate := false) -> string`<br>
367 | Returns a copy of `str` with `pad_with` repeated from the end such that the string length reaches the `desired_count`.
368 | 
369 | 
370 | * `pad (str: string, desired_count: int, pad_with := " ", null_terminate := false) -> string`<br>
371 | Returns a copy of `str` with `pad_with` repeated from the begining *and* from the end such that the string length reaches the `desired_count`.
372 | Note that `pad_with` can be multiple characters long (and in fact the default value is actually multiple spaces, for performance).
373 | 
374 | 
375 | * `pad (str: string, desired_count: int, pad_with: u8, null_terminate := false) -> string`<br>
376 | Returns a copy of `str` with `pad_with` repeated from the begining *and* from the end such that the string length reaches the `desired_count`.
377 | 
378 | 
379 | * `repeat (str: string, times: int, null_terminate := false) -> string`<br>
380 | Returns a string consisting of `str` repeated `times` times.
381 | 
382 | 
383 | * `replace (haystack: *string, needle: %Tool, replacement: u8, max_replacements := 0, null_terminate := false) -> change_count: int`<br>
384 | Mutates the haystack in-place, replacing `needle` with the `replacement` character specified.
385 | 
386 | 
387 | * `replace (haystack: string, needle: %Tool, replacement: string,  max_replacements := 0, character_compare := default_compare, null_terminate := false) -> string`<br>
388 | Returns a copy of `str` with all (non-overlapping) instances of `needle` replaced with `replacement`.
389 | If `max_replacements` is non-zero then at most that many replacements will be made (starting at the beginning of the string).
390 | 
391 | 
392 | * `reverse (str: *string)`<br>
393 | Reverses the characters in `str` in-place.
394 | 
395 | 
396 | * `reverse (str: string, null_terminate := false) -> string`<br>
397 | Returns a copy of `str` with the characters in the reverse order.
398 | 
399 | 
400 | * `to_upper (str: *string)`<br>
401 | Mutates `str` in-place, overwritting any lower-case characters with their upper-case equivalent.
402 | 
403 | 
404 | * `to_upper (str: string, null_terminate := false)`<br>
405 | Returns a copy of `str` with all lower-case characters converted to their upper-case equivalent.
406 | 
407 | 
408 | * `to_lower (str: *string)`<br>
409 | Mutates `str` in-place, overwritting any upper-case characters with their lower-case equivalent.
410 | 
411 | 
412 | * `to_lower (str: string, null_terminate := false) -> string`<br>
413 | Returns a copy of `str` with all upper-case characters converted to their lower-case equivalent.
414 | 
415 | 
416 | * `to_capitalized (str: *string, preserve_caps := true)`<br>
417 | Sets the first letter of `str` to upper-case.  If `preserve_caps` is set to false, will set all following letters to lower-case.
418 | 
419 | 
420 | * `to_capitalized (str: string, preserve_caps := true, null_terminate := false) -> string`<br>
421 | Returns a copy of `str` with the first letter converted to upper-case.  If `preserve_caps` is disabled then all subsequent letters will be converted to lower-case.
422 | 
423 | 
424 | * `camel_from_snake (str: string, preserve_caps := false, null_terminate := false) -> string`<br>
425 | Returns a copy of underscore-separated `str`, changed into programmer CamelCase; i.e. with the leading letter, and every letter after an underscore, converted to upper-case, and with underscores removed.  If `preserve_caps` is enabled then the the underscore removal still happens, but the case is kept.
426 | 
427 | For example:
428 | ```jai
429 |     assert( camel_from_snake("play_RTS")       == "playRts" );
430 |     assert( camel_from_snake("play_RTS", true) == "playRTS" );
431 | ```
432 | 
433 | 
434 | * `snake_from_camel (str: string, preserve_caps := false, null_terminate := false) -> string`<br>
435 | Returns a copy of CamelCased `str`, changed into programmer snake case; i.e. converted to lower-case, but split by `_` at each formerly upper-case letter edge.  If `preserve_caps` is enabled then the the split still happens, but the case is kept.
436 | 
437 | For example:
438 | ```jai
439 |     assert( snake_from_camel("PlayRTS")       == "play_rts" );
440 |     assert( snake_from_camel("PlayRTS", true) == "play_RTS" );
441 | ```
442 | 
443 | 
444 | #### Utilities
445 | 
446 | 
447 | * `char_as_string (char: *u8) -> string`<br>
448 | Returns a string representation of the single character provided.
449 | 
450 | 
451 | * `copy_string (str: string, null_terminate: bool) -> string`<br>
452 | Returns of a copy of `str`.
453 | 
454 | 
455 | * `join (strings: .. string, null_terminate := false) -> string`<br>
456 | Returns a single string created by concatenating all the provided strings together.
457 | 
458 | 
459 | * `join (strings: [] string, null_terminate := false) -> string`<br>
460 | Returns a single string, the result of joining all the strings in the `strings` array together.
461 | 
462 | 
463 | * `join (strings: [] string, separator: string|u8, null_terminate := false) -> string`<br>
464 | Returns a single string, the result of joining all the strings in the `strings` array together with `separator` between them.
465 | 
466 | 
467 | * `join (strings: $T/Splitter, null_terminate := false) -> string`<br>
468 | Returns a single string, the result of joining all the strings in the `strings` iterator together.
469 | 
470 | 
471 | * `join (strings: $T/Splitter, separator: string|u8, null_terminate := false) -> string`<br>
472 | Returns a single string, the result of joining all the strings in the `strings` iterator together with `separator` between them.
473 | 
474 | 
475 | * `apply_backslash (str: string, null_terminate := false) -> string, well_formed: bool`<br>
476 | Converts legal jai backslash escape sequences (i.e. `\n`, `\t`, etc) into their specified character. i.e. a two character string `"\n"` will yield a single character string with byte value `10`;
477 | `well_formed` will be true if all backslash characters in `str` are followed by an appropriate escape sequence.
478 | 
479 | 
480 | * `escape (str: string, null_terminate := false) -> string`<br>
481 | Replaces the special characters which jai uses backslash escapes to represent with said backslash escape sequence. i.e. the single character string with byte value `10` will yield the two character string `"\n"`
482 | 
483 | 
484 | * `reverse_index_proc (index_proc: Index_Proc, haystack: string, needle: string, boundary_index: int) -> from_index: int, to_index: int, found: bool`<br>
485 | Can be used to automatically make a reversed version of an `Index_Proc` (see `question_mark_index` example above).  It does so in an extremely inefficient way; if you care about the performance of the reverse search then you should code it directly.
486 | 


--------------------------------------------------------------------------------
/Strings/splitting.jai:
--------------------------------------------------------------------------------
  1 | Splitter :: struct {
  2 |     text              : string;
  3 |     max_results       : int;
  4 |     skip_empty        : bool;
  5 |     keep_separator    : Keep_Separator;
  6 |     character_compare : Character_Compare;
  7 | }
  8 | 
  9 | Split_By_String :: struct {
 10 |     #as using base: Splitter;
 11 |     separator : string;
 12 |     first_index : String_Index_Proc;
 13 |     last_index  : String_Index_Proc;
 14 | }
 15 | 
 16 | Split_By_Chars :: struct {
 17 |     #as using base: Splitter;
 18 |     separator : [] u8;
 19 | }
 20 | 
 21 | Split_By_Char :: struct {
 22 |     #as using base: Splitter;
 23 |     separator : u8;
 24 | }
 25 | 
 26 | Split_By_Proc :: struct {
 27 |     #as using base: Splitter;
 28 |     separator : string;
 29 |     index_proc : Index_Proc;
 30 | }
 31 | 
 32 | Split_By_Index :: struct {
 33 |     #as using base: Splitter;
 34 |     indexes : [] int;
 35 | }
 36 | 
 37 | Split_By_Count :: struct {
 38 |     #as using base: Splitter;
 39 |     count : int;
 40 | }
 41 | 
 42 | Split_By_Line :: struct {
 43 |     #as using base: Splitter;
 44 | }
 45 | 
 46 | 
 47 | to_array :: (splitter: $T/Splitter, reversed := false) -> [..] string {
 48 |     result : [..] string;
 49 |     if reversed  for < splitter  array_add(*result, it);
 50 |     else         for splitter    array_add(*result, it);
 51 |     return result;
 52 | }
 53 | 
 54 | to_array :: (array: *[] string, splitter: $T/Splitter, reversed := false, clear_unused := true) -> [] string {
 55 |     // @Note We set max_results so we don't overflow the destination array: the last entry in the array may
 56 |     //       therefor contain further potential splits.
 57 | 
 58 |     result : [] string = .{0, array.data};
 59 | 
 60 |     _splitter := splitter;
 61 |     if _splitter.max_results == 0 || _splitter.max_results > array.count
 62 |         _splitter.max_results = array.count;
 63 | 
 64 |     if reversed  for < _splitter  { result.count += 1; result[it_index] = it; }
 65 |     else         for _splitter    { result.count += 1; result[it_index] = it; }
 66 | 
 67 |     remainder := array.count - result.count;
 68 |     if clear_unused && remainder > 0
 69 |         memset(array.data + result.count, 0, remainder * size_of(string));
 70 | 
 71 |     return result;
 72 | }
 73 | 
 74 | to_array :: inline (array: *[$N] string, splitter: $T/Splitter, reversed := false, clear_unused := true) -> [] string {
 75 |     array_view : [] string = array.*;
 76 |     return to_array(*array_view, splitter, reversed, clear_unused);
 77 | }
 78 | 
 79 | 
 80 | split_at_index :: (text: string, index: int) -> string, string {
 81 |     return slice(text, 0, index), slice(text, index);
 82 | }
 83 | 
 84 | 
 85 | split_into_two :: (text: string, separator: u8) -> string, string {
 86 |     index, found := first_index(text, separator);
 87 |     if !found  return text, "";
 88 |     return raw_slice(text, 0, index), raw_slice(text, index + 1);
 89 | }
 90 | 
 91 | split_into_two :: (text: string, separator: [] u8) -> string, string {
 92 |     index, found := first_index(text, separator);
 93 |     if !found  return text, "";
 94 |     return raw_slice(text, 0, index), raw_slice(text, index + 1);
 95 | }
 96 | 
 97 | split_into_two :: (text: string, separator: string) -> string, string {
 98 |     index, found := first_index(text, separator);
 99 |     if !found  return text, "";
100 |     return raw_slice(text, 0, index), raw_slice(text, index + separator.count);
101 | }
102 | 
103 | split_into_two :: (text: string, separator: string, $$separator_proc: Index_Proc) -> string, string {
104 |     from_index, found, to_index := first_index(text, separator, separator_proc);
105 |     if !found  return text, "";
106 |     return raw_slice(text, 0, from_index), raw_slice(text, to_index);
107 | }
108 | 
109 | split_into_two :: (text: string, separator: $T, keep_separator := Keep_Separator.NO, character_compare := default_character_compare) -> string, string {
110 |     parts : [2] string = ---;
111 |     split(*parts, text, separator, keep_separator = keep_separator, character_compare = character_compare);
112 |     return parts[0], parts[1];
113 | }
114 | 
115 | 
116 | split :: inline (text: string, separator: string, max_results := 0, skip_empty := false, keep_separator := Keep_Separator.NO, character_compare := default_character_compare) -> Split_By_String {
117 |     return Split_By_String.{.{text, max_results, skip_empty, keep_separator, character_compare}, separator, context.strings_thread_data.string_first_index, context.strings_thread_data.string_last_index};
118 | }
119 | 
120 | for_expansion :: (splitter: *Split_By_String, body: Code, flags: For_Flags) #expand {
121 | 	#assert(!(flags & .POINTER));
122 |     reverse := flags & .REVERSE;
123 | 
124 |     `it : string = ---;
125 |     `it_index : int = -1;
126 | 
127 |     for escape: 1..1 { // so break avoids the cleanup #insert
128 |         if splitter.max_results == 1 {
129 |             it = splitter.text;
130 |             it_index = 0;
131 | 
132 |             #insert body;
133 |         }
134 |         else {
135 |             max_it_index := ifx splitter.max_results == 0 then 0 else splitter.max_results - 2;
136 |             separator_offset_to_start, separator_offset_to_end, empty_count : int = ---;
137 | 
138 |             if reverse {
139 |                 end_index := splitter.text.count;
140 |                 index, found := last_index(splitter.text, splitter.separator, splitter.last_index, splitter.character_compare);
141 | 
142 |                 if splitter.keep_separator == {
143 |                     case .NO;
144 |                     empty_count = 0;
145 |                     separator_offset_to_start = splitter.separator.count;
146 |                     separator_offset_to_end = 0;
147 | 
148 |                     case .AS_PREFIX;
149 |                     empty_count = splitter.separator.count;
150 |                     separator_offset_to_start = 0;
151 |                     separator_offset_to_end = 0;
152 | 
153 |                     case .AS_POSTFIX;
154 |                     empty_count = splitter.separator.count;
155 |                     separator_offset_to_start = splitter.separator.count;
156 |                     separator_offset_to_end = splitter.separator.count;
157 |                 }
158 | 
159 |                 while found {
160 |                     defer index, found = last_index(splitter.text, splitter.separator, index, splitter.last_index, splitter.character_compare);
161 | 
162 |                     it = raw_slice(splitter.text, index + separator_offset_to_start, end_index);
163 |                     end_index = index + separator_offset_to_end;
164 | 
165 |                     if !splitter.skip_empty || it.count != empty_count {
166 |                         it_index += 1;
167 |                         defer if splitter.max_results != 0 && it_index >= max_it_index  break;
168 | 
169 |                         #insert(break = break escape) body;
170 |                     }
171 | 
172 |                 }
173 | 
174 |                 it = raw_slice(splitter.text, 0, end_index);
175 |             }
176 |             else {
177 |                 start_index := 0;
178 |                 index, found := first_index(splitter.text, splitter.separator, splitter.first_index, splitter.character_compare);
179 | 
180 |                 if splitter.keep_separator == {
181 |                     case .NO;
182 |                     empty_count = 0;
183 |                     separator_offset_to_start = splitter.separator.count;
184 |                     separator_offset_to_end = 0;
185 | 
186 |                     case .AS_PREFIX;
187 |                     empty_count = splitter.separator.count;
188 |                     separator_offset_to_start = 0;
189 |                     separator_offset_to_end = 0;
190 | 
191 |                     case .AS_POSTFIX;
192 |                     empty_count = splitter.separator.count;
193 |                     separator_offset_to_start = splitter.separator.count;
194 |                     separator_offset_to_end = splitter.separator.count;
195 |                 }
196 | 
197 |                 while found {
198 |                     defer index, found = first_index(splitter.text, splitter.separator, index + splitter.separator.count, splitter.first_index, splitter.character_compare);
199 | 
200 |                     it = raw_slice(splitter.text, start_index, index + separator_offset_to_end);
201 |                     start_index = index + separator_offset_to_start;
202 | 
203 |                     if !splitter.skip_empty || it.count > empty_count {
204 |                         it_index += 1;
205 |                         defer if splitter.max_results != 0 && it_index >= max_it_index  break;
206 | 
207 |                         #insert(break = break escape) body;
208 |                     }
209 |                 }
210 | 
211 |                 it = raw_slice(splitter.text, start_index);
212 |             }
213 | 
214 |             if !splitter.skip_empty || it.count != empty_count {
215 |                 it_index += 1;
216 |                 #insert body;
217 |             }
218 |         }
219 |     }
220 | }
221 | 
222 | 
223 | split :: inline (text: string, separator: [] u8, max_results := 0, skip_empty := false, keep_separator := Keep_Separator.NO, character_compare := default_character_compare) -> Split_By_Chars {
224 |     return Split_By_Chars.{.{text, max_results, skip_empty, keep_separator, character_compare}, separator};
225 | }
226 | 
227 | for_expansion :: (splitter: *Split_By_Chars, body: Code, flags: For_Flags) #expand {
228 |     #assert(!(flags & .POINTER));
229 |     reverse := flags & .REVERSE;
230 | 
231 |     `it : string = ---;
232 |     `it_index : int = -1;
233 | 
234 |     for escape: 1..1 { // so break avoids the cleanup #insert
235 |         if splitter.max_results == 1 {
236 |             it = splitter.text;
237 |             it_index = 0;
238 | 
239 |             #insert body;
240 |         }
241 |         else {
242 |             max_it_index := ifx splitter.max_results == 0 then 0 else splitter.max_results - 2;
243 |             separator_offset_to_start, separator_offset_to_end, empty_count : int = ---;
244 | 
245 |             if reverse {
246 |                 end_index := splitter.text.count;
247 |                 index, found := last_index(splitter.text, splitter.separator, splitter.character_compare);
248 |                 if splitter.keep_separator == {
249 |                     case .NO;
250 |                     empty_count = 0;
251 |                     separator_offset_to_start = 1;
252 |                     separator_offset_to_end = 0;
253 | 
254 |                     case .AS_PREFIX;
255 |                     empty_count = 1;
256 |                     separator_offset_to_start = 0;
257 |                     separator_offset_to_end = 0;
258 | 
259 |                     case .AS_POSTFIX;
260 |                     empty_count = 1;
261 |                     separator_offset_to_start = 1;
262 |                     separator_offset_to_end = 1;
263 |                 }
264 | 
265 |                 while found {
266 |                     defer index, found = last_index(splitter.text, splitter.separator, index, splitter.character_compare);
267 | 
268 |                     it = raw_slice(splitter.text, index + separator_offset_to_start, end_index);
269 |                     end_index = index + separator_offset_to_end;
270 | 
271 |                     if !splitter.skip_empty || it.count != empty_count {
272 |                         it_index += 1;
273 |                         defer if splitter.max_results != 0 && it_index >= max_it_index  break;
274 | 
275 |                         #insert(break = break escape) body;
276 |                     }
277 | 
278 |                 }
279 | 
280 |                 it = raw_slice(splitter.text, 0, end_index);
281 |             }
282 |             else {
283 |                 start_index := 0;
284 |                 index, found := first_index(splitter.text, splitter.separator, splitter.character_compare);
285 |                 if splitter.keep_separator == {
286 |                     case .NO;
287 |                     empty_count = 0;
288 |                     separator_offset_to_start = 1;
289 |                     separator_offset_to_end = 0;
290 | 
291 |                     case .AS_PREFIX;
292 |                     empty_count = 1;
293 |                     separator_offset_to_start = 0;
294 |                     separator_offset_to_end = 0;
295 | 
296 |                     case .AS_POSTFIX;
297 |                     empty_count = 1;
298 |                     separator_offset_to_start = 1;
299 |                     separator_offset_to_end = 1;
300 |                 }
301 | 
302 |                 while found {
303 |                     defer index, found = first_index(splitter.text, splitter.separator, index + 1, splitter.character_compare);
304 | 
305 |                     it = raw_slice(splitter.text, start_index, index + separator_offset_to_end);
306 |                     start_index = index + separator_offset_to_start;
307 | 
308 |                     if !splitter.skip_empty || it.count != empty_count {
309 |                         it_index += 1;
310 |                         defer if splitter.max_results != 0 && it_index >= max_it_index  break;
311 | 
312 |                         #insert(break = break escape) body;
313 |                     }
314 |                 }
315 | 
316 |                 it = raw_slice(splitter.text, start_index);
317 |             }
318 | 
319 |             if !splitter.skip_empty || it.count != empty_count {
320 |                 it_index += 1;
321 |                 #insert body;
322 |             }
323 |         }
324 |     }
325 | }
326 | 
327 | 
328 | split :: inline (text: string, separator: u8, max_results := 0, skip_empty := false, keep_separator := Keep_Separator.NO, character_compare := default_character_compare) -> Split_By_Char {
329 |     return Split_By_Char.{.{text, max_results, skip_empty, keep_separator, character_compare}, separator};
330 | }
331 | 
332 | for_expansion :: (splitter: *Split_By_Char, body: Code, flags: For_Flags) #expand {
333 | 	#assert(!(flags & .POINTER));
334 |     reverse := flags & .REVERSE;
335 | 
336 |     `it : string = ---;
337 |     `it_index : int = -1;
338 | 
339 |     for escape: 1..1 { // so break avoids the cleanup #insert
340 |         if splitter.max_results == 1 {
341 |             it = splitter.text;
342 |             it_index = 0;
343 | 
344 |             #insert body;
345 |         }
346 |         else {
347 |             max_it_index := ifx splitter.max_results == 0 then 0 else splitter.max_results - 2;
348 |             separator_offset_to_start, separator_offset_to_end, empty_count : int = ---;
349 | 
350 |             if reverse {
351 |                 end_index := splitter.text.count;
352 |                 index, found := last_index(splitter.text, splitter.separator, splitter.character_compare);
353 |                 if splitter.keep_separator == {
354 |                     case .NO;
355 |                     empty_count = 0;
356 |                     separator_offset_to_start = 1;
357 |                     separator_offset_to_end = 0;
358 | 
359 |                     case .AS_PREFIX;
360 |                     empty_count = 1;
361 |                     separator_offset_to_start = 0;
362 |                     separator_offset_to_end = 0;
363 | 
364 |                     case .AS_POSTFIX;
365 |                     empty_count = 1;
366 |                     separator_offset_to_start = 1;
367 |                     separator_offset_to_end = 1;
368 |                 }
369 | 
370 |                 while found {
371 |                     defer index, found = last_index(splitter.text, splitter.separator, index, splitter.character_compare);
372 | 
373 |                     it = raw_slice(splitter.text, index + separator_offset_to_start, end_index);
374 |                     end_index = index + separator_offset_to_end;
375 | 
376 |                     if !splitter.skip_empty || it.count != empty_count {
377 |                         it_index += 1;
378 |                         defer if splitter.max_results != 0 && it_index >= max_it_index  break;
379 | 
380 |                         #insert(break = break escape) body;
381 |                     }
382 |                 }
383 | 
384 |                 it = raw_slice(splitter.text, 0, end_index);
385 |             }
386 |             else {
387 |                 start_index := 0;
388 |                 index, found := first_index(splitter.text, splitter.separator, splitter.character_compare);
389 |                 if splitter.keep_separator == {
390 |                     case .NO;
391 |                     empty_count = 0;
392 |                     separator_offset_to_start = 1;
393 |                     separator_offset_to_end = 0;
394 | 
395 |                     case .AS_PREFIX;
396 |                     empty_count = 1;
397 |                     separator_offset_to_start = 0;
398 |                     separator_offset_to_end = 0;
399 | 
400 |                     case .AS_POSTFIX;
401 |                     empty_count = 1;
402 |                     separator_offset_to_start = 1;
403 |                     separator_offset_to_end = 1;
404 |                 }
405 | 
406 |                 while found {
407 |                     defer index, found = first_index(splitter.text, splitter.separator, index + 1, splitter.character_compare);
408 | 
409 |                     it = raw_slice(splitter.text, start_index, index + separator_offset_to_end);
410 |                     start_index = index + separator_offset_to_start;
411 | 
412 |                     if !splitter.skip_empty || it.count != empty_count {
413 |                         it_index += 1;
414 |                         defer if splitter.max_results != 0 && it_index >= max_it_index  break;
415 | 
416 |                         #insert(break = break escape) body;
417 |                     }
418 |                 }
419 | 
420 |                 it = raw_slice(splitter.text, start_index);
421 |             }
422 | 
423 |             if !splitter.skip_empty || it.count != empty_count {
424 |                 it_index += 1;
425 |                 #insert body;
426 |             }
427 |         }
428 |     }
429 | }
430 | 
431 | 
432 | split :: inline (text: string, separator: string, $$index: Index_Proc, max_results := 0, skip_empty := false, keep_separator := Keep_Separator.NO) -> Split_By_Proc {
433 |     return Split_By_Proc.{.{text, max_results, skip_empty, keep_separator, .{}}, separator, index};
434 | }
435 | 
436 | for_expansion :: (splitter: *Split_By_Proc, body: Code, flags: For_Flags) #expand {
437 | 	#assert(!(flags & .POINTER));
438 |     reverse := flags & .REVERSE;
439 | 
440 |     `it : string = ---;
441 |     `it_index : int = -1;
442 | 
443 |     for escape: 1..1 { // so break avoids the cleanup #insert
444 |         if splitter.max_results == 1 {
445 |             it = splitter.text;
446 |             it_index = 0;
447 | 
448 |             #insert body;
449 |         }
450 |         else {
451 |             max_it_index := ifx splitter.max_results == 0 then 0 else splitter.max_results - 2;
452 | 
453 |             if reverse {
454 |                 end_index := splitter.text.count;
455 |                 from_index, to_index, found := splitter.index_proc(splitter.text, splitter.separator, splitter.text.count, true);
456 |                 while found {
457 |                     defer from_index, to_index, found = splitter.index_proc(splitter.text, splitter.separator, from_index, true);
458 | 
459 |                     if #complete splitter.keep_separator == {
460 |                         case .AS_PREFIX;
461 |                         it = raw_slice(splitter.text, from_index, end_index);
462 |                         end_index = from_index;
463 | 
464 |                         case .AS_POSTFIX;
465 |                         it = raw_slice(splitter.text, to_index, end_index);
466 |                         end_index = to_index;
467 | 
468 |                         case .NO;
469 |                         it = raw_slice(splitter.text, to_index, end_index);
470 |                         end_index = from_index;
471 |                     }
472 | 
473 |                     if !splitter.skip_empty || it != "" {
474 |                         it_index += 1;
475 |                         defer if splitter.max_results != 0 && it_index >= max_it_index  break;
476 | 
477 |                         #insert(break = break escape) body;
478 |                     }
479 |                 }
480 | 
481 |                 it = raw_slice(splitter.text, 0, end_index);
482 |             }
483 |             else {
484 |                 start_index := 0;
485 |                 from_index, to_index, found := splitter.index_proc(splitter.text, splitter.separator, 0, false);
486 |                 while found {
487 |                     defer from_index, to_index, found = splitter.index_proc(splitter.text, splitter.separator, to_index, false);
488 | 
489 |                     if #complete splitter.keep_separator == {
490 |                         case .AS_PREFIX;
491 |                         it = raw_slice(splitter.text, start_index, from_index);
492 |                         start_index = from_index;
493 | 
494 |                         case .AS_POSTFIX;
495 |                         it = raw_slice(splitter.text, start_index, to_index);
496 |                         start_index = to_index;
497 | 
498 |                         case .NO;
499 |                         it = raw_slice(splitter.text, start_index, from_index);
500 |                         start_index = to_index;
501 |                     }
502 | 
503 |                     if !splitter.skip_empty || it != "" {
504 |                         it_index += 1;
505 |                         defer if splitter.max_results != 0 && it_index >= max_it_index  break;
506 | 
507 |                         #insert(break = break escape) body;
508 |                     }
509 |                 }
510 | 
511 |                 it = raw_slice(splitter.text, start_index);
512 |             }
513 | 
514 |             if !splitter.skip_empty || it != "" {
515 |                 it_index += 1;
516 | 
517 |                 #insert body;
518 |             }
519 |         }
520 |     }
521 | }
522 | 
523 | 
524 | index_split :: inline (text: string, indexes: .. int, max_results := 0, skip_empty := false, keep_separator := Keep_Separator.NO) -> Split_By_Index {
525 |     return Split_By_Index.{.{text, max_results, skip_empty, keep_separator, .{}}, indexes};
526 | }
527 | 
528 | for_expansion :: (splitter: *Split_By_Index, body: Code, flags: For_Flags) #expand {
529 | 	#assert(!(flags & .POINTER));
530 |     reverse := flags & .REVERSE;
531 | 
532 |     `it : string = ---;
533 |     `it_index : int = -1;
534 | 
535 |     for escape: 1..1 { // so break avoids the cleanup #insert
536 |         if splitter.max_results == 1 {
537 |             it = splitter.text;
538 |             it_index = 0;
539 | 
540 |             #insert body;
541 |         }
542 |         else {
543 |             max_it_index := ifx splitter.max_results == 0 then 0 else splitter.max_results - 2;
544 | 
545 |             if reverse {
546 |                 end_index := splitter.text.count;
547 |                 for < index, splitter_index: splitter.indexes {
548 |                     start_index := slice_index(splitter.text, ifx index < end_index then index else end_index);
549 |                     it = raw_slice(splitter.text, start_index, end_index);
550 |                     end_index = start_index;
551 | 
552 |                     if splitter.skip_empty && it == ""  continue;
553 | 
554 |                     it_index += 1;
555 |                     defer if splitter.max_results != 0 && it_index >= max_it_index  break;
556 | 
557 |                     #insert(break = break escape) body;
558 |                 }
559 | 
560 |                 it = raw_slice(splitter.text, 0, end_index);
561 |             }
562 |             else {
563 |                 start_index := 0;
564 |                 for index, splitter_index: splitter.indexes {
565 |                     start_index = slice_index(splitter.text, start_index);
566 |                     end_index := slice_index(splitter.text, ifx index > start_index then index else start_index);
567 |                     it = raw_slice(splitter.text, start_index, end_index);
568 |                     start_index = end_index;
569 | 
570 |                     if splitter.skip_empty && it == ""  continue;
571 | 
572 |                     it_index += 1;
573 |                     defer if splitter.max_results != 0 && it_index >= max_it_index  break;
574 | 
575 |                     #insert(break = break escape) body;
576 |                 }
577 | 
578 |                 it = raw_slice(splitter.text, start_index);
579 |             }
580 | 
581 |             if !splitter.skip_empty || it != "" {
582 |                 it_index += 1;
583 | 
584 |                 #insert body;
585 |             }
586 |         }
587 |     }
588 | }
589 | 
590 | 
591 | count_split :: inline (text: string, count: int, max_results := 0) -> Split_By_Count {
592 |     return Split_By_Count.{.{text, max_results, false, .NO, .{}}, count};
593 | }
594 | 
595 | for_expansion :: (splitter: *Split_By_Count, body: Code, flags: For_Flags) #expand {
596 | 	#assert(!(flags & .POINTER));
597 |     reverse := flags & .REVERSE;
598 | 
599 |     `it : string = ---;
600 |     `it_index : int = -1;
601 | 
602 |     for escape: 1..1 { // so break avoids the cleanup #insert
603 |         if splitter.max_results == 1 {
604 |             it = splitter.text;
605 |             it_index = 0;
606 | 
607 |             #insert body;
608 |         }
609 |         else {
610 |             max_it_index := ifx splitter.max_results == 0 then 0 else splitter.max_results - 2;
611 | 
612 |             if reverse {
613 |                 end_index := splitter.text.count;
614 |                 start_index := end_index - splitter.count;
615 |                 while start_index > 0 {
616 |                     it = raw_slice(splitter.text, start_index, end_index);
617 |                     end_index = start_index;
618 |                     start_index -= splitter.count;
619 | 
620 |                     it_index += 1;
621 |                     defer if splitter.max_results != 0 && it_index >= max_it_index  break;
622 | 
623 |                     #insert(break = break escape) body;
624 |                 }
625 | 
626 |                 it = raw_slice(splitter.text, 0, end_index);
627 |             }
628 |             else {
629 |                 start_index := 0;
630 |                 end_index := start_index + splitter.count;
631 |                 while end_index < splitter.text.count {
632 |                     it = raw_slice(splitter.text, start_index, end_index);
633 |                     start_index = end_index;
634 |                     end_index += splitter.count;
635 | 
636 |                     it_index += 1;
637 |                     defer if splitter.max_results != 0 && it_index >= max_it_index  break;
638 | 
639 |                     #insert(break = break escape) body;
640 |                 }
641 | 
642 |                 it = raw_slice(splitter.text, start_index);
643 |             }
644 | 
645 |             if it {
646 |                 it_index += 1;
647 |                 #insert body;
648 |             }
649 |         }
650 |     }
651 | }
652 | 
653 | 
654 | 
655 | 
656 | 
657 | line_split :: inline (text: string, max_results := 0, skip_empty := false, keep_separator := Keep_Separator.NO) -> Split_By_Line {
658 |     return .{.{text, max_results, skip_empty, keep_separator, case_sensitive}};
659 | }
660 | 
661 | for_expansion :: (splitter: *Split_By_Line, body: Code, flags: For_Flags) #expand {
662 | 	#assert(!(flags & .POINTER));
663 |     reverse := flags & .REVERSE;
664 | 
665 |     it_is_empty :: () -> bool #expand {
666 |         if `splitter.keep_separator != .NO
667 |             return `it == "" || `it.count == 1 || `it.count == 2 && `it[0] == Chars.CR;
668 |         else
669 |             return `it == "";
670 |     }
671 | 
672 |     `it : string = ---;
673 |     `it_index : int = -1;
674 | 
675 |     for escape: 1..1 { // so break avoids the cleanup #insert
676 |         if splitter.text == "" || (splitter.max_results != 0 && splitter.max_results < 2) {
677 |             it = splitter.text;
678 |             it_index = 0;
679 | 
680 |             #insert body;
681 |         }
682 |         else {
683 |             max_it_index := ifx splitter.max_results == 0 then 0 else splitter.max_results - 2;
684 | 
685 |             if reverse {
686 |                 index := splitter.text.count;
687 |                 end_index : int = ---;
688 |                 found := false;
689 | 
690 |                 if #complete splitter.keep_separator == {
691 |                     case .AS_PREFIX;
692 |                     end_index = index;
693 | 
694 |                     case .AS_POSTFIX;
695 |                     end_index = index;
696 |                     if splitter.text[index - 1] == Chars.LF  index -= 1;
697 | 
698 |                     case .NO;
699 |                     if splitter.text[index - 1] == Chars.LF  index -= 1;
700 |                     if index > 0 && splitter.text[index - 1] == Chars.CR  index -= 1;
701 |                     end_index = index;
702 |                 }
703 | 
704 |                 index, found = last_index(splitter.text, Chars.LF, index, splitter.character_compare);
705 | 
706 |                 while found {
707 |                     defer index, found = last_index(splitter.text, Chars.LF, index, splitter.character_compare);
708 | 
709 |                     if #complete splitter.keep_separator == {
710 |                         case .AS_PREFIX;
711 |                         if index > 0 && splitter.text[index - 1] == Chars.CR
712 |                             index -= 1;
713 |                         it = raw_slice(splitter.text, index, end_index);
714 |                         end_index = index;
715 | 
716 |                         case .AS_POSTFIX;
717 |                         it = raw_slice(splitter.text, index + 1, end_index);
718 |                         end_index = index + 1;
719 | 
720 |                         case .NO;
721 |                         it = raw_slice(splitter.text, index + 1, end_index);
722 |                         if index > 0 && splitter.text[index - 1] == Chars.CR
723 |                             end_index = index - 1;
724 |                         else
725 |                             end_index = index;
726 |                     }
727 | 
728 |                     if !splitter.skip_empty || !it_is_empty() {
729 |                         it_index += 1;
730 |                         defer if splitter.max_results != 0 && it_index >= max_it_index  break;
731 | 
732 |                         #insert(break = break escape) body;
733 |                     }
734 |                 }
735 | 
736 |                 if end_index > 0 {
737 |                     it = raw_slice(splitter.text, 0, end_index);
738 | 
739 |                     if !splitter.skip_empty || !it_is_empty() {
740 |                         it_index += 1;
741 |                         #insert body;
742 |                     }
743 |                 }
744 |             }
745 |             else {
746 |                 start_index := 0;
747 |                 index, found := first_index(splitter.text, Chars.LF, splitter.character_compare);
748 | 
749 |                 while found {
750 |                     defer index, found = first_index(splitter.text, Chars.LF, index + 1, splitter.character_compare);
751 | 
752 |                     if #complete splitter.keep_separator == {
753 |                         case .AS_PREFIX;
754 |                         end_index := index;
755 |                         if end_index > 0 && splitter.text[end_index - 1] == Chars.CR
756 |                             end_index -= 1;
757 |                         it = raw_slice(splitter.text, start_index, end_index);
758 |                         start_index = end_index;
759 | 
760 |                         case .AS_POSTFIX;
761 |                         it = raw_slice(splitter.text, start_index, index + 1);
762 |                         start_index = index + 1;
763 | 
764 |                         case .NO;
765 |                         end_index := index;
766 |                         if end_index > 0 && splitter.text[end_index - 1] == Chars.CR
767 |                             end_index -= 1;
768 |                         it = raw_slice(splitter.text, start_index, end_index);
769 |                         start_index = index + 1;
770 |                     }
771 | 
772 |                     if !splitter.skip_empty || !it_is_empty() {
773 |                         it_index += 1;
774 |                         defer if splitter.max_results != 0 && it_index >= max_it_index  break;
775 | 
776 |                         #insert(break = break escape) body;
777 |                     }
778 |                 }
779 | 
780 |                 if start_index < splitter.text.count {
781 |                     it = raw_slice(splitter.text, start_index, splitter.text.count);
782 | 
783 |                     if !splitter.skip_empty || !it_is_empty() {
784 |                         it_index += 1;
785 |                         #insert body;
786 |                     }
787 |                 }
788 |             }
789 |         }
790 |     }
791 | }
792 | 


--------------------------------------------------------------------------------
/tests/output.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | Disassembly of 'count' at c:/jai/modules/Strings_Shared/Strings_Shared.jai:1009
  3 | - Stack size 188
  4 | 
  5 | -------- Basic Block 0 -------- defines v5-7 --------
  6 | 
  7 |            (no dominating)
  8 | 
  9 |    0|          lea   v6, stack+0
 10 |    1|        clear   v6 {size 16}
 11 |    2|     string==   v5, v1 == v6
 12 |    3|         jump   1 if v5 == 0
 13 |    4|     constant   v7 = 0
 14 |    5| return_value   v7 -> 1
 15 |    6|       return   
 16 | 
 17 | -------- Basic Block 1 -------- defines v8-11 --------
 18 | 
 19 |            ... dominating: block 0 instruction 3
 20 | 
 21 |    7|          mov   v8, [v3] :1b
 22 |    8|     constant   v9 = 1
 23 |    9|         jump   5 if v8 != v9
 24 |   10|         call   is_upper (v2) -> v10
 25 |   11|         jump   2 if v10 == 0
 26 |   12|          lea   v11, stack+10
 27 |   13|       memcpy   v11, v3 {size 16}
 28 |   14|         jump   4
 29 | 
 30 | -------- Basic Block 2 -------- defines v12-14 --------
 31 | 
 32 |            ... dominating: block 1 instruction 11
 33 | 
 34 |   15|         call   is_lower (v2) -> v12
 35 |   16|         jump   3 if v12 == 0
 36 |   17|         call   to_upper (v2) -> v13
 37 |   18|         copy   v2 = v13
 38 |   19|          lea   v14, stack+10
 39 |   20|       memcpy   v14, v3 {size 16}
 40 |   21|         jump   4
 41 | 
 42 | -------- Basic Block 3 -------- defines v15-16 --------
 43 | 
 44 |            ... dominating: block 2 instruction 16
 45 | 
 46 |   22|          lea   v15, bss+40
 47 |   23|          lea   v16, stack+10
 48 |   24|       memcpy   v16, v15 {size 16}
 49 | 
 50 | -------- Basic Block 4 --------
 51 | 
 52 |            ... dominating: block 1 instruction 11
 53 | 
 54 |   25|         jump   6
 55 | 
 56 | -------- Basic Block 5 -------- defines v17 --------
 57 | 
 58 |            ... dominating: block 1 instruction 9
 59 | 
 60 |   26|          lea   v17, stack+10
 61 |   27|       memcpy   v17, v3 {size 16}
 62 | 
 63 | -------- Basic Block 6 -------- defines v18-19 --------
 64 | 
 65 |            ... dominating: block 1 instruction 9
 66 | 
 67 |   28|     constant   v18 = 0
 68 |   29|          mov   v19, [stack+10] :1b
 69 | 
 70 | -------- Basic Block 7 -------- defines v20 --------
 71 | 
 72 |            ... dominating: block 6 instruction 30
 73 | 
 74 |   30|     constant   v20 = 0
 75 |   31|         jump   23 if v19 != v20
 76 | 
 77 | -------- Basic Block 8 -------- defines v21-26 --------
 78 | 
 79 |            ... dominating: block 7 instruction 31
 80 | 
 81 |   32|          lea   v21, data+8048
 82 |   33|          mov   v22, [v0] :4b
 83 |   34|         imul   v23 = v22 * 8
 84 |   35|   add_extend   v25 = v21 + v23
 85 |   36|          mov   v26, [v25] :8b
 86 | 
 87 | -------- Basic Block 9 -------- defines v27 --------
 88 | 
 89 |            ... dominating: block 8 instruction 37
 90 | 
 91 |   37|     constant   v27 = 0
 92 |   38|         jump   15 if v26 != v27
 93 | 
 94 | -------- Basic Block 10 -------- defines v28-42 --------
 95 | 
 96 |            ... dominating: block 9 instruction 38
 97 | 
 98 |   39|          mov   v29, [v1+8] :8b
 99 |   40|  cast_number   v31 (u64), v29 (*u8)
100 |   41|         copy   v28 = v31
101 |   42|          mov   v33, [v1+8] :8b
102 |   43|  cast_number   v35 (u64), v33 (*u8)
103 |   44|          mov   v36, [v1] :8b
104 |   45|  cast_number   v37 (u64), v36 (s64)
105 |   46|        binop   v32, v35 + v37
106 |   47|          lea   v38, stack+20
107 |   48|     constant   v39 = 0x10
108 |   49|       memset   v38, v2 {count v39}
109 |   50|         copy   v40 = v38
110 |   51|      add_int   v41 = v32 + -15
111 |   52|         call   is_debugger_present () -> v42
112 |   53|         jump   11 if v42 == 0
113 |   54|         call   debug_break ()
114 | 
115 | -------- Basic Block 11 -------- defines v43-50 --------
116 | 
117 |            ... dominating: block 10 instruction 53
118 | 
119 |   55|      asm_x86   (contents not shown)
120 |   56|         copy   v46 = v28
121 |   57|      add_int   v49 = v28 + 15
122 |   58|      add_int   v50 = v32 + -1
123 |   59|         call   min (v49, v50) -> v48
124 |   60|         jump   14 if v46 > v48
125 | 
126 | -------- Basic Block 12 -------- defines v51-54 --------
127 | 
128 |            ... dominating: block 11 instruction 60
129 | 
130 |   61|  cast_number   v52 (*u8), v46 (u64)
131 |   62|          mov   v53, [v52] :1b
132 |   63|         jump   13 if v53 != v2
133 |   64|      add_int   v54 = v18 + 1
134 |   65|         copy   v18 = v54
135 | 
136 | -------- Basic Block 13 -------- defines v55 --------
137 | 
138 |            ... dominating: block 12 instruction 61
139 | 
140 |   66|      compare   v55 = (v46 >= v48)
141 |   67|      add_int   v46 = v46 + 1
142 |   68|         jump   12 if v55 == 0
143 | 
144 | -------- Basic Block 14 --------
145 | 
146 |            ... dominating: block 11 instruction 60
147 | 
148 |   69| return_value   v18 -> 1
149 |   70|       return   
150 | 
151 | -------- Basic Block 15 -------- defines v56 --------
152 | 
153 |            ... dominating: block 8 instruction 37
154 | 
155 |   71|     constant   v56 = 3
156 |   72|         jump   22 if v26 != v56
157 | 
158 | -------- Basic Block 16 -------- defines v57-76 --------
159 | 
160 |            ... dominating: block 15 instruction 72
161 | 
162 |   73|          mov   v58, [v1+8] :8b
163 |   74|  cast_number   v60 (u64), v58 (*u8)
164 |   75|         copy   v57 = v60
165 |   76|          mov   v62, [v1+8] :8b
166 |   77|  cast_number   v64 (u64), v62 (*u8)
167 |   78|          mov   v65, [v1] :8b
168 |   79|  cast_number   v66 (u64), v65 (s64)
169 |   80|        binop   v61, v64 + v66
170 |   81|          lea   v67, stack+30
171 |   82|     constant   v68 = 0x20
172 |   83|       memset   v67, v2 {count v68}
173 |   84|         copy   v69 = v67
174 |   85|          lea   v71, stack+50
175 |   86|         copy   v70 = v71
176 |   87|      add_int   v72 = v61 + -31
177 |   88|      asm_x86   (contents not shown)
178 | 
179 | -------- Basic Block 17 -------- defines v77-86 --------
180 | 
181 |            ... dominating: block 16 instruction 89
182 | 
183 |   89|         jump   18 if v57 >= v72
184 |   90|      asm_x86   (contents not shown)
185 |   91|          mov   v84, [v71] :4b
186 |   92|  cast_number   v85 (s64), v84 (u32)
187 |   93|        binop   v83, v18 + v85
188 |   94|         copy   v18 = v83
189 |   95|      add_int   v86 = v57 + 32
190 |   96|         copy   v57 = v86
191 |   97|         jump   17
192 | 
193 | -------- Basic Block 18 -------- defines v87-91 --------
194 | 
195 |            ... dominating: block 17 instruction 89
196 | 
197 |   98|         copy   v87 = v57
198 |   99|      add_int   v90 = v57 + 31
199 |  100|      add_int   v91 = v61 + -1
200 |  101|         call   min (v90, v91) -> v89
201 |  102|         jump   21 if v87 > v89
202 | 
203 | -------- Basic Block 19 -------- defines v92-97 --------
204 | 
205 |            ... dominating: block 18 instruction 102
206 | 
207 |  103|          mov   v93, [stack+18] :8b
208 |  104|  cast_number   v95 (*u8), v87 (u64)
209 |  105|          mov   v96, [v95] :1b
210 |  106|         call   v93 (v96, v2) -> v92
211 |  107|         jump   20 if v92 == 0
212 |  108|      add_int   v97 = v18 + 1
213 |  109|         copy   v18 = v97
214 | 
215 | -------- Basic Block 20 -------- defines v98 --------
216 | 
217 |            ... dominating: block 19 instruction 103
218 | 
219 |  110|      compare   v98 = (v87 >= v89)
220 |  111|      add_int   v87 = v87 + 1
221 |  112|         jump   19 if v98 == 0
222 | 
223 | -------- Basic Block 21 --------
224 | 
225 |            ... dominating: block 18 instruction 102
226 | 
227 |  113| return_value   v18 -> 1
228 |  114|       return   
229 | 
230 | -------- Basic Block 22 --------
231 | 
232 |            ... dominating: block 8 instruction 37
233 | 
234 |  115|         jump   40
235 | 
236 | -------- Basic Block 23 --------
237 | 
238 |            ... dominating: block 6 instruction 30
239 | 
240 |  116|         jump   40 if v19 != v9
241 | 
242 | -------- Basic Block 24 -------- defines v99-104 --------
243 | 
244 |            ... dominating: block 23 instruction 116
245 | 
246 |  117|          lea   v99, data+8048
247 |  118|          mov   v100, [v0] :4b
248 |  119|         imul   v101 = v100 * 8
249 |  120|   add_extend   v103 = v99 + v101
250 |  121|          mov   v104, [v103] :8b
251 | 
252 | -------- Basic Block 25 -------- defines v105 --------
253 | 
254 |            ... dominating: block 24 instruction 122
255 | 
256 |  122|     constant   v105 = 0
257 |  123|         jump   32 if v104 != v105
258 | 
259 | -------- Basic Block 26 -------- defines v106-128 --------
260 | 
261 |            ... dominating: block 25 instruction 123
262 | 
263 |  124|          mov   v107, [v1+8] :8b
264 |  125|  cast_number   v109 (u64), v107 (*u8)
265 |  126|         copy   v106 = v109
266 |  127|          mov   v111, [v1+8] :8b
267 |  128|  cast_number   v113 (u64), v111 (*u8)
268 |  129|          mov   v114, [v1] :8b
269 |  130|  cast_number   v115 (u64), v114 (s64)
270 |  131|        binop   v110, v113 + v115
271 |  132|          lea   v116, stack+54
272 |  133|     constant   v117 = 0x10
273 |  134|       memset   v116, v2 {count v117}
274 |  135|         copy   v118 = v116
275 |  136|          lea   v119, stack+64
276 |  137|         call   to_lower (v2) -> v120
277 |  138|       memset   v119, v120 {count v117}
278 |  139|         copy   v121 = v119
279 |  140|          lea   v123, stack+74
280 |  141|         copy   v122 = v123
281 |  142|      add_int   v124 = v110 + -15
282 |  143|      asm_x86   (contents not shown)
283 | 
284 | -------- Basic Block 27 -------- defines v129-138 --------
285 | 
286 |            ... dominating: block 26 instruction 144
287 | 
288 |  144|         jump   28 if v106 >= v124
289 |  145|      asm_x86   (contents not shown)
290 |  146|          mov   v136, [v123] :2b
291 |  147|  cast_number   v137 (s64), v136 (u16)
292 |  148|        binop   v135, v18 + v137
293 |  149|         copy   v18 = v135
294 |  150|      add_int   v138 = v106 + 16
295 |  151|         copy   v106 = v138
296 |  152|         jump   27
297 | 
298 | -------- Basic Block 28 -------- defines v139-143 --------
299 | 
300 |            ... dominating: block 27 instruction 144
301 | 
302 |  153|         copy   v139 = v106
303 |  154|      add_int   v142 = v106 + 15
304 |  155|      add_int   v143 = v110 + -1
305 |  156|         call   min (v142, v143) -> v141
306 |  157|         jump   31 if v139 > v141
307 | 
308 | -------- Basic Block 29 -------- defines v144-149 --------
309 | 
310 |            ... dominating: block 28 instruction 157
311 | 
312 |  158|          mov   v145, [stack+18] :8b
313 |  159|  cast_number   v147 (*u8), v139 (u64)
314 |  160|          mov   v148, [v147] :1b
315 |  161|         call   v145 (v148, v2) -> v144
316 |  162|         jump   30 if v144 == 0
317 |  163|      add_int   v149 = v18 + 1
318 |  164|         copy   v18 = v149
319 | 
320 | -------- Basic Block 30 -------- defines v150 --------
321 | 
322 |            ... dominating: block 29 instruction 158
323 | 
324 |  165|      compare   v150 = (v139 >= v141)
325 |  166|      add_int   v139 = v139 + 1
326 |  167|         jump   29 if v150 == 0
327 | 
328 | -------- Basic Block 31 --------
329 | 
330 |            ... dominating: block 28 instruction 157
331 | 
332 |  168| return_value   v18 -> 1
333 |  169|       return   
334 | 
335 | -------- Basic Block 32 -------- defines v151 --------
336 | 
337 |            ... dominating: block 24 instruction 122
338 | 
339 |  170|     constant   v151 = 3
340 |  171|         jump   39 if v104 != v151
341 | 
342 | -------- Basic Block 33 -------- defines v152-178 --------
343 | 
344 |            ... dominating: block 32 instruction 171
345 | 
346 |  172|          mov   v153, [v1+8] :8b
347 |  173|  cast_number   v155 (u64), v153 (*u8)
348 |  174|         copy   v152 = v155
349 |  175|          mov   v157, [v1+8] :8b
350 |  176|  cast_number   v159 (u64), v157 (*u8)
351 |  177|          mov   v160, [v1] :8b
352 |  178|  cast_number   v161 (u64), v160 (s64)
353 |  179|        binop   v156, v159 + v161
354 |  180|          lea   v162, stack+76
355 |  181|     constant   v163 = 0x20
356 |  182|       memset   v162, v2 {count v163}
357 |  183|         copy   v164 = v162
358 |  184|          lea   v165, stack+96
359 |  185|         call   to_lower (v2) -> v166
360 |  186|       memset   v165, v166 {count v163}
361 |  187|         copy   v167 = v165
362 |  188|          lea   v169, stack+b8
363 |  189|         copy   v168 = v169
364 |  190|      add_int   v170 = v156 + -31
365 |  191|      asm_x86   (contents not shown)
366 | 
367 | -------- Basic Block 34 -------- defines v179-196 --------
368 | 
369 |            ... dominating: block 33 instruction 192
370 | 
371 |  192|         jump   35 if v152 >= v170
372 |  193|      asm_x86   (contents not shown)
373 |  194|          mov   v194, [v169] :4b
374 |  195|  cast_number   v195 (s64), v194 (u32)
375 |  196|        binop   v193, v18 + v195
376 |  197|         copy   v18 = v193
377 |  198|      add_int   v196 = v152 + 32
378 |  199|         copy   v152 = v196
379 |  200|         jump   34
380 | 
381 | -------- Basic Block 35 -------- defines v197-201 --------
382 | 
383 |            ... dominating: block 34 instruction 192
384 | 
385 |  201|         copy   v197 = v152
386 |  202|      add_int   v200 = v152 + 31
387 |  203|      add_int   v201 = v156 + -1
388 |  204|         call   min (v200, v201) -> v199
389 |  205|         jump   38 if v197 > v199
390 | 
391 | -------- Basic Block 36 -------- defines v202-207 --------
392 | 
393 |            ... dominating: block 35 instruction 205
394 | 
395 |  206|          mov   v203, [stack+18] :8b
396 |  207|  cast_number   v205 (*u8), v197 (u64)
397 |  208|          mov   v206, [v205] :1b
398 |  209|         call   v203 (v206, v2) -> v202
399 |  210|         jump   37 if v202 == 0
400 |  211|      add_int   v207 = v18 + 1
401 |  212|         copy   v18 = v207
402 | 
403 | -------- Basic Block 37 -------- defines v208 --------
404 | 
405 |            ... dominating: block 36 instruction 206
406 | 
407 |  213|      compare   v208 = (v197 >= v199)
408 |  214|      add_int   v197 = v197 + 1
409 |  215|         jump   36 if v208 == 0
410 | 
411 | -------- Basic Block 38 --------
412 | 
413 |            ... dominating: block 35 instruction 205
414 | 
415 |  216| return_value   v18 -> 1
416 |  217|       return   
417 | 
418 | -------- Basic Block 39 --------
419 | 
420 |            ... dominating: block 24 instruction 122
421 | 
422 |  218|         jump   40
423 | 
424 | -------- Basic Block 40 -------- defines v209-213 --------
425 | 
426 |            ... dominating: block 6 instruction 30
427 | 
428 |  219|     constant   v209 = 0
429 |  220|         copy   v210 = v209
430 |  221|          mov   v212, [v1] :8b
431 |  222|      add_int   v213 = v212 + -1
432 |  223|         jump   43 if v210 > v213
433 | 
434 | -------- Basic Block 41 -------- defines v214-219 --------
435 | 
436 |            ... dominating: block 40 instruction 223
437 | 
438 |  224|          mov   v215, [stack+18] :8b
439 |  225|          mov   v216, [v1+8] :8b
440 |  226|   add_extend   v217 = v216 + v210
441 |  227|          mov   v218, [v217] :1b
442 |  228|         call   v215 (v218, v2) -> v214
443 |  229|         jump   42 if v214 == 0
444 |  230|      add_int   v219 = v18 + 1
445 |  231|         copy   v18 = v219
446 | 
447 | -------- Basic Block 42 --------
448 | 
449 |            ... dominating: block 41 instruction 224
450 | 
451 |  232|      add_int   v210 = v210 + 1
452 |  233|         jump   41 if v210 <= v213
453 | 
454 | -------- Basic Block 43 --------
455 | 
456 |            ... dominating: block 40 instruction 223
457 | 
458 |  234| return_value   v18 -> 1
459 |  235|       return   
460 |    Creating library C:/Repos/jai-modules/Strings_Modules/tests/.build/test9.lib and object C:/Repos/jai-modules/Strings_Modules/tests/.build/test9.exp
461 | Running linker: "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.26.28801\bin\Hostx64\x64\link.exe" /nologo C:/Repos/jai-modules/Strings_Modules/tests/.build/test9_2_0.obj /OUT:test9.exe /MACHINE:AMD64 /INCREMENTAL:NO /DEBUG /IMPLIB:C:/Repos/jai-modules/Strings_Modules/tests/.build/test9.lib /libpath:"C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.26.28801\lib\x64" /libpath:"C:\Program Files (x86)\Windows Kits\10\Lib\10.0.18362.0\um\x64" /libpath:"C:\Program Files (x86)\Windows Kits\10\Lib\10.0.18362.0\ucrt\x64" -nodefaultlib libcmt.lib vcruntime.lib ucrt.lib kernel32.lib comdlg32.lib shlwapi.lib Advapi32.lib DbgHelp.lib Dwmapi.lib opengl32.lib Gdi32.lib msvcrt.lib shell32.lib user32.lib winmm.lib kernel32.lib Ntdll.lib c:/jai/modules/stb_sprintf/win/stb_sprintf.lib
462 | 
463 | Stats for Workspace 2 ("Target Program"):
464 | Lexer lines processed: 135047 (141800 including blank lines, comments.)
465 | Front-end time: 0.068047 seconds.
466 | llvm      time: 0.905029 seconds.
467 | 
468 | Compiler  time: 0.973076 seconds.
469 | Link      time: 0.467812 seconds.
470 | Total     time: 1.440889 seconds.
471 | ------- 
472 | 
473 | Disassembly of 'count' at c:/jai/modules/Strings_Shared/Strings_Shared.jai:1009
474 | - Stack size 188
475 | 
476 | -------- Basic Block 0 -------- defines v5-7 --------
477 | 
478 |            (no dominating)
479 | 
480 |    0|          lea   v6, stack+0
481 |    1|        clear   v6 {size 16}
482 |    2|     string==   v5, v1 == v6
483 |    3|         jump   1 if v5 == 0
484 |    4|     constant   v7 = 0
485 |    5| return_value   v7 -> 1
486 |    6|       return   
487 | 
488 | -------- Basic Block 1 -------- defines v8-11 --------
489 | 
490 |            ... dominating: block 0 instruction 3
491 | 
492 |    7|          mov   v8, [v3] :1b
493 |    8|     constant   v9 = 1
494 |    9|         jump   5 if v8 != v9
495 |   10|         call   is_upper (v2) -> v10
496 |   11|         jump   2 if v10 == 0
497 |   12|          lea   v11, stack+10
498 |   13|       memcpy   v11, v3 {size 16}
499 |   14|         jump   4
500 | 
501 | -------- Basic Block 2 -------- defines v12-14 --------
502 | 
503 |            ... dominating: block 1 instruction 11
504 | 
505 |   15|         call   is_lower (v2) -> v12
506 |   16|         jump   3 if v12 == 0
507 |   17|         call   to_upper (v2) -> v13
508 |   18|         copy   v2 = v13
509 |   19|          lea   v14, stack+10
510 |   20|       memcpy   v14, v3 {size 16}
511 |   21|         jump   4
512 | 
513 | -------- Basic Block 3 -------- defines v15-16 --------
514 | 
515 |            ... dominating: block 2 instruction 16
516 | 
517 |   22|          lea   v15, bss+40
518 |   23|          lea   v16, stack+10
519 |   24|       memcpy   v16, v15 {size 16}
520 | 
521 | -------- Basic Block 4 --------
522 | 
523 |            ... dominating: block 1 instruction 11
524 | 
525 |   25|         jump   6
526 | 
527 | -------- Basic Block 5 -------- defines v17 --------
528 | 
529 |            ... dominating: block 1 instruction 9
530 | 
531 |   26|          lea   v17, stack+10
532 |   27|       memcpy   v17, v3 {size 16}
533 | 
534 | -------- Basic Block 6 -------- defines v18-19 --------
535 | 
536 |            ... dominating: block 1 instruction 9
537 | 
538 |   28|     constant   v18 = 0
539 |   29|          mov   v19, [stack+10] :1b
540 | 
541 | -------- Basic Block 7 -------- defines v20 --------
542 | 
543 |            ... dominating: block 6 instruction 30
544 | 
545 |   30|     constant   v20 = 0
546 |   31|         jump   23 if v19 != v20
547 | 
548 | -------- Basic Block 8 -------- defines v21-26 --------
549 | 
550 |            ... dominating: block 7 instruction 31
551 | 
552 |   32|          lea   v21, data+8048
553 |   33|          mov   v22, [v0] :4b
554 |   34|         imul   v23 = v22 * 8
555 |   35|   add_extend   v25 = v21 + v23
556 |   36|          mov   v26, [v25] :8b
557 | 
558 | -------- Basic Block 9 -------- defines v27 --------
559 | 
560 |            ... dominating: block 8 instruction 37
561 | 
562 |   37|     constant   v27 = 0
563 |   38|         jump   15 if v26 != v27
564 | 
565 | -------- Basic Block 10 -------- defines v28-42 --------
566 | 
567 |            ... dominating: block 9 instruction 38
568 | 
569 |   39|          mov   v29, [v1+8] :8b
570 |   40|  cast_number   v31 (u64), v29 (*u8)
571 |   41|         copy   v28 = v31
572 |   42|          mov   v33, [v1+8] :8b
573 |   43|  cast_number   v35 (u64), v33 (*u8)
574 |   44|          mov   v36, [v1] :8b
575 |   45|  cast_number   v37 (u64), v36 (s64)
576 |   46|        binop   v32, v35 + v37
577 |   47|          lea   v38, stack+20
578 |   48|     constant   v39 = 0x10
579 |   49|       memset   v38, v2 {count v39}
580 |   50|         copy   v40 = v38
581 |   51|      add_int   v41 = v32 + -15
582 |   52|         call   is_debugger_present () -> v42
583 |   53|         jump   11 if v42 == 0
584 |   54|         call   debug_break ()
585 | 
586 | -------- Basic Block 11 -------- defines v43-50 --------
587 | 
588 |            ... dominating: block 10 instruction 53
589 | 
590 |   55|      asm_x86   (contents not shown)
591 |   56|      asm_x86   (contents not shown)
592 |   57|         copy   v46 = v28
593 |   58|      add_int   v49 = v28 + 15
594 |   59|      add_int   v50 = v32 + -1
595 |   60|         call   min (v49, v50) -> v48
596 |   61|         jump   14 if v46 > v48
597 | 
598 | -------- Basic Block 12 -------- defines v51-54 --------
599 | 
600 |            ... dominating: block 11 instruction 61
601 | 
602 |   62|  cast_number   v52 (*u8), v46 (u64)
603 |   63|          mov   v53, [v52] :1b
604 |   64|         jump   13 if v53 != v2
605 |   65|      add_int   v54 = v18 + 1
606 |   66|         copy   v18 = v54
607 | 
608 | -------- Basic Block 13 -------- defines v55 --------
609 | 
610 |            ... dominating: block 12 instruction 62
611 | 
612 |   67|      compare   v55 = (v46 >= v48)
613 |   68|      add_int   v46 = v46 + 1
614 |   69|         jump   12 if v55 == 0
615 | 
616 | -------- Basic Block 14 --------
617 | 
618 |            ... dominating: block 11 instruction 61
619 | 
620 |   70| return_value   v18 -> 1
621 |   71|       return   
622 | 
623 | -------- Basic Block 15 -------- defines v56 --------
624 | 
625 |            ... dominating: block 8 instruction 37
626 | 
627 |   72|     constant   v56 = 3
628 |   73|         jump   22 if v26 != v56
629 | 
630 | -------- Basic Block 16 -------- defines v57-76 --------
631 | 
632 |            ... dominating: block 15 instruction 73
633 | 
634 |   74|          mov   v58, [v1+8] :8b
635 |   75|  cast_number   v60 (u64), v58 (*u8)
636 |   76|         copy   v57 = v60
637 |   77|          mov   v62, [v1+8] :8b
638 |   78|  cast_number   v64 (u64), v62 (*u8)
639 |   79|          mov   v65, [v1] :8b
640 |   80|  cast_number   v66 (u64), v65 (s64)
641 |   81|        binop   v61, v64 + v66
642 |   82|          lea   v67, stack+30
643 |   83|     constant   v68 = 0x20
644 |   84|       memset   v67, v2 {count v68}
645 |   85|         copy   v69 = v67
646 |   86|          lea   v71, stack+50
647 |   87|         copy   v70 = v71
648 |   88|      add_int   v72 = v61 + -31
649 |   89|      asm_x86   (contents not shown)
650 | 
651 | -------- Basic Block 17 -------- defines v77-86 --------
652 | 
653 |            ... dominating: block 16 instruction 90
654 | 
655 |   90|         jump   18 if v57 >= v72
656 |   91|      asm_x86   (contents not shown)
657 |   92|          mov   v84, [v71] :4b
658 |   93|  cast_number   v85 (s64), v84 (u32)
659 |   94|        binop   v83, v18 + v85
660 |   95|         copy   v18 = v83
661 |   96|      add_int   v86 = v57 + 32
662 |   97|         copy   v57 = v86
663 |   98|         jump   17
664 | 
665 | -------- Basic Block 18 -------- defines v87-91 --------
666 | 
667 |            ... dominating: block 17 instruction 90
668 | 
669 |   99|         copy   v87 = v57
670 |  100|      add_int   v90 = v57 + 31
671 |  101|      add_int   v91 = v61 + -1
672 |  102|         call   min (v90, v91) -> v89
673 |  103|         jump   21 if v87 > v89
674 | 
675 | -------- Basic Block 19 -------- defines v92-97 --------
676 | 
677 |            ... dominating: block 18 instruction 103
678 | 
679 |  104|          mov   v93, [stack+18] :8b
680 |  105|  cast_number   v95 (*u8), v87 (u64)
681 |  106|          mov   v96, [v95] :1b
682 |  107|         call   v93 (v96, v2) -> v92
683 |  108|         jump   20 if v92 == 0
684 |  109|      add_int   v97 = v18 + 1
685 |  110|         copy   v18 = v97
686 | 
687 | -------- Basic Block 20 -------- defines v98 --------
688 | 
689 |            ... dominating: block 19 instruction 104
690 | 
691 |  111|      compare   v98 = (v87 >= v89)
692 |  112|      add_int   v87 = v87 + 1
693 |  113|         jump   19 if v98 == 0
694 | 
695 | -------- Basic Block 21 --------
696 | 
697 |            ... dominating: block 18 instruction 103
698 | 
699 |  114| return_value   v18 -> 1
700 |  115|       return   
701 | 
702 | -------- Basic Block 22 --------
703 | 
704 |            ... dominating: block 8 instruction 37
705 | 
706 |  116|         jump   40
707 | 
708 | -------- Basic Block 23 --------
709 | 
710 |            ... dominating: block 6 instruction 30
711 | 
712 |  117|         jump   40 if v19 != v9
713 | 
714 | -------- Basic Block 24 -------- defines v99-104 --------
715 | 
716 |            ... dominating: block 23 instruction 117
717 | 
718 |  118|          lea   v99, data+8048
719 |  119|          mov   v100, [v0] :4b
720 |  120|         imul   v101 = v100 * 8
721 |  121|   add_extend   v103 = v99 + v101
722 |  122|          mov   v104, [v103] :8b
723 | 
724 | -------- Basic Block 25 -------- defines v105 --------
725 | 
726 |            ... dominating: block 24 instruction 123
727 | 
728 |  123|     constant   v105 = 0
729 |  124|         jump   32 if v104 != v105
730 | 
731 | -------- Basic Block 26 -------- defines v106-128 --------
732 | 
733 |            ... dominating: block 25 instruction 124
734 | 
735 |  125|          mov   v107, [v1+8] :8b
736 |  126|  cast_number   v109 (u64), v107 (*u8)
737 |  127|         copy   v106 = v109
738 |  128|          mov   v111, [v1+8] :8b
739 |  129|  cast_number   v113 (u64), v111 (*u8)
740 |  130|          mov   v114, [v1] :8b
741 |  131|  cast_number   v115 (u64), v114 (s64)
742 |  132|        binop   v110, v113 + v115
743 |  133|          lea   v116, stack+54
744 |  134|     constant   v117 = 0x10
745 |  135|       memset   v116, v2 {count v117}
746 |  136|         copy   v118 = v116
747 |  137|          lea   v119, stack+64
748 |  138|         call   to_lower (v2) -> v120
749 |  139|       memset   v119, v120 {count v117}
750 |  140|         copy   v121 = v119
751 |  141|          lea   v123, stack+74
752 |  142|         copy   v122 = v123
753 |  143|      add_int   v124 = v110 + -15
754 |  144|      asm_x86   (contents not shown)
755 | 
756 | -------- Basic Block 27 -------- defines v129-138 --------
757 | 
758 |            ... dominating: block 26 instruction 145
759 | 
760 |  145|         jump   28 if v106 >= v124
761 |  146|      asm_x86   (contents not shown)
762 |  147|          mov   v136, [v123] :2b
763 |  148|  cast_number   v137 (s64), v136 (u16)
764 |  149|        binop   v135, v18 + v137
765 |  150|         copy   v18 = v135
766 |  151|      add_int   v138 = v106 + 16
767 |  152|         copy   v106 = v138
768 |  153|         jump   27
769 | 
770 | -------- Basic Block 28 -------- defines v139-143 --------
771 | 
772 |            ... dominating: block 27 instruction 145
773 | 
774 |  154|         copy   v139 = v106
775 |  155|      add_int   v142 = v106 + 15
776 |  156|      add_int   v143 = v110 + -1
777 |  157|         call   min (v142, v143) -> v141
778 |  158|         jump   31 if v139 > v141
779 | 
780 | -------- Basic Block 29 -------- defines v144-149 --------
781 | 
782 |            ... dominating: block 28 instruction 158
783 | 
784 |  159|          mov   v145, [stack+18] :8b
785 |  160|  cast_number   v147 (*u8), v139 (u64)
786 |  161|          mov   v148, [v147] :1b
787 |  162|         call   v145 (v148, v2) -> v144
788 |  163|         jump   30 if v144 == 0
789 |  164|      add_int   v149 = v18 + 1
790 |  165|         copy   v18 = v149
791 | 
792 | -------- Basic Block 30 -------- defines v150 --------
793 | 
794 |            ... dominating: block 29 instruction 159
795 | 
796 |  166|      compare   v150 = (v139 >= v141)
797 |  167|      add_int   v139 = v139 + 1
798 |  168|         jump   29 if v150 == 0
799 | 
800 | -------- Basic Block 31 --------
801 | 
802 |            ... dominating: block 28 instruction 158
803 | 
804 |  169| return_value   v18 -> 1
805 |  170|       return   
806 | 
807 | -------- Basic Block 32 -------- defines v151 --------
808 | 
809 |            ... dominating: block 24 instruction 123
810 | 
811 |  171|     constant   v151 = 3
812 |  172|         jump   39 if v104 != v151
813 | 
814 | -------- Basic Block 33 -------- defines v152-178 --------
815 | 
816 |            ... dominating: block 32 instruction 172
817 | 
818 |  173|          mov   v153, [v1+8] :8b
819 |  174|  cast_number   v155 (u64), v153 (*u8)
820 |  175|         copy   v152 = v155
821 |  176|          mov   v157, [v1+8] :8b
822 |  177|  cast_number   v159 (u64), v157 (*u8)
823 |  178|          mov   v160, [v1] :8b
824 |  179|  cast_number   v161 (u64), v160 (s64)
825 |  180|        binop   v156, v159 + v161
826 |  181|          lea   v162, stack+76
827 |  182|     constant   v163 = 0x20
828 |  183|       memset   v162, v2 {count v163}
829 |  184|         copy   v164 = v162
830 |  185|          lea   v165, stack+96
831 |  186|         call   to_lower (v2) -> v166
832 |  187|       memset   v165, v166 {count v163}
833 |  188|         copy   v167 = v165
834 |  189|          lea   v169, stack+b8
835 |  190|         copy   v168 = v169
836 |  191|      add_int   v170 = v156 + -31
837 |  192|      asm_x86   (contents not shown)
838 | 
839 | -------- Basic Block 34 -------- defines v179-196 --------
840 | 
841 |            ... dominating: block 33 instruction 193
842 | 
843 |  193|         jump   35 if v152 >= v170
844 |  194|      asm_x86   (contents not shown)
845 |  195|          mov   v194, [v169] :4b
846 |  196|  cast_number   v195 (s64), v194 (u32)
847 |  197|        binop   v193, v18 + v195
848 |  198|         copy   v18 = v193
849 |  199|      add_int   v196 = v152 + 32
850 |  200|         copy   v152 = v196
851 |  201|         jump   34
852 | 
853 | -------- Basic Block 35 -------- defines v197-201 --------
854 | 
855 |            ... dominating: block 34 instruction 193
856 | 
857 |  202|         copy   v197 = v152
858 |  203|      add_int   v200 = v152 + 31
859 |  204|      add_int   v201 = v156 + -1
860 |  205|         call   min (v200, v201) -> v199
861 |  206|         jump   38 if v197 > v199
862 | 
863 | -------- Basic Block 36 -------- defines v202-207 --------
864 | 
865 |            ... dominating: block 35 instruction 206
866 | 
867 |  207|          mov   v203, [stack+18] :8b
868 |  208|  cast_number   v205 (*u8), v197 (u64)
869 |  209|          mov   v206, [v205] :1b
870 |  210|         call   v203 (v206, v2) -> v202
871 |  211|         jump   37 if v202 == 0
872 |  212|      add_int   v207 = v18 + 1
873 |  213|         copy   v18 = v207
874 | 
875 | -------- Basic Block 37 -------- defines v208 --------
876 | 
877 |            ... dominating: block 36 instruction 207
878 | 
879 |  214|      compare   v208 = (v197 >= v199)
880 |  215|      add_int   v197 = v197 + 1
881 |  216|         jump   36 if v208 == 0
882 | 
883 | -------- Basic Block 38 --------
884 | 
885 |            ... dominating: block 35 instruction 206
886 | 
887 |  217| return_value   v18 -> 1
888 |  218|       return   
889 | 
890 | -------- Basic Block 39 --------
891 | 
892 |            ... dominating: block 24 instruction 123
893 | 
894 |  219|         jump   40
895 | 
896 | -------- Basic Block 40 -------- defines v209-213 --------
897 | 
898 |            ... dominating: block 6 instruction 30
899 | 
900 |  220|     constant   v209 = 0
901 |  221|         copy   v210 = v209
902 |  222|          mov   v212, [v1] :8b
903 |  223|      add_int   v213 = v212 + -1
904 |  224|         jump   43 if v210 > v213
905 | 
906 | -------- Basic Block 41 -------- defines v214-219 --------
907 | 
908 |            ... dominating: block 40 instruction 224
909 | 
910 |  225|          mov   v215, [stack+18] :8b
911 |  226|          mov   v216, [v1+8] :8b
912 |  227|   add_extend   v217 = v216 + v210
913 |  228|          mov   v218, [v217] :1b
914 |  229|         call   v215 (v218, v2) -> v214
915 |  230|         jump   42 if v214 == 0
916 |  231|      add_int   v219 = v18 + 1
917 |  232|         copy   v18 = v219
918 | 
919 | -------- Basic Block 42 --------
920 | 
921 |            ... dominating: block 41 instruction 225
922 | 
923 |  233|      add_int   v210 = v210 + 1
924 |  234|         jump   41 if v210 <= v213
925 | 
926 | -------- Basic Block 43 --------
927 | 
928 |            ... dominating: block 40 instruction 224
929 | 
930 |  235| return_value   v18 -> 1
931 |  236|       return   
932 |    Creating library C:/Repos/jai-modules/Strings_Modules/tests/.build/test9.lib and object C:/Repos/jai-modules/Strings_Modules/tests/.build/test9.exp
933 | Running linker: "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.26.28801\bin\Hostx64\x64\link.exe" /nologo C:/Repos/jai-modules/Strings_Modules/tests/.build/test9_2_0.obj /OUT:test9.exe /MACHINE:AMD64 /INCREMENTAL:NO /DEBUG /IMPLIB:C:/Repos/jai-modules/Strings_Modules/tests/.build/test9.lib /libpath:"C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.26.28801\lib\x64" /libpath:"C:\Program Files (x86)\Windows Kits\10\Lib\10.0.18362.0\um\x64" /libpath:"C:\Program Files (x86)\Windows Kits\10\Lib\10.0.18362.0\ucrt\x64" -nodefaultlib libcmt.lib vcruntime.lib ucrt.lib kernel32.lib comdlg32.lib shlwapi.lib Advapi32.lib DbgHelp.lib Dwmapi.lib opengl32.lib Gdi32.lib msvcrt.lib shell32.lib user32.lib winmm.lib kernel32.lib Ntdll.lib c:/jai/modules/stb_sprintf/win/stb_sprintf.lib
934 | 
935 | Stats for Workspace 2 ("Target Program"):
936 | Lexer lines processed: 135050 (141800 including blank lines, comments.)
937 | Front-end time: 0.070711 seconds.
938 | llvm      time: 0.933580 seconds.
939 | 
940 | Compiler  time: 1.004290 seconds.
941 | Link      time: 0.602430 seconds.
942 | Total     time: 1.606720 seconds.
943 | 


--------------------------------------------------------------------------------