├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── afl
    ├── README.md
    ├── afl_harness.c
    ├── afl_testcases
    │   └── simple
    └── run_afl
├── librope.sln
├── librope.vcxproj
├── librope.xcodeproj
    └── project.pbxproj
├── rope.c
├── rope.h
└── test
    ├── benchmark.c
    ├── slowstring.c
    ├── slowstring.h
    ├── tests.c
    └── tests.h


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.o
 2 | *.bc
 3 | librope.a
 4 | tests
 5 | *.swp
 6 | .DS_Store
 7 | Debug
 8 | Release
 9 | librope.suo
10 | librope.sdf
11 | librope.vcxproj.*
12 | Build
13 | *.dSYM
14 | 
15 | afl/afl
16 | afl/afl_findings
17 | 
18 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Licensed under the standard MIT license:
 2 | 
 3 | Copyright 2011 Joseph Gentle.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: all clean
 2 | 
 3 | CFLAGS=-O2 -Wall -I. -std=c99
 4 | 
 5 | UNAME := $(shell uname)
 6 | 
 7 | ifeq ($(UNAME), Darwin)
 8 | CFLAGS := $(CFLAGS) -arch x86_64
 9 | endif
10 | 
11 | all: librope.a
12 | 
13 | clean:
14 | 	rm -f librope.a *.bc *.o tests
15 | 
16 | # You can add -emit-llvm here if you're using clang.
17 | rope.o: rope.c rope.h
18 | 	$(CC) $(CFLAGS) $< -c -o $@
19 | 
20 | librope.a: rope.o
21 | 	ar rcs $@ $+
22 | 
23 | # Only need corefoundation to run the tests on mac
24 | tests: test/tests.c test/benchmark.c test/slowstring.c librope.a
25 | 	$(CC) $(CFLAGS) $+ -o $@
26 | 
27 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | librope
 2 | =======
 3 | 
 4 | This is a little C library for heavyweight utf-8 strings (rope). Unlike regular C strings, ropes can do substring insertion and deletion in O(log n) time.
 5 | 
 6 | librope is implemented using skip lists, which have the same big-O time complexity as trees but don't require rebalancing.
 7 | 
 8 | librope is _fast_. It will happily perform [~15 million edit operations per second](https://home.seph.codes/public/rope_bench/realworld/C-JumpRope/automerge-paper/report/index.html) on a modern CPU. Inserts and deletes in librope outperform straight C strings for any document longer than a few hundred bytes.
 9 | 
10 | ## Support
11 | 
12 | This library works (C code never dies). But I'm moving to rust for my newer projects. This library has been rewritten in rust as [Jumprope](https://crates.io/crates/jumprope). Jumprope is another 2-3x faster than this library on real world editing traces. Its obnoxiously fast.
13 | 
14 | Usage
15 | -----
16 | 
17 | Just add `rope.c` and `rope.h` to your project.
18 | Be sure to add `rope.c` to your compile line as well.
19 | 
20 | ```c
21 | // Import rope library into project
22 | #include "rope.h"
23 | 
24 | // Make a new empty rope
25 | rope *r = rope_new();
26 | 
27 | // Put some content in it (at position 0)
28 | rope_insert(r, 0, "Hi there!");
29 | 
30 | // Delete 6 characters at position 2
31 | rope_del(r, 2, 6);
32 | 
33 | // Get the whole string back out of the rope
34 | uint8_t *str = rope_create_cstr(r);
35 | 
36 | // str now contains "Hi!"! Test it out!:
37 | _rope_print(r);
38 | 
39 | // Done with the rope?
40 | rope_free(r);
41 | ```
42 | 
43 | Wide Character String Compatibility
44 | -----------------------------------
45 | 
46 | String insertion / deletion positions in Javascript, Objective-C (NSString), Java, C# and others are **wrong sometimes**!!!
47 | 
48 | These languages store strings as `wchar` arrays (arrays of two byte characters). Some characters in the unicode character set require more than two bytes. These languages encode such characters using multiple wchars as per UTF-16. This works most of the time. However, insertion and deletion positions in these strings still refer to offsets in the underlying array. So unicode characters which take up 4 bytes in UTF-16 count as two characters for the purpose of deletion ranges, insertion positions and string length.
49 | 
50 | Even though these characters are exceptionally rare, I don't want my editor to go all funky if people start getting creative. About a quarter of librope's code is dedicated to fixing this mismatch. However, bookkeeping isn't free - librope performance drops by 35% when wchar conversion support is enabled.
51 | 
52 | For more information, read my [blog post about it](https://josephg.com/blog/string-length-lies).
53 | 
54 | Long story short, if you need to interoperate with strings from any of these dodgy languages, here's what you do:
55 | 
56 | - Compile with `-DROPE_WCHAR=1`. This macro enables the expensive wchar bookkeeping.
57 | - Use the alternate insert & delete functions `rope_insert_at_wchar(...)` and `rope_del_at_wchar(...)` when your index / size is specified in UTF-16 offsets.
58 | 
59 | Take a look at the header file for documentation.
60 | 
61 | #### Beware:
62 | 
63 | - When using `rope_insert_at_wchar` you still need to convert the string you're inserting into UTF-8 before you pass it into librope.
64 | - The API lets you try to delete or insert halfway through a large character. You probably don't want to do that.
65 | - librope is 100% faithful when it comes to the characters you're inserting. If your string has byte order marks, you might want to remove them before passing the string into librope.
66 | 
67 | 


--------------------------------------------------------------------------------
/afl/README.md:
--------------------------------------------------------------------------------
1 | This is a little harness & set of tools for testing librope with
2 | [american fuzzy lop](http://lcamtuf.coredump.cx/afl/).
3 | 
4 | To get started, have a read through my [blog post on
5 | AFL](https://josephg.com/blog/bug-hunting-with-american-fuzzy-lop/).
6 | 


--------------------------------------------------------------------------------
/afl/afl_harness.c:
--------------------------------------------------------------------------------
 1 | //
 2 | //  afl.c
 3 | //  librope
 4 | //
 5 | //  Created by Joseph Gentle on 11/12/2014.
 6 | //  Copyright (c) 2014 Joseph Gentle. All rights reserved.
 7 | //
 8 | 
 9 | #include <assert.h>
10 | #include <stdio.h>
11 | #include <stdlib.h>
12 | #include <stdbool.h>
13 | #include "rope.h"
14 | 
15 | int main() {
16 |   printf("AFL test harness\n");
17 |   rope *r = rope_new();
18 |   
19 |   //FILE *stream = fopen("/Users/josephg/src/librope/death1", "r");
20 |   FILE *stream = stdin;
21 |   
22 |   char *buffer = NULL;
23 |   size_t buf_cap = 0;
24 |   while (true) {
25 |     // First read the position we're editing the rope
26 |     ssize_t bytes_read = getline(&buffer, &buf_cap, stream);
27 |     if (bytes_read == -1) break;
28 |     
29 |     int pos = atoi(buffer);
30 |     int length = (int)rope_char_count(r);
31 |     pos = pos < 0 ? 0 : pos > length ? length : pos;
32 |     
33 |     // Now read the characters to insert
34 |     bytes_read = getline(&buffer, &buf_cap, stream);
35 |     if (bytes_read == -1) break;
36 | 
37 |     if (bytes_read > 0 && buffer[0] == '-') {
38 |       // Delete some characters
39 |       int to_del = atoi(&buffer[1]);
40 |       rope_del(r, pos, to_del);
41 |     } else {
42 |       // Delete the newline.
43 |       if (bytes_read > 0) buffer[bytes_read - 1] = '\0';
44 |       ROPE_RESULT result = rope_insert(r, pos, (uint8_t *)buffer);
45 |       if (result == ROPE_INVALID_UTF8) {
46 |         fprintf(stderr, "invalid utf8 - insert ignored\n");
47 |       }
48 |     }
49 |   }
50 |   
51 |   _rope_check(r);
52 |   printf("Final length: %zu\n", rope_char_count(r));
53 |   rope_free(r);
54 | }
55 | 
56 | 


--------------------------------------------------------------------------------
/afl/afl_testcases/simple:
--------------------------------------------------------------------------------
1 | 0
2 | omg hi
3 | 3
4 | -3
5 | 
6 | 


--------------------------------------------------------------------------------
/afl/run_afl:
--------------------------------------------------------------------------------
1 | rm -rf afl_findings/*
2 | afl-clang -O2 -Wall -I.. -std=c99 -arch x86_64 ../rope.c afl_harness.c -o afl
3 | afl-fuzz -i afl_testcases -o afl_findings ./afl
4 | 
5 | 


--------------------------------------------------------------------------------
/librope.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 11.00
 3 | # Visual Studio 2010
 4 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "librope", "librope.vcxproj", "{8BC2F5A9-0E22-C440-4A06-6EA14611419A}"
 5 | EndProject
 6 | Global
 7 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 8 | 		Debug|Win32 = Debug|Win32
 9 | 		Release|Win32 = Release|Win32
10 | 	EndGlobalSection
11 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
12 | 		{8BC2F5A9-0E22-C440-4A06-6EA14611419A}.Debug|Win32.ActiveCfg = Debug|Win32
13 | 		{8BC2F5A9-0E22-C440-4A06-6EA14611419A}.Debug|Win32.Build.0 = Debug|Win32
14 | 		{8BC2F5A9-0E22-C440-4A06-6EA14611419A}.Release|Win32.ActiveCfg = Release|Win32
15 | 		{8BC2F5A9-0E22-C440-4A06-6EA14611419A}.Release|Win32.Build.0 = Release|Win32
16 | 	EndGlobalSection
17 | 	GlobalSection(SolutionProperties) = preSolution
18 | 		HideSolutionNode = FALSE
19 | 	EndGlobalSection
20 | EndGlobal
21 | 


--------------------------------------------------------------------------------
/librope.vcxproj:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup Label="ProjectConfigurations">
 4 |     <ProjectConfiguration Include="Debug|Win32">
 5 |       <Configuration>Debug</Configuration>
 6 |       <Platform>Win32</Platform>
 7 |     </ProjectConfiguration>
 8 |     <ProjectConfiguration Include="Release|Win32">
 9 |       <Configuration>Release</Configuration>
10 |       <Platform>Win32</Platform>
11 |     </ProjectConfiguration>
12 |   </ItemGroup>
13 |   <PropertyGroup Label="Globals">
14 |     <Keyword>Win32Proj</Keyword>
15 |   </PropertyGroup>
16 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
17 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
18 |     <ConfigurationType>StaticLibrary</ConfigurationType>
19 |     <UseDebugLibraries>true</UseDebugLibraries>
20 |   </PropertyGroup>
21 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
22 |     <ConfigurationType>StaticLibrary</ConfigurationType>
23 |     <UseDebugLibraries>false</UseDebugLibraries>
24 |   </PropertyGroup>
25 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
26 |   <ImportGroup Label="ExtensionSettings">
27 |   </ImportGroup>
28 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
29 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
30 |   </ImportGroup>
31 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
32 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
33 |   </ImportGroup>
34 |   <PropertyGroup Label="UserMacros" />
35 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
36 |     <LinkIncremental>true</LinkIncremental>
37 |   </PropertyGroup>
38 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
39 |     <LinkIncremental>true</LinkIncremental>
40 |   </PropertyGroup>
41 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
42 |     <ClCompile>
43 |       <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
44 |       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
45 |       <WarningLevel>Level3</WarningLevel>
46 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
47 |       <Optimization>Disabled</Optimization>
48 |       <CompileAs>CompileAsCpp</CompileAs>
49 |     </ClCompile>
50 |     <Link>
51 |       <TargetMachine>MachineX86</TargetMachine>
52 |       <GenerateDebugInformation>true</GenerateDebugInformation>
53 |       <SubSystem>Console</SubSystem>
54 |     </Link>
55 |   </ItemDefinitionGroup>
56 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
57 |     <ClCompile>
58 |       <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
59 |       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
60 |       <WarningLevel>Level3</WarningLevel>
61 |       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
62 |       <CompileAs>CompileAsCpp</CompileAs>
63 |     </ClCompile>
64 |     <Link>
65 |       <TargetMachine>MachineX86</TargetMachine>
66 |       <GenerateDebugInformation>true</GenerateDebugInformation>
67 |       <SubSystem>Console</SubSystem>
68 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
69 |       <OptimizeReferences>true</OptimizeReferences>
70 |     </Link>
71 |   </ItemDefinitionGroup>
72 |   <ItemGroup>
73 |     <ClCompile Include="rope.c" />
74 |   </ItemGroup>
75 |   <ItemGroup>
76 |     <ClInclude Include="rope.h" />
77 |   </ItemGroup>
78 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
79 |   <ImportGroup Label="ExtensionTargets">
80 |   </ImportGroup>
81 | </Project>


--------------------------------------------------------------------------------
/librope.xcodeproj/project.pbxproj:
--------------------------------------------------------------------------------
  1 | // !$*UTF8*$!
  2 | {
  3 | 	archiveVersion = 1;
  4 | 	classes = {
  5 | 	};
  6 | 	objectVersion = 46;
  7 | 	objects = {
  8 | 
  9 | /* Begin PBXBuildFile section */
 10 | 		FD5D7C3315E1DC4A00F847DF /* rope.c in Sources */ = {isa = PBXBuildFile; fileRef = FD5D7C3215E1DC4A00F847DF /* rope.c */; };
 11 | 		FD90DBF315EF60900045B2C2 /* librope.a in Frameworks */ = {isa = PBXBuildFile; fileRef = FD5D7C2515E1DBAD00F847DF /* librope.a */; };
 12 | 		FD967CC215ECBF8600B36CA1 /* benchmark.c in Sources */ = {isa = PBXBuildFile; fileRef = FD967CBD15ECBF8600B36CA1 /* benchmark.c */; };
 13 | 		FD967CC315ECBF8600B36CA1 /* slowstring.c in Sources */ = {isa = PBXBuildFile; fileRef = FD967CBE15ECBF8600B36CA1 /* slowstring.c */; };
 14 | 		FD967CC415ECBF8600B36CA1 /* tests.c in Sources */ = {isa = PBXBuildFile; fileRef = FD967CC015ECBF8600B36CA1 /* tests.c */; };
 15 | 		FD967CC515ECBF9400B36CA1 /* rope.h in Headers */ = {isa = PBXBuildFile; fileRef = FD5D7C3515E1DC5300F847DF /* rope.h */; settings = {ATTRIBUTES = (Public, ); }; };
 16 | /* End PBXBuildFile section */
 17 | 
 18 | /* Begin PBXCopyFilesBuildPhase section */
 19 | 		FD5D7C3A15E1DCA100F847DF /* CopyFiles */ = {
 20 | 			isa = PBXCopyFilesBuildPhase;
 21 | 			buildActionMask = 2147483647;
 22 | 			dstPath = /usr/share/man/man1/;
 23 | 			dstSubfolderSpec = 0;
 24 | 			files = (
 25 | 			);
 26 | 			runOnlyForDeploymentPostprocessing = 1;
 27 | 		};
 28 | /* End PBXCopyFilesBuildPhase section */
 29 | 
 30 | /* Begin PBXFileReference section */
 31 | 		FD5D7C2515E1DBAD00F847DF /* librope.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = librope.a; sourceTree = BUILT_PRODUCTS_DIR; };
 32 | 		FD5D7C3215E1DC4A00F847DF /* rope.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; lineEnding = 0; path = rope.c; sourceTree = "<group>"; xcLanguageSpecificationIdentifier = xcode.lang.c; };
 33 | 		FD5D7C3515E1DC5300F847DF /* rope.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; lineEnding = 0; path = rope.h; sourceTree = "<group>"; xcLanguageSpecificationIdentifier = xcode.lang.objcpp; };
 34 | 		FD5D7C3C15E1DCA100F847DF /* tests */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = tests; sourceTree = BUILT_PRODUCTS_DIR; };
 35 | 		FD967CBD15ECBF8600B36CA1 /* benchmark.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = benchmark.c; sourceTree = "<group>"; };
 36 | 		FD967CBE15ECBF8600B36CA1 /* slowstring.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = slowstring.c; sourceTree = "<group>"; };
 37 | 		FD967CBF15ECBF8600B36CA1 /* slowstring.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = slowstring.h; sourceTree = "<group>"; };
 38 | 		FD967CC015ECBF8600B36CA1 /* tests.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; lineEnding = 0; path = tests.c; sourceTree = "<group>"; xcLanguageSpecificationIdentifier = xcode.lang.c; };
 39 | 		FD967CC115ECBF8600B36CA1 /* tests.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = tests.h; sourceTree = "<group>"; };
 40 | /* End PBXFileReference section */
 41 | 
 42 | /* Begin PBXFrameworksBuildPhase section */
 43 | 		FD5D7C2215E1DBAD00F847DF /* Frameworks */ = {
 44 | 			isa = PBXFrameworksBuildPhase;
 45 | 			buildActionMask = 2147483647;
 46 | 			files = (
 47 | 			);
 48 | 			runOnlyForDeploymentPostprocessing = 0;
 49 | 		};
 50 | 		FD5D7C3915E1DCA100F847DF /* Frameworks */ = {
 51 | 			isa = PBXFrameworksBuildPhase;
 52 | 			buildActionMask = 2147483647;
 53 | 			files = (
 54 | 				FD90DBF315EF60900045B2C2 /* librope.a in Frameworks */,
 55 | 			);
 56 | 			runOnlyForDeploymentPostprocessing = 0;
 57 | 		};
 58 | /* End PBXFrameworksBuildPhase section */
 59 | 
 60 | /* Begin PBXGroup section */
 61 | 		FD5D7C1A15E1DBAD00F847DF = {
 62 | 			isa = PBXGroup;
 63 | 			children = (
 64 | 				FD5D7C3215E1DC4A00F847DF /* rope.c */,
 65 | 				FD5D7C3515E1DC5300F847DF /* rope.h */,
 66 | 				FD967CBC15ECBF8600B36CA1 /* test */,
 67 | 				FD5D7C2615E1DBAD00F847DF /* Products */,
 68 | 			);
 69 | 			sourceTree = "<group>";
 70 | 		};
 71 | 		FD5D7C2615E1DBAD00F847DF /* Products */ = {
 72 | 			isa = PBXGroup;
 73 | 			children = (
 74 | 				FD5D7C2515E1DBAD00F847DF /* librope.a */,
 75 | 				FD5D7C3C15E1DCA100F847DF /* tests */,
 76 | 			);
 77 | 			name = Products;
 78 | 			sourceTree = "<group>";
 79 | 		};
 80 | 		FD967CBC15ECBF8600B36CA1 /* test */ = {
 81 | 			isa = PBXGroup;
 82 | 			children = (
 83 | 				FD967CC015ECBF8600B36CA1 /* tests.c */,
 84 | 				FD967CC115ECBF8600B36CA1 /* tests.h */,
 85 | 				FD967CBD15ECBF8600B36CA1 /* benchmark.c */,
 86 | 				FD967CBE15ECBF8600B36CA1 /* slowstring.c */,
 87 | 				FD967CBF15ECBF8600B36CA1 /* slowstring.h */,
 88 | 			);
 89 | 			path = test;
 90 | 			sourceTree = "<group>";
 91 | 		};
 92 | /* End PBXGroup section */
 93 | 
 94 | /* Begin PBXHeadersBuildPhase section */
 95 | 		FD5D7C2315E1DBAD00F847DF /* Headers */ = {
 96 | 			isa = PBXHeadersBuildPhase;
 97 | 			buildActionMask = 2147483647;
 98 | 			files = (
 99 | 				FD967CC515ECBF9400B36CA1 /* rope.h in Headers */,
100 | 			);
101 | 			runOnlyForDeploymentPostprocessing = 0;
102 | 		};
103 | /* End PBXHeadersBuildPhase section */
104 | 
105 | /* Begin PBXNativeTarget section */
106 | 		FD5D7C2415E1DBAD00F847DF /* rope */ = {
107 | 			isa = PBXNativeTarget;
108 | 			buildConfigurationList = FD5D7C2915E1DBAD00F847DF /* Build configuration list for PBXNativeTarget "rope" */;
109 | 			buildPhases = (
110 | 				FD5D7C2115E1DBAD00F847DF /* Sources */,
111 | 				FD5D7C2215E1DBAD00F847DF /* Frameworks */,
112 | 				FD5D7C2315E1DBAD00F847DF /* Headers */,
113 | 			);
114 | 			buildRules = (
115 | 			);
116 | 			dependencies = (
117 | 			);
118 | 			name = rope;
119 | 			productName = librope;
120 | 			productReference = FD5D7C2515E1DBAD00F847DF /* librope.a */;
121 | 			productType = "com.apple.product-type.library.static";
122 | 		};
123 | 		FD5D7C3B15E1DCA100F847DF /* tests */ = {
124 | 			isa = PBXNativeTarget;
125 | 			buildConfigurationList = FD5D7C4315E1DCA100F847DF /* Build configuration list for PBXNativeTarget "tests" */;
126 | 			buildPhases = (
127 | 				FD5D7C3815E1DCA100F847DF /* Sources */,
128 | 				FD5D7C3915E1DCA100F847DF /* Frameworks */,
129 | 				FD5D7C3A15E1DCA100F847DF /* CopyFiles */,
130 | 			);
131 | 			buildRules = (
132 | 			);
133 | 			dependencies = (
134 | 			);
135 | 			name = tests;
136 | 			productName = tests;
137 | 			productReference = FD5D7C3C15E1DCA100F847DF /* tests */;
138 | 			productType = "com.apple.product-type.tool";
139 | 		};
140 | /* End PBXNativeTarget section */
141 | 
142 | /* Begin PBXProject section */
143 | 		FD5D7C1C15E1DBAD00F847DF /* Project object */ = {
144 | 			isa = PBXProject;
145 | 			attributes = {
146 | 				LastUpgradeCheck = 0510;
147 | 				ORGANIZATIONNAME = "Joseph Gentle";
148 | 			};
149 | 			buildConfigurationList = FD5D7C1F15E1DBAD00F847DF /* Build configuration list for PBXProject "librope" */;
150 | 			compatibilityVersion = "Xcode 3.2";
151 | 			developmentRegion = English;
152 | 			hasScannedForEncodings = 0;
153 | 			knownRegions = (
154 | 				en,
155 | 			);
156 | 			mainGroup = FD5D7C1A15E1DBAD00F847DF;
157 | 			productRefGroup = FD5D7C2615E1DBAD00F847DF /* Products */;
158 | 			projectDirPath = "";
159 | 			projectRoot = "";
160 | 			targets = (
161 | 				FD5D7C2415E1DBAD00F847DF /* rope */,
162 | 				FD5D7C3B15E1DCA100F847DF /* tests */,
163 | 			);
164 | 		};
165 | /* End PBXProject section */
166 | 
167 | /* Begin PBXSourcesBuildPhase section */
168 | 		FD5D7C2115E1DBAD00F847DF /* Sources */ = {
169 | 			isa = PBXSourcesBuildPhase;
170 | 			buildActionMask = 2147483647;
171 | 			files = (
172 | 				FD5D7C3315E1DC4A00F847DF /* rope.c in Sources */,
173 | 			);
174 | 			runOnlyForDeploymentPostprocessing = 0;
175 | 		};
176 | 		FD5D7C3815E1DCA100F847DF /* Sources */ = {
177 | 			isa = PBXSourcesBuildPhase;
178 | 			buildActionMask = 2147483647;
179 | 			files = (
180 | 				FD967CC215ECBF8600B36CA1 /* benchmark.c in Sources */,
181 | 				FD967CC315ECBF8600B36CA1 /* slowstring.c in Sources */,
182 | 				FD967CC415ECBF8600B36CA1 /* tests.c in Sources */,
183 | 			);
184 | 			runOnlyForDeploymentPostprocessing = 0;
185 | 		};
186 | /* End PBXSourcesBuildPhase section */
187 | 
188 | /* Begin XCBuildConfiguration section */
189 | 		FD5D7C2715E1DBAD00F847DF /* Debug */ = {
190 | 			isa = XCBuildConfiguration;
191 | 			buildSettings = {
192 | 				ALWAYS_SEARCH_USER_PATHS = NO;
193 | 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
194 | 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
195 | 				COPY_PHASE_STRIP = NO;
196 | 				GCC_C_LANGUAGE_STANDARD = gnu11;
197 | 				GCC_DYNAMIC_NO_PIC = NO;
198 | 				GCC_ENABLE_OBJC_EXCEPTIONS = YES;
199 | 				GCC_OPTIMIZATION_LEVEL = 0;
200 | 				GCC_PREPROCESSOR_DEFINITIONS = (
201 | 					"DEBUG=1",
202 | 					"$(inherited)",
203 | 				);
204 | 				GCC_SYMBOLS_PRIVATE_EXTERN = NO;
205 | 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
206 | 				GCC_WARN_ABOUT_RETURN_TYPE = YES;
207 | 				GCC_WARN_UNINITIALIZED_AUTOS = YES;
208 | 				GCC_WARN_UNUSED_VARIABLE = YES;
209 | 				MACOSX_DEPLOYMENT_TARGET = "";
210 | 				ONLY_ACTIVE_ARCH = YES;
211 | 				SDKROOT = macosx;
212 | 			};
213 | 			name = Debug;
214 | 		};
215 | 		FD5D7C2815E1DBAD00F847DF /* Release */ = {
216 | 			isa = XCBuildConfiguration;
217 | 			buildSettings = {
218 | 				ALWAYS_SEARCH_USER_PATHS = NO;
219 | 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
220 | 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
221 | 				COPY_PHASE_STRIP = YES;
222 | 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
223 | 				GCC_C_LANGUAGE_STANDARD = gnu11;
224 | 				GCC_ENABLE_OBJC_EXCEPTIONS = YES;
225 | 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
226 | 				GCC_WARN_ABOUT_RETURN_TYPE = YES;
227 | 				GCC_WARN_UNINITIALIZED_AUTOS = YES;
228 | 				GCC_WARN_UNUSED_VARIABLE = YES;
229 | 				LLVM_LTO = YES;
230 | 				MACOSX_DEPLOYMENT_TARGET = "";
231 | 				SDKROOT = macosx;
232 | 			};
233 | 			name = Release;
234 | 		};
235 | 		FD5D7C2A15E1DBAD00F847DF /* Debug */ = {
236 | 			isa = XCBuildConfiguration;
237 | 			buildSettings = {
238 | 				COMBINE_HIDPI_IMAGES = YES;
239 | 				EXECUTABLE_PREFIX = lib;
240 | 				PRODUCT_NAME = "$(TARGET_NAME)";
241 | 			};
242 | 			name = Debug;
243 | 		};
244 | 		FD5D7C2B15E1DBAD00F847DF /* Release */ = {
245 | 			isa = XCBuildConfiguration;
246 | 			buildSettings = {
247 | 				COMBINE_HIDPI_IMAGES = YES;
248 | 				EXECUTABLE_PREFIX = lib;
249 | 				PRODUCT_NAME = "$(TARGET_NAME)";
250 | 			};
251 | 			name = Release;
252 | 		};
253 | 		FD5D7C4415E1DCA100F847DF /* Debug */ = {
254 | 			isa = XCBuildConfiguration;
255 | 			buildSettings = {
256 | 				PRODUCT_NAME = "$(TARGET_NAME)";
257 | 			};
258 | 			name = Debug;
259 | 		};
260 | 		FD5D7C4515E1DCA100F847DF /* Release */ = {
261 | 			isa = XCBuildConfiguration;
262 | 			buildSettings = {
263 | 				CLANG_USE_OPTIMIZATION_PROFILE = YES;
264 | 				PRODUCT_NAME = "$(TARGET_NAME)";
265 | 			};
266 | 			name = Release;
267 | 		};
268 | /* End XCBuildConfiguration section */
269 | 
270 | /* Begin XCConfigurationList section */
271 | 		FD5D7C1F15E1DBAD00F847DF /* Build configuration list for PBXProject "librope" */ = {
272 | 			isa = XCConfigurationList;
273 | 			buildConfigurations = (
274 | 				FD5D7C2715E1DBAD00F847DF /* Debug */,
275 | 				FD5D7C2815E1DBAD00F847DF /* Release */,
276 | 			);
277 | 			defaultConfigurationIsVisible = 0;
278 | 			defaultConfigurationName = Release;
279 | 		};
280 | 		FD5D7C2915E1DBAD00F847DF /* Build configuration list for PBXNativeTarget "rope" */ = {
281 | 			isa = XCConfigurationList;
282 | 			buildConfigurations = (
283 | 				FD5D7C2A15E1DBAD00F847DF /* Debug */,
284 | 				FD5D7C2B15E1DBAD00F847DF /* Release */,
285 | 			);
286 | 			defaultConfigurationIsVisible = 0;
287 | 			defaultConfigurationName = Release;
288 | 		};
289 | 		FD5D7C4315E1DCA100F847DF /* Build configuration list for PBXNativeTarget "tests" */ = {
290 | 			isa = XCConfigurationList;
291 | 			buildConfigurations = (
292 | 				FD5D7C4415E1DCA100F847DF /* Debug */,
293 | 				FD5D7C4515E1DCA100F847DF /* Release */,
294 | 			);
295 | 			defaultConfigurationIsVisible = 0;
296 | 			defaultConfigurationName = Release;
297 | 		};
298 | /* End XCConfigurationList section */
299 | 	};
300 | 	rootObject = FD5D7C1C15E1DBAD00F847DF /* Project object */;
301 | }
302 | 


--------------------------------------------------------------------------------
/rope.c:
--------------------------------------------------------------------------------
  1 | // Implementation for rope library.
  2 | 
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include <sys/types.h>
  6 | 
  7 | // Needed for VC++, which always compiles in C++ mode and doesn't have stdbool.
  8 | #ifndef __cplusplus
  9 | #include <stdbool.h>
 10 | #endif
 11 | 
 12 | #include <assert.h>
 13 | #include "rope.h"
 14 | 
 15 | // The number of bytes the rope head structure takes up
 16 | static const size_t ROPE_SIZE = sizeof(rope) + sizeof(rope_node) * ROPE_MAX_HEIGHT;
 17 | 
 18 | // Create a new rope with no contents
 19 | rope *rope_new2(void *(*alloc)(size_t bytes),
 20 |                 void *(*realloc)(void *ptr, size_t newsize),
 21 |                 void (*free)(void *ptr)) {
 22 |   rope *r = (rope *)alloc(ROPE_SIZE);
 23 |   r->num_chars = r->num_bytes = 0;
 24 | 
 25 |   r->alloc = alloc;
 26 |   r->realloc = realloc;
 27 |   r->free = free;
 28 | 
 29 |   r->head.height = 1;
 30 |   r->head.num_bytes = 0;
 31 |   r->head.nexts[0].node = NULL;
 32 |   r->head.nexts[0].skip_size = 0;
 33 | #if ROPE_WCHAR
 34 |   r->head.nexts[0].wchar_size = 0;
 35 | #endif
 36 |   return r;
 37 | }
 38 | 
 39 | rope *rope_new() {
 40 |   return rope_new2(malloc, realloc, free);
 41 | }
 42 | 
 43 | // Create a new rope containing the specified string
 44 | rope *rope_new_with_utf8(const uint8_t *str) {
 45 |   rope *r = rope_new();
 46 |   ROPE_RESULT result = rope_insert(r, 0, str);
 47 | 
 48 |   if (result != ROPE_OK) {
 49 |     rope_free(r);
 50 |     return NULL;
 51 |   } else {
 52 |     return r;
 53 |   }
 54 | }
 55 | 
 56 | rope *rope_copy(const rope *other) {
 57 |   rope *r = (rope *)other->alloc(ROPE_SIZE);
 58 | 
 59 |   // Just copy most of the head's data. Note this won't copy the nexts list in head.
 60 |   *r = *other;
 61 | 
 62 |   rope_node *nodes[ROPE_MAX_HEIGHT];
 63 | 
 64 |   for (int i = 0; i < other->head.height; i++) {
 65 |     nodes[i] = &r->head;
 66 |     // non-NULL next pointers will be rewritten below.
 67 |     r->head.nexts[i] = other->head.nexts[i];
 68 |   }
 69 | 
 70 |   for (rope_node *n = other->head.nexts[0].node; n != NULL; n = n->nexts[0].node) {
 71 |     // I wonder if it would be faster if we took this opportunity to rebalance the node list..?
 72 |     size_t h = n->height;
 73 |     rope_node *n2 = (rope_node *)r->alloc(sizeof(rope_node) + h * sizeof(rope_skip_node));
 74 | 
 75 |     // Would it be faster to just *n2 = *n; ?
 76 |     n2->num_bytes = n->num_bytes;
 77 |     n2->height = h;
 78 |     memcpy(n2->str, n->str, n->num_bytes);
 79 |     memcpy(n2->nexts, n->nexts, h * sizeof(rope_skip_node));
 80 | 
 81 |     for (int i = 0; i < h; i++) {
 82 |       nodes[i]->nexts[i].node = n2;
 83 |       nodes[i] = n2;
 84 |     }
 85 |   }
 86 | 
 87 |   return r;
 88 | }
 89 | 
 90 | // Free the specified rope
 91 | void rope_free(rope *r) {
 92 |   assert(r);
 93 |   rope_node *next;
 94 | 
 95 |   for (rope_node *n = r->head.nexts[0].node; n != NULL; n = next) {
 96 |     next = n->nexts[0].node;
 97 |     r->free(n);
 98 |   }
 99 | 
100 |   r->free(r);
101 | }
102 | 
103 | // Get the number of characters in a rope
104 | size_t rope_char_count(const rope *r) {
105 |   assert(r);
106 |   return r->num_chars;
107 | }
108 | 
109 | // Get the number of bytes which the rope would take up if stored as a utf8
110 | // string
111 | size_t rope_byte_count(const rope *r) {
112 |   assert(r);
113 |   return r->num_bytes;
114 | }
115 | 
116 | // Copies the rope's contents into a utf8 encoded C string. Also copies a trailing '\0' character.
117 | // Returns the number of bytes written, which is rope_byte_count(r) + 1.
118 | size_t rope_write_cstr(rope *r, uint8_t *dest) {
119 |   size_t num_bytes = rope_byte_count(r);
120 |   dest[num_bytes] = '\0';
121 | 
122 |   if (num_bytes) {
123 |     uint8_t *p = dest;
124 |     for (rope_node* restrict n = &r->head; n != NULL; n = n->nexts[0].node) {
125 |       memcpy(p, n->str, n->num_bytes);
126 |       p += n->num_bytes;
127 |     }
128 | 
129 |     assert(p == &dest[num_bytes]);
130 |   }
131 |   return num_bytes + 1;
132 | }
133 | 
134 | // Create a new C string which contains the rope. The string will contain
135 | // the rope encoded as utf8.
136 | uint8_t *rope_create_cstr(rope *r) {
137 |   uint8_t *bytes = (uint8_t *)r->alloc(rope_byte_count(r) + 1); // Room for a zero.
138 |   rope_write_cstr(r, bytes);
139 |   return bytes;
140 | }
141 | 
142 | #if ROPE_WCHAR
143 | size_t rope_wchar_count(rope *r) {
144 |   assert(r);
145 |   return r->head.nexts[r->head.height - 1].wchar_size;
146 | }
147 | #endif
148 | 
149 | #define MIN(x,y) ((x) > (y) ? (y) : (x))
150 | #define MAX(x,y) ((x) > (y) ? (x) : (y))
151 | 
152 | #ifdef _WIN32
153 | inline static long random() {
154 |   return rand();
155 | }
156 | #endif
157 | 
158 | static uint8_t random_height() {
159 |   // This function is horribly inefficient. I'm throwing away heaps of entropy, and
160 |   // the mod could be replaced by some clever shifting.
161 |   //
162 |   // However, random_height barely appears in the profiler output - so its probably
163 |   // not worth investing the time to optimise.
164 | 
165 |   uint8_t height = 1;
166 | 
167 |   // The root node's height is the height of the largest node + 1, so the largest
168 |   // node can only have ROPE_MAX_HEIGHT - 1.
169 |   while(height < (ROPE_MAX_HEIGHT - 1) && (random() % 100) < ROPE_BIAS) {
170 |     height++;
171 |   }
172 | 
173 |   return height;
174 | }
175 | 
176 | // Figure out how many bytes to allocate for a node with the specified height.
177 | static size_t node_size(uint8_t height) {
178 |   return sizeof(rope_node) + height * sizeof(rope_skip_node);
179 | }
180 | 
181 | // Allocate and return a new node. The new node will be full of junk, except
182 | // for its height.
183 | // This function should be replaced at some point with an object pool based version.
184 | static rope_node *alloc_node(rope *r, uint8_t height) {
185 |   rope_node *node = (rope_node *)r->alloc(node_size(height));
186 |   node->height = height;
187 |   return node;
188 | }
189 | 
190 | // Find out how many bytes the unicode character which starts with the specified byte
191 | // will occupy in memory.
192 | // Returns the number of bytes, or SIZE_MAX if the byte is invalid.
193 | static inline size_t codepoint_size(uint8_t byte) {
194 |   if (byte == 0) { return SIZE_MAX; } // NULL byte.
195 |   else if (byte <= 0x7f) { return 1; } // 0x74 = 0111 1111
196 |   else if (byte <= 0xbf) { return SIZE_MAX; } // 1011 1111. Invalid for a starting byte.
197 |   else if (byte <= 0xdf) { return 2; } // 1101 1111
198 |   else if (byte <= 0xef) { return 3; } // 1110 1111
199 |   else if (byte <= 0xf7) { return 4; } // 1111 0111
200 |   else if (byte <= 0xfb) { return 5; } // 1111 1011
201 |   else if (byte <= 0xfd) { return 6; } // 1111 1101
202 |   else { return SIZE_MAX; }
203 | }
204 | 
205 | // This little function counts how many bytes a certain number of characters take up.
206 | static size_t count_bytes_in_utf8(const uint8_t *str, size_t num_chars) {
207 |   const uint8_t *p = str;
208 |   for (unsigned int i = 0; i < num_chars; i++) {
209 |     p += codepoint_size(*p);
210 |   }
211 |   return p - str;
212 | }
213 | 
214 | #if ROPE_WCHAR
215 | 
216 | #define NEEDS_TWO_WCHARS(x) (((x) & 0xf0) == 0xf0)
217 | 
218 | static size_t count_wchars_in_utf8(const uint8_t *str, size_t num_chars) {
219 |   size_t wchars = 0;
220 |   for (unsigned int i = 0; i < num_chars; i++) {
221 |     wchars += 1 + NEEDS_TWO_WCHARS(*str);
222 |     str += codepoint_size(*str);
223 |   }
224 |   return wchars;
225 | }
226 | 
227 | static size_t count_utf8_in_wchars(const uint8_t *str, size_t num_wchars) {
228 |   size_t chars = num_wchars;
229 |   for (unsigned int i = 0; i < num_wchars; i++) {
230 |     if (NEEDS_TWO_WCHARS(*str)) {
231 |       chars--;
232 |       i++;
233 |     }
234 |     str += codepoint_size(*str);
235 |   }
236 |   return chars;
237 | }
238 | #endif
239 | 
240 | // Count the number of characters in a string.
241 | static size_t strlen_utf8(const uint8_t *str) {
242 |   const uint8_t *p = str;
243 |   size_t i = 0;
244 |   while (*p) {
245 |     p += codepoint_size(*p);
246 |     i++;
247 |   }
248 |   return i;
249 | }
250 | 
251 | // Checks if a UTF8 string is ok. Returns the number of bytes in the string if
252 | // it is ok, otherwise returns -1.
253 | static ssize_t bytelen_and_check_utf8(const uint8_t *str) {
254 |   const uint8_t *p = str;
255 |   while (*p != '\0') {
256 |     size_t size = codepoint_size(*p);
257 |     if (size == SIZE_MAX) return -1;
258 |     p++; size--;
259 |     while (size > 0) {
260 |       // Check that any middle bytes are of the form 0x10xx xxxx
261 |       if ((*p & 0xc0) != 0x80)
262 |         return -1;
263 |       p++; size--;
264 |     }
265 |   }
266 | 
267 | #ifdef DEBUG
268 |   size_t num = p - str;
269 |   assert(num == strlen((char *)str));
270 | #endif
271 | 
272 |   return p - str;
273 | }
274 | 
275 | typedef struct {
276 |   // This stores the previous node at each height, and the number of characters from the start of
277 |   // the previous node to the current iterator position.
278 |   rope_skip_node s[ROPE_MAX_HEIGHT];
279 | } rope_iter;
280 | 
281 | // Internal function for navigating to a particular character offset in the rope.
282 | // The function returns the list of nodes which point past the position, as well as
283 | // offsets of how far into their character lists the specified characters are.
284 | static rope_node *iter_at_char_pos(rope *r, size_t char_pos, rope_iter *iter) {
285 |   assert(char_pos <= r->num_chars);
286 | 
287 |   rope_node *e = &r->head;
288 |   int height = r->head.height - 1;
289 | 
290 |   // Offset stores how many characters we still need to skip in the current node.
291 |   size_t offset = char_pos;
292 |   size_t skip;
293 | #if ROPE_WCHAR
294 |   size_t wchar_pos = 0; // Current wchar pos from the start of the rope.
295 | #endif
296 | 
297 |   while (true) {
298 |     skip = e->nexts[height].skip_size;
299 |     if (offset > skip) {
300 |       // Go right.
301 |       assert(e == &r->head || e->num_bytes);
302 | 
303 |       offset -= skip;
304 | #if ROPE_WCHAR
305 |       wchar_pos += e->nexts[height].wchar_size;
306 | #endif
307 |       e = e->nexts[height].node;
308 |     } else {
309 |       // Go down.
310 |       iter->s[height].skip_size = offset;
311 |       iter->s[height].node = e;
312 | #if ROPE_WCHAR
313 |       iter->s[height].wchar_size = wchar_pos;
314 | #endif
315 | 
316 |       if (height == 0) {
317 |         break;
318 |       } else {
319 |         height--;
320 |       }
321 |     }
322 |   }
323 | 
324 | #if ROPE_WCHAR
325 |   // For some reason, this is _REALLY SLOW_. Like, 5.5Mops/s -> 4Mops/s from this block of code.
326 |   wchar_pos += count_wchars_in_utf8(e->str, offset);
327 | 
328 |   // The iterator has the wchar pos from the start of the whole string.
329 |   for (int i = 0; i < r->head.height; i++) {
330 |     iter->s[i].wchar_size = wchar_pos - iter->s[i].wchar_size;
331 |   }
332 | #endif
333 | 
334 |   assert(offset <= ROPE_NODE_STR_SIZE);
335 |   assert(iter->s[0].node == e);
336 |   return e;
337 | }
338 | 
339 | #if ROPE_WCHAR
340 | // Equivalent of iter_at_char_pos, but for wchar positions instead.
341 | static rope_node *iter_at_wchar_pos(rope *r, size_t wchar_pos, rope_iter *iter) {
342 |   int height = r->head.height - 1;
343 |   assert(wchar_pos <= r->head.nexts[height].wchar_size);
344 | 
345 |   rope_node *e = &r->head;
346 | 
347 |   // Offset stores how many wchar characters we still need to skip in the current node.
348 |   size_t offset = wchar_pos;
349 |   size_t skip;
350 |   size_t char_pos = 0; // Current char pos from the start of the rope.
351 | 
352 |   while (true) {
353 |     skip = e->nexts[height].wchar_size;
354 |     if (offset > skip) {
355 |       // Go right.
356 |       offset -= skip;
357 |       char_pos += e->nexts[height].skip_size;
358 |       e = e->nexts[height].node;
359 |     } else {
360 |       // Go down.
361 |       iter->s[height].skip_size = char_pos;
362 |       iter->s[height].node = e;
363 |       iter->s[height].wchar_size = offset;
364 | 
365 |       if (height == 0) {
366 |         break;
367 |       } else {
368 |         height--;
369 |       }
370 |     }
371 |   }
372 | 
373 |   char_pos += count_utf8_in_wchars(e->str, offset);
374 | 
375 |   // The iterator has character positions from the start of the rope to the start of the node.
376 |   for (int i = 0; i < r->head.height; i++) {
377 |     iter->s[i].skip_size = char_pos - iter->s[i].skip_size;
378 |   }
379 |   assert(e == iter->s[0].node);
380 |   return e;
381 | }
382 | #endif
383 | 
384 | #if ROPE_WCHAR
385 | static void update_offset_list(rope *r, rope_iter *iter, size_t num_chars, size_t num_wchars) {
386 |   for (int i = 0; i < r->head.height; i++) {
387 |     iter->s[i].node->nexts[i].skip_size += num_chars;
388 |     iter->s[i].node->nexts[i].wchar_size += num_wchars;
389 |   }
390 | }
391 | #else
392 | static void update_offset_list(rope *r, rope_iter *iter, size_t num_chars) {
393 |   for (int i = 0; i < r->head.height; i++) {
394 |     iter->s[i].node->nexts[i].skip_size += num_chars;
395 |   }
396 | }
397 | #endif
398 | 
399 | 
400 | // Internal method of rope_insert.
401 | // This function creates a new node in the rope at the specified position and fills it with the
402 | // passed string.
403 | static void insert_at(rope *r, rope_iter *iter,
404 |     const uint8_t *str, size_t num_bytes, size_t num_chars) {
405 | #if ROPE_WCHAR
406 |   size_t num_wchars = count_wchars_in_utf8(str, num_chars);
407 | #endif
408 | 
409 |   // This describes how many levels of the iter are filled in.
410 |   uint8_t max_height = r->head.height;
411 |   uint8_t new_height = random_height();
412 |   rope_node *new_node = alloc_node(r, new_height);
413 |   new_node->num_bytes = num_bytes;
414 |   memcpy(new_node->str, str, num_bytes);
415 | 
416 |   assert(new_height < ROPE_MAX_HEIGHT);
417 | 
418 |   // Max height (the rope's head's height) must be 1+ the height of the largest node.
419 |   while (max_height <= new_height) {
420 |     r->head.height++;
421 |     r->head.nexts[max_height] = r->head.nexts[max_height - 1];
422 | 
423 |     // This is the position (offset from the start) of the rope.
424 |     iter->s[max_height] = iter->s[max_height - 1];
425 |     max_height++;
426 |   }
427 | 
428 |   // Fill in the new node's nexts array.
429 |   int i;
430 |   for (i = 0; i < new_height; i++) {
431 |     rope_skip_node *prev_skip = &iter->s[i].node->nexts[i];
432 |     new_node->nexts[i].node = prev_skip->node;
433 |     new_node->nexts[i].skip_size = num_chars + prev_skip->skip_size - iter->s[i].skip_size;
434 | 
435 | 
436 |     prev_skip->node = new_node;
437 |     prev_skip->skip_size = iter->s[i].skip_size;
438 | 
439 |     // & move the iterator to the end of the newly inserted node.
440 |     iter->s[i].node = new_node;
441 |     iter->s[i].skip_size = num_chars;
442 | #if ROPE_WCHAR
443 |     new_node->nexts[i].wchar_size = num_wchars + prev_skip->wchar_size - iter->s[i].wchar_size;
444 |     prev_skip->wchar_size = iter->s[i].wchar_size;
445 |     iter->s[i].wchar_size = num_wchars;
446 | #endif
447 |   }
448 | 
449 |   for (; i < max_height; i++) {
450 |     iter->s[i].node->nexts[i].skip_size += num_chars;
451 |     iter->s[i].skip_size += num_chars;
452 | #if ROPE_WCHAR
453 |     iter->s[i].node->nexts[i].wchar_size += num_wchars;
454 |     iter->s[i].wchar_size += num_wchars;
455 | #endif
456 |   }
457 | 
458 |   r->num_chars += num_chars;
459 |   r->num_bytes += num_bytes;
460 | }
461 | 
462 | // Insert the given utf8 string into the rope at the specified position.
463 | static ROPE_RESULT rope_insert_at_iter(rope *r, rope_node *e, rope_iter *iter, const uint8_t *str) {
464 |   // iter.offset contains how far (in characters) into the current element to skip.
465 |   // Figure out how much that is in bytes.
466 |   size_t offset_bytes = 0;
467 |   // The insertion offset into the destination node.
468 |   size_t offset = iter->s[0].skip_size;
469 |   if (offset) {
470 |     assert(offset <= e->nexts[0].skip_size);
471 |     offset_bytes = count_bytes_in_utf8(e->str, offset);
472 |   }
473 | 
474 |   // We might be able to insert the new data into the current node, depending on
475 |   // how big it is. We'll count the bytes, and also check that its valid utf8.
476 |   ssize_t num_inserted_bytes = bytelen_and_check_utf8(str);
477 |   if (num_inserted_bytes == -1) return ROPE_INVALID_UTF8;
478 | 
479 |   // Can we insert into the current node?
480 |   bool insert_here = e->num_bytes + num_inserted_bytes <= ROPE_NODE_STR_SIZE;
481 | 
482 |   // Can we insert into the subsequent node?
483 |   rope_node *next = NULL;
484 |   if (!insert_here && offset_bytes == e->num_bytes) {
485 |     next = e->nexts[0].node;
486 |     // We can insert into the subsequent node if:
487 |     // - We can't insert into the current node
488 |     // - There _is_ a next node to insert into
489 |     // - The insert would be at the start of the next node
490 |     // - There's room in the next node
491 |     if (next && next->num_bytes + num_inserted_bytes <= ROPE_NODE_STR_SIZE) {
492 |       offset = offset_bytes = 0;
493 |       for (int i = 0; i < next->height; i++) {
494 |         iter->s[i].node = next;
495 |         // tree offset nodes will not be used.
496 |       }
497 |       e = next;
498 | 
499 |       insert_here = true;
500 |     }
501 |   }
502 | 
503 |   if (insert_here) {
504 |     // First move the current bytes later on in the string.
505 |     if (offset_bytes < e->num_bytes) {
506 |       memmove(&e->str[offset_bytes + num_inserted_bytes],
507 |               &e->str[offset_bytes],
508 |               e->num_bytes - offset_bytes);
509 |     }
510 | 
511 |     // Then copy in the string bytes
512 |     memcpy(&e->str[offset_bytes], str, num_inserted_bytes);
513 |     e->num_bytes += num_inserted_bytes;
514 | 
515 |     r->num_bytes += num_inserted_bytes;
516 |     size_t num_inserted_chars = strlen_utf8(str);
517 |     r->num_chars += num_inserted_chars;
518 | 
519 |     // .... aaaand update all the offset amounts.
520 | #if ROPE_WCHAR
521 |     size_t num_inserted_wchars = count_wchars_in_utf8(str, num_inserted_chars);
522 |     update_offset_list(r, iter, num_inserted_chars, num_inserted_wchars);
523 | #else
524 |     update_offset_list(r, iter, num_inserted_chars);
525 | #endif
526 | 
527 |   } else {
528 |     // There isn't room. We'll need to add at least one new node to the rope.
529 | 
530 |     // If we're not at the end of the current node, we'll need to remove
531 |     // the end of the current node's data and reinsert it later.
532 |     size_t num_end_chars, num_end_bytes = e->num_bytes - offset_bytes;
533 |     if (num_end_bytes) {
534 |       // We'll pretend like the character have been deleted from the node, while leaving
535 |       // the bytes themselves there (for later).
536 |       e->num_bytes = offset_bytes;
537 |       num_end_chars = e->nexts[0].skip_size - offset;
538 | #if ROPE_WCHAR
539 |       size_t num_end_wchars = count_wchars_in_utf8(&e->str[offset_bytes], num_end_chars);
540 |       update_offset_list(r, iter, -num_end_chars, -num_end_wchars);
541 | #else
542 |       update_offset_list(r, iter, -num_end_chars);
543 | #endif
544 | 
545 |       r->num_chars -= num_end_chars;
546 |       r->num_bytes -= num_end_bytes;
547 |     }
548 | 
549 |     // Now we insert new nodes containing the new character data. The data must be broken into
550 |     // pieces of with a maximum size of ROPE_NODE_STR_SIZE. Node boundaries must not occur in the
551 |     // middle of a utf8 codepoint.
552 |     size_t str_offset = 0;
553 |     while (str_offset < num_inserted_bytes) {
554 |       size_t new_node_bytes = 0;
555 |       size_t new_node_chars = 0;
556 | 
557 |       while (str_offset + new_node_bytes < num_inserted_bytes) {
558 |         size_t cs = codepoint_size(str[str_offset + new_node_bytes]);
559 |         if (cs + new_node_bytes > ROPE_NODE_STR_SIZE) {
560 |           break;
561 |         } else {
562 |           new_node_bytes += cs;
563 |           new_node_chars++;
564 |         }
565 |       }
566 | 
567 |       insert_at(r, iter, &str[str_offset], new_node_bytes, new_node_chars);
568 |       str_offset += new_node_bytes;
569 |     }
570 | 
571 |     if (num_end_bytes) {
572 |       insert_at(r, iter, &e->str[offset_bytes], num_end_bytes, num_end_chars);
573 |     }
574 |   }
575 | 
576 |   return ROPE_OK;
577 | }
578 | 
579 | ROPE_RESULT rope_insert(rope *r, size_t pos, const uint8_t *str) {
580 |   assert(r);
581 |   assert(str);
582 | #ifdef DEBUG
583 |   _rope_check(r);
584 | #endif
585 |   pos = MIN(pos, r->num_chars);
586 | 
587 |   rope_iter iter;
588 |   // First we need to search for the node where we'll insert the string.
589 |   rope_node *e = iter_at_char_pos(r, pos, &iter);
590 | 
591 |   ROPE_RESULT result = rope_insert_at_iter(r, e, &iter, str);
592 | 
593 | #ifdef DEBUG
594 |   _rope_check(r);
595 | #endif
596 | 
597 |   return result;
598 | }
599 | 
600 | #if ROPE_WCHAR
601 | // Insert the given utf8 string into the rope at the specified position.
602 | size_t rope_insert_at_wchar(rope *r, size_t wchar_pos, const uint8_t *str) {
603 |   assert(r);
604 |   assert(str);
605 | #ifdef DEBUG
606 |   _rope_check(r);
607 | #endif
608 |   wchar_pos = MIN(wchar_pos, rope_wchar_count(r));
609 | 
610 |   rope_iter iter;
611 |   // First we need to search for the node where we'll insert the string.
612 |   rope_node *e = iter_at_wchar_pos(r, wchar_pos, &iter);
613 |   size_t pos = iter.s[r->head.height - 1].skip_size;
614 |   rope_insert_at_iter(r, e, &iter, str);
615 | 
616 | #ifdef DEBUG
617 |   _rope_check(r);
618 | #endif
619 |   return pos;
620 | }
621 | 
622 | #endif
623 | 
624 | // Delete num characters at position pos. Deleting past the end of the string
625 | // has no effect.
626 | static void rope_del_at_iter(rope *r, rope_node *e, rope_iter *iter, size_t length) {
627 |   r->num_chars -= length;
628 |   size_t offset = iter->s[0].skip_size;
629 |   while (length) {
630 |     if (offset == e->nexts[0].skip_size) {
631 |       // End of the current node. Skip to the start of the next one.
632 |       e = iter->s[0].node->nexts[0].node;
633 |       offset = 0;
634 |     }
635 | 
636 |     size_t num_chars = e->nexts[0].skip_size;
637 |     size_t removed = MIN(length, num_chars - offset);
638 | #if ROPE_WCHAR
639 |     size_t removed_wchars;
640 | #endif
641 | 
642 |     int i;
643 |     if (removed < num_chars || e == &r->head) {
644 |       // Just trim this node down to size.
645 |       size_t leading_bytes = count_bytes_in_utf8(e->str, offset);
646 |       size_t removed_bytes = count_bytes_in_utf8(&e->str[leading_bytes], removed);
647 |       size_t trailing_bytes = e->num_bytes - leading_bytes - removed_bytes;
648 | #if ROPE_WCHAR
649 |       removed_wchars = count_wchars_in_utf8(&e->str[leading_bytes], removed);
650 | #endif
651 |       if (trailing_bytes) {
652 |         memmove(&e->str[leading_bytes], &e->str[leading_bytes + removed_bytes], trailing_bytes);
653 |       }
654 |       e->num_bytes -= removed_bytes;
655 |       r->num_bytes -= removed_bytes;
656 | 
657 |       for (i = 0; i < e->height; i++) {
658 |         e->nexts[i].skip_size -= removed;
659 | #if ROPE_WCHAR
660 |         e->nexts[i].wchar_size -= removed_wchars;
661 | #endif
662 |       }
663 |     } else {
664 |       // Remove the node from the list
665 | #if ROPE_WCHAR
666 |       removed_wchars = e->nexts[0].wchar_size;
667 | #endif
668 |       for (i = 0; i < e->height; i++) {
669 |         iter->s[i].node->nexts[i].node = e->nexts[i].node;
670 |         iter->s[i].node->nexts[i].skip_size += e->nexts[i].skip_size - removed;
671 | #if ROPE_WCHAR
672 |         iter->s[i].node->nexts[i].wchar_size += e->nexts[i].wchar_size - removed_wchars;
673 | #endif
674 |       }
675 | 
676 |       r->num_bytes -= e->num_bytes;
677 |       // TODO: Recycle e.
678 |       rope_node *next = e->nexts[0].node;
679 |       r->free(e);
680 |       e = next;
681 |     }
682 | 
683 |     for (; i < r->head.height; i++) {
684 |       iter->s[i].node->nexts[i].skip_size -= removed;
685 | #if ROPE_WCHAR
686 |       iter->s[i].node->nexts[i].wchar_size -= removed_wchars;
687 | #endif
688 |     }
689 | 
690 |     length -= removed;
691 |   }
692 | }
693 | 
694 | void rope_del(rope *r, size_t pos, size_t length) {
695 | #ifdef DEBUG
696 |   _rope_check(r);
697 | #endif
698 | 
699 |   assert(r);
700 |   pos = MIN(pos, r->num_chars);
701 |   length = MIN(length, r->num_chars - pos);
702 | 
703 |   rope_iter iter;
704 | 
705 |   // Search for the node where we'll insert the string.
706 |   rope_node *e = iter_at_char_pos(r, pos, &iter);
707 | 
708 |   rope_del_at_iter(r, e, &iter, length);
709 | 
710 | #ifdef DEBUG
711 |   _rope_check(r);
712 | #endif
713 | }
714 | 
715 | #if ROPE_WCHAR
716 | size_t rope_del_at_wchar(rope *r, size_t wchar_pos, size_t wchar_num, size_t *char_len_out) {
717 | #ifdef DEBUG
718 |   _rope_check(r);
719 | #endif
720 | 
721 |   assert(r);
722 |   size_t wchar_total = rope_wchar_count(r);
723 |   wchar_pos = MIN(wchar_pos, wchar_total);
724 |   wchar_num = MIN(wchar_num, wchar_total - wchar_pos);
725 | 
726 |   rope_iter iter;
727 | 
728 |   // Search for the node where we'll insert the string.
729 |   rope_node *start = iter_at_wchar_pos(r, wchar_pos, &iter);
730 |   size_t char_pos = iter.s[r->head.height - 1].skip_size;
731 | 
732 |   rope_iter end_iter;
733 |   int h = r->head.height - 1;
734 |   iter_at_wchar_pos(r, iter.s[h].wchar_size + wchar_num, &end_iter);
735 | 
736 |   size_t char_length = end_iter.s[h].skip_size - iter.s[h].skip_size;
737 |   rope_del_at_iter(r, start, &iter, char_length);
738 | 
739 | #ifdef DEBUG
740 |   _rope_check(r);
741 | #endif
742 |   if (char_len_out) {
743 |     *char_len_out = char_length;
744 |   }
745 |   return char_pos;
746 | }
747 | #endif
748 | 
749 | void _rope_check(rope *r) {
750 |   assert(r->head.height); // Even empty ropes have a height of 1.
751 |   assert(r->num_bytes >= r->num_chars);
752 | 
753 |   rope_skip_node skip_over = r->head.nexts[r->head.height - 1];
754 |   assert(skip_over.skip_size == r->num_chars);
755 |   assert(skip_over.node == NULL);
756 | 
757 |   size_t num_bytes = 0;
758 |   size_t num_chars = 0;
759 | #if ROPE_WCHAR
760 |   size_t num_wchar = 0;
761 | #endif
762 | 
763 |   // The offsets here are used to store the total distance travelled from the start
764 |   // of the rope.
765 |   rope_iter iter = {};
766 |   for (int i = 0; i < r->head.height; i++) {
767 |     iter.s[i].node = &r->head;
768 |   }
769 | 
770 |   for (rope_node *n = &r->head; n != NULL; n = n->nexts[0].node) {
771 |     assert(n == &r->head || n->num_bytes);
772 |     assert(n->height <= ROPE_MAX_HEIGHT);
773 |     assert(count_bytes_in_utf8(n->str, n->nexts[0].skip_size) == n->num_bytes);
774 | #if ROPE_WCHAR
775 |     assert(count_wchars_in_utf8(n->str, n->nexts[0].skip_size) == n->nexts[0].wchar_size);
776 | #endif
777 |     for (int i = 0; i < n->height; i++) {
778 |       assert(iter.s[i].node == n);
779 |       assert(iter.s[i].skip_size == num_chars);
780 |       iter.s[i].node = n->nexts[i].node;
781 |       iter.s[i].skip_size += n->nexts[i].skip_size;
782 | #if ROPE_WCHAR
783 |       assert(iter.s[i].wchar_size == num_wchar);
784 |       iter.s[i].wchar_size += n->nexts[i].wchar_size;
785 | #endif
786 |     }
787 | 
788 |     num_bytes += n->num_bytes;
789 |     num_chars += n->nexts[0].skip_size;
790 | #if ROPE_WCHAR
791 |     num_wchar += n->nexts[0].wchar_size;
792 | #endif
793 |   }
794 | 
795 |   for (int i = 0; i < r->head.height; i++) {
796 |     assert(iter.s[i].node == NULL);
797 |     assert(iter.s[i].skip_size == num_chars);
798 | #if ROPE_WCHAR
799 |     assert(iter.s[i].wchar_size == num_wchar);
800 | #endif
801 |   }
802 | 
803 |   assert(r->num_bytes == num_bytes);
804 |   assert(r->num_chars == num_chars);
805 | #if ROPE_WCHAR
806 |   assert(skip_over.wchar_size == num_wchar);
807 | #endif
808 | }
809 | 
810 | // For debugging.
811 | #include <stdio.h>
812 | void _rope_print(rope *r) {
813 |   printf("chars: %zd\tbytes: %zd\theight: %d\n", r->num_chars, r->num_bytes, r->head.height);
814 | 
815 |   printf("HEAD");
816 |   for (int i = 0; i < r->head.height; i++) {
817 |     printf(" |%3zd ", r->head.nexts[i].skip_size);
818 |   }
819 |   printf("\n");
820 | 
821 |   int num = 0;
822 |   for (rope_node *n = &r->head; n != NULL; n = n->nexts[0].node) {
823 |     printf("%3d:", num++);
824 |     for (int i = 0; i < n->height; i++) {
825 |       printf(" |%3zd ", n->nexts[i].skip_size);
826 |     }
827 |     printf("        : \"");
828 |     fwrite(n->str, n->num_bytes, 1, stdout);
829 |     printf("\"\n");
830 |   }
831 | }
832 | 


--------------------------------------------------------------------------------
/rope.h:
--------------------------------------------------------------------------------
  1 | /* UTF-8 Rope implementation by Joseph Gentle
  2 |  *
  3 |  * This library implements a heavyweight utf8 string type with fast
  4 |  * insert-at-position and delete-at-position operations.
  5 |  * 
  6 |  * It uses skip lists instead of trees. Trees might be faster - who knows?
  7 |  *
  8 |  * Ropes are not syncronized. Do not access the same rope from multiple threads
  9 |  * simultaneously.
 10 |  */
 11 | 
 12 | #ifndef librope_rope_h
 13 | #define librope_rope_h
 14 | 
 15 | #include <stdint.h>
 16 | #include <stddef.h>
 17 | 
 18 | // Whether or not the rope should support converting UTF-8 character offsets to
 19 | // wchar array positions. This is useful when interoperating with strings in
 20 | // JS, Objective-C and many other languages. See
 21 | // http://josephg.com/post/31707645955/string-length-lies
 22 | //
 23 | // Adding wchar conversion support decreases performance by about 30%.
 24 | #ifndef ROPE_WCHAR
 25 | #define ROPE_WCHAR 0
 26 | #endif
 27 | 
 28 | // These two magic values seem to be approximately optimal given the benchmark
 29 | // in tests.c which does lots of small inserts.
 30 | 
 31 | // Must be <= UINT16_MAX. Benchmarking says this is pretty close to optimal
 32 | // (tested on a mac using clang 4.0 and x86_64).
 33 | #ifndef ROPE_NODE_STR_SIZE
 34 | #if ROPE_WCHAR
 35 | #define ROPE_NODE_STR_SIZE 64
 36 | #else
 37 | #define ROPE_NODE_STR_SIZE 136
 38 | #endif
 39 | #endif
 40 | 
 41 | // The likelyhood (%) a node will have height (n+1) instead of n
 42 | #ifndef ROPE_BIAS
 43 | #define ROPE_BIAS 25
 44 | #endif
 45 | 
 46 | // The rope will stop being efficient after the string is 2 ^ ROPE_MAX_HEIGHT
 47 | // nodes.
 48 | #ifndef ROPE_MAX_HEIGHT
 49 | #define ROPE_MAX_HEIGHT 60
 50 | #endif
 51 | 
 52 | struct rope_node_t;
 53 | 
 54 | // The number of characters in str can be read out of nexts[0].skip_size.
 55 | typedef struct {
 56 |   // The number of _characters_ between the start of the current node
 57 |   // and the start of next.
 58 |   size_t skip_size;
 59 | 
 60 |   // For some reason, librope runs about 1% faster when this next pointer is
 61 |   // exactly _here_ in the struct.
 62 |   struct rope_node_t *node;
 63 | 
 64 | #if ROPE_WCHAR
 65 |   // The number of wide characters contained in space.
 66 |   size_t wchar_size;
 67 | #endif
 68 | } rope_skip_node;
 69 | 
 70 | typedef struct rope_node_t {
 71 |   uint8_t str[ROPE_NODE_STR_SIZE];
 72 | 
 73 |   // The number of bytes in str in use
 74 |   uint16_t num_bytes;
 75 |   
 76 |   // This is the number of elements allocated in nexts.
 77 |   // Each height is 1/2 as likely as the height before. The minimum height is 1.
 78 |   uint8_t height;
 79 |   
 80 |   rope_skip_node nexts[];
 81 | } rope_node;
 82 | 
 83 | typedef struct {
 84 |   // The total number of characters in the rope.
 85 |   size_t num_chars;
 86 |   
 87 |   // The total number of bytes which the characters in the rope take up.
 88 |   size_t num_bytes;
 89 |   
 90 |   void *(*alloc)(size_t bytes);
 91 |   void *(*realloc)(void *ptr, size_t newsize);
 92 |   void (*free)(void *ptr);
 93 | 
 94 |   // The first node exists inline in the rope structure itself.
 95 |   rope_node head;
 96 | } rope;
 97 | 
 98 | #ifdef __cplusplus
 99 | extern "C" {
100 | #endif
101 |   
102 | // Create a new rope with no contents
103 | rope *rope_new();
104 | 
105 | // Create a new rope using custom allocators.
106 | rope *rope_new2(void *(*alloc)(size_t bytes),
107 |     void *(*realloc)(void *ptr, size_t newsize),
108 |     void (*free)(void *ptr));
109 | 
110 | // Create a new rope containing a copy of the given string. Shorthand for
111 | // r = rope_new(); rope_insert(r, 0, str);
112 | rope *rope_new_with_utf8(const uint8_t *str);
113 | 
114 | // Make a copy of an existing rope
115 | rope *rope_copy(const rope *r);
116 | 
117 | // Free the specified rope
118 | void rope_free(rope *r);
119 | 
120 | // Get the number of characters in a rope
121 | size_t rope_char_count(const rope *r);
122 | 
123 | // Get the number of bytes which the rope would take up if stored as a utf8
124 | // string
125 | size_t rope_byte_count(const rope *r);
126 | 
127 | // Copies the rope's contents into a utf8 encoded C string. Also copies a
128 | // trailing '\0' character.
129 | // Returns the number of bytes written, which is rope_byte_count(r) + 1.
130 | size_t rope_write_cstr(rope *r, uint8_t *dest);
131 | 
132 | // Create a new C string which contains the rope. The string will contain
133 | // the rope encoded as utf8, followed by a trailing '\0'.
134 | // Use rope_byte_count(r) to get the length of the returned string.
135 | uint8_t *rope_create_cstr(rope *r);
136 | 
137 | // If you try to insert data into the rope with an invalid UTF8 encoding,
138 | // nothing will happen and we'll return ROPE_INVALID_UTF8.
139 | typedef enum { ROPE_OK, ROPE_INVALID_UTF8 } ROPE_RESULT;
140 |   
141 | // Insert the given utf8 string into the rope at the specified position.
142 | ROPE_RESULT rope_insert(rope *r, size_t pos, const uint8_t *str);
143 | 
144 | // Delete num characters at position pos. Deleting past the end of the string
145 | // has no effect.
146 | void rope_del(rope *r, size_t pos, size_t num);
147 |   
148 | // This macro expands to a for() loop header which loops over the segments in a
149 | // rope.
150 | //
151 | // Eg:
152 | //  rope *r = rope_new_with_utf8(str);
153 | //  ROPE_FOREACH(r, iter) {
154 | //    printf("%s", rope_node_data(iter));
155 | //  }
156 | #define ROPE_FOREACH(rope, iter) \
157 |   for (rope_node *iter = &(rope)->head; iter != NULL; iter = iter->nexts[0].node)
158 | 
159 | // Get the actual data inside a rope node.
160 | static inline uint8_t *rope_node_data(rope_node *n) {
161 |   return n->str;
162 | }
163 | 
164 | // Get the number of bytes inside a rope node. This is useful when you're
165 | // looping through a rope.
166 | static inline size_t rope_node_num_bytes(rope_node *n) {
167 |   return n->num_bytes;
168 | }
169 | 
170 | // Get the number of characters inside a rope node.
171 | static inline size_t rope_node_chars(rope_node *n) {
172 |   return n->nexts[0].skip_size;
173 | }
174 |   
175 | #if ROPE_WCHAR
176 | // Get the number of wchar characters in the rope
177 | size_t rope_wchar_count(rope *r);
178 | 
179 | // Insert the given utf8 string into the rope at the specified wchar position.
180 | // This is compatible with NSString, Javascript, etc. The string still needs to
181 | // be passed in using UTF-8.
182 | //
183 | // Returns the insertion position in characters.
184 | size_t rope_insert_at_wchar(rope *r, size_t wchar_pos, const uint8_t *utf8_str);
185 |   
186 | // Delete wchar_num wide characters at the specified wchar position offset.
187 | // If the range is inside character boundaries, behaviour is undefined.
188 | //
189 | // Returns the deletion position in characters. *char_len_out is set to the
190 | // deletion length, in chars if its not null.
191 | size_t rope_del_at_wchar(rope *r, size_t wchar_pos, size_t wchar_num, size_t *char_len_out);
192 |   
193 | // Get the number of wchars inside a rope node. This is useful when you're
194 | // looping throuhg a rope.
195 | static inline size_t rope_node_wchars(rope_node *n) {
196 |   return n->nexts[0].wchar_size;
197 | }
198 | #endif
199 | 
200 | 
201 |   
202 | // For debugging.
203 | void _rope_check(rope *r);
204 | void _rope_print(rope *r);
205 | 
206 | #ifdef __cplusplus
207 | }
208 | #endif
209 | 
210 | #endif
211 | 


--------------------------------------------------------------------------------
/test/benchmark.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <sys/time.h>
  3 | #include <stdlib.h>
  4 | #include <stdio.h>
  5 | #include <string.h>
  6 | 
  7 | 
  8 | #include "rope.h"
  9 | #include "tests.h"
 10 | 
 11 | #include "slowstring.h"
 12 | 
 13 | #ifdef __cplusplus
 14 | #include <ext/rope>
 15 | #endif
 16 | 
 17 | // Wrapper for rope
 18 | static void *_rope_create() {
 19 |   return (void *)rope_new();
 20 | }
 21 | 
 22 | static void _rope_insert(void *r, size_t pos, const uint8_t *str) {
 23 |   rope_insert((rope *)r, pos, str);
 24 | }
 25 | static void _rope_del(void *r, size_t pos, size_t len) {
 26 |   rope_del((rope *)r, pos, len);
 27 | }
 28 | static void _rope_destroy(void *r) {
 29 |   rope_free((rope *)r);
 30 | }
 31 | 
 32 | static size_t _rope_num_chars(void *r) {
 33 |   return rope_char_count((rope *)r);
 34 | }
 35 | 
 36 | // Wrapper for a vector-based string
 37 | 
 38 | static void *_str_create() {
 39 |   return (void *)str_create();
 40 | }
 41 | 
 42 | static void _str_insert(void *r, size_t pos, const uint8_t *str) {
 43 |   str_insert((_string *)r, pos, str);
 44 | }
 45 | 
 46 | static void _str_del(void *r, size_t pos, size_t len) {
 47 |   str_del((_string *)r, pos, len);
 48 | }
 49 | 
 50 | static void _str_destroy(void *r) {
 51 |   str_destroy((_string *)r);
 52 | }
 53 | 
 54 | static size_t _str_num_chars(void *r) {
 55 |   return str_num_chars((_string *)r);
 56 | }
 57 | 
 58 | // SGI C++ rope. To enable these benchmarks, compile this file using a C++ compiler. There's a
 59 | // bug with some versions of clang and the rope library - you might have to switch to gcc.
 60 | #ifdef __cplusplus
 61 | static void *_sgi_create() {
 62 |   return new __gnu_cxx::crope();
 63 | }
 64 | 
 65 | static void _sgi_insert(void *r, size_t pos, const uint8_t *str) {
 66 |   __gnu_cxx::crope *rope = (__gnu_cxx::crope *)r;
 67 |   rope->insert(pos, (const char *)str);
 68 | }
 69 | static void _sgi_del(void *r, size_t pos, size_t len) {
 70 |   __gnu_cxx::crope *rope = (__gnu_cxx::crope *)r;
 71 |   rope->erase(pos, len);
 72 | }
 73 | static void _sgi_destroy(void *r) {
 74 |   __gnu_cxx::crope *rope = (__gnu_cxx::crope *)r;
 75 |   delete rope;
 76 | }
 77 | 
 78 | static size_t _sgi_num_chars(void *r) {
 79 |   __gnu_cxx::crope *rope = (__gnu_cxx::crope *)r;
 80 |   return rope->size();
 81 | }
 82 | #endif
 83 | 
 84 | 
 85 | struct rope_implementation {
 86 |   const char *name;
 87 |   void* (*create)();
 88 |   void (*insert)(void *r, size_t pos, const uint8_t *str);
 89 |   void (*del)(void *r, size_t pos, size_t len);
 90 |   void (*destroy)(void *r);
 91 |   size_t (*num_chars)(void *r);
 92 | } types[] = {
 93 |   { "librope", &_rope_create, &_rope_insert, &_rope_del, &_rope_destroy, &_rope_num_chars },
 94 | #ifdef __cplusplus
 95 |   { "sgirope", &_sgi_create, &_sgi_insert, &_sgi_del, &_sgi_destroy, &_sgi_num_chars },
 96 | #endif
 97 |   { "c string", &_str_create, &_str_insert, &_str_del, &_str_destroy, &_str_num_chars },
 98 | };
 99 | 
100 | void benchmark() {
101 |   printf("Benchmarking... (node size = %d, wchar support = %d)\n",
102 |          ROPE_NODE_STR_SIZE, ROPE_WCHAR);
103 |   
104 |   long iterations = 20000000;
105 | //  long iterations = 1000000;
106 |   struct timeval start, end;
107 | 
108 |   // Make the test stable
109 |   srandom(1234);
110 |   
111 |   uint8_t *strings[100];
112 |   for (int i = 0; i < 100; i++) {
113 |     size_t len = 1 + random() % 2;//i * i + 1;
114 |     strings[i] = (uint8_t *)calloc(1, len + 1);
115 |     random_ascii_string(strings[i], len + 1);
116 | //    random_unicode_string(strings[i], len + 1);
117 |   }
118 |   
119 |   // We should pick the same random sequence each benchmark run.
120 |   unsigned long *rvals = (unsigned long *)malloc(sizeof(unsigned long) * iterations);
121 |   for (int i = 0; i < iterations; i++) {
122 |     rvals[i] = random();
123 |   }
124 | 
125 | //  for (int t = 0; t < sizeof(types) / sizeof(types[0]); t++) {
126 |   for (int t = 0; t < 1; t++) {
127 |     for (int i = 0; i < 5; i++) {
128 |       printf("benchmarking %s\n", types[t].name);
129 |       void *r = types[t].create();
130 | 
131 |       gettimeofday(&start, NULL);
132 |       
133 |       for (long i = 0; i < iterations; i++) {
134 |         if (types[t].num_chars(r) == 0 || i % 20 > 0) {
135 |           // insert. (Inserts are way more common in practice than deletes.)
136 |           uint8_t *str = strings[i % 100];
137 |           types[t].insert(r, rvals[i] % (types[t].num_chars(r) + 1), str);
138 |         } else {
139 |           size_t pos = rvals[i] % types[t].num_chars(r);
140 |           size_t length = MIN(types[t].num_chars(r) - pos, 1 + (~rvals[i]) % 53);
141 |           types[t].del(r, pos, length);
142 |         }
143 |         
144 |         //printf("%s\n", rope_createcstr(r, NULL));
145 |       }
146 |       
147 |       gettimeofday(&end, NULL);
148 | 
149 |       double elapsedTime = end.tv_sec - start.tv_sec;
150 |       elapsedTime += (end.tv_usec - start.tv_usec) / 1e6;
151 |       printf("did %ld iterations in %f ms: %f Miter/sec\n",
152 |              iterations, elapsedTime * 1000, iterations / elapsedTime / 1000000);
153 |       printf("final string length: %zi\n", types[t].num_chars(r));
154 |       
155 |       types[t].destroy(r);
156 |     }
157 |   }
158 |   
159 |   for (int i = 0; i < 100; i++) {
160 |     free(strings[i]);
161 |   }
162 | }
163 | 
164 | 


--------------------------------------------------------------------------------
/test/slowstring.c:
--------------------------------------------------------------------------------
  1 | //
  2 | //  slowstring.c
  3 | //  librope
  4 | //
  5 | //  Created by Joseph Gentle on 28/08/12.
  6 | //  Copyright (c) 2012 Joseph Gentle. All rights reserved.
  7 | //
  8 | 
  9 | #include <stdlib.h>
 10 | #include <string.h>
 11 | 
 12 | #include "slowstring.h"
 13 | 
 14 | // Private rope methods, stolen for utf8 support in the string.
 15 | static size_t codepoint_size(uint8_t byte) {
 16 |   if (byte <= 0x7f) { return 1; }
 17 |   else if (byte <= 0xdf) { return 2; }
 18 |   else if (byte <= 0xef) { return 3; }
 19 |   else if (byte <= 0xf7) { return 4; }
 20 |   else if (byte <= 0xfb) { return 5; }
 21 |   else if (byte <= 0xfd) { return 6; }
 22 |   else {
 23 |     // The codepoint is invalid... what do?
 24 |     //assert(0);
 25 |     return 1;
 26 |   }
 27 | }
 28 | 
 29 | // This little function counts how many bytes the some characters take up.
 30 | static size_t count_bytes_in_chars(const uint8_t *str, size_t num_chars) {
 31 |   const uint8_t *p = str;
 32 |   for (int i = 0; i < num_chars; i++) {
 33 |     p += codepoint_size(*p);
 34 |   }
 35 |   return p - str;
 36 | }
 37 | 
 38 | static size_t strlen_utf8(const uint8_t *str) {
 39 |   const uint8_t *p = str;
 40 |   size_t i = 0;
 41 |   while (*p) {
 42 |     p += codepoint_size(*p);
 43 |     i++;
 44 |   }
 45 |   return i;
 46 | }
 47 | 
 48 | _string *str_create() {
 49 |   _string *s = (_string *)malloc(sizeof(_string));
 50 |   s->capacity = 64; // A reasonable capacity considering...
 51 |   s->mem = (uint8_t *)malloc(s->capacity);
 52 |   s->mem[0] = '\0';
 53 |   s->len = 0;
 54 |   s->num_chars = 0;
 55 |   return s;
 56 | }
 57 | 
 58 | void str_insert(_string *s, size_t pos, const uint8_t *str) {
 59 |   size_t num_inserted_bytes = strlen((char *)str);
 60 |   // Offset to insert at in the string.
 61 |   size_t offset = count_bytes_in_chars(s->mem, pos);
 62 |   size_t end_size = s->len - offset;
 63 |   
 64 |   // Resize if needed.
 65 |   s->len += num_inserted_bytes;
 66 |   if (s->len >= s->capacity) {
 67 |     while (s->len >= s->capacity) {
 68 |       s->capacity *= 2;
 69 |     }
 70 |     s->mem = (uint8_t *)realloc(s->mem, s->capacity);
 71 |   }
 72 |   s->num_chars += strlen_utf8(str);
 73 |   
 74 |   memmove(&s->mem[offset + num_inserted_bytes], &s->mem[offset], end_size);
 75 |   memcpy(&s->mem[offset], str, num_inserted_bytes);
 76 |   s->mem[s->len] = '\0';
 77 | }
 78 | 
 79 | void str_del(_string *s, size_t pos, size_t len) {
 80 |   // Offset to delete at in the string.
 81 |   size_t offset = count_bytes_in_chars(s->mem, pos);
 82 |   size_t num_bytes = count_bytes_in_chars(s->mem + offset, len);
 83 |   size_t end_size = s->len - offset - num_bytes;
 84 |   
 85 |   if (end_size > 0) {
 86 |     memmove(&s->mem[offset], &s->mem[offset + num_bytes], end_size);
 87 |   }
 88 |   s->len -= num_bytes;
 89 |   s->num_chars -= len;
 90 |   s->mem[s->len] = '\0';
 91 | }
 92 | 
 93 | void str_destroy(_string *s) {
 94 |   free(s->mem);
 95 |   free(s);
 96 | }
 97 | 
 98 | size_t str_num_chars(const _string *s) {
 99 |   return s->num_chars;
100 | }
101 | 


--------------------------------------------------------------------------------
/test/slowstring.h:
--------------------------------------------------------------------------------
 1 | // This is a copy of the rope API using simple C strings.
 2 | //
 3 | // Its used for testing and benchmarking.
 4 | 
 5 | #ifndef librope_slowstring_h
 6 | #define librope_slowstring_h
 7 | 
 8 | #include <stdint.h>
 9 | 
10 | typedef struct {
11 |   uint8_t *mem;
12 |   size_t capacity;
13 |   size_t len;
14 |   size_t num_chars;
15 | } _string;
16 | 
17 | _string *str_create();
18 | 
19 | void str_insert(_string *s, size_t pos, const uint8_t *str);
20 | 
21 | void str_del(_string *s, size_t pos, size_t len);
22 | 
23 | void str_destroy(_string *s);
24 | 
25 | size_t str_num_chars(const _string *s);
26 | 
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/test/tests.c:
--------------------------------------------------------------------------------
  1 | // Tests for librope.
  2 | 
  3 | #include <stdio.h>
  4 | #include <stdlib.h>
  5 | #include <assert.h>
  6 | #include <string.h>
  7 | 
  8 | #include "tests.h"
  9 | #include "slowstring.h"
 10 | #include "rope.h"
 11 | 
 12 | static float rand_float() {
 13 |   return (float)random() / INT32_MAX;
 14 | }
 15 | 
 16 | // A selection of different unicode characters to pick from.
 17 | // As far as I can tell, there are no unicode characters assigned which
 18 | // take up more than 4 bytes in utf-8.
 19 | static const char *UCHARS[] = {
 20 |   "a", "b", "c", "1", "2", "3", " ", "\n", // ASCII
 21 |   "©", "¥", "½", // The Latin-1 suppliment (U+80 - U+ff)
 22 |   "Ύ", "Δ", "δ", "Ϡ", // Greek (U+0370 - U+03FF)
 23 |   "←", "↯", "↻", "⇈", // Arrows (U+2190 – U+21FF)
 24 |   "𐆐", "𐆔", "𐆘", "𐆚", // Ancient roman symbols (U+10190 – U+101CF)
 25 | };
 26 | 
 27 | // s is the size of the buffer, including the \0. This function might use
 28 | // fewer bytes than that.
 29 | void random_unicode_string(uint8_t *buffer, size_t s) {
 30 |   if (s == 0) { return; }
 31 |   uint8_t *pos = buffer;
 32 |   
 33 |   while(1) {
 34 |     uint8_t *c = (uint8_t *)UCHARS[random() % (sizeof(UCHARS) / sizeof(UCHARS[0]))];
 35 |     
 36 |     size_t bytes = strlen((char *)c);
 37 |     
 38 |     size_t remaining_space = buffer + s - pos - 1;
 39 |     
 40 |     if (remaining_space < bytes) {
 41 |       break;
 42 |     }
 43 |     
 44 |     memcpy(pos, c, bytes);
 45 |     pos += bytes;
 46 |   }
 47 |   
 48 |   *pos = '\0';
 49 | }
 50 | 
 51 | static const char CHARS[] = " ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
 52 | "0123456789!@#$%^&*()[]{}<>?,./";
 53 | void random_ascii_string(uint8_t *buffer, size_t len) {
 54 |   assert(len);
 55 |   for (int i = 0; i < len - 1; i++) {
 56 |     buffer[i] = CHARS[random() % (sizeof(CHARS) - 1)];
 57 |   }
 58 |   buffer[len - 1] = '\0';
 59 | }
 60 | 
 61 | static size_t strlen_utf8(uint8_t *data) {
 62 |   size_t numchars = 0;
 63 |   
 64 |   while (*data) {
 65 |     if ((*data++ & 0xC0) != 0x80) {
 66 |       ++numchars;
 67 |     }
 68 |   }
 69 |   
 70 |   return numchars;
 71 | }
 72 | 
 73 | #if ROPE_WCHAR
 74 | // Count the number of wchars this string would take up if it was encoded using utf16.
 75 | static size_t wchar_size_count(uint8_t *data) {
 76 |   size_t num = 0;
 77 |   
 78 |   while (*data) {
 79 |     if ((*data & 0xC0) != 0x80) {
 80 |       ++num;
 81 |       if ((*data & 0xf0) == 0xf0) {
 82 |         // It'll take up 2 wchars, not just one.
 83 |         ++num;
 84 |       }
 85 |     }
 86 |     
 87 |     ++data;
 88 |   }
 89 |   
 90 |   return num;
 91 | }
 92 | 
 93 | static size_t count_wchars_in_utf8(const uint8_t *str, size_t num_chars) {
 94 |   size_t wchars = num_chars;
 95 |   while (num_chars) {
 96 |     if ((*str & 0xf0) == 0xf0) {
 97 |       wchars++;
 98 |     }
 99 |     if ((*str & 0xc0) != 0x80) {
100 |       num_chars--;
101 |     }
102 |     ++str;
103 |   }
104 |   return wchars;
105 | }
106 | #endif
107 | 
108 | void test(int cond) {
109 |   if (!cond) {
110 |     fprintf(stderr, "Test failed\n");
111 |     assert(0);
112 |   }
113 | }
114 | 
115 | void check(rope *rope, char *expected) {
116 |   // Rope will be null when the inserted data is invalid.
117 |   assert((rope == NULL) == (expected == NULL));
118 |   
119 |   if (rope) {
120 |     _rope_check(rope);
121 |     test(rope_byte_count(rope) == strlen(expected));
122 |     uint8_t *cstr = rope_create_cstr(rope);
123 |     test(strcmp((char *)cstr, expected) == 0);
124 |     free(cstr);
125 |   }
126 | }
127 | 
128 | static void test_empty_rope_has_no_content() {
129 |   rope *r = rope_new();
130 |   check(r, "");
131 |   test(rope_char_count(r) == 0);
132 |   
133 |   uint8_t *bytes = rope_create_cstr(r);
134 |   test(bytes[0] == '\0');
135 |   free(bytes);
136 |   
137 |   rope_free(r);
138 | }
139 | 
140 | static void checked_insert(rope *r, size_t pos, char *str) {
141 |   ROPE_RESULT result = rope_insert(r, pos, (uint8_t *)str);
142 |   assert(result == ROPE_OK);
143 | }
144 | 
145 | static void test_insert_at_location() {
146 |   rope *r = rope_new();
147 |   
148 |   checked_insert(r, 0, "AAA");
149 |   check(r, "AAA");
150 |   
151 |   checked_insert(r, 0, "BBB");
152 |   check(r, "BBBAAA");
153 | 
154 |   checked_insert(r, 6, "CCC");
155 |   check(r, "BBBAAACCC");
156 | 
157 |   checked_insert(r, 5, "DDD");
158 |   check(r, "BBBAADDDACCC");
159 |   
160 |   test(rope_char_count(r) == 12);
161 |   
162 |   rope_free(r);
163 | }
164 | 
165 | static void check_invalid(char *err_str) {
166 |   rope *r = rope_new();
167 |   ROPE_RESULT result = rope_insert(r, 0, (uint8_t *)err_str);
168 |   assert(result == ROPE_INVALID_UTF8);
169 |   
170 |   // And check that nothing happened.
171 |   assert(0 == rope_char_count(r));
172 |   assert(0 == rope_byte_count(r));
173 |   rope_free(r);
174 | }
175 | 
176 | static void test_invalid_utf8_rejected() {
177 |   check_invalid((char[]){0xb0,0}); // trailing middle byte
178 |   check_invalid((char[]){0xc0,0}); // half of 2 byte sequence
179 |   check_invalid((char[]){0xc0,0xb0,0xb0,0});
180 |   check_invalid((char[]){0xc0,0xc0,0xb0,0});
181 |   check_invalid((char[]){0xe0,0xb0,0}); // 2/3 in 3 byte sequence
182 |   check_invalid((char[]){0xe0,0xb0,0xb0,0xb0,0});
183 |   check_invalid((char[]){0xe0,0xc0,0xb0,0});
184 |   check_invalid((char[]){0xe0,0xc0,0xb0,0xb0,0});
185 | }
186 | 
187 | // A rope initialized with a string has that string as its content
188 | static void test_new_string_has_content() {
189 |   rope *r = rope_new_with_utf8((uint8_t *)"Hi there");
190 |   check(r, "Hi there");
191 |   test(rope_char_count(r) == strlen("Hi there"));
192 |   rope_free(r);
193 |   
194 |   // If need be, this could be rewritten as an array of bytes...
195 |   r = rope_new_with_utf8((uint8_t *)"κόσμε");
196 |   check(r, "κόσμε");
197 |   test(rope_char_count(r) == 5);
198 |   
199 |   rope_insert(r, 2, (uint8_t *)"𝕐𝕆𝌀");
200 |   check(r, "κό𝕐𝕆𝌀σμε");
201 |   test(rope_char_count(r) == 8);
202 |   rope_free(r);
203 | }
204 | 
205 | static void test_delete_at_location() {
206 |   rope *r = rope_new_with_utf8((uint8_t *)"012345678");
207 |   
208 |   rope_del(r, 8, 1);
209 |   check(r, "01234567");
210 |   
211 |   rope_del(r, 0, 1);
212 |   check(r, "1234567");
213 |   
214 |   rope_del(r, 5, 1);
215 |   check(r, "123457");
216 |   
217 |   rope_del(r, 5, 1);
218 |   check(r, "12345");
219 |   
220 |   rope_del(r, 0, 5);
221 |   check(r, "");
222 |   
223 |   test(rope_char_count(r) == 0);
224 |   
225 |   rope_free(r);
226 | }
227 | 
228 | static void test_delete_past_end_of_string() {
229 |   rope *r = rope_new();
230 |   
231 |   rope_del(r, 0, 100);
232 |   check(r, "");
233 |   
234 |   rope_insert(r, 0, (uint8_t *)"hi there");
235 |   rope_del(r, 3, 10);
236 |   check(r, "hi ");
237 |   
238 |   test(rope_char_count(r) == 3);
239 |   
240 |   rope_free(r);
241 | }
242 | 
243 | static void test_wchar() {
244 | #if ROPE_WCHAR
245 |   rope *r = rope_new_with_utf8((uint8_t *)"𐆔𐆚𐆔");
246 |   test(rope_wchar_count(r) == 6);
247 |   
248 |   size_t len;
249 |   size_t pos = rope_del_at_wchar(r, 2, 2, &len);
250 |   check(r, "𐆔𐆔");
251 |   test(pos == 1);
252 |   test(len == 1);
253 |   
254 |   pos = rope_insert_at_wchar(r, 2, (uint8_t *)"abcde");
255 |   check(r, "𐆔abcde𐆔");
256 |   test(pos == 1);
257 |   
258 |   pos = rope_insert_at_wchar(r, 5, (uint8_t *)"𐆚");
259 |   check(r, "𐆔abc𐆚de𐆔");
260 |   test(pos == 4);
261 |   
262 |   rope_free(r);
263 | #else
264 |   printf("Skipping wchar tests - wchar conversion support disabled.\n");
265 | #endif
266 | }
267 | 
268 | static void test_really_long_ascii_string() {
269 |   size_t len = 2000;
270 |   uint8_t *str = malloc(len + 1);
271 |   random_ascii_string(str, len + 1);
272 |   
273 |   rope *r = rope_new_with_utf8((uint8_t *)str);
274 |   test(rope_char_count(r) == len);
275 |   check(r, (char *)str);
276 |   
277 |   // Iterate through all the characters using the loop macros and make sure it all works.
278 |   size_t pos = 0;
279 |   ROPE_FOREACH(r, n) {
280 |     test(memcmp(rope_node_data(n), &str[pos], rope_node_num_bytes(n)) == 0);
281 |     pos += rope_node_num_bytes(n);
282 |   }
283 |   test(pos == r->num_bytes);
284 |   
285 |   // Delete everything but the first and last characters.
286 |   rope_del(r, 1, len - 2);
287 |   assert(r->num_bytes == 2);
288 |   assert(r->num_chars == 2);
289 |   char *contents = (char *)rope_create_cstr(r);
290 |   _rope_check(r);
291 |   test(contents[0] == str[0]);
292 |   test(contents[1] == str[len - 1]);
293 |   free(contents);
294 |   
295 |   rope_free(r);
296 | }
297 | 
298 | static int alloced_regions = 0;
299 | 
300 | void *_alloc(size_t size) {
301 |   alloced_regions++;
302 |   return malloc(size);
303 | }
304 | 
305 | void _free(void *mem) {
306 |   alloced_regions--;
307 |   free(mem);
308 | }
309 | 
310 | static void test_custom_allocator() {
311 |   // Its really hard to test that malloc is never called, but I can make sure
312 |   // custom frees match custom allocs.
313 |   rope *r = rope_new2(_alloc, realloc, _free);
314 |   for (int i = 0; i < 100; i++) {
315 |     rope_insert(r, random() % (rope_char_count(r) + 1),
316 |         (uint8_t *)"Whoa super happy fun times!\n");
317 |   }
318 | 
319 |   rope_free(r);
320 | 
321 |   test(alloced_regions == 0);
322 | }
323 | 
324 | static void test_copy() {
325 |   // Copy an empty string.
326 |   rope *r1 = rope_new();
327 |   rope *r2 = rope_copy(r1);
328 |   check(r2, "");
329 |   rope_free(r2);
330 |   
331 |   // Insert some text (less than one node worth)
332 |   rope_insert(r1, 0, (uint8_t *)"Eureka!");
333 |   r2 = rope_copy(r1);
334 |   check(r2, "Eureka!");
335 |   
336 |   rope_free(r1);
337 |   rope_free(r2);
338 | }
339 | 
340 | static void test_random_edits() {
341 |   // This string should always have the same content as the rope.
342 |   _string *str = str_create();
343 |   rope *r = rope_new();
344 |   
345 |   const size_t max_stringsize = 1000;
346 |   uint8_t strbuffer[max_stringsize + 1];
347 |   
348 |   for (int i = 0; i < 1000; i++) {
349 |     // First, some sanity checks.
350 |     check(r, (char *)str->mem);
351 |     
352 |     rope *r2 = rope_copy(r);
353 |     check(r2, (char *)str->mem);
354 |     rope_free(r2);
355 |     
356 | //    printf("String contains '%s'\n", str->mem);
357 |     test(rope_byte_count(r) == str->len);
358 |     size_t len = strlen_utf8(str->mem);
359 |     test(rope_char_count(r) == len);
360 |     test(str_num_chars(str) == len);
361 |     
362 |     if (len == 0 || rand_float() < 0.5f) {
363 |       // Insert.
364 |       random_unicode_string(strbuffer, 1 + random() % max_stringsize);
365 |       size_t pos = random() % (len + 1);
366 |       
367 | //      printf("inserting %s at %zd\n", strbuffer, pos);
368 |       rope_insert(r, pos, strbuffer);
369 |       str_insert(str, pos, strbuffer);
370 |     } else {
371 |       // Delete
372 |       size_t pos = random() % len;
373 |       
374 |       size_t dellen = random() % 10;
375 |       dellen = MIN(len - pos, dellen);
376 |       
377 | //      printf("deleting %zd chars at %zd\n", dellen, pos);
378 |       rope_del(r, pos, dellen);
379 |       str_del(str, pos, dellen);
380 |     }
381 |   }
382 |   
383 |   rope_free(r);
384 |   str_destroy(str);
385 | }
386 | 
387 | static void test_random_wchar_edits() {
388 | #if ROPE_WCHAR
389 |   // This string should always have the same content as the rope.
390 |   // Both are stored using UTF-8, but we'll make edits using the wchar functions.
391 |   _string *str = str_create();
392 |   rope *r = rope_new();
393 |   
394 |   const size_t max_stringsize = 1000;
395 |   uint8_t strbuffer[max_stringsize + 1];
396 |   
397 |   for (int i = 0; i < 1000; i++) {
398 |     check(r, (char *)str->mem);
399 |     
400 | //    printf("String contains '%s'\n", str->mem);
401 |     test(rope_byte_count(r) == str->len);
402 |     size_t len = strlen_utf8(str->mem);
403 |     test(rope_char_count(r) == len);
404 |     test(str_num_chars(str) == len);
405 |     test(rope_wchar_count(r) == wchar_size_count(str->mem));
406 |     
407 |     if (len == 0 || rand_float() < 0.5f) {
408 |       // Insert.
409 |       random_unicode_string(strbuffer, 1 + random() % max_stringsize);
410 |       size_t pos = random() % (len + 1);
411 |       
412 |       // We need to convert pos to the wchar offset. There's a private function in rope.c for this
413 |       // but ...
414 |       size_t wchar_pos = count_wchars_in_utf8(str->mem, pos);
415 |       
416 | //      printf("inserting '%s' at %zd\n", strbuffer, pos);
417 |       rope_insert_at_wchar(r, wchar_pos, strbuffer);
418 |       str_insert(str, pos, strbuffer);
419 |     } else {
420 |       // Delete
421 |       size_t pos = random() % len;
422 |       
423 |       size_t dellen = random() % 10;
424 |       dellen = MIN(len - pos, dellen);
425 |       
426 |       size_t wchar_pos = count_wchars_in_utf8(str->mem, pos);
427 |       size_t wchar_len = count_wchars_in_utf8(str->mem, pos + dellen) - wchar_pos;
428 | //      printf("deleting %zd (%zd) chars at %zd (%zd)\n", dellen, wchar_len, pos, wchar_pos);
429 |       rope_del_at_wchar(r, wchar_pos, wchar_len, NULL);
430 |       str_del(str, pos, dellen);
431 |     }
432 |   }
433 |   
434 |   rope_free(r);
435 |   str_destroy(str);
436 | #endif
437 | }
438 | 
439 | 
440 | void test_all() {
441 |   printf("Running tests...\n");
442 |   test_empty_rope_has_no_content();
443 |   test_insert_at_location();
444 |   test_new_string_has_content();
445 |   test_invalid_utf8_rejected();
446 |   test_delete_at_location();
447 |   test_delete_past_end_of_string();
448 |   test_wchar();
449 |   test_really_long_ascii_string();
450 |   test_custom_allocator();
451 |   test_copy();
452 |   printf("Normal tests passed. Running randomizers...\n");
453 |   test_random_edits();
454 |   test_random_wchar_edits();
455 |   printf("Done!\n");
456 | }
457 | 
458 | int main(int argc, const char * argv[]) {
459 |   test_all();
460 |   
461 |   if (argc > 1 && strcmp(argv[1], "-b") == 0) {
462 |     benchmark();
463 |   }
464 |   
465 |   return 0;
466 | }
467 | 
468 | 


--------------------------------------------------------------------------------
/test/tests.h:
--------------------------------------------------------------------------------
 1 | #ifndef librope_test_h
 2 | #define librope_test_h
 3 | 
 4 | #include <stdint.h>
 5 | 
 6 | #define MIN(x,y) ((x) > (y) ? (y) : (x))
 7 | 
 8 | #ifdef __cplusplus
 9 | extern "C" {
10 | #endif
11 | 
12 | void benchmark();
13 | 
14 | // len is approximate. Might use fewer bytes than that.
15 | void random_unicode_string(uint8_t *buffer, size_t len);
16 | 
17 | // len includes \0.
18 | void random_ascii_string(uint8_t *buffer, size_t len);  
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 |     
24 | #endif
25 | 


--------------------------------------------------------------------------------