├── .github
    ├── CODEOWNERS
    └── workflows
    │   └── ci.yml
├── .gitignore
├── CHANGES.md
├── LICENSE
├── Makefile
├── README.md
├── dune-project
├── dune-workspace.dev
├── pkg
    └── pkg.ml
├── src
    ├── dune
    ├── zed_char.ml
    ├── zed_char.mli
    ├── zed_cursor.ml
    ├── zed_cursor.mli
    ├── zed_edit.ml
    ├── zed_edit.mli
    ├── zed_input.ml
    ├── zed_input.mli
    ├── zed_lines.ml
    ├── zed_lines.mli
    ├── zed_macro.ml
    ├── zed_macro.mli
    ├── zed_rope.ml
    ├── zed_rope.mli
    ├── zed_string.ml
    ├── zed_string.mli
    ├── zed_utf8.ml
    ├── zed_utf8.mli
    └── zed_utils.ml
├── style.css
├── test
    ├── dune
    └── test_zed.ml
├── zed.descr
└── zed.opam


/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @diml
2 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | jobs:
10 |   build:
11 |     strategy:
12 |       fail-fast: false
13 |       matrix:
14 |         os:
15 |           - macos-latest
16 |           - ubuntu-latest
17 |           - windows-latest
18 |         ocaml-compiler:
19 |           - 4.02.x
20 |           - 4.03.x
21 |           - 4.04.x
22 |           - 4.06.x
23 |           - 4.08.x
24 |           - 4.10.x
25 |           - 4.12.x
26 |           - 4.14.x
27 |         exclude:
28 |           # react does not build with 4.02.x on Windows
29 |           - os: windows-latest
30 |             ocaml-compiler: 4.02.x
31 | 
32 |     runs-on: ${{ matrix.os }}
33 | 
34 |     steps:
35 |       - name: Checkout code
36 |         uses: actions/checkout@v3
37 | 
38 |       - name: Use OCaml ${{ matrix.ocaml-compiler }}
39 |         uses: ocaml/setup-ocaml@v2
40 |         with:
41 |           ocaml-compiler: ${{ matrix.ocaml-compiler }}
42 | 
43 |       - run: opam install . --deps-only --with-test
44 | 
45 |       - run: opam exec -- dune build
46 | 
47 |       - run: opam exec -- dune runtest
48 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | _build/
2 | /zed-*.tar.gz
3 | /setup.data
4 | /setup.log
5 | /setup.exe
6 | /setup-dev.exe
7 | .merlin
8 | *.install


--------------------------------------------------------------------------------
/CHANGES.md:
--------------------------------------------------------------------------------
  1 | 3.2.3 (2023-08-10)
  2 | ------------------
  3 | * `Zed_edit`: fix the `Next_word` and `Prev_word` actions
  4 | 
  5 | 3.2.2 (2023-06-20)
  6 | ------------------
  7 | 
  8 | * `Zed_utf8.next_error`: raise `Zed_utf8.Out_of_bounds` in case of invalid offset (@Lucccyo, #52)
  9 | * `kill_next_word` should not raise `Out_of_bound` (@Lucccyo, #55)
 10 | * `of_utf8`: add `Uchar.is_valid` to check the input (@Lucccyo, #51)
 11 | 
 12 | 3.2.1 (2022-11-10)
 13 | ------------------
 14 | 
 15 | * Check if decoded values fit in Uchar (Etienne Millon, #50)
 16 | 
 17 | 3.2.0 (2022-06-30)
 18 | ------------------
 19 | 
 20 | * Replace Camomile with uu* (Nicolás Ojeda Bär, ZAN DoYe, Thibaut Mattio, #46)
 21 | 
 22 | 3.1.0 (2020-05-30)
 23 | ------------------
 24 | 
 25 | * `Zed_edit`
 26 |   * `Set_pos` action
 27 |   * `Insert_str` action
 28 | 
 29 | 3.0.1 (2020-04-28)
 30 | ------------------
 31 | 
 32 | * `Zed_edit`: fix `copy_sequence`
 33 | 
 34 | 3.0.0 (2020-04-25)
 35 | ------------------
 36 | 
 37 | * `Zed_edit`:
 38 |   * new actions
 39 |     * `Join_line`
 40 |     * `Goto of int`
 41 |     * `Delete_next_chars of int`
 42 |     * `Delete_prev_chars of int`
 43 |     * `Kill_next_chars of int`
 44 |     * `Kill_prev_chars of int`
 45 |   * function `copy_sequence`
 46 | 
 47 | 2.0.7 (2020-04-08)
 48 | ------------------
 49 | 
 50 | * fix Zed\_edit.undo (#36)
 51 | 
 52 | 2.0.6 (2020-02-27)
 53 | ------------------
 54 | 
 55 | * compabile with `Result` (>= 1.5) (@mjambon, #31)
 56 | 
 57 | 
 58 | 2.0.5 (2020-01-29)
 59 | ------------------
 60 | 
 61 | * Zed\_rope.Zip: fix a bug in function `make_b`
 62 | 
 63 | 2.0.4 (2019-12-31)
 64 | ------------------
 65 | 
 66 | * add wanted\_column support for wide width character
 67 | * Zed\_lines: `get_idx_by_width set row column_width` return the offset of the character at `[row, column_width]`
 68 | 
 69 | 2.0.3 (2019-08-09)
 70 | ------------------
 71 | 
 72 | * Zed\_string
 73 |   * `exception Invalid of string * string` raised when an invalid Zed\_char sequence is encounted
 74 |   * `next_ofs : t -> int -> int` returns the offset of the next zchar in `t`
 75 |   * `prev_ofs : t -> int -> int` returns the offset of the prev zchar in `t`
 76 | 
 77 | 2.0.2 (2019-06-21)
 78 | ------------------
 79 | 
 80 | * Zed\_utf8: fix an ofs-stepping bug in function `unsafe_extract_prev`
 81 | 
 82 | 2.0.1 (2019-06-04)
 83 | ------------------
 84 | 
 85 | * Zed\_char: add an `indv_combining` option to the transforming
 86 |   functions(`of_uChars, zChars_of_uChars, of_utf8`) to determine whether
 87 |   to extract individual combining marks from the parameter (#18)
 88 | * Zed\_char: clarify some documentation comments (#18)
 89 | 
 90 | 2.0 (2019-05-17)
 91 | ----------------
 92 | 
 93 | ### Additions
 94 | 
 95 | * module Zed\_char
 96 | * module Zed\_string
 97 | * Zed\_cursor
 98 |   * `column_display: Zed_cursor.t -> int React.signal`
 99 |   * `get_column: Zed_cursor.t -> int`
100 |   * `coordinates_display: Zed_cursor.t -> (int * int) React.signal`
101 |   * `get_coordinates: Zed_cursor.t -> int * int`
102 | * Zed\_edit
103 |   * `regexp_word_core: Zed_re.Core.t`
104 |   * `regexp_word_raw: Zed_re.raw.t`
105 |   * `match_by_regexp_core`
106 |   * `match_by_regexp_raw`
107 | 
108 | ### Breaking
109 | 
110 | * Zed\_rope
111 |   * Zed\_rope.empty is a function now
112 |   * Other functions in this module take `Zed_char.t` or `Zed_string.t` as arguemnts instead of `UChar.t` or `Zed_utf8.t`
113 |   * module Zipper is divided into two modules, Zip and Zip\_raw, to navigate over a rope by Zed\_char.t or UChar.t, respectively
114 |   * module Text is divided into three modules, Text, Text\_core, Text\_raw, to manager Zed\_rope by Zed\_char.t, the core UChar.t of a `Zed_char.t` and raw `UChar.t`, respectively
115 | * Zed\_re is therefore divided into two modules: Core and Raw
116 | * Zed\_cursor: the type `changes` is defined as a structure and has two more fields: `added_width` and `removed_width`
117 | 
118 | ### General
119 | 
120 | * README: Add Travis badge (Kevin Ji, #11)
121 | * Add travis config (Anurag Soni, #10)
122 | * Switch to dune (Anurag Soni, #9)
123 | 
124 | 1.6 (2017-11-05)
125 | ----------------
126 | 
127 | * safe-string compatibility (#8)
128 | 
129 | 1.5 (2017-04-26)
130 | ----------------
131 | 
132 | * Switch to jbuilder (Rudi Gringberg, #4)
133 | * Make `{delete_,kill_,}{next,prev}_word` consistent near the
134 |   start/end of the buffer (Fabian (github use copy), #5)
135 | 
136 | 1.4 (2015-01-07)
137 | ----------------
138 | 
139 | * added `Zed_edit.get_line`
140 | * added `Zed_line.line_{length,stop}`
141 | * fix a bug in cursor updates
142 | * fix some invalid use of react
143 | 
144 | 1.3 (2014-04-21)
145 | ----------------
146 | 
147 | * `Zed_rope` fixes:
148 |   - `rev_map`: fix recursion
149 |   - enforce evaluation order in `map` & `rev_map`
150 | 
151 | 1.2 (2012-07-30)
152 | ----------------
153 | 
154 | * add escaping functions
155 | * add `Zed_utf8.next_error`
156 | 
157 | 1.1 (2011-08-06)
158 | ----------------
159 | 
160 | * add the `{delete,kill}-{prev,next}-word` actions and functions
161 | * add `Zed_edit.Insert(ch)`
162 | * add `Zed_edit.replace`
163 | * raise an exception when editing a read-only part of a text
164 | * disable the move function
165 | * add support for undo
166 | * add `Zed_input` to ease writing key binders
167 | * add `Zed_macro` to ease writing macro system
168 | * fix `Zed_rope.Zip.sub`
169 | * add `Zed_edit.new_clipboard`
170 | * add `Zed_utf8.add`
171 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2011, Jeremie Dimino <jeremie@dimino.org>
 2 | All rights reserved.
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are met:
 5 | 
 6 |     * Redistributions of source code must retain the above copyright
 7 |       notice, this list of conditions and the following disclaimer.
 8 |     * Redistributions in binary form must reproduce the above copyright
 9 |       notice, this list of conditions and the following disclaimer in the
10 |       documentation and/or other materials provided with the distribution.
11 |     * Neither the name of Jeremie Dimino nor the names of his
12 |       contributors may be used to endorse or promote products derived
13 |       from this software without specific prior written permission.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY
16 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE FOR ANY
19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | build:
 2 | 	dune build
 3 | 
 4 | test:
 5 | 	dune runtest
 6 | 
 7 | all-supported-ocaml-versions:
 8 | 	dune build @install @runtest --workspace dune-workspace.dev
 9 | 
10 | clean:
11 | 	dune clean
12 | 
13 | .PHONY: build all-supported-ocaml-versions clean test
14 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Zed
 2 | ===
 3 | 
 4 | [![Build Status](https://travis-ci.org/ocaml-community/zed.svg?branch=master)](https://travis-ci.org/ocaml-community/zed)
 5 | 
 6 | Zed is an abstract engine for text edition. It can be used to write
 7 | text editors, edition widgets, readlines, ... You just have to
 8 | _connect_ an engine to your inputs and rendering functions to get an
 9 | editor.
10 | 
11 | Zed provides:
12 | 
13 | * edition state management,
14 | * multiple cursor support,
15 | * key-binding helpers,
16 | * general purpose unicode rope manipulation functions.
17 | 
18 | [API Documentation](http://ocaml-community.github.io/zed/)
19 | 
20 | Installation
21 | ------------
22 | 
23 | To build and install zed, use opam:
24 | 
25 |     $ opam install zed
26 | 
27 | Modules
28 | -------
29 | 
30 | * `Zed_edit`: the main module, it defines edition engines.
31 | * `Zed_cursor`: manages cursors. Cursors are automatically updated
32 |   when the text is modified.
33 | * `Zed_lines`: maintains the offsets of beginning of lines.
34 | * `Zed_input`: helpers for implementing key bindings.
35 | * `Zed_macro`: helpers for writing macro systems.
36 | * `Zed_utf8`: general purpose UTF-8 strings manipulation.
37 | * `Zed_rope`: general purpose unicode ropes manipulation.
38 | * `Zed_char`: general purpose unicode characters manipulation.
39 | * `Zed_string`: general purpose unicode strings manipulation.
40 | 


--------------------------------------------------------------------------------
/dune-project:
--------------------------------------------------------------------------------
 1 | (lang dune 3.0)
 2 | 
 3 | (name zed)
 4 | 
 5 | (generate_opam_files true)
 6 | 
 7 | (source
 8 |  (github ocaml-community/zed))
 9 | 
10 | (authors "Jérémie Dimino")
11 | 
12 | (maintainers "opam-devel@lists.ocaml.org")
13 | 
14 | (license BSD-3-Clause)
15 | 
16 | (package
17 |  (name zed)
18 |  (synopsis "Abstract engine for text edition in OCaml")
19 |  (description
20 |   "Zed is an abstract engine for text edition. It can be used to write text
21 | editors, edition widgets, readlines, ... Zed uses Camomile to fully support the
22 | Unicode specification, and implements an UTF-8 encoded string type with
23 | validation, and a rope datastructure to achieve efficient operations on large
24 | Unicode buffers. Zed also features a regular expression search on ropes. To
25 | support efficient text edition capabilities, Zed provides macro recording and
26 | cursor management facilities.")
27 |  (depends
28 |   (ocaml
29 |    (>= 4.02.3))
30 |   react
31 |   result
32 |   uchar
33 |   uutf
34 |   (uucp
35 |    (>= 2.0.0))
36 |   uuseg
37 |   (alcotest :with-test)))
38 | 


--------------------------------------------------------------------------------
/dune-workspace.dev:
--------------------------------------------------------------------------------
 1 | (lang dune 1.1)
 2 | 
 3 | ;; This file is used by `make all-supported-ocaml-versions`
 4 | (context (opam (switch 4.02.3)))
 5 | (context (opam (switch 4.03.0)))
 6 | (context (opam (switch 4.04.2)))
 7 | (context (opam (switch 4.05.0)))
 8 | (context (opam (switch 4.06.1)))
 9 | (context (opam (switch 4.07.0)))
10 | 


--------------------------------------------------------------------------------
/pkg/pkg.ml:
--------------------------------------------------------------------------------
1 | #use "topfind"
2 | #require "topkg-jbuilder.auto"
3 | 


--------------------------------------------------------------------------------
/src/dune:
--------------------------------------------------------------------------------
1 | (library
2 |  (name zed)
3 |  (public_name zed)
4 |  (wrapped false)
5 |  (flags
6 |   (:standard -safe-string))
7 |  (libraries react result uchar uutf uucp uuseg))
8 | 


--------------------------------------------------------------------------------
/src/zed_char.ml:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * zed_char.ml
  3 |  * -----------
  4 |  * Copyright : (c) 2019, ZAN DoYe <zandoye@gmail.com>
  5 |  * Licence   : BSD3
  6 |  *
  7 |  * This file is a part of Zed, an editor engine.
  8 |  *)
  9 | 
 10 | 
 11 | open Result
 12 | 
 13 | type t= Zed_utf8.t
 14 | 
 15 | type char_prop=
 16 |   | Printable of int
 17 |   | Other
 18 |   | Null
 19 | 
 20 | let to_raw= Zed_utf8.explode
 21 | let to_array t= Array.of_list (Zed_utf8.explode t)
 22 | 
 23 | let zero= String.make 1 (Char.chr 0)
 24 | 
 25 | let core t= Zed_utf8.unsafe_extract t 0
 26 | let combined t= List.tl (Zed_utf8.explode t)
 27 | 
 28 | let prop_uChar uChar=
 29 |   match Uucp.Break.tty_width_hint uChar with
 30 |   | -1 -> Other
 31 |   | 0->
 32 |     if Uchar.to_int uChar = 0
 33 |     then Null
 34 |     else Printable 0
 35 |   | w-> Printable w
 36 | 
 37 | let prop t= prop_uChar (Zed_utf8.unsafe_extract t 0)
 38 | 
 39 | let is_printable uChar=
 40 |   match prop_uChar uChar with
 41 |   | Printable _ -> true
 42 |   | _-> false
 43 | 
 44 | let is_printable_core uChar=
 45 |   match prop_uChar uChar with
 46 |   | Printable w when w > 0 -> true
 47 |   | _-> false
 48 | 
 49 | let is_combining_mark uChar=
 50 |   match prop_uChar uChar with
 51 |   | Printable w when w = 0 -> true
 52 |   | _-> false
 53 | 
 54 | let length= Zed_utf8.length
 55 | let size= length
 56 | 
 57 | let width t= Uucp.Break.tty_width_hint (Zed_utf8.unsafe_extract t 0)
 58 | 
 59 | let out_of_range t i= i < 0 || i >= size t
 60 | let get= Zed_utf8.get
 61 | 
 62 | let get_opt t i=
 63 |   try Some (get t i)
 64 |   with _-> None
 65 | 
 66 | let append ch mark=
 67 |   match prop_uChar mark with
 68 |   | Printable 0-> ch ^ (Zed_utf8.singleton mark)
 69 |   | _-> failwith "combining mark expected"
 70 | 
 71 | let compare_core t1 t2=
 72 |   let core1= Zed_utf8.unsafe_extract t1 0
 73 |   and core2= Zed_utf8.unsafe_extract t2 0 in
 74 |   Uchar.compare core1 core2
 75 | 
 76 | let compare_raw= Zed_utf8.compare
 77 | 
 78 | let compare= compare_raw
 79 | 
 80 | let mix_uChar zChar uChar=
 81 |   match prop_uChar uChar with
 82 |   | Printable 0->
 83 |     Ok (zChar ^ (Zed_utf8.singleton uChar))
 84 |   | _->
 85 |     Error (Zed_utf8.singleton uChar)
 86 | 
 87 | let first_core ?(trim=false) uChars=
 88 |   let rec aux uChars=
 89 |     match uChars with
 90 |     | []-> None, []
 91 |     | uChar::tl->
 92 |       let prop= prop_uChar uChar in
 93 |       match prop with
 94 |       | Printable w->
 95 |         if w > 0
 96 |         then Some (prop, uChar), tl
 97 |         else aux tl
 98 |       | Other-> Some (prop, uChar), tl
 99 |       | Null-> Some (prop, uChar), tl
100 |   in
101 |   match uChars with
102 |   | []-> None, []
103 |   | uChar::_->
104 |     if not trim && is_combining_mark uChar then
105 |       None, uChars
106 |     else
107 |       aux uChars
108 | 
109 | let rec subsequent uChars=
110 |   match uChars with
111 |   | []-> [], []
112 |   | uChar::tl->
113 |     let prop= prop_uChar uChar in
114 |     match prop with
115 |     | Printable w->
116 |       if w > 0 then
117 |         [], uChars
118 |       else
119 |         let seq, remain= subsequent tl in
120 |         uChar :: seq, remain
121 |     | _-> [], uChars
122 | 
123 | let of_uChars ?(trim=false) ?(indv_combining=true) uChars=
124 |   match uChars with
125 |   | []-> None, []
126 |   | uChar::tl->
127 |     match first_core ~trim uChars with
128 |     | None, _->
129 |       if indv_combining then
130 |         Some (Zed_utf8.singleton uChar), tl
131 |       else
132 |         None, uChars
133 |     | Some (Printable _w, uChar), tl->
134 |       let combined, tl= subsequent tl in
135 |       Some (Zed_utf8.implode (uChar::combined)), tl
136 |     | Some (Null, uChar), tl->
137 |       Some (Zed_utf8.singleton uChar) ,tl
138 |     | Some (Other, uChar), tl->
139 |       Some (Zed_utf8.singleton uChar) ,tl
140 | 
141 | let zChars_of_uChars ?(trim=false) ?(indv_combining=true) uChars=
142 |   let rec aux zChars uChars=
143 |     match of_uChars ~trim ~indv_combining uChars with
144 |     | None, tl-> List.rev zChars, tl
145 |     | Some zChar, tl-> aux (zChar::zChars) tl
146 |   in
147 |   aux [] uChars
148 | 
149 | external id : 'a -> 'a = "%identity"
150 | let unsafe_of_utf8 : string -> t=
151 |   fun str-> if String.length str > 0
152 |     then str
153 |     else failwith "malformed Zed_char sequence"
154 | let of_utf8 ?(indv_combining=true) str=
155 |   match of_uChars ~indv_combining (Zed_utf8.explode str) with
156 |   | Some zChar, []-> zChar
157 |   | _-> failwith "malformed Zed_char sequence"
158 | 
159 | let to_utf8 : t -> string= id
160 | 
161 | let unsafe_of_char c=
162 |   Zed_utf8.singleton (Uchar.of_char c)
163 | 
164 | let unsafe_of_uChar uChar= Zed_utf8.singleton uChar
165 | 
166 | let for_all= Zed_utf8.for_all
167 | let iter= Zed_utf8.iter
168 | 
169 | 


--------------------------------------------------------------------------------
/src/zed_char.mli:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * zed_char.mli
  3 |  * ------------
  4 |  * Copyright : (c) 2019, ZAN DoYe <zandoye@gmail.com>
  5 |  * Licence   : BSD3
  6 |  *
  7 |  * This file is a part of Zed, an editor engine.
  8 |  *)
  9 | 
 10 | open Result
 11 | 
 12 | (** The type for glyphs. *)
 13 | type t
 14 | (**
 15 |   To represent a grapheme in unicode is a bit more complicated than what
 16 |   is expected: a printable UChar. For example, diacritics are added to
 17 |   IPA(international phonetic alphabet) letter to produce a modified
 18 |   pronunciation. Variation selectors are added to a CJK character to
 19 |   specify a specific glyph variant for the character.
 20 | 
 21 |   Therefore the logical type definition of [Zed_char.t] can be seen as
 22 |   {[
 23 |     type Zed_char.t= {
 24 |       core: UChar.t;
 25 |       combined: UChar.t list;
 26 |     }
 27 |   ]}
 28 |   *)
 29 | 
 30 | type char_prop =
 31 |     Printable of int | Other | Null
 32 | 
 33 |   (** The property of a character. It can be either [Printable of width],
 34 |     [Other](unprintable character) or [Null](code 0). *)
 35 | 
 36 | val to_raw : t -> Uchar.t list
 37 | val to_array : t -> Uchar.t array
 38 | 
 39 | val core : t -> Uchar.t
 40 |   (** [core char] returns the core of the [char] *)
 41 | 
 42 | val combined : t -> Uchar.t list
 43 |   (** [combined char] returns the combining marks of the [char] *)
 44 | 
 45 | val unsafe_of_utf8 : string -> t
 46 |   (** [unsafe_of_utf8 str] returns a [zed_char] from utf8 encoded [str] without any validation. *)
 47 | 
 48 | val of_utf8 : ?indv_combining:bool -> string -> t
 49 |   (** [of_utf8 str] returns a [zed_char] from utf8 encoded [str].
 50 |     This function checks whether [str] represents a single [UChar] or a
 51 |     legal grapheme, i.e. a printable core with optional combining marks.
 52 |     It will raise [Failure "malformed Zed_char sequence"] If the validation
 53 |     is not passed.
 54 |     @param indv_combining allow to create a [Zed_char.t] from a single
 55 |     combining mark, default to [true]
 56 |    *)
 57 | 
 58 | val to_utf8 : t -> string
 59 |   (** [to_utf8 chr] converts a [chr] to a string encoded in UTF-8. *)
 60 | 
 61 | val zero : t
 62 |   (** The Character 0. *)
 63 | 
 64 | val prop_uChar : Uchar.t -> char_prop
 65 |   (** [prop_uChar uChar] returns the char_prop of [uChar] *)
 66 | 
 67 | val prop : t -> char_prop
 68 |   (** [prop ch] returns the char_prop of [ch] *)
 69 | 
 70 | val is_printable : Uchar.t -> bool
 71 |   (** Returns whether a [Uchar.t] is a printable character or not. *)
 72 | 
 73 | val is_printable_core : Uchar.t -> bool
 74 |   (** Returns whether a [Uchar.t] is a printable character and its width is not zero. *)
 75 | 
 76 | val is_combining_mark : Uchar.t -> bool
 77 |   (** Returns whether a [Uchar.t] is a combining mark. *)
 78 | 
 79 | val size : t -> int
 80 |   (** [size ch] returns the size (number of characters) of [ch]. *)
 81 | 
 82 | val length : t -> int
 83 |   (** Aliase of size *)
 84 | 
 85 | val width : t -> int
 86 |   (** [width ch] returns the width of [ch]. *)
 87 | 
 88 | val out_of_range : t -> int -> bool
 89 |   (** [out_of_range ch idx] returns whether [idx] is out of range of [ch]. *)
 90 | 
 91 | val get : t -> int -> Uchar.t
 92 |   (** [get ch n] returns the [n]-th character of [ch]. *)
 93 | 
 94 | val get_opt : t -> int -> Uchar.t option
 95 |   (** [get ch n] returns an optional value of the [n]-th character of [ch]. *)
 96 | 
 97 | val append : t -> Uchar.t -> t
 98 |   (** [append ch cm] append the combining mark [cm] to ch and returns it. If [cm] is not a combining mark, then the original [ch] is returned. *)
 99 | 
100 | val compare_core : t -> t -> int
101 |   (** [compare_core ch1 ch2] compares the core components of ch1 and ch2*)
102 | 
103 | val compare_raw : t -> t -> int
104 |   (** [compare_raw ch1 ch2] compares over the internal characters of ch1 and ch2 sequentially *)
105 | 
106 | val compare : t -> t -> int
107 |   (** Alias of compare_raw *)
108 | 
109 | val mix_uChar : t -> Uchar.t -> (t, t) result
110 |   (** [mix_uChar chr uChar] tries to append [uChar] to [chr] and returns
111 |     [Ok result]. If [uChar] is not a combining mark, then an
112 |     [Error (Zed_char.t consists of uChar)] is returned. *)
113 | 
114 | val of_uChars : ?trim:bool -> ?indv_combining:bool -> Uchar.t list -> t option * Uchar.t list
115 |   (** [of_uChars uChars] transforms [uChars] to a tuple. The first value
116 |     is an optional [Zed_char.t] and the second is a list of remaining
117 |     uChars. The optional [Zed_char.t] is either a legal grapheme(a core
118 |     printable char with optinal combining marks) or a wrap for an
119 |     arbitrary Uchar.t. After that, all remaining uChars returned as the
120 |     second value in the tuple.
121 |     @param trim trim leading combining marks before transforming, default to [false]
122 |     @param indv_combining create a [Zed_char] from an individual dissociated combining mark, default to [true]
123 |   *)
124 | 
125 | val zChars_of_uChars : ?trim:bool -> ?indv_combining:bool -> Uchar.t list -> t list * Uchar.t list
126 |   (** [zChars of_uChars uChars] transforms [uChars] to a tuple. The first
127 |     value is a list of [Zed_char.t] and the second is a list of remaining uChars.
128 |     @param trim trim leading combining marks before transforming, default to [false]
129 |     @param indv_combining create a [Zed_char] from an individual dissociated combining mark, default to [true]
130 |   *)
131 | 
132 | val for_all : (Uchar.t -> bool) -> t -> bool
133 |   (** [for_all p zChar] checks if all elements of [zChar]
134 |    satisfy the predicate [p]. *)
135 | 
136 | val iter : (Uchar.t -> unit) -> t -> unit
137 |   (** [iter f char] applies [f] on all characters of [char]. *)
138 | 
139 | (** The prefix 'unsafe_' of [unsafe_of_char] and [unsafe_of_uChar] means
140 |   the two functions do not check if [char] or [uChar] being transformed
141 |   is a valid grapheme. There is no 'safe_' version, because the scenario
142 |   we should deal with a single [char] or [uChar] is when the char
143 |   sequence are individual, incomplete. For example, when we are reading
144 |   user input. Even if a user wants to input a legal grapheme, say,
145 |   'a' with a hat(a combining mark) on top. the user will input 'a' and
146 |   then '^' individually, the later combining mark is always illegal.
147 |   What we should do is to invoke [unsafe_of_uChar user_input] and send
148 |   the result to the edit engine. Other modules in zed, like Zed_string,
149 |   Zed_lines, Zed_edit ... are already well designed to deal with such a
150 |   situation. They will do combining mark joining, grapheme validation for
151 |   you automatically. Use the two 'unsafe_' functions directly,
152 |   you're doing things the right way. *)
153 | 
154 | val unsafe_of_char : char -> t
155 |   (** [unsafe_of_char ch] returns a [Zed_char] whose core is [ch]. *)
156 | 
157 | val unsafe_of_uChar : Uchar.t -> t
158 |   (** [unsafe_of_uChar ch] returns a [Zed_char] whose core is [ch]. *)


--------------------------------------------------------------------------------
/src/zed_cursor.ml:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * zed_cursor.ml
  3 |  * -------------
  4 |  * Copyright : (c) 2011, Jeremie Dimino <jeremie@dimino.org>
  5 |  * Licence   : BSD3
  6 |  *
  7 |  * This file is a part of Zed, an editor engine.
  8 |  *)
  9 | 
 10 | open React
 11 | 
 12 | exception Out_of_bounds
 13 | 
 14 | type changes= {
 15 |   position: int;
 16 |   added: int;
 17 |   removed: int;
 18 |   added_width: int;
 19 |   removed_width: int;
 20 | }
 21 | 
 22 | type action =
 23 |   | User_move of int
 24 |   | Text_modification of changes (* start, added, removed *)
 25 | 
 26 | type t = {
 27 |   position : int signal;
 28 |   send : action -> unit;
 29 |   length : int ref;
 30 |   changes :  changes event;
 31 |   get_lines : unit -> Zed_lines.t;
 32 |   coordinates : (int * int) signal;
 33 |   coordinates_display : (int * int) signal;
 34 |   line : int signal;
 35 |   column : int signal;
 36 |   column_display : int signal;
 37 |   wanted_column : int signal;
 38 |   set_wanted_column : int -> unit;
 39 | }
 40 | 
 41 | let create length changes get_lines position wanted_column =
 42 |   if position < 0 || position > length then raise Out_of_bounds;
 43 |   let length = ref length in
 44 |   let user_moves, send = E.create () in
 45 |   let update_position position action =
 46 |     match action with
 47 |     | User_move pos -> pos
 48 |     | Text_modification changes ->
 49 |       let delta = changes.added - changes.removed in
 50 |       length := !length + delta;
 51 |       if !length < 0 then raise Out_of_bounds;
 52 |       (* Move the cursor if it is after the start of the changes. *)
 53 |       if position > changes.position then begin
 54 |         if delta >= 0 then
 55 |           (* Text has been inserted, advance the cursor. *)
 56 |           position + delta
 57 |         else if position < changes.position - delta  then
 58 |           (* Text has been removed and the removed block contains the
 59 |              cursor, move it at the beginning of the removed block. *)
 60 |           changes.position
 61 |         else
 62 |           (* Text has been removed before the cursor, move back the
 63 |              cursor. *)
 64 |           position + delta
 65 |       end else
 66 |         position
 67 |   in
 68 |   let text_modifications = E.map (fun x -> Text_modification x) changes in
 69 |   let position =
 70 |     S.fold update_position position (E.select [user_moves; text_modifications])
 71 |   in
 72 |   let compute_coordinates_and_display position =
 73 |     let lines = get_lines () in
 74 |     let index = Zed_lines.line_index lines position in
 75 |     let bol= Zed_lines.line_start lines index in
 76 |     let column= position - bol in
 77 |     let width= Zed_lines.force_width lines bol column in
 78 |     (index, column, bol, width)
 79 |   in
 80 |   let coordinates_and_display= S.map compute_coordinates_and_display position in
 81 |   let coordinates = S.map (fun (row, column,_,_)-> (row, column)) coordinates_and_display in
 82 |   let coordinates_display = S.map (fun (row,_,_,width)-> (row, width)) coordinates_and_display in
 83 |   let line= S.map fst coordinates in
 84 |   let column= S.map snd coordinates in
 85 |   let column_display= S.map snd coordinates_display in
 86 |   let wanted_column, set_wanted_column = S.create wanted_column in
 87 |   {
 88 |     position;
 89 |     send;
 90 |     length;
 91 |     changes;
 92 |     get_lines;
 93 |     coordinates;
 94 |     coordinates_display;
 95 |     line;
 96 |     column;
 97 |     column_display;
 98 |     wanted_column;
 99 |     set_wanted_column;
100 |   }
101 | 
102 | let copy cursor =
103 |   create
104 |     !(cursor.length)
105 |     cursor.changes
106 |     cursor.get_lines
107 |     (S.value cursor.position)
108 |     (S.value cursor.wanted_column)
109 | 
110 | let position cursor = cursor.position
111 | let get_position cursor = S.value cursor.position
112 | let line cursor = cursor.line
113 | let get_line cursor = S.value cursor.line
114 | let column cursor = cursor.column
115 | let column_display cursor = cursor.column_display
116 | let get_column cursor = S.value cursor.column
117 | let get_column_display cursor = S.value cursor.column_display
118 | let coordinates cursor = cursor.coordinates
119 | let coordinates_display cursor = cursor.coordinates
120 | let get_coordinates cursor = S.value cursor.coordinates
121 | let get_coordinates_display cursor = S.value cursor.coordinates_display
122 | let wanted_column cursor = cursor.wanted_column
123 | let get_wanted_column cursor = S.value cursor.wanted_column
124 | let set_wanted_column cursor column = cursor.set_wanted_column column
125 | 
126 | let move cursor ?(set_wanted_column=true) delta =
127 |   let new_position = S.value cursor.position + delta in
128 |   if new_position < 0 || new_position > !(cursor.length) then
129 |     raise Out_of_bounds
130 |   else begin
131 |     cursor.send (User_move new_position);
132 |     if set_wanted_column then cursor.set_wanted_column (S.value cursor.column_display)
133 |   end
134 | 
135 | let goto cursor ?(set_wanted_column=true) position =
136 |   if position < 0 || position > !(cursor.length) then
137 |     raise Out_of_bounds
138 |   else begin
139 |     cursor.send (User_move position);
140 |     if set_wanted_column then cursor.set_wanted_column (S.value cursor.column_display)
141 |   end
142 | 


--------------------------------------------------------------------------------
/src/zed_cursor.mli:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * zed_cursor.mli
  3 |  * --------------
  4 |  * Copyright : (c) 2011, Jeremie Dimino <jeremie@dimino.org>
  5 |  * Licence   : BSD3
  6 |  *
  7 |  * This file is a part of Zed, an editor engine.
  8 |  *)
  9 | 
 10 | (** Cursors *)
 11 | 
 12 | (** A cursor is a pointer in an edition buffer. When some text is
 13 |     inserted or removed, all cursors after the modification are
 14 |     automatically moved accordingly. *)
 15 | 
 16 | open React
 17 | 
 18 | type t
 19 |   (** Type of a cursor. *)
 20 | 
 21 | type changes= {
 22 |   position: int;
 23 |   added: int;
 24 |   removed: int;
 25 |   added_width: int;
 26 |   removed_width: int;
 27 | }
 28 | 
 29 | exception Out_of_bounds
 30 |   (** Exception raised when trying to move a cursor outside the bounds
 31 |       of the text it points to. *)
 32 | 
 33 | val create : int -> changes event -> (unit -> Zed_lines.t) -> int -> int -> t
 34 |   (** [create length changes get_lines position wanted_column] creates
 35 |       a new cursor pointing to position [position].
 36 | 
 37 |       [length] is the current length of the text the cursor points
 38 |       to. It raises {!Out_of_bounds} if [position] is greater than
 39 |       [length].
 40 | 
 41 |       [changes] is an event which occurs with values of the form
 42 |       [(start, added, removed)] when the text changes, with the same
 43 |       semantic as {!Zed_edit.changes}.
 44 | 
 45 |       [get_lines] is used to retreive the current set of line
 46 |       positions of the text. It is used to compute the line and column
 47 |       of the cursor.
 48 | 
 49 |       [wanted_column] is the column on which the cursor want to be, if
 50 |       there is enough room on the line. *)
 51 | 
 52 | val copy : t -> t
 53 |   (** [copy cursor] creates a copy of the given cursor. The new cursor
 54 |       initially points to the same location as [cursor]. *)
 55 | 
 56 | val position : t -> int signal
 57 |   (** [position cursor] returns the signal holding the current
 58 |       position of the given cursor. *)
 59 | 
 60 | val get_position : t -> int
 61 |   (** [get_position cursor] returns the current position of
 62 |       [cursor]. *)
 63 | 
 64 | val line : t -> int signal
 65 |   (** [line cursor] returns the signal holding the current line on
 66 |       which the cursor is. *)
 67 | 
 68 | val get_line : t -> int
 69 |   (** [get_line cursor] returns the current line of the cursor. *)
 70 | 
 71 | val column : t -> int signal
 72 |   (** [column cursor] returns the signal holding the current column of
 73 |       the cursor. *)
 74 | 
 75 | val column_display : t -> int React.signal
 76 |   (** [column_display cursor] returns the signal holding the current display column of
 77 |       the cursor. *)
 78 | 
 79 | val get_column : t -> int
 80 |   (** [get_column cursor] returns the current column of the cursor. *)
 81 | 
 82 | val get_column_display : t -> int
 83 |   (** [get_column_display cursor] returns the current display column of the cursor. *)
 84 | 
 85 | val coordinates : t -> (int * int) signal
 86 |   (** [coordinates cursor] returns the signal holding the current
 87 |       line & column of the cursor. *)
 88 | 
 89 | val coordinates_display : t -> (int * int) React.signal
 90 |   (** [coordinates cursor] returns the signal holding the current
 91 |       line & display column of the cursor. *)
 92 | 
 93 | val get_coordinates : t -> int * int
 94 |   (** [get_coordinates cursor] returns the
 95 |       current line & column of the cursor. *)
 96 | 
 97 | val get_coordinates_display : t -> int * int
 98 |   (** [get_coordinates_display cursor] returns the
 99 |       current line & display column of the cursor. *)
100 | 
101 | val wanted_column : t -> int signal
102 |   (** [wanted_column cursor] returns the signal holding the column on
103 |       which the cursor wants to be. *)
104 | 
105 | val get_wanted_column : t -> int
106 |   (** [get_wanted_column cursor] returns the column on which the
107 |       cursor wants to be. *)
108 | 
109 | val set_wanted_column : t -> int -> unit
110 |   (** [set_wanted_column cursor] sets the column on which the cursor
111 |       want to be. *)
112 | 
113 | val goto : t -> ?set_wanted_column : bool -> int -> unit
114 |   (** [goto cursor position] moves the given cursor to the given
115 |       position. It raises {!Out_of_bounds} if [position] is outside
116 |       the bounds of the text. If [set_wanted_column] is [true] (the
117 |       default), then the wanted column will be set to the column of
118 |       the cursor at given position. *)
119 | 
120 | val move : t -> ?set_wanted_column : bool -> int -> unit
121 |   (** [move cursor delta] moves the given cursor by the given number
122 |       of characters. It raises {!Out_of_bounds} if the result is
123 |       outside the bounds of the text. *)
124 | 


--------------------------------------------------------------------------------
/src/zed_edit.mli:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * zed_edit.mli
  3 |  * ------------
  4 |  * Copyright : (c) 2011, Jeremie Dimino <jeremie@dimino.org>
  5 |  * Licence   : BSD3
  6 |  *
  7 |  * This file is a part of Zed, an editor engine.
  8 |  *)
  9 | 
 10 | (** Edition engines *)
 11 | 
 12 | open React
 13 | 
 14 | type 'a t
 15 |   (** Type of edition engines. ['a] is the type of custom data
 16 |       attached to the engine in order to extend it. *)
 17 | 
 18 | (** Type of clipboards. *)
 19 | type clipboard = {
 20 |   clipboard_get : unit -> Zed_rope.t;
 21 |   (** Returns the current contents of the clipboard. *)
 22 |   clipboard_set : Zed_rope.t -> unit;
 23 |   (** Sets the contents of the clipboard. *)
 24 | }
 25 | 
 26 | val new_clipboard : unit -> clipboard
 27 |   (** [new_clipboard ()] creates a new clipboard using a reference. *)
 28 | 
 29 | val create :
 30 |   ?editable : (int -> int -> bool) ->
 31 |   ?move : (int -> int -> int) ->
 32 |   ?clipboard : clipboard ->
 33 |   ?match_word : (Zed_rope.t -> int -> int option) ->
 34 |   ?locale : string option signal ->
 35 |   ?undo_size : int ->
 36 |   unit -> 'a t
 37 |   (** [create ?editable ?move ?clipboard ()] creates a new edition
 38 |       engine in the initial state.
 39 | 
 40 |       [editable] is used to determine whether the text at given
 41 |       position is editable or not. It takes as argument the position
 42 |       and the length of the text to remove.
 43 | 
 44 |       [move] is unused.
 45 | 
 46 |       [clipboard] is the clipboard to use for this engine. If none is
 47 |       defined, a new one using a reference is created.
 48 | 
 49 |       [match_word] is used to recognize words. It must returns the end
 50 |       of the matched word if any.
 51 | 
 52 |       [locale] is the locale of this buffer. It is used for case
 53 |       mapping.
 54 | 
 55 |       [undo_size] is the size of the undo buffer. It is the number of
 56 |       state zed will remember. It defaults to [1000]. *)
 57 | 
 58 | 
 59 | val get_data : 'a t -> 'a
 60 |   (** [get_data edit] returns the custom data attached to the
 61 |       engine. It raises [Not_found] if no data is attached to the
 62 |       engine. *)
 63 | 
 64 | val set_data : 'a t -> 'a -> unit
 65 |   (** [set_data edit data] attach [data] to the engine. *)
 66 | 
 67 | val clear_data : 'a t -> unit
 68 |   (** [clear_data edit] removes the custom data of engine. *)
 69 | 
 70 | val text : 'a t -> Zed_rope.t
 71 |   (** [text edit] returns the signal holding the current contents of
 72 |       the buffer. *)
 73 | 
 74 | val lines : 'a t -> Zed_lines.t
 75 |   (** [lines edit] returns the set of line position of [text edit]. *)
 76 | 
 77 | val get_line : 'a t -> int -> Zed_rope.t
 78 |   (** [get_line edit n] returns the rope corresponding to the [n]th line
 79 |       without the newline character. *)
 80 | 
 81 | val changes : 'a t -> Zed_cursor.changes  event
 82 |   (** [changes edit] returns an event which occurs with values of the
 83 |       form [(start, added, removed)] when the contents of the engine
 84 |       changes. [start] is the start of modifications, [added] is the
 85 |       number of characters added and [removed] is the number of
 86 |       characters removed. *)
 87 | 
 88 | val update : 'a t -> Zed_cursor.t list -> unit event
 89 |   (** [update edit cursors] returns an event which occurs each the
 90 |       rendering of the engine should be updated. *)
 91 | 
 92 | val erase_mode : 'a t -> bool signal
 93 |   (** [erase_mode edit] returns the ``erase'' mode of the buffer. In
 94 |       this mode character inserted in the buffer erase existing
 95 |       ones. *)
 96 | 
 97 | val get_erase_mode : 'a t -> bool
 98 |   (** [erase_mode edit] returns the current erase mode of the
 99 |       buffer. *)
100 | 
101 | val set_erase_mode : 'a t -> bool -> unit
102 |   (** [set_erase_mode edit state] sets the status of the erase mode
103 |       for the given engine. *)
104 | 
105 | val mark : 'a t -> Zed_cursor.t
106 |   (** [mark edit] returns the cursor used to for the mark in the given
107 |       engine. *)
108 | 
109 | val selection : 'a t -> bool signal
110 |   (** [selection edit] returns the signal holding the current
111 |       selection state. If [true], text is being selectionned. *)
112 | 
113 | val get_selection : 'a t -> bool
114 |   (** [selection edit] returns the current selection state. *)
115 | 
116 | val set_selection : 'a t -> bool -> unit
117 |   (** [set_selection edit state] sets the selection state. *)
118 | 
119 | (** {5 Cursors} *)
120 | 
121 | val new_cursor : 'a t -> Zed_cursor.t
122 |   (** [new_cursor edit] creates a new cursor for the given edition
123 |       engine. The cursor initially points to the beginning of the
124 |       buffer. *)
125 | 
126 | (** {5 Actions} *)
127 | 
128 | exception Cannot_edit
129 |   (** Exception raised when trying to edit a non-editable portion of a
130 |       buffer. *)
131 | 
132 | type 'a context
133 |   (** Type of contexts. Contexts are used to modify an edition
134 |       buffer. *)
135 | 
136 | val context : ?check : bool -> 'a t -> Zed_cursor.t -> 'a context
137 |   (** [context ?check edit cursor] creates a new context with given
138 |       parameters. [cursor] is the cursor that will be used for all
139 |       modification of the text. If [check] is [true] (the default)
140 |       then all modification of the text will be checked with the
141 |       [editable] function of the engine. *)
142 | 
143 | val edit : 'a context -> 'a t
144 |   (** [edit ctx] returns the edition engine used by the given
145 |       context. *)
146 | 
147 | val cursor : 'a context -> Zed_cursor.t
148 |   (** [cursor ctx] returns the cursor used by this context. *)
149 | 
150 | val check : 'a context -> bool
151 |   (** [check ctx] returns whether the context has been created with
152 |       the [check] flag. *)
153 | 
154 | val with_check : bool -> 'a context -> 'a context
155 |   (** [with_check check ctx] retuns [ctx] with the check flag set to
156 |       [check]. *)
157 | 
158 | val goto : 'a context -> ?set_wanted_column : bool -> int -> unit
159 |   (** [goto ctx ?set_column position] moves the cursor to the given
160 |       position. It raises {!Zed_cursor.Out_of_bounds} if the position
161 |       is outside the bounds of the text. If [set_wanted_column] is
162 |       [true], the wanted column of the cursor is set to the new
163 |       column. *)
164 | 
165 | val move : 'a context -> ?set_wanted_column : bool -> int -> unit
166 |   (** [move ctx ?set_wanted_column delta] moves the cursor by the
167 |       given number of characters. It raises
168 |       {!Zed_cursor.Out_of_bounds} if the current plus [delta] is
169 |       outside the bounds of the text. *)
170 | 
171 | val move_line : 'a context -> int -> unit
172 |   (** [move_line ctx ?set_wanted_column delta] moves the cursor by the
173 |       given number of lines. *)
174 | 
175 | val position : 'a context -> int
176 |   (** [position ctx] returns the position of the cursor. *)
177 | 
178 | val line : 'a context -> int
179 |   (** [line ctx] returns the line of the cursor. *)
180 | 
181 | val column : 'a context -> int
182 |   (** [column ctx] returns the column of the cursor. *)
183 | 
184 | val column_display : 'a context -> int
185 |   (** [column_display ctx] returns the display column of the cursor. *)
186 | 
187 | val at_bol : 'a context -> bool
188 |   (** [at_bol ctx] returns [true] iff the cursor is at the beginning
189 |       of the current line. *)
190 | 
191 | val at_eol : 'a context -> bool
192 |   (** [at_eol ctx] returns [true] iff the cursor is at the end of the
193 |       current line. *)
194 | 
195 | val at_bot : 'a context -> bool
196 |   (** [at_bot ctx] returns [true] iff the cursor is at the beginning
197 |       of the text. *)
198 | 
199 | val at_eot : 'a context -> bool
200 |   (** [at_eot ctx] returns [true] iff the cursor is at the end of the
201 |       text. *)
202 | 
203 | val insert : 'a context -> Zed_rope.t -> unit
204 |   (** [insert ctx rope] inserts the given rope at current position. *)
205 | 
206 | val insert_char : 'a context -> Uchar.t -> unit
207 |   (** [insert ctx rope] inserts the given Uchar at current position. *)
208 | 
209 | val insert_no_erase : 'a context -> Zed_rope.t -> unit
210 |   (** [insert ctx rope] inserts the given rope at current position but
211 |       do not erase text if the buffer is currently in erase mode. *)
212 | 
213 | val remove_next : 'a context -> int -> unit
214 |   (** [remove_next ctx n] removes [n] characters at current
215 |       position. If there is less than [n] characters at current
216 |       position, it removes everything until the end of the text. *)
217 | 
218 | val remove_prev : 'a context -> int -> unit
219 |   (** [remove_prev ctx n] removes [n] characters before current
220 |       position. If there is less than [n] characters before current
221 |       position, it removes everything until the beginning of the
222 |       text. *)
223 | 
224 | val remove : 'a context -> int -> unit
225 |   (** Alias for {!remove_next} *)
226 | 
227 | val replace : 'a context -> int -> Zed_rope.t -> unit
228 |   (** [replace ctx n rope] does the same as:
229 | 
230 |       {[
231 |         remove ctx n;
232 |         insert_no_erase ctx rope
233 |       ]}
234 | 
235 |       but in one atomic operation. *)
236 | 
237 | val newline : 'a context -> unit
238 |   (** Insert a newline character. *)
239 | 
240 | val next_char : 'a context -> unit
241 |   (** [next_char ctx] moves the cursor to the next character. It does
242 |       nothing if the cursor is at the end of the text. *)
243 | 
244 | val prev_char : 'a context -> unit
245 |   (** [prev_char ctx] moves the cursor to the previous character. It
246 |       does nothing if the cursor is at the beginning of the text. *)
247 | 
248 | val next_line : 'a context -> unit
249 |   (** [next_line ctx] moves the cursor to the next line. If the cursor
250 |       is on the last line, it is moved to the end of the buffer. *)
251 | 
252 | val prev_line : 'a context -> unit
253 |   (** [prev_line ctx] moves the cursor to the previous line. If the
254 |       cursor is on the first line, it is moved to the beginning of the
255 |       buffer. *)
256 | 
257 | val goto_bol : 'a context -> unit
258 |   (** [goto_bol ctx] moves the cursor to the beginning of the current
259 |       line. *)
260 | 
261 | val goto_eol : 'a context -> unit
262 |   (** [goto_eol ctx] moves the cursor to the end of the current
263 |       line. *)
264 | 
265 | val goto_bot : 'a context -> unit
266 |   (** [goto_bot ctx] moves the cursor to the beginning of the text. *)
267 | 
268 | val goto_eot : 'a context -> unit
269 |   (** [goto_eot ctx] moves the cursor to the end of the text. *)
270 | 
271 | val delete_next_char : 'a context -> unit
272 |   (** [delete_next_char ctx] deletes the character after the cursor,
273 |       if any. *)
274 | 
275 | val delete_prev_char : 'a context -> unit
276 |   (** [delete_prev_char ctx] delete the character before the
277 |       cursor. *)
278 | 
279 | val delete_next_line : 'a context -> unit
280 |   (** [delete_next_line ctx] delete everything until the end of the
281 |       current line. *)
282 | 
283 | val delete_prev_line : 'a context -> unit
284 |   (** [delete_next_line ctx] delete everything until the beginning of
285 |       the current line. *)
286 | 
287 | val kill_next_line : 'a context -> unit
288 |   (** [kill_next_line ctx] delete everything until the end of the
289 |       current line and save it to the clipboard. *)
290 | 
291 | val kill_prev_line : 'a context -> unit
292 |   (** [kill_next_line ctx] delete everything until the beginning of
293 |       the current line and save it to the clipboard. *)
294 | 
295 | val switch_erase_mode : 'a context -> unit
296 |   (** [switch_erase_mode ctx] switch the current erase mode. *)
297 | 
298 | val set_mark : 'a context -> unit
299 |   (** [set_mark ctx] sets the mark at current position. *)
300 | 
301 | val goto_mark : 'a context -> unit
302 |   (** [goto_mark ctx] moves the cursor to the mark. *)
303 | 
304 | val copy : 'a context -> unit
305 |   (** [copy ctx] copies the current selectionned region to the
306 |       clipboard. *)
307 | 
308 | val copy_sequence: 'a context -> int -> int -> unit
309 |   (** [copy_sequence ctx start len] copies [len] characters start
310 |       from [start] to the clipboard. *)
311 | 
312 | val kill : 'a context -> unit
313 |   (** [kill ctx] copies the current selectionned region to the
314 |       clipboard and remove it. *)
315 | 
316 | val yank : 'a context -> unit
317 |   (** [yank ctx] inserts the contents of the clipboard at current
318 |       position. *)
319 | 
320 | val capitalize_word : 'a context -> unit
321 |   (** [capitalize_word ctx] capitalizes the first word after the
322 |       cursor. *)
323 | 
324 | val lowercase_word : 'a context -> unit
325 |   (** [lowercase_word ctx] converts the first word after the cursor to
326 |       lowercase. *)
327 | 
328 | val uppercase_word : 'a context -> unit
329 |   (** [uppercase_word ctx] converts the first word after the cursor to
330 |       uppercase. *)
331 | 
332 | val next_word : 'a context -> unit
333 |   (** [next_word ctx] moves the cursor to the end of the next word. *)
334 | 
335 | val prev_word : 'a context -> unit
336 |   (** [prev_word ctx] moves the cursor to the beginning of the
337 |       previous word. *)
338 | 
339 | val delete_next_word : 'a context -> unit
340 |   (** [delete_next_word ctx] deletes the word after the cursor. *)
341 | 
342 | val delete_prev_word : 'a context -> unit
343 |   (** [delete_prev_word ctx] deletes the word before the cursor. *)
344 | 
345 | val kill_next_word : 'a context -> unit
346 |   (** [kill_next_word ctx] deletes the word after the cursor and save
347 |       it to the clipboard. *)
348 | 
349 | val kill_prev_word : 'a context -> unit
350 |   (** [kill_prev_word ctx] deletes the word before the cursor and save
351 |       it to the clipboard. *)
352 | 
353 | val undo : 'a context -> unit
354 |   (** [undo ctx] reverts the last performed action. *)
355 | 
356 | (** {5 Action by names} *)
357 | 
358 | (** Type of actions. *)
359 | type action =
360 |   | Insert of Zed_char.t
361 |   | Insert_str of Zed_string.t
362 |   | Newline
363 |   | Next_char
364 |   | Prev_char
365 |   | Next_line
366 |   | Prev_line
367 |   | Join_line
368 |   | Set_pos of int
369 |   | Goto of int
370 |   | Goto_bol
371 |   | Goto_eol
372 |   | Goto_bot
373 |   | Goto_eot
374 |   | Delete_next_chars of int
375 |   | Delete_prev_chars of int
376 |   | Kill_next_chars of int
377 |   | Kill_prev_chars of int
378 |   | Delete_next_char
379 |   | Delete_prev_char
380 |   | Delete_next_line
381 |   | Delete_prev_line
382 |   | Kill_next_line
383 |   | Kill_prev_line
384 |   | Switch_erase_mode
385 |   | Set_mark
386 |   | Goto_mark
387 |   | Copy
388 |   | Kill
389 |   | Yank
390 |   | Capitalize_word
391 |   | Lowercase_word
392 |   | Uppercase_word
393 |   | Next_word
394 |   | Prev_word
395 |   | Delete_next_word
396 |   | Delete_prev_word
397 |   | Kill_next_word
398 |   | Kill_prev_word
399 |   | Undo
400 | 
401 | val get_action : action -> ('a context -> unit)
402 |   (** [get_action action] returns the function associated to the given
403 |       action. *)
404 | 
405 | val actions : (action * string) list
406 |   (** List of actions with their names, except {!Insert}. *)
407 | 
408 | val doc_of_action : action -> string
409 |   (** [doc_of_action action] returns a short description of the
410 |       action. *)
411 | 
412 | val action_of_name : string -> action
413 |   (** [action_of_name str] converts the given action name into an
414 |       action. Action name are the same as function name but with '_'
415 |       replaced by '-', number parameter replaced with "(number)".
416 |       It raises [Not_found] if the name does not correspond to an action.
417 | 
418 |       [Insert ch] is represented by "insert(<char>)" where [<char>] is:
419 | 
420 |       - a literal ascii character, such as "a", "b", ...
421 |       - a unicode character, written "U+< code >", such as "U+0041"
422 | 
423 |       [Insert_str str] is represented by "insert-str(<str>)" where
424 |       [<str>] is raw utf8 string.
425 |   *)
426 | 
427 | val name_of_action : action -> string
428 |   (** [name_of_action act] returns the name of the given action. *)
429 | 


--------------------------------------------------------------------------------
/src/zed_input.ml:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * zed_input.ml
  3 |  * ------------
  4 |  * Copyright : (c) 2011, Jeremie Dimino <jeremie@dimino.org>
  5 |  * Licence   : BSD3
  6 |  *
  7 |  * This file is a part of Zed, an editor engine.
  8 |  *)
  9 | 
 10 | module type S = sig
 11 |   type event
 12 |   type +'a t
 13 |   val empty : 'a t
 14 |   val add : event list -> 'a -> 'a t -> 'a t
 15 |   val remove : event list -> 'a t -> 'a t
 16 |   val fold : (event list -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b
 17 |   val bindings : 'a t -> (event list * 'a) list
 18 |   type 'a resolver
 19 |   type 'a pack
 20 |   val pack : ('a -> 'b) -> 'a t -> 'b pack
 21 |   val resolver : 'a pack list -> 'a resolver
 22 |   type 'a result =
 23 |     | Accepted of 'a
 24 |     | Continue of 'a resolver
 25 |     | Rejected
 26 |   val resolve : event -> 'a resolver -> 'a result
 27 | end
 28 | 
 29 | module Make (Event : Map.OrderedType) =
 30 | struct
 31 |   type event = Event.t
 32 | 
 33 |   module Event_map = Map.Make (Event)
 34 | 
 35 |   type 'a t = 'a node Event_map.t
 36 | 
 37 |   and 'a node =
 38 |     | Set of 'a t
 39 |     | Val of 'a
 40 | 
 41 |   let empty = Event_map.empty
 42 | 
 43 |   let rec add events value set =
 44 |     match events with
 45 |       | [] ->
 46 |           invalid_arg "Zed_input.Make.add"
 47 |       | [event] ->
 48 |           Event_map.add event (Val value) set
 49 |       | event :: events ->
 50 |           match try Some (Event_map.find event set) with Not_found -> None with
 51 |             | None
 52 |             | Some (Val _) ->
 53 |                 Event_map.add event (Set (add events value empty)) set
 54 |             | Some (Set s) ->
 55 |                 Event_map.add event (Set (add events value s)) set
 56 | 
 57 |   let rec remove events set =
 58 |     match events with
 59 |       | [] ->
 60 |           invalid_arg "Zed_input.Make.remove"
 61 |       | [event] ->
 62 |           Event_map.remove event set
 63 |       | event :: events ->
 64 |           match try Some (Event_map.find event set) with Not_found -> None with
 65 |             | None
 66 |             | Some (Val _) ->
 67 |                 set
 68 |             | Some (Set s) ->
 69 |                 let s = remove events s in
 70 |                 if Event_map.is_empty s then
 71 |                   Event_map.remove event set
 72 |                 else
 73 |                   Event_map.add event (Set s) set
 74 | 
 75 |   let fold f set acc =
 76 |     let rec loop prefix set acc =
 77 |       Event_map.fold
 78 |         (fun event node acc ->
 79 |            match node with
 80 |              | Val v ->
 81 |                  f (List.rev (event :: prefix)) v acc
 82 |              | Set s ->
 83 |                  loop (event :: prefix) s acc)
 84 |         set
 85 |         acc
 86 |     in
 87 |     loop [] set acc
 88 | 
 89 |   let bindings set =
 90 |     List.rev (fold (fun events action l -> (events, action) :: l) set [])
 91 | 
 92 |   module type Pack =
 93 |   sig
 94 |     type a
 95 |     type b
 96 |     val set : a t
 97 |     val map : a -> b
 98 |   end
 99 | 
100 |   type 'a pack = (module Pack with type b = 'a)
101 |   type 'a resolver = 'a pack list
102 | 
103 |   let pack (type u) (type v) map set =
104 |     let module Pack = struct type a = u type b = v let set = set let map = map end in
105 |     (module Pack : Pack with type b = v)
106 | 
107 |   let resolver l = l
108 | 
109 |   type 'a result =
110 |     | Accepted of 'a
111 |     | Continue of 'a resolver
112 |     | Rejected
113 | 
114 |   let rec resolve_rec : 'a. event -> 'a pack list -> 'a pack list -> 'a result = fun (type u) event acc packs ->
115 |     match packs with
116 |       | [] ->
117 |           if acc = [] then
118 |             Rejected
119 |           else
120 |             Continue (List.rev acc)
121 |       | p :: packs ->
122 |           let module Pack = (val p : Pack with type b = u) in
123 |           match try Some (Event_map.find event Pack.set) with Not_found -> None with
124 |             | Some (Set set) ->
125 |                 resolve_rec event (pack Pack.map set :: acc) packs
126 |             | Some (Val v) ->
127 |                 Accepted (Pack.map v)
128 |             | None ->
129 |                 resolve_rec event acc packs
130 | 
131 |   let resolve event sets =
132 |     resolve_rec event [] sets
133 | end
134 | 


--------------------------------------------------------------------------------
/src/zed_input.mli:
--------------------------------------------------------------------------------
 1 | (*
 2 |  * zed_input.mli
 3 |  * -------------
 4 |  * Copyright : (c) 2011, Jeremie Dimino <jeremie@dimino.org>
 5 |  * Licence   : BSD3
 6 |  *
 7 |  * This file is a part of Zed, an editor engine.
 8 |  *)
 9 | 
10 | (** Helpers for writing key bindings *)
11 | 
12 | (** Signature for binders. *)
13 | module type S = sig
14 | 
15 |   type event
16 |     (** Type of events. *)
17 | 
18 |   type +'a t
19 |     (** Type of set of bindings mapping input sequence to values of
20 |         type ['a]. *)
21 | 
22 |   val empty : 'a t
23 |     (** The empty set of bindings. *)
24 | 
25 |   val add : event list -> 'a -> 'a t -> 'a t
26 |     (** [add events x bindings] binds [events] to [x]. It raises
27 |         [Invalid_argument] if [events] is empty. *)
28 | 
29 |   val remove : event list -> 'a t -> 'a t
30 |     (** [remove events bindings] unbinds [events]. It raises
31 |         [Invalid_argument] if [events] is empty. *)
32 | 
33 |   val fold : (event list -> 'a -> 'b -> 'b) -> 'a t -> 'b -> 'b
34 |     (** [fold f set acc] executes [f] on all sequence of [set],
35 |         accumulating a value. *)
36 | 
37 |   val bindings : 'a t -> (event list * 'a) list
38 |     (** [bindings set] returns all bindings of [set]. *)
39 | 
40 |   type 'a resolver
41 |     (** Type of a resolver. A resolver is used to resolve an input
42 |         sequence, i.e. to find the value associated to one. It returns
43 |         a value of type ['a] when a matching sequence is found. *)
44 | 
45 |   type 'a pack
46 |     (** A pack is a pair of a set of bindings and a mapping
47 |         function. *)
48 | 
49 |   val pack : ('a -> 'b) -> 'a t -> 'b pack
50 |     (** [pack f set] creates a pack. *)
51 | 
52 |   val resolver : 'a pack list -> 'a resolver
53 |     (** [resolver packs] creates a resolver from a list of pack. *)
54 | 
55 |   (** Result of a resolving operation. *)
56 |   type 'a result =
57 |     | Accepted of 'a
58 |         (** The sequence is terminated and associated to the given
59 |             value. *)
60 |     | Continue of 'a resolver
61 |         (** The sequence is not terminated. *)
62 |     | Rejected
63 |         (** None of the sequences is prefixed by the one. *)
64 | 
65 |   val resolve : event -> 'a resolver -> 'a result
66 |     (** [resolve event resolver] tries to resolve [event] using
67 |         [resolver]. *)
68 | end
69 | 
70 | module Make (Event : Map.OrderedType) : S with type event = Event.t
71 |   (** [Make (Event)] makes a a new binder. *)
72 | 


--------------------------------------------------------------------------------
/src/zed_lines.ml:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * zed_lines.ml
  3 |  * ------------
  4 |  * Copyright : (c) 2011, Jeremie Dimino <jeremie@dimino.org>
  5 |  * Copyright : (c) 2019, ZAN DoYe <zandoye@gmail.com>
  6 |  * Licence   : BSD3
  7 |  *
  8 |  * This file is a part of Zed, an editor engine.
  9 |  *)
 10 | 
 11 | open Result
 12 | 
 13 | exception Out_of_bounds
 14 | 
 15 | (* +-----------------------------------------------------------------+
 16 |    | Representation                                                  |
 17 |    +-----------------------------------------------------------------+ *)
 18 | 
 19 | (* Sets are represented by ropes. *)
 20 | 
 21 | type line=
 22 |   {
 23 |     length: int;
 24 |     width: int;
 25 |     width_info: int array;
 26 |   }
 27 | 
 28 | type t =
 29 |   | String of line
 30 |       (* [String len] is a string of length [len] without newline
 31 |          character. *)
 32 |   | Return
 33 |       (* A newline character. *)
 34 |   | Concat of t * t * int * int * int
 35 |       (* [Concat(t1, t2, len, count, depth)] *)
 36 | 
 37 | (* +-----------------------------------------------------------------+
 38 |    | Basic functions                                                 |
 39 |    +-----------------------------------------------------------------+ *)
 40 | 
 41 | let empty_line ()= { length= 0; width= 0; width_info= [||] }
 42 | 
 43 | let length = function
 44 |   | String line -> line.length
 45 |   | Return -> 1
 46 |   | Concat(_, _, len, _, _) -> len
 47 | 
 48 | let count = function
 49 |   | String _ -> 0
 50 |   | Return -> 1
 51 |   | Concat(_, _, _, count, _) -> count
 52 | 
 53 | let depth = function
 54 |   | String _ | Return -> 0
 55 |   | Concat(_, _, _, _, d) -> d
 56 | 
 57 | let empty = String (empty_line ())
 58 | 
 59 | let unsafe_width ?(tolerant=false) set idx len=
 60 |   let start= idx
 61 |   and len_all= len
 62 |   and acc=
 63 |     if tolerant then
 64 |       fun a b-> (+)
 65 |         (if a < 0 then 1 else a)
 66 |         (if b < 0 then 1 else b)
 67 |     else
 68 |       (+)
 69 |   in
 70 |   let rec unsafe_width set idx len=
 71 |     if len = 0 then
 72 |       Ok 0
 73 |     else
 74 |       match set with
 75 |       | Return-> Error (start + len_all - len)
 76 |       | String line->
 77 |         Ok (Array.fold_left acc 0 (Array.sub line.width_info idx len))
 78 |       | Concat (set1, set2, _,_,_)->
 79 |         let len1= length set1 in
 80 |         if idx + len <= len1 then
 81 |           unsafe_width set1 idx len
 82 |         else if idx >= len1 then
 83 |           unsafe_width set2 (idx-len1) len
 84 |         else
 85 |           let r1= unsafe_width set1 idx (len1 - idx)
 86 |           and r2= unsafe_width set2 0 (len - len1 + idx) in
 87 |           match r1, r2 with
 88 |           | Error ofs, _-> Error ofs
 89 |           | Ok _, Error ofs-> Error ofs
 90 |           | Ok w1, Ok w2-> Ok (w1 + w2)
 91 |   in
 92 |   unsafe_width set idx len
 93 | 
 94 | let width ?(tolerant=false) set idx len =
 95 |   if idx < 0 || len < 0 || idx + len > length set then
 96 |     raise Out_of_bounds
 97 |   else
 98 |     unsafe_width ~tolerant set idx len
 99 | 
100 | let force_width set idx len=
101 |   let acc a b= (+)
102 |     (if a < 0 then 1 else a)
103 |     (if b < 0 then 1 else b)
104 |   in
105 |   let rec force_width set idx len=
106 |     if len = 0 then
107 |       0
108 |     else
109 |       match set with
110 |       | Return-> 0
111 |       | String line->
112 |         Array.fold_left acc 0 (Array.sub line.width_info idx len)
113 |       | Concat (set1, set2, _,_,_)->
114 |         let len1= length set1 in
115 |         if idx + len <= len1 then
116 |           force_width set1 idx len
117 |         else if idx >= len1 then
118 |           force_width set2 (idx-len1) len
119 |         else
120 |           let r1= force_width set1 idx (len1 - idx)
121 |           and r2= force_width set2 0 (len - len1 + idx) in
122 |           r1 + r2
123 |   in
124 |   if idx < 0 || len < 0 || idx + len > length set then
125 |     raise Out_of_bounds
126 |   else
127 |     force_width set idx len
128 | 
129 | (* +-----------------------------------------------------------------+
130 |    | Offset/line resolution                                          |
131 |    +-----------------------------------------------------------------+ *)
132 | 
133 | let rec line_index_rec set ofs acc =
134 |   match set with
135 |   | String _ ->
136 |     acc
137 |   | Return ->
138 |     if ofs = 0 then
139 |       acc
140 |     else
141 |       acc + 1
142 |   | Concat(s1, s2, _, _, _) ->
143 |     let len1 = length s1 in
144 |     if ofs < len1 then
145 |       line_index_rec s1 ofs acc
146 |     else
147 |       line_index_rec s2 (ofs - len1) (acc + count s1)
148 | 
149 | let line_index set ofs =
150 |   if ofs < 0 || ofs > length set then
151 |     raise Out_of_bounds
152 |   else
153 |     line_index_rec set ofs 0
154 | 
155 | let rec line_start_rec set idx acc =
156 |   match set with
157 |   | String _ ->
158 |     acc
159 |   | Return ->
160 |     if idx = 0 then
161 |       acc
162 |     else
163 |       acc + 1
164 |   | Concat(s1, s2, _, _, _) ->
165 |     let count1 = count s1 in
166 |     if idx <= count1 then
167 |       line_start_rec s1 idx acc
168 |     else
169 |       line_start_rec s2 (idx - count1) (acc + length s1)
170 | 
171 | let line_start set idx =
172 |   if idx < 0 || idx > count set then
173 |     raise Out_of_bounds
174 |   else
175 |     line_start_rec set idx 0
176 | 
177 | let line_stop set idx =
178 |   if idx = count set
179 |   then length set
180 |   else line_start set (idx + 1) - 1
181 | 
182 | let line_length set idx =
183 |   line_stop set idx - line_start set idx
184 | 
185 | 
186 | (* +-----------------------------------------------------------------+
187 |    | Operations on sets                                              |
188 |    +-----------------------------------------------------------------+ *)
189 | 
190 | let concat set1 set2 =
191 |   Concat(
192 |     set1, set2,
193 |     length set1 + length set2,
194 |     count set1 + count set2,
195 |     1 + max (depth set1) (depth set2))
196 | 
197 | let append_line l1 l2=
198 |   { length= l1.length + l2.length;
199 |     width= l1.width + l2.width;
200 |     width_info= Array.append l1.width_info l2.width_info
201 |   }
202 | 
203 | let append set1 set2 =
204 |   match set1, set2 with
205 |   | String {length= 0;_}, _ -> set2
206 |   | _, String {length= 0;_} -> set1
207 |   | String l1, String l2 -> String (append_line l1 l2)
208 |   | String l1, Concat(String l2, set, len, count, h) ->
209 |     Concat(String (append_line l1 l2), set, len + l1.length, count, h)
210 |   | Concat(set, String l1, len, count, h), String l2 ->
211 |     Concat(set, String(append_line l1 l2), len + l2.length, count, h)
212 |   | _ ->
213 |     let d1 = depth set1 and d2 = depth set2 in
214 |     if d1 > d2 + 2 then begin
215 |       match set1 with
216 |       | String _ | Return ->
217 |         assert false
218 |       | Concat(set1_1, set1_2, _, _, _) ->
219 |         if depth set1_1 >= depth set1_2 then
220 |           concat set1_1 (concat set1_2 set2)
221 |         else begin
222 |           match set1_2 with
223 |           | String _ | Return ->
224 |             assert false
225 |           | Concat(set1_2_1, set1_2_2, _, _, _) ->
226 |             concat (concat set1_1 set1_2_1) (concat set1_2_2 set2)
227 |         end
228 |     end else if d2 > d1 + 2 then begin
229 |       match set2 with
230 |       | String _ | Return ->
231 |         assert false
232 |       | Concat(set2_1, set2_2, _, _, _) ->
233 |         if depth set2_2 >= depth set2_1 then
234 |           concat (concat set1 set2_1) set2_2
235 |         else begin
236 |           match set2_1 with
237 |           | String _ | Return ->
238 |             assert false
239 |           | Concat(set2_1_1, set2_1_2, _, _, _) ->
240 |             concat (concat set1 set2_1_1) (concat set2_1_2 set2_2)
241 |         end
242 |     end else
243 |       concat set1 set2
244 | 
245 | let rec unsafe_sub set idx len =
246 |   match set with
247 |   | String line ->
248 |     let length= len in
249 |     let width_info= Array.sub line.width_info idx length in
250 |     let width= Array.fold_left (+) 0 width_info in
251 |     String { length; width; width_info }
252 |   | Return ->
253 |     if len = 1 then
254 |       Return
255 |     else
256 |       String (empty_line ())
257 |   | Concat(set_l, set_r, len', _, _) ->
258 |     let len_l = length set_l in
259 |     if len = len' then
260 |       set
261 |     else if idx >= len_l then
262 |       unsafe_sub set_r (idx - len_l) len
263 |     else if idx + len <= len_l then
264 |       unsafe_sub set_l idx len
265 |     else
266 |       append
267 |         (unsafe_sub set_l idx (len_l - idx))
268 |         (unsafe_sub set_r 0 (len - len_l + idx))
269 | 
270 | let sub set idx len =
271 |   if idx < 0 || len < 0 || idx + len > length set then
272 |     raise Out_of_bounds
273 |   else
274 |     unsafe_sub set idx len
275 | 
276 | let break set ofs =
277 |   let len = length set in
278 |   if ofs < 0 || ofs > len then
279 |     raise Out_of_bounds
280 |   else
281 |     (unsafe_sub set 0 ofs, unsafe_sub set ofs (len - ofs))
282 | 
283 | let insert set ofs set' =
284 |   let set1, set2 = break set ofs in
285 |   append set1 (append set' set2)
286 | 
287 | let remove set ofs len =
288 |   append (sub set 0 ofs) (sub set (ofs + len) (length set - ofs - len))
289 | 
290 | let replace set ofs len repl =
291 |   append (sub set 0 ofs) (append repl (sub set (ofs + len) (length set - ofs - len)))
292 | 
293 | (* +-----------------------------------------------------------------+
294 |    | Sets from ropes                                                 |
295 |    +-----------------------------------------------------------------+ *)
296 | 
297 | let of_rope rope =
298 |   let calc_widths widths=
299 |     let width_info= widths |> List.rev |> Array.of_list in
300 |     let width= Array.fold_left (+) 0 width_info in
301 |     (width, width_info)
302 |   in
303 |   let rec loop zip (length, widths) acc =
304 |     if Zed_rope.Zip.at_eos zip then
305 |       let width, width_info= calc_widths widths in
306 |       append acc (String { length; width; width_info })
307 |     else
308 |       let ch, zip = Zed_rope.Zip.next zip in
309 |       if Uchar.to_int (Zed_char.core ch) = 10 then
310 |         let width, width_info= calc_widths widths in
311 |         loop0 zip (append (append acc (String { length; width; width_info })) Return)
312 |       else
313 |         loop zip (length + 1, Zed_char.width ch::widths) acc
314 |   and loop0 zip acc =
315 |     if Zed_rope.Zip.at_eos zip then
316 |       acc
317 |     else
318 |       let ch, zip = Zed_rope.Zip.next zip in
319 |       if Uchar.to_int (Zed_char.core ch) = 10 then
320 |         loop0 zip (append acc Return)
321 |       else
322 |         loop zip (1, [Zed_char.width ch]) acc
323 |   in
324 |   loop0 (Zed_rope.Zip.make_f rope 0) empty
325 | 
326 | (* +-----------------------------------------------------------------+
327 |    | Index and width                                                 |
328 |    +-----------------------------------------------------------------+ *)
329 | 
330 | let get_idx_by_width set row column=
331 |   let start= line_start set row in
332 |   let stop= line_stop set row in
333 |   let rec get idx acc_width=
334 |     if acc_width >= column || idx >= stop then
335 |       idx
336 |     else
337 |       let curr_width= force_width set idx 1 in
338 |       if acc_width + curr_width > column
339 |       then idx (* the width of the current char covers the column *)
340 |       else get (idx+1) (acc_width + curr_width)
341 |   in
342 |   get start 0
343 | 
344 | 


--------------------------------------------------------------------------------
/src/zed_lines.mli:
--------------------------------------------------------------------------------
 1 | (*
 2 |  * zed_lines.mli
 3 |  * -------------
 4 |  * Copyright : (c) 2011, Jeremie Dimino <jeremie@dimino.org>
 5 |  * Licence   : BSD3
 6 |  *
 7 |  * This file is a part of Zed, an editor engine.
 8 |  *)
 9 | 
10 | (** Sets of line positions. *)
11 | 
12 | (** This module implement sets of line positions. They allow to
13 |     efficiently find the beginning of a line and to convert offset to
14 |     line and column number. *)
15 | 
16 | open Result
17 | 
18 | exception Out_of_bounds
19 |   (** Exception raised when trying to access a position outside the
20 |       bounds of a set. *)
21 | 
22 | type line
23 | 
24 | type t
25 |   (** Type of sets of line positions. *)
26 | 
27 | val length : t -> int
28 |   (** Returns the length of the set, i.e. the number of characters in
29 |       the set. *)
30 | 
31 | val count : t -> int
32 |   (** Returns the number of newlines in the set. *)
33 | 
34 | val of_rope : Zed_rope.t -> t
35 |   (** [of_rope rope] returns the set of newline positions in [rope]. *)
36 | 
37 | val empty : t
38 |   (** The empty set. *)
39 | 
40 | val width : ?tolerant:bool -> t -> int -> int -> (int, int) result
41 |   (** Returns the width of the given string. *)
42 | 
43 | val force_width : t -> int -> int -> int
44 |   (** Returns the width of the given string. If error encounted, returns the width of the legit part *)
45 | 
46 | val line_index : t -> int -> int
47 |   (** [line_index set ofs] returns the line number of the line
48 |       containing [ofs]. *)
49 | 
50 | val line_start : t -> int -> int
51 |   (** [line_start set idx] returns the offset of the beginning of the
52 |       [idx]th line of [set] . *)
53 | 
54 | val line_stop : t -> int -> int
55 |   (** [line_stop set idx] returns the offset of the end of the
56 |       [idx]th line of [set] . *)
57 | 
58 | val line_length : t -> int -> int
59 |   (** [line_length set idx] returns the length of the
60 |       [idx]th line of [set] . *)
61 | 
62 | val append : t -> t -> t
63 |   (** [append s1 s2] concatenates two sets of line positions. *)
64 | 
65 | val insert : t -> int -> t -> t
66 |   (** [insert set offset set'] inserts [set] at given positon in
67 |       [set'].*)
68 | 
69 | val remove : t -> int -> int -> t
70 |   (** [remove set offet length] removes [length] characters at
71 |       [offset] in set. *)
72 | 
73 | val replace : t -> int -> int -> t -> t
74 |   (** [replace set offset length repl] replaces the subset at offset
75 |       [offset] and length [length] by [repl] in [set]. *)
76 | 
77 | val get_idx_by_width : t -> int -> int -> int
78 |   (** [get_idx_by_width set row column_width] return the offset of the char
79 |       at \[row, column_width\]. *)
80 | 
81 | 


--------------------------------------------------------------------------------
/src/zed_macro.ml:
--------------------------------------------------------------------------------
 1 | (*
 2 |  * zed_macro.ml
 3 |  * ------------
 4 |  * Copyright : (c) 2011, Jeremie Dimino <jeremie@dimino.org>
 5 |  * Licence   : BSD3
 6 |  *
 7 |  * This file is a part of Zed, an editor engine.
 8 |  *)
 9 | 
10 | open React
11 | 
12 | type 'a t = {
13 |   recording : bool signal;
14 |   set_recording : bool -> unit;
15 |   mutable tmp_macro : 'a list;
16 |   mutable macro : 'a list;
17 |   count : int signal;
18 |   set_count : int -> unit;
19 |   counter : int signal;
20 |   set_counter : int -> unit;
21 | }
22 | 
23 | let create macro =
24 |   let recording, set_recording = S.create false in
25 |   let count, set_count = S.create 0 in
26 |   let counter, set_counter = S.create 0 in
27 |   {
28 |     recording;
29 |     set_recording;
30 |     macro;
31 |     tmp_macro = [];
32 |     count;
33 |     set_count;
34 |     counter;
35 |     set_counter;
36 |   }
37 | 
38 | let recording r = r.recording
39 | 
40 | let get_recording r = S.value r.recording
41 | 
42 | let set_recording r state =
43 |   match state with
44 |     | true ->
45 |         r.tmp_macro <- [];
46 |         r.set_recording true;
47 |         r.set_count 0;
48 |         r.set_counter 0
49 |     | false ->
50 |         if S.value r.recording then begin
51 |           r.macro <- List.rev r.tmp_macro;
52 |           r.tmp_macro <- [];
53 |           r.set_recording false;
54 |           r.set_count 0
55 |         end
56 | 
57 | let cancel r =
58 |   if S.value r.recording then begin
59 |     r.tmp_macro <- [];
60 |     r.set_recording false;
61 |     r.set_count 0
62 |   end
63 | 
64 | let count r = r.count
65 | let get_count r = S.value r.count
66 | 
67 | let counter r = r.counter
68 | let get_counter r = S.value r.counter
69 | let set_counter r v = r.set_counter v
70 | let add_counter r v = r.set_counter (S.value r.counter + v)
71 | 
72 | let add r x =
73 |   if S.value r.recording then begin
74 |     r.tmp_macro <- x :: r.tmp_macro;
75 |     r.set_count (S.value r.count + 1)
76 |   end
77 | 
78 | let contents r = r.macro
79 | 


--------------------------------------------------------------------------------
/src/zed_macro.mli:
--------------------------------------------------------------------------------
 1 | (*
 2 |  * zed_macro.mli
 3 |  * -------------
 4 |  * Copyright : (c) 2011, Jeremie Dimino <jeremie@dimino.org>
 5 |  * Licence   : BSD3
 6 |  *
 7 |  * This file is a part of Zed, an editor engine.
 8 |  *)
 9 | 
10 | (** Macro recorder *)
11 | 
12 | type 'a t
13 |   (** Type of macro recorders. *)
14 | 
15 | val create : 'a list -> 'a t
16 |   (** [create macro] create a new macro recorder, with initial
17 |       contents [macro]. *)
18 | 
19 | val recording : 'a t -> bool React.signal
20 |   (** Whether the recorder is recording a macro. *)
21 | 
22 | val get_recording : 'a t -> bool
23 |   (** Returns the current state of the recorder. *)
24 | 
25 | val set_recording : 'a t -> bool -> unit
26 |   (** Starts or stops the macro recorder. *)
27 | 
28 | val cancel : 'a t -> unit
29 |   (** Cancels the current macro if recording one. *)
30 | 
31 | val count : 'a t -> int React.signal
32 |   (** The number of actions in the macro recorder. It is [0] if the
33 |       recorder is not currently recording. *)
34 | 
35 | val get_count : 'a t -> int
36 |   (** Returns the current number of actions in the macro recorder. *)
37 | 
38 | val add : 'a t -> 'a -> unit
39 |   (** [add recorder x] adds [x] to the recorder if it is recording a
40 |       macro. *)
41 | 
42 | val contents : 'a t -> 'a list
43 |   (** Returns the currently recorded macro. *)
44 | 
45 | val counter : 'a t -> int React.signal
46 |   (** The contents of the macro counter. *)
47 | 
48 | val get_counter : 'a t -> int
49 |   (** Gets the contents of the macro counter. *)
50 | 
51 | val set_counter : 'a t -> int -> unit
52 |   (** Sets the macro counter to the given value. *)
53 | 
54 | val add_counter : 'a t -> int -> unit
55 |   (** Adds the given value to the macro counter. *)
56 | 


--------------------------------------------------------------------------------
/src/zed_rope.ml:
--------------------------------------------------------------------------------
   1 | (*
   2 |  * zed_rope.ml
   3 |  * -----------
   4 |  * Copyright : (c) 2011, Jeremie Dimino <jeremie@dimino.org>
   5 |  * Copyright : (c) 2019, ZAN DoYe <zandoye@gmail.com>
   6 |  * Licence   : BSD3
   7 |  *
   8 |  * This file is a part of Zed, an editor engine.
   9 |  *)
  10 | 
  11 | (* Maximum length of a leaf *)
  12 | let max_leaf_size= 256
  13 | 
  14 | exception Out_of_bounds
  15 | 
  16 | (* +-----------------------------------------------------------------+
  17 |    | Ropes representation                                            |
  18 |    +-----------------------------------------------------------------+ *)
  19 | 
  20 | type t=
  21 |   (* the size is the number of Uchar.t in the rope *)
  22 |   | Leaf of Zed_string.t * (int * int)
  23 |     (* [Leaf(str, (len, size))] *)
  24 |   | Node of int * (int * int) * t * (int * int) * t
  25 |     (* [Node(depth, (length_left, size_left), left, (length_right, size_right), right)] *)
  26 | 
  27 | type rope= t
  28 | 
  29 | let empty ()= Leaf (Zed_string.empty (), (0, 0))
  30 | 
  31 | (* +-----------------------------------------------------------------+
  32 |    | Basic operations                                                |
  33 |    +-----------------------------------------------------------------+ *)
  34 | 
  35 | let length= function
  36 |   | Leaf(_, (len, _)) -> len
  37 |   | Node(_, (len_l,_), _, (len_r,_), _) -> len_l + len_r
  38 | 
  39 | let size= function
  40 |   | Leaf(_, (_,size)) -> size
  41 |   | Node(_, (_,size_l), _, (_,size_r), _) -> size_l + size_r
  42 | 
  43 | let depth= function
  44 |   | Leaf _ -> 0
  45 |   | Node(d, _, _, _, _) -> d
  46 | 
  47 | let is_empty= function
  48 |   | Leaf(_, (0, 0)) -> true
  49 |   | _ -> false
  50 | 
  51 | let rec trim_hd t=
  52 |   match t with
  53 |   | Leaf (str, (l, _))->
  54 |     let hd, _= Zed_string.extract_next str 0 in
  55 |     let hd= hd
  56 |       |> Zed_char.to_utf8
  57 |       |> Zed_string.unsafe_of_utf8
  58 |     in
  59 |     let after= Zed_string.after str 1 in
  60 |     let size= Zed_string.size after in
  61 |     (Leaf (after, (l-1, size)), hd)
  62 |   | Node (d, (ll, _sl), l, (lr, sr), r)->
  63 |     let t, hd= trim_hd l in
  64 |     let size= size t in
  65 |     (Node (d, (ll-1, size), t, (lr, sr), r), hd)
  66 | 
  67 | let append_cm t cm=
  68 |   let size= Zed_string.size cm in
  69 |   let rec append_cm t=
  70 |     match t with
  71 |     | Leaf (str, (l, s))-> Leaf (Zed_string.append str cm, (l, s + size))
  72 |     | Node (d, (ll, sl), l, (lr, sr), r)->
  73 |       Node (d, (ll, sl), l, (lr, sr + size), append_cm r)
  74 |   in
  75 |   append_cm t
  76 | 
  77 | (* +-----------------------------------------------------------------+
  78 |    | Balancing                                                       |
  79 |    +-----------------------------------------------------------------+ *)
  80 | 
  81 | let rec make_fibo acc a b=
  82 |   let c= a + b in
  83 |   if c < b then
  84 |     (* overflow *)
  85 |     acc
  86 |   else
  87 |     make_fibo (c :: acc) b c
  88 | 
  89 | let fibo=
  90 |   let l= make_fibo [1; 1; 0] 1 1 in
  91 |   let n= List.length l in
  92 |   let fibo= Array.make n 0 in
  93 |   let rec loop i= function
  94 |     | [] ->
  95 |         fibo
  96 |     | x :: l ->
  97 |         fibo.(i) <- x;
  98 |         loop (i - 1) l
  99 |   in
 100 |   loop (n - 1) l
 101 | 
 102 | let max_depth= Array.length fibo
 103 | 
 104 | let unsafe_concat rope1 rope2=
 105 |   match rope1, rope2 with
 106 |     | Leaf(_, (0,_)), _ -> rope2
 107 |     | _, Leaf(_, (0,_)) -> rope1
 108 |     | _ -> Node(
 109 |       1 + max (depth rope1) (depth rope2),
 110 |       (length rope1, size rope1), rope1,
 111 |       (length rope2, size rope2), rope2)
 112 | 
 113 | let rec insert_to_forest forest acc idx=
 114 |   let acc= unsafe_concat forest.(idx) acc in
 115 |   if length acc < fibo.(idx + 1) then
 116 |     forest.(idx) <- acc
 117 |   else begin
 118 |     forest.(idx) <- empty ();
 119 |     insert_to_forest forest acc (idx + 1)
 120 |   end
 121 | 
 122 | let rec concat_forest_until forest acc idx rope=
 123 |   if length rope < fibo.(idx + 1) then
 124 |     insert_to_forest forest (unsafe_concat acc rope) idx
 125 |   else begin
 126 |     let acc= unsafe_concat forest.(idx) acc in
 127 |     forest.(idx) <- empty ();
 128 |     concat_forest_until forest acc (idx + 1) rope
 129 |   end
 130 | 
 131 | let rec balance_rec forest rope=
 132 |   match rope with
 133 |     | Leaf _ ->
 134 |         concat_forest_until forest (empty ()) 2 rope
 135 |     | Node(_depth, _len_l, rope_l, _len_r, rope_r) ->
 136 |         balance_rec forest rope_l;
 137 |         balance_rec forest rope_r
 138 | 
 139 | let rec concat_forest forest acc idx=
 140 |   if idx = max_depth then
 141 |     acc
 142 |   else
 143 |     concat_forest forest (unsafe_concat forest.(idx) acc) (idx + 1)
 144 | 
 145 | let balance rope=
 146 |   match length rope with
 147 |     | 0 | 1 ->
 148 |         rope
 149 |     | len when len >= fibo.(depth rope + 2) ->
 150 |         rope
 151 |     | _len ->
 152 |         let forest= Array.make max_depth (empty ()) in
 153 |         balance_rec forest rope;
 154 |         concat_forest forest (empty ()) 2
 155 | 
 156 | 
 157 | (* +-----------------------------------------------------------------+
 158 |    | Leaf operations                                               |
 159 |    +-----------------------------------------------------------------+ *)
 160 | 
 161 | let rec unsafe_get idx rope =
 162 |   match rope with
 163 |     | Leaf(text, _) ->
 164 |         Zed_string.get text idx
 165 |     | Node(_, (len_l,_), rope_l, _len_r, rope_r) ->
 166 |         if idx < len_l then
 167 |           unsafe_get idx rope_l
 168 |         else
 169 |           unsafe_get (idx - len_l) rope_r
 170 | 
 171 | let get rope idx =
 172 |   if idx < 0 || idx >= length rope then
 173 |     raise Out_of_bounds
 174 |   else
 175 |     unsafe_get idx rope
 176 | 
 177 | let rec unsafe_get_raw idx rope =
 178 |   match rope with
 179 |     | Leaf(text, _) ->
 180 |         Zed_string.get_raw text idx
 181 |     | Node(_, (_,size_l), rope_l, _len_r, rope_r) ->
 182 |         if idx < size_l then
 183 |           unsafe_get_raw idx rope_l
 184 |         else
 185 |           unsafe_get_raw (idx - size_l) rope_r
 186 | 
 187 | let get_raw rope idx =
 188 |   if idx < 0 || idx >= size rope then
 189 |     raise Out_of_bounds
 190 |   else
 191 |     unsafe_get_raw idx rope
 192 | 
 193 | let append rope1 rope2 =
 194 |   let len_12_comb=
 195 |     if length rope1 > 0 && length rope2 > 0 then
 196 |       Zed_char.is_combining_mark (Zed_char.core (get rope2 0))
 197 |     else
 198 |       false
 199 |   in
 200 |   let len12 l1 l2= if len_12_comb then l1 + l2 - 1 else l1 + l2 in
 201 |   match rope1, rope2 with
 202 |     | Leaf(_, (0,_)), _ ->
 203 |         rope2
 204 |     | _, Leaf(_, (0,_)) ->
 205 |         rope1
 206 |     | Leaf(text1, (len1, size1)), Leaf(text2, (len2, size2))
 207 |       when len12 len1 len2 <= max_leaf_size ->
 208 |         Leaf(Zed_string.append text1 text2, (len12 len1 len2, size1+size2))
 209 |     | Node(d, len_l, rope_l, _, Leaf(text1, (len1,size1))), Leaf(text2, (len2,size2))
 210 |       when len12 len1 len2 <= max_leaf_size ->
 211 |       let ls= len12 len1 len2, size1+size2 in
 212 |         Node(
 213 |           d,
 214 |           len_l,
 215 |           rope_l,
 216 |           ls,
 217 |           Leaf(Zed_string.append text1 text2, ls))
 218 |     | Leaf(text1, (len1,size1)), Node(d, _, Leaf(text2, (len2,size2)), len_r, rope_r)
 219 |       when len12 len1 len2 <= max_leaf_size ->
 220 |       let ls= len12 len1 len2, size1+size2 in
 221 |         Node(
 222 |           d,
 223 |           ls,
 224 |           Leaf(Zed_string.append text1 text2, ls),
 225 |           len_r,
 226 |           rope_r)
 227 |     | _ ->
 228 |       let rope1, rope2=
 229 |         if length rope1 > 0 && length rope2 > 0 then
 230 |           if Zed_char.is_combining_mark (Zed_char.core (get rope2 0)) then
 231 |             let r2, hd= trim_hd rope2 in
 232 |             let r1= append_cm rope1 hd in
 233 |             r1, r2
 234 |           else
 235 |             rope1, rope2
 236 |         else
 237 |           rope1, rope2
 238 |       in
 239 |       balance (Node(
 240 |         1 + max (depth rope1) (depth rope2),
 241 |         (length rope1, size rope1), rope1,
 242 |         (length rope2, size rope2), rope2))
 243 | 
 244 | let concat sep l =
 245 |   let rec loop acc = function
 246 |     | [] -> acc
 247 |     | x :: l -> loop (append (append acc sep) x) l
 248 |   in
 249 |   match l with
 250 |     | [] -> empty ()
 251 |     | x :: l -> loop x l
 252 | 
 253 | let rec unsafe_sub rope idx len =
 254 |   match rope with
 255 |     | Leaf(text, _) ->
 256 |       let str= Zed_string.sub ~pos:idx ~len text in
 257 |       let size= Zed_string.size str in
 258 |         Leaf(str, (len,size))
 259 |     | Node(_, (len_l,_), rope_l, (len_r,_), rope_r) ->
 260 |         if len = len_l + len_r then
 261 |           rope
 262 |         else if idx >= len_l then
 263 |           unsafe_sub rope_r (idx - len_l) len
 264 |         else if idx + len <= len_l then
 265 |           unsafe_sub rope_l idx len
 266 |         else
 267 |           append
 268 |             (unsafe_sub rope_l idx (len_l - idx))
 269 |             (unsafe_sub rope_r 0 (len - len_l + idx))
 270 | 
 271 | let sub rope idx len =
 272 |   if idx < 0 || len < 0 || idx + len > length rope then
 273 |     raise Out_of_bounds
 274 |   else
 275 |     unsafe_sub rope idx len
 276 | 
 277 | let make length char =
 278 |   if length < max_leaf_size then
 279 |     Leaf(Zed_string.make length char, (length, length))
 280 |   else begin
 281 |     let text = Zed_string.make max_leaf_size char in
 282 |     let chunk = Leaf(text, (max_leaf_size, max_leaf_size)) in
 283 |     let rec loop acc n =
 284 |       if n = 0 then
 285 |         acc
 286 |       else if n < max_leaf_size then
 287 |         let str= Zed_string.sub ~pos:0 ~len:n text in
 288 |         let size= Zed_string.size str in
 289 |         append acc (Leaf(str, (n, size)))
 290 |       else
 291 |         loop (append acc chunk) (n - max_leaf_size)
 292 |     in
 293 |     loop (empty ()) length
 294 |   end
 295 | 
 296 | let singleton ch =
 297 |   Leaf(Zed_string.make 1 ch, (1, 1))
 298 | 
 299 | let break rope pos =
 300 |   let len = length rope in
 301 |   if pos < 0 || pos > len then raise Out_of_bounds;
 302 |   (unsafe_sub rope 0 pos, unsafe_sub rope pos (len - pos))
 303 | 
 304 | let before rope pos =
 305 |   sub rope 0 pos
 306 | 
 307 | let after rope pos =
 308 |   sub rope pos (length rope - pos)
 309 | 
 310 | let insert rope pos sub =
 311 |   let before, after = break rope pos in
 312 |   append before (append sub after)
 313 | 
 314 | let remove rope pos len =
 315 |   append (sub rope 0 pos) (sub rope (pos + len) (length rope - pos - len))
 316 | 
 317 | let replace rope pos len repl =
 318 |   append (sub rope 0 pos) (append repl (sub rope (pos + len) (length rope - pos - len)))
 319 | 
 320 | let insert_uChar rope pos ch =
 321 |   if Uchar.to_int ch = 0 then
 322 |     rope
 323 |   else
 324 |     if Zed_char.is_combining_mark ch then
 325 |       if length rope = 0 then
 326 |         failwith "inserting an individual combining mark"
 327 |       else
 328 |         if pos = 0 then
 329 |           failwith "inserting an individual combining mark"
 330 |         else
 331 |           let pos= if pos > 0 then pos - 1 else pos in
 332 |           let glyph= get rope pos in
 333 |           if Zed_char.is_printable_core (Zed_char.core glyph) then
 334 |             let glyph= Zed_char.append glyph ch in
 335 |             replace rope pos 1 (Leaf (Zed_string.implode [glyph], (1, 1)))
 336 |           else
 337 |             failwith "inserting an individual combining mark"
 338 |     else
 339 |       let sub= Leaf (Zed_string.implode [Zed_char.unsafe_of_uChar ch], (1, 1)) in
 340 |       insert rope pos sub
 341 | 
 342 | let lchop = function
 343 |   | Leaf(_, (0,_)) -> empty ()
 344 |   | rope -> sub rope 1 (length rope - 1)
 345 | 
 346 | let rchop = function
 347 |   | Leaf(_, (0,_)) -> empty ()
 348 |   | rope -> sub rope 0 (length rope - 1)
 349 | 
 350 | (* +-----------------------------------------------------------------+
 351 |    | Iterating, folding and mapping                                  |
 352 |    +-----------------------------------------------------------------+ *)
 353 | 
 354 | let rec iter f = function
 355 |   | Leaf(text, _) ->
 356 |       Zed_string.iter f text
 357 |   | Node(_, _, rope_l, _, rope_r) ->
 358 |       iter f rope_l;
 359 |       iter f rope_r
 360 | 
 361 | let rec rev_iter f = function
 362 |   | Leaf(text, _) ->
 363 |       Zed_string.rev_iter f text
 364 |   | Node(_, _, rope_l, _, rope_r) ->
 365 |       rev_iter f rope_r;
 366 |       rev_iter f rope_l
 367 | 
 368 | let rec fold f rope acc =
 369 |   match rope with
 370 |     | Leaf(text, _) ->
 371 |         Zed_string.fold f text acc
 372 |     | Node(_, _, rope_l, _, rope_r) ->
 373 |         fold f rope_r (fold f rope_l acc)
 374 | 
 375 | let rec rev_fold f rope acc =
 376 |   match rope with
 377 |     | Leaf(text, _) ->
 378 |         Zed_string.rev_fold f text acc
 379 |     | Node(_, _, rope_l, _, rope_r) ->
 380 |         rev_fold f rope_l (rev_fold f rope_r acc)
 381 | 
 382 | let rec map f = function
 383 |   | Leaf(txt, len) ->
 384 |       Leaf(Zed_string.map f txt, len)
 385 |   | Node(depth, length_l, rope_l, length_r, rope_r) ->
 386 |       let rope_l' = map f rope_l in
 387 |       let rope_r' = map f rope_r in
 388 |       Node(depth, length_l, rope_l', length_r, rope_r')
 389 | 
 390 | let rec rev_map f = function
 391 |   | Leaf(txt, len) ->
 392 |       Leaf(Zed_string.rev_map f txt, len)
 393 |   | Node(depth, length_l, rope_l, length_r, rope_r) ->
 394 |       let rope_l' = rev_map f rope_l in
 395 |       let rope_r' = rev_map f rope_r in
 396 |       Node(depth, length_r, rope_r', length_l, rope_l')
 397 | 
 398 | let rec iter_leaf f = function
 399 |   | Leaf(text, _) ->
 400 |       f text
 401 |   | Node(_, _, rope_l, _, rope_r) ->
 402 |       iter_leaf f rope_l;
 403 |       iter_leaf f rope_r
 404 | 
 405 | let rec rev_iter_leaf f = function
 406 |   | Leaf(text, _) ->
 407 |       f text
 408 |   | Node(_, _, rope_l, _, rope_r) ->
 409 |       rev_iter_leaf f rope_r;
 410 |       rev_iter_leaf f rope_l
 411 | 
 412 | let rec fold_leaf f rope acc =
 413 |   match rope with
 414 |     | Leaf(text, _) ->
 415 |         f text acc
 416 |     | Node(_, _, rope_l, _, rope_r) ->
 417 |         fold_leaf f rope_r (fold_leaf f rope_l acc)
 418 | 
 419 | let rec rev_fold_leaf f rope acc =
 420 |   match rope with
 421 |     | Leaf(text, _) ->
 422 |         f text acc
 423 |     | Node(_, _, rope_l, _, rope_r) ->
 424 |         rev_fold_leaf f rope_l (rev_fold_leaf f rope_r acc)
 425 | 
 426 | (* +-----------------------------------------------------------------+
 427 |    | Comparison                                                      |
 428 |    +-----------------------------------------------------------------+ *)
 429 | 
 430 | let rec cmp_loop str1 ofs1 str2 ofs2 rest1 rest2 =
 431 |   if ofs1 = Zed_string.bytes str1 then
 432 |     match rest1 with
 433 |       | [] ->
 434 |           if ofs2 = Zed_string.length str2 && rest2 = [] then
 435 |             0
 436 |           else
 437 |             -1
 438 |       | rope1 :: rest1 ->
 439 |           cmp_search1 rope1 str2 ofs2 rest1 rest2
 440 |   else if ofs2 = Zed_string.bytes str2 then
 441 |     match rest2 with
 442 |       | [] ->
 443 |           1
 444 |       | rope2 :: rest2 ->
 445 |           cmp_search2 rope2 str1 ofs1 rest1 rest2
 446 |   else
 447 |     let chr1, ofs1 = Zed_string.extract_next str1 ofs1
 448 |     and chr2, ofs2 = Zed_string.extract_next str2 ofs2 in
 449 |     let d = Zed_char.compare_raw chr1 chr2 in
 450 |     if d = 0 then
 451 |       cmp_loop str1 ofs1 str2 ofs2 rest1 rest2
 452 |     else
 453 |       d
 454 | 
 455 | and cmp_search1 rope1 str2 ofs2 rest1 rest2 =
 456 |   match rope1 with
 457 |     | Leaf(str1, _) ->
 458 |         cmp_loop str1 0 str2 ofs2 rest1 rest2
 459 |     | Node(_, _, rope1_l, _, rope1_r) ->
 460 |         cmp_search1 rope1_l str2 ofs2 (rope1_r :: rest1) rest2
 461 | 
 462 | and cmp_search2 rope2 str1 ofs1 rest1 rest2 =
 463 |   match rope2 with
 464 |     | Leaf(str2, _) ->
 465 |         cmp_loop str1 ofs1 str2 0 rest1 rest2
 466 |     | Node(_, _, rope2_l, _, rope2_r) ->
 467 |         cmp_search2 rope2_l str1 ofs1 rest1 (rope2_r :: rest2)
 468 | 
 469 | let rec cmp_init rope1 rope2 rest1 =
 470 |   match rope1 with
 471 |     | Leaf(str1, _) ->
 472 |         cmp_search2 rope2 str1 0 rest1 []
 473 |     | Node(_, _, rope1_l, _, rope1_r) ->
 474 |         cmp_init rope1_l rope2 (rope1_r :: rest1)
 475 | 
 476 | let compare r1 r2 = cmp_init r1 r2 []
 477 | 
 478 | let equal r1 r2 = length r1 = length r2 && compare r1 r2 = 0
 479 | 
 480 | 
 481 | (* +-----------------------------------------------------------------+
 482 |    | Zippers                                                         |
 483 |    +-----------------------------------------------------------------+ *)
 484 | 
 485 | module Zip = struct
 486 |   type rope_zipper = {
 487 |     str : Zed_string.t;
 488 |     (* The string of the current leaf. *)
 489 |     ofs : int;
 490 |     (* The offset of the current leaf in the whole rope. *)
 491 |     leaf : t;
 492 |     (* The current leaf. *)
 493 |     rest_b : t list;
 494 |     rest_f : t list;
 495 |   }
 496 | 
 497 |   type t = {
 498 |     idx : int;
 499 |     (* The index in byte of the zipper in the current leaf. *)
 500 |     pos : int;
 501 |     (* The index in character of the zipper in the current leaf. *)
 502 |     zip : rope_zipper;
 503 |   }
 504 | 
 505 |   let rec make_rec ofs rope pos rest_b rest_f =
 506 |     match rope with
 507 |     | Leaf(str, _) ->
 508 |       { idx= Zed_string.move str 0 pos;
 509 |         pos = pos;
 510 |         zip = { str; ofs = ofs - pos; leaf = rope; rest_b; rest_f } }
 511 |     | Node(_, _, r1, _, r2) ->
 512 |       let len1 = length r1 in
 513 |       if pos < len1 then
 514 |         make_rec ofs r1 pos rest_b (r2 :: rest_f)
 515 |       else
 516 |         make_rec ofs r2 (pos - len1) (r1 :: rest_b) rest_f
 517 | 
 518 |   let make_f rope pos =
 519 |     if pos < 0 || pos > length rope then raise Out_of_bounds;
 520 |     make_rec pos rope pos [] []
 521 | 
 522 |   let make_b rope pos =
 523 |     let len = length rope in
 524 |     if pos < 0 || pos > len then raise Out_of_bounds;
 525 |     let pos = len - pos in
 526 |     make_rec pos rope pos [] []
 527 | 
 528 |   let offset zip =
 529 |     zip.zip.ofs + zip.pos
 530 | 
 531 |   let rec next_leaf ofs rope rest_b rest_f =
 532 |     match rope with
 533 |     | Leaf(str, _) ->
 534 |       let chr, idx= Zed_string.extract_next str 0 in
 535 |       (chr,
 536 |        { idx;
 537 |          pos = 1;
 538 |          zip = { str; ofs; leaf = rope; rest_b; rest_f } })
 539 |     | Node(_, _, r1, _, r2) ->
 540 |       next_leaf ofs r1 rest_b (r2 :: rest_f)
 541 | 
 542 |   let next zip =
 543 |     if zip.idx = Zed_string.bytes zip.zip.str then
 544 |       match zip.zip.rest_f with
 545 |       | [] ->
 546 |         raise Out_of_bounds
 547 |       | rope :: rest ->
 548 |         next_leaf (zip.zip.ofs + length zip.zip.leaf) rope (zip.zip.leaf :: zip.zip.rest_b) rest
 549 |     else
 550 |       let chr, idx= Zed_string.extract_next zip.zip.str zip.idx in
 551 |       (chr, { zip with idx; pos = zip.pos + 1 })
 552 | 
 553 |   let rec prev_leaf ofs rope rest_b rest_f =
 554 |     match rope with
 555 |     | Leaf(str, (len,_size)) ->
 556 |       let chr, idx=
 557 |         Zed_string.extract_prev str (Zed_string.bytes str)
 558 |       in
 559 |       (chr,
 560 |        { idx;
 561 |          pos = len - 1;
 562 |          zip = { str; ofs = ofs - len; leaf = rope; rest_b; rest_f } })
 563 |     | Node(_, _, r1, _, r2) ->
 564 |       prev_leaf ofs r2 (r1 :: rest_b) rest_f
 565 | 
 566 |   let prev zip =
 567 |     if zip.pos = 0 then
 568 |       match zip.zip.rest_b with
 569 |       | [] ->
 570 |         raise Out_of_bounds
 571 |       | rope :: rest ->
 572 |         prev_leaf zip.zip.ofs rope rest (zip.zip.leaf :: zip.zip.rest_f)
 573 |     else
 574 |       let chr, idx= Zed_string.extract_prev zip.zip.str zip.idx in
 575 |       (chr, { zip with idx; pos = zip.pos - 1 })
 576 | 
 577 |   let rec move_f n ofs rope rest_b rest_f =
 578 |     match rope with
 579 |     | Leaf(str, (len,_size)) ->
 580 |       if n <= len then
 581 |         { idx= Zed_string.move str 0 n;
 582 |           pos = n;
 583 |           zip = { str; ofs; leaf = rope; rest_b; rest_f } }
 584 |       else begin
 585 |         match rest_f with
 586 |         | [] ->
 587 |           raise Out_of_bounds
 588 |         | rope' :: rest_f ->
 589 |           move_f (n - len) (ofs + len) rope' (rope :: rest_b) rest_f
 590 |       end
 591 |     | Node(_, _, r1, _, r2) ->
 592 |       move_f n ofs r1 rest_b (r2 :: rest_f)
 593 | 
 594 |   let rec move_b n ofs rope rest_b rest_f =
 595 |     match rope with
 596 |     | Leaf(str, (len,_size)) ->
 597 |       if n <= len then
 598 |         { idx= Zed_string.move str (Zed_string.bytes str) (-n);
 599 |           pos = len - n;
 600 |           zip = { str; ofs; leaf = rope; rest_b; rest_f } }
 601 |       else begin
 602 |         match rest_b with
 603 |         | [] ->
 604 |           raise Out_of_bounds
 605 |         | rope' :: rest_b ->
 606 |           move_b (n - len) (ofs - len) rope' rest_b (rope :: rest_f)
 607 |       end
 608 |     | Node(_, _, r1, _, r2) ->
 609 |       move_b n ofs r2 (r1 :: rest_b) rest_f
 610 | 
 611 |   let move n zip =
 612 |     if n > 0 then
 613 |       let len = length zip.zip.leaf in
 614 |       if zip.pos + n <= len then
 615 |         { zip with
 616 |           idx= Zed_string.move zip.zip.str zip.idx n;
 617 |           pos = zip.pos + n }
 618 |       else
 619 |         match zip.zip.rest_f with
 620 |         | [] ->
 621 |           raise Out_of_bounds
 622 |         | rope :: rest_f ->
 623 |           move_f
 624 |             (n - (len - zip.pos))
 625 |             (zip.zip.ofs + len)
 626 |             rope
 627 |             (zip.zip.leaf :: zip.zip.rest_b)
 628 |             rest_f
 629 |     else
 630 |     if zip.pos + n >= 0 then
 631 |       { zip with
 632 |         idx= Zed_string.move zip.zip.str zip.idx n;
 633 |         pos = zip.pos + n }
 634 |     else
 635 |       match zip.zip.rest_b with
 636 |       | [] ->
 637 |         raise Out_of_bounds
 638 |       | rope :: rest_b ->
 639 |         move_b
 640 |           (n - zip.pos)
 641 |           zip.zip.ofs
 642 |           rope
 643 |           rest_b
 644 |           (zip.zip.leaf :: zip.zip.rest_f)
 645 | 
 646 |   let at_bos zip= zip.zip.rest_b = [] && zip.idx = 0
 647 |   let at_eos zip= zip.zip.rest_f = [] && zip.idx = Zed_string.bytes zip.zip.str
 648 | 
 649 |   let rec sub_rec acc ropes len =
 650 |     match ropes with
 651 |     | [] ->
 652 |       if len > 0 then
 653 |         raise Out_of_bounds
 654 |       else
 655 |         acc
 656 |     | rope :: rest ->
 657 |       let len' = length rope in
 658 |       if len <= len' then
 659 |         append acc (sub rope 0 len)
 660 |       else
 661 |         sub_rec (append acc rope) rest (len - len')
 662 | 
 663 |   let sub zip len =
 664 |     if len < 0 then
 665 |       raise Out_of_bounds
 666 |     else
 667 |       let len' = length zip.zip.leaf - zip.pos in
 668 |       if len <= len' then
 669 |         let str= Zed_string.sub ~pos:zip.pos ~len zip.zip.str in
 670 |         let size= Zed_string.size str in
 671 |         Leaf(str, (len,size))
 672 |       else
 673 |         let str= Zed_string.sub ~pos:zip.pos ~len:(Zed_string.length zip.zip.str - zip.pos) zip.zip.str in
 674 |         let size= Zed_string.size str in
 675 |         sub_rec (Leaf(str, (len',size))) zip.zip.rest_f (len - len')
 676 | 
 677 |   let slice zip1 zip2 =
 678 |     let ofs1 = offset zip1 and ofs2 = offset zip2 in
 679 |     if ofs1 <= ofs2 then
 680 |       sub zip1 (ofs2 - ofs1)
 681 |     else
 682 |       sub zip2 (ofs1 - ofs2)
 683 | 
 684 |   let rec find_f f zip =
 685 |     if at_eos zip then
 686 |       zip
 687 |     else
 688 |       let ch, zip' = next zip in
 689 |       if f ch then
 690 |         zip
 691 |       else
 692 |         find_f f zip'
 693 | 
 694 |   let rec find_b f zip =
 695 |     if at_bos zip then
 696 |       zip
 697 |     else
 698 |       let ch, zip' = prev zip in
 699 |       if f ch then
 700 |         zip
 701 |       else
 702 |         find_b f zip'
 703 | end
 704 | 
 705 | module Zip_raw = struct
 706 |   type rope_zipper = {
 707 |     str : Zed_string.t;
 708 |     (* The string of the current leaf. *)
 709 |     ofs : int;
 710 |     (* The offset of the current leaf in the whole rope. *)
 711 |     leaf : t;
 712 |     (* The current leaf. *)
 713 |     rest_b : t list;
 714 |     rest_f : t list;
 715 |   }
 716 | 
 717 |   type t = {
 718 |     idx : int;
 719 |     (* The index in byte of the zipper in the current leaf. *)
 720 |     pos : int;
 721 |     (* The index in character of the zipper in the current leaf. *)
 722 |     zip : rope_zipper;
 723 |   }
 724 | 
 725 |   let rec make_f_rec ofs rope pos rest_b rest_f =
 726 |     match rope with
 727 |     | Leaf(str, _) ->
 728 |       { idx= Zed_string.move_raw str 0 pos;
 729 |         pos = pos;
 730 |         zip = { str; ofs = ofs - pos; leaf = rope; rest_b; rest_f } }
 731 |     | Node(_, _, r1, _, r2) ->
 732 |       let size1= size r1 in
 733 |       if pos < size1 then
 734 |         make_f_rec ofs r1 pos rest_b (r2 :: rest_f)
 735 |       else
 736 |         make_f_rec ofs r2 (pos - size1) (r1 :: rest_b) rest_f
 737 | 
 738 |   let make_f rope pos =
 739 |     if pos < 0 || pos > size rope then raise Out_of_bounds;
 740 |     make_f_rec pos rope pos [] []
 741 | 
 742 |   let rec make_b_rec ofs rope pos rest_b rest_f =
 743 |     match rope with
 744 |       | Leaf(str, (len,_)) ->
 745 |           { idx= Zed_string.move_raw str (Zed_string.bytes str) (- (len - pos));
 746 |             pos = pos;
 747 |             zip = { str; ofs = ofs - pos; leaf = rope; rest_b; rest_f } }
 748 |       | Node(_, _, r1, _, r2) ->
 749 |           let len1 = length r1 in
 750 |           if pos < len1 then
 751 |             make_b_rec ofs r1 pos rest_b (r2 :: rest_f)
 752 |           else
 753 |             make_b_rec ofs r2 (pos - len1) (r1 :: rest_b) rest_f
 754 | 
 755 |   let make_b rope pos =
 756 |     let size = size rope in
 757 |     if pos < 0 || pos > size then raise Out_of_bounds;
 758 |     let pos = size - pos in
 759 |     make_b_rec pos rope pos [] []
 760 | 
 761 |   let offset zip =
 762 |     zip.zip.ofs + zip.pos
 763 | 
 764 |   let rec next_leaf ofs rope rest_b rest_f =
 765 |     match rope with
 766 |     | Leaf(str, _) ->
 767 |       let chr, idx= Zed_utf8.unsafe_extract_next (Zed_string.to_utf8 str) 0 in
 768 |       (chr,
 769 |        { idx;
 770 |          pos = 1;
 771 |          zip = { str; ofs; leaf = rope; rest_b; rest_f } })
 772 |     | Node(_, _, r1, _, r2) ->
 773 |       next_leaf ofs r1 rest_b (r2 :: rest_f)
 774 | 
 775 |   let next zip =
 776 |     if zip.pos = Zed_string.size zip.zip.str then
 777 |       match zip.zip.rest_f with
 778 |       | [] ->
 779 |         raise Out_of_bounds
 780 |       | rope :: rest ->
 781 |         next_leaf (zip.zip.ofs + size zip.zip.leaf) rope (zip.zip.leaf :: zip.zip.rest_b) rest
 782 |     else
 783 |       let chr, idx= Zed_utf8.unsafe_extract_next (Zed_string.to_utf8 zip.zip.str) zip.idx in
 784 |       (chr, { zip with idx; pos = zip.pos + 1 })
 785 | 
 786 |   let rec prev_leaf ofs rope rest_b rest_f =
 787 |     match rope with
 788 |     | Leaf(str, (_len, size)) ->
 789 |       let chr, idx =
 790 |         let str= Zed_string.to_utf8 str in
 791 |         Zed_utf8.unsafe_extract_prev str (String.length str)
 792 |       in
 793 |       (chr,
 794 |        { idx;
 795 |          pos = size - 1;
 796 |          zip = { str; ofs = ofs - size; leaf = rope; rest_b; rest_f } })
 797 |     | Node(_, _, r1, _, r2) ->
 798 |       prev_leaf ofs r2 (r1 :: rest_b) rest_f
 799 | 
 800 |   let prev zip =
 801 |     if zip.pos = 0 then
 802 |       match zip.zip.rest_b with
 803 |       | [] ->
 804 |         raise Out_of_bounds
 805 |       | rope :: rest ->
 806 |         prev_leaf zip.zip.ofs rope rest (zip.zip.leaf :: zip.zip.rest_f)
 807 |     else
 808 |       let chr, idx= Zed_utf8.unsafe_extract_prev (Zed_string.to_utf8 zip.zip.str) zip.idx in
 809 |       (chr, { zip with idx; pos = zip.pos - 1 })
 810 | 
 811 |   let rec move_f n ofs rope rest_b rest_f =
 812 |     match rope with
 813 |     | Leaf(str, (_,size)) ->
 814 |       if n <= size then
 815 |         { idx= Zed_string.move_raw str 0 n;
 816 |           pos = n;
 817 |           zip = { str; ofs; leaf = rope; rest_b; rest_f } }
 818 |       else begin
 819 |         match rest_f with
 820 |         | [] ->
 821 |           raise Out_of_bounds
 822 |         | rope' :: rest_f ->
 823 |           move_f (n - size) (ofs + size) rope' (rope :: rest_b) rest_f
 824 |       end
 825 |     | Node(_, _, r1, _, r2) ->
 826 |       move_f n ofs r1 rest_b (r2 :: rest_f)
 827 | 
 828 |   let rec move_b n ofs rope rest_b rest_f =
 829 |     match rope with
 830 |     | Leaf(str, (_,size)) ->
 831 |       if n <= size then
 832 |         { idx= Zed_string.move_raw str (Zed_string.bytes str) (-n);
 833 |           pos = size - n;
 834 |           zip = { str; ofs; leaf = rope; rest_b; rest_f } }
 835 |       else begin
 836 |         match rest_b with
 837 |         | [] ->
 838 |           raise Out_of_bounds
 839 |         | rope' :: rest_b ->
 840 |           move_b (n - size) (ofs - size) rope' rest_b (rope :: rest_f)
 841 |       end
 842 |     | Node(_, _, r1, _, r2) ->
 843 |       move_b n ofs r2 (r1 :: rest_b) rest_f
 844 | 
 845 |   let move n zip =
 846 |     if n > 0 then
 847 |       let size = size zip.zip.leaf in
 848 |       if zip.pos + n <= size then
 849 |         { zip with
 850 |           idx= Zed_string.move_raw zip.zip.str zip.idx n;
 851 |           pos = zip.pos + n }
 852 |       else
 853 |         match zip.zip.rest_f with
 854 |         | [] ->
 855 |           raise Out_of_bounds
 856 |         | rope :: rest_f ->
 857 |           move_f
 858 |             (n - (size - zip.pos))
 859 |             (zip.zip.ofs + size)
 860 |             rope
 861 |             (zip.zip.leaf :: zip.zip.rest_b)
 862 |             rest_f
 863 |     else
 864 |     if zip.pos + n >= 0 then
 865 |       { zip with
 866 |         idx = Zed_string.move_raw zip.zip.str zip.idx (-n);
 867 |         pos = zip.pos + n }
 868 |     else
 869 |       match zip.zip.rest_b with
 870 |       | [] ->
 871 |         raise Out_of_bounds
 872 |       | rope :: rest_b ->
 873 |         move_b
 874 |           (n - zip.pos)
 875 |           zip.zip.ofs
 876 |           rope
 877 |           rest_b
 878 |           (zip.zip.leaf :: zip.zip.rest_f)
 879 | 
 880 |   let at_bos zip= zip.zip.rest_b = [] && zip.idx = 0
 881 |   let at_eos zip= zip.zip.rest_f = [] && zip.idx = Zed_string.bytes zip.zip.str
 882 | 
 883 |   let rec find_f f zip =
 884 |     if at_eos zip then
 885 |       zip
 886 |     else
 887 |       let ch, zip' = next zip in
 888 |       if f ch then
 889 |         zip
 890 |       else
 891 |         find_f f zip'
 892 | 
 893 |   let rec find_b f zip =
 894 |     if at_bos zip then
 895 |       zip
 896 |     else
 897 |       let ch, zip' = prev zip in
 898 |       if f ch then
 899 |         zip
 900 |       else
 901 |         find_b f zip'
 902 | end
 903 | 
 904 | (* +-----------------------------------------------------------------+
 905 |    | Buffers                                                         |
 906 |    +-----------------------------------------------------------------+ *)
 907 | 
 908 | module String_buffer = Buffer
 909 | 
 910 | module Buffer = struct
 911 |   type t = {
 912 |     mutable acc : rope;
 913 |     mutable buf : Zed_string.Buf.buf;
 914 |     mutable idx : int;
 915 |   }
 916 | 
 917 |   let create () = {
 918 |     acc = empty ();
 919 |     buf = Zed_string.Buf.create 1024;
 920 |     idx = 0;
 921 |   }
 922 | 
 923 |   let add buffer x =
 924 |     if buffer.idx = max_leaf_size then begin
 925 |       let str= Zed_string.Buf.contents buffer.buf in
 926 |       let size= Zed_string.size str in
 927 |       buffer.acc <- append buffer.acc (Leaf(str, (max_leaf_size,size)));
 928 |       Zed_string.Buf.reset buffer.buf;
 929 |       Zed_string.Buf.add_zChar buffer.buf x;
 930 |       buffer.idx <- Zed_string.Buf.length buffer.buf
 931 |     end else begin
 932 |       Zed_string.Buf.add_zChar buffer.buf x;
 933 |       buffer.idx <- Zed_string.Buf.length buffer.buf
 934 |     end
 935 | 
 936 |   let add_uChar buffer x =
 937 |     if buffer.idx = max_leaf_size then begin
 938 |       let str= Zed_string.Buf.contents buffer.buf in
 939 |       let size= Zed_string.size str in
 940 |       buffer.acc <- append buffer.acc (Leaf(str, (max_leaf_size,size)));
 941 |       Zed_string.Buf.reset buffer.buf;
 942 |       Zed_string.Buf.add_uChar buffer.buf x;
 943 |       buffer.idx <- Zed_string.Buf.length buffer.buf
 944 |     end else begin
 945 |       Zed_string.Buf.add_uChar buffer.buf x;
 946 |       buffer.idx <- Zed_string.Buf.length buffer.buf
 947 |     end
 948 | 
 949 |   let add_rope buf rope= iter (add buf) rope
 950 |   let add_string buf str= Zed_string.iter (add buf) str
 951 | 
 952 |   let contents buffer =
 953 |     if buffer.idx = 0 then
 954 |       buffer.acc
 955 |     else
 956 |       let str= Zed_string.Buf.contents buffer.buf in
 957 |       let size= Zed_string.size str in
 958 |       append
 959 |         buffer.acc
 960 |         (Leaf (str, (buffer.idx, size)))
 961 | 
 962 |   let reset buffer =
 963 |     Zed_string.Buf.reset buffer.buf;
 964 |     buffer.acc <- empty ();
 965 |     buffer.idx <- 0
 966 | end
 967 | 
 968 | (* +-----------------------------------------------------------------+
 969 |    | Init                                                            |
 970 |    +-----------------------------------------------------------------+ *)
 971 | 
 972 | let init n f =
 973 |   let buf = Buffer.create () in
 974 |   for i = 0 to n - 1 do
 975 |     Buffer.add buf (f i)
 976 |   done;
 977 |   Buffer.contents buf
 978 | 
 979 | let init_from_uChars len f=
 980 |   match len with
 981 |   | 0-> empty ()
 982 |   | len when len > 0 ->
 983 |     let rec create n=
 984 |       if n > 0 then
 985 |         f (len - n) :: create (n-1)
 986 |       else []
 987 |     in
 988 |     let uChars= create len in
 989 |     let zChars, _= Zed_char.zChars_of_uChars uChars in
 990 |     let buf = Buffer.create () in
 991 |     List.iter (Buffer.add buf) zChars;
 992 |     Buffer.contents buf
 993 |   | _-> raise (Invalid_argument "Zed_rope.init_from_uChars")
 994 | 
 995 | let of_string s=
 996 |   let buf= Buffer.create () in
 997 |   Buffer.add_string buf s;
 998 |   Buffer.contents buf
 999 | 
1000 | let rec to_string t=
1001 |   match t with
1002 |   | Leaf (s,_)-> s
1003 |   | Node (_,_,l,_,r)-> Zed_string.append (to_string l) (to_string r)
1004 | 
1005 | let case_map f ?locale:_ t =
1006 |   let buf = Buffer.create () in
1007 |   let rec loop zip =
1008 |     match Zip_raw.next zip with
1009 |     | exception Out_of_bounds ->
1010 |         Buffer.contents buf
1011 |     | u, zip ->
1012 |         begin match f u with
1013 |         | `Self -> Buffer.add_uChar buf u
1014 |         | `Uchars us -> List.iter (Buffer.add_uChar buf) us
1015 |         end;
1016 | 	loop zip
1017 |   in
1018 |   loop (Zip_raw.make_f t 0)
1019 | 
1020 | let lowercase ?locale t =
1021 |   case_map Uucp.Case.Map.to_lower ?locale t
1022 | 
1023 | let uppercase ?locale t =
1024 |   case_map Uucp.Case.Map.to_upper ?locale t
1025 | 


--------------------------------------------------------------------------------
/src/zed_rope.mli:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * zed_rope.mli
  3 |  * ------------
  4 |  * Copyright : (c) 2011, Jeremie Dimino <jeremie@dimino.org>
  5 |  * Copyright : (c) 2019, ZAN DoYe <zandoye@gmail.com>
  6 |  * Licence   : BSD3
  7 |  *
  8 |  * This file is a part of Zed, an editor engine.
  9 |  *)
 10 | 
 11 | (** Unicode ropes *)
 12 | 
 13 | type t
 14 |   (** Type of unicode ropes. *)
 15 | 
 16 | type rope = t
 17 |     (** Alias. *)
 18 | 
 19 | exception Out_of_bounds
 20 |   (** Exception raised when trying to access a character which is
 21 |       outside the bounds of a rope. *)
 22 | 
 23 | (** {5 Construction} *)
 24 | 
 25 | val empty : unit -> rope
 26 |   (** The empty rope. *)
 27 | 
 28 | val make : int -> Zed_char.t -> rope
 29 |   (** [make length char] creates a rope of length [length] containing only [char]. *)
 30 | 
 31 | val singleton : Zed_char.t -> rope
 32 |   (** [singleton ch] creates a rope of length 1 containing only [ch]. *)
 33 | 
 34 | (** {5 Informations} *)
 35 | 
 36 | val length : rope -> int
 37 |   (** Returns the length of the given rope. *)
 38 | 
 39 | val size : rope -> int
 40 |   (** Returns the size of the given rope. *)
 41 | 
 42 | val is_empty : rope -> bool
 43 |   (** [is_empty rope] returns whether [str] is the empty rope or not. *)
 44 | 
 45 | 
 46 | (** {5 Random access} *)
 47 | 
 48 | val get : rope -> int -> Zed_char.t
 49 |   (** [get rope idx] returns the glyph at index [idx] in [rope]. *)
 50 | 
 51 | val get_raw : rope -> int -> Uchar.t
 52 |   (** [get_raw rope idx] returns the character at raw index [idx] in
 53 |       [rope]. *)
 54 | 
 55 | (** {5 Rope manipulation} *)
 56 | 
 57 | val append : rope -> rope -> rope
 58 |   (** Concatenates the two given ropes. *)
 59 | 
 60 | val concat : rope -> rope list -> rope
 61 |   (** [concat sep l] concatenates all strings of [l] separating them
 62 |       by [sep]. *)
 63 | 
 64 | val sub : rope -> int -> int -> rope
 65 |   (** [sub rope ofs len] Returns the sub-rope of [rope] starting at
 66 |       [ofs] and of length [len]. *)
 67 | 
 68 | val break : rope -> int -> rope * rope
 69 |   (** [break rope pos] returns the sub-ropes before and after [pos] in
 70 |       [rope]. It is more efficient than creating two sub-ropes with
 71 |       {!sub}. *)
 72 | 
 73 | val before : rope -> int -> rope
 74 |   (** [before rope pos] returns the sub-rope before [pos] in [rope]. *)
 75 | 
 76 | val after : rope -> int -> rope
 77 |   (** [after rope pos] returns the sub-string after [pos] in [rope]. *)
 78 | 
 79 | val insert : rope -> int -> rope -> rope
 80 |   (** [insert rope pos sub] inserts [sub] in [rope] at position
 81 |       [pos]. *)
 82 | 
 83 | val insert_uChar : rope -> int -> Uchar.t -> rope
 84 |   (** [insert rope pos char] inserts [char] in [rope] at position
 85 |       [pos]. If [char] is a combing mark, it's merged to the character
 86 |       at position [pos-1] *)
 87 | 
 88 | val remove : rope -> int -> int -> rope
 89 |   (** [remove rope pos len] removes the [len] characters at position
 90 |       [pos] in [rope] *)
 91 | 
 92 | val replace : rope -> int -> int -> rope -> rope
 93 |   (** [replace rope pos len repl] replaces the [len] characters at
 94 |       position [pos] in [rope] by [repl]. *)
 95 | 
 96 | val lchop : rope -> rope
 97 |   (** [lchop rope] returns [rope] without is first character. Returns
 98 |       {!empty} if [rope] is empty. *)
 99 | 
100 | val rchop : rope -> rope
101 |   (** [rchop rope] returns [rope] without is last character. Returns
102 |       {!empty} if [rope] is empty. *)
103 | 
104 | (** {5 Iteration, folding and mapping} *)
105 | 
106 | val iter : (Zed_char.t -> unit) -> rope -> unit
107 |   (** [iter f rope] applies [f] on all characters of [rope] starting
108 |       from the left. *)
109 | 
110 | val rev_iter : (Zed_char.t -> unit) -> rope -> unit
111 |   (** [rev_iter f rope] applies [f] an all characters of [rope]
112 |       starting from the right. *)
113 | 
114 | val fold : (Zed_char.t -> 'a -> 'a) -> rope -> 'a -> 'a
115 |   (** [fold f rope acc] applies [f] on all characters of [rope]
116 |       starting from the left, accumulating a value. *)
117 | 
118 | val rev_fold : (Zed_char.t -> 'a -> 'a) -> rope -> 'a -> 'a
119 |   (** [rev_fold f rope acc] applies [f] on all characters of [rope]
120 |       starting from the right, accumulating a value. *)
121 | 
122 | val map : (Zed_char.t -> Zed_char.t) -> rope -> rope
123 |   (** [map f rope] maps all characters of [rope] with [f]. *)
124 | 
125 | val rev_map : (Zed_char.t -> Zed_char.t) -> rope -> rope
126 |   (** [rev_map f str] maps all characters of [rope] with [f] in
127 |       reverse order. *)
128 | 
129 | (** {5 Iteration and folding on leafs} *)
130 | 
131 | (** Note: for all of the following functions, the leaves must
132 |     absolutely not be modified. *)
133 | 
134 | val iter_leaf : (Zed_string.t -> unit) -> rope -> unit
135 |   (** [iter_leaf f rope] applies [f] on all leaves of [rope] starting
136 |       from the left. *)
137 | 
138 | val rev_iter_leaf : (Zed_string.t -> unit) -> rope -> unit
139 |   (** [iter_leaf f rope] applies [f] on all leaves of [rope] starting
140 |       from the right. *)
141 | 
142 | val fold_leaf : (Zed_string.t -> 'a -> 'a) -> rope -> 'a -> 'a
143 |   (** [fold f rope acc] applies [f] on all leaves of [rope] starting
144 |       from the left, accumulating a value. *)
145 | 
146 | val rev_fold_leaf : (Zed_string.t -> 'a -> 'a) -> rope -> 'a -> 'a
147 |   (** [rev_fold f rope acc] applies [f] on all leaves of [rope]
148 |       starting from the right, accumulating a value. *)
149 | 
150 | 
151 | val compare : rope -> rope -> int
152 |   (** Compares two ropes (in code point order). *)
153 | 
154 | val equal : rope -> rope -> bool
155 |   (** [equal r1 r2] retuns [true] if [r1] is equal to [r2]. *)
156 | 
157 | (** {5 Zippers} *)
158 | 
159 | module Zip : sig
160 |   type t
161 |     (** Type of zippers. A zipper allow to naviguate in a rope in a
162 |         convenient and efficient manner. Note that a zipper points to
163 |         a position between two glyphs, not to a glyph, so in a
164 |         rope of length [len] there is [len + 1] positions. *)
165 | 
166 |   val make_f : rope -> int -> t
167 |     (** [make_f rope pos] creates a new zipper pointing to positon
168 |         [pos] of [rope]. *)
169 | 
170 |   val make_b : rope -> int -> t
171 |     (** [make_b rope pos] creates a new zipper pointing to positon
172 |         [length rope - pos] of [rope]. *)
173 | 
174 |   val offset : t -> int
175 |     (** Returns the position of the zipper in the rope. *)
176 | 
177 |   val next : t -> Zed_char.t * t
178 |     (** [next zipper] returns the glyph at the right of the
179 |         zipper and a zipper to the next position. It raises
180 |         [Out_of_bounds] if the zipper points to the end of the
181 |         rope. *)
182 | 
183 |   val prev : t -> Zed_char.t * t
184 |     (** [prev zipper] returns the glyph at the left of the
185 |         zipper and a zipper to the previous position. It raises
186 |         [Out_of_bounds] if the zipper points to the beginning of the
187 |         rope. *)
188 | 
189 |   val move : int -> t -> t
190 |     (** [move n zip] moves the zipper by [n] glyphs. If [n] is
191 |         negative it is moved to the left and if it is positive it is
192 |         moved to the right. It raises [Out_of_bounds] if the result
193 |         is outside the bounds of the rope. *)
194 | 
195 |   val at_bos : t -> bool
196 |     (** [at_bos zipper] returns [true] if [zipper] points to the
197 |         beginning of the rope. *)
198 | 
199 |   val at_eos : t -> bool
200 |     (** [at_eos zipper] returns [true] if [zipper] points to the
201 |         end of the rope. *)
202 | 
203 |   val find_f : (Zed_char.t -> bool) -> t -> t
204 |     (** [find_f f zip] search forward for a glyph to satisfy
205 |         [f]. It returns a zipper pointing to the left of the first
206 |         glyph to satisfy [f], or a zipper pointing to the end of
207 |         the rope if no such glyph exists. *)
208 | 
209 |   val find_b : (Zed_char.t -> bool) -> t -> t
210 |     (** [find_b f zip] search backward for a glyph to satisfy
211 |         [f]. It returns a zipper pointing to the right of the first
212 |         glyph to satisfy [f], or a zipper pointing to the
213 |         beginning of the rope if no such glyph exists. *)
214 | 
215 |   val sub : t -> int -> rope
216 |     (** [sub zipper len] returns the sub-rope of length [len] pointed
217 |         by [zipper]. *)
218 | 
219 |   val slice : t -> t -> rope
220 |     (** [slice zipper1 zipper2] returns the rope between [zipper1]
221 |         and [zipper2]. If [zipper1 > zipper2] then this is the same as
222 |         [slice zipper2 zipper1].
223 | 
224 |         The result is unspecified if the two zippers do not points to
225 |         the same rope. *)
226 | end
227 | 
228 | module Zip_raw : sig
229 |   type t
230 |     (** Type of zippers. A zipper allow to naviguate in a rope in a
231 |         convenient and efficient manner. Note that a zipper points to
232 |         a position between two characters, not to a character, so in a
233 |         rope of length [len] there is [len + 1] positions. *)
234 | 
235 |   val make_f : rope -> int -> t
236 |     (** [make_f rope pos] creates a new zipper pointing to raw positon
237 |         [pos] of [rope]. *)
238 | 
239 |   val make_b : rope -> int -> t
240 |     (** [make_b rope pos] creates a new zipper pointing to raw positon
241 |         [length rope - pos] of [rope]. *)
242 | 
243 |   val offset : t -> int
244 |     (** Returns the raw position of the zipper in the rope. *)
245 | 
246 |   val next : t -> Uchar.t * t
247 |     (** [next zipper] returns the code point at the right of the
248 |         zipper and a zipper to the next raw position. It raises
249 |         [Out_of_bounds] if the zipper points to the end of the
250 |         rope. *)
251 | 
252 |   val prev : t -> Uchar.t * t
253 |     (** [prev zipper] returns the code point at the left of the
254 |         zipper and a zipper to the previous raw position. It raises
255 |         [Out_of_bounds] if the zipper points to the beginning of the
256 |         rope. *)
257 | 
258 |   val move : int -> t -> t
259 |     (** [move n zip] moves the zipper by [n] characters. If [n] is
260 |         negative it is moved to the left and if it is positive it is
261 |         moved to the right. It raises [Out_of_bounds] if the result
262 |         is outside the bounds of the rope. *)
263 | 
264 |   val at_bos : t -> bool
265 |     (** [at_bos zipper] returns [true] if [zipper] points to the
266 |         beginning of the rope. *)
267 | 
268 |   val at_eos : t -> bool
269 |     (** [at_eos zipper] returns [true] if [zipper] points to the
270 |         end of the rope. *)
271 | 
272 |   val find_f : (Uchar.t -> bool) -> t -> t
273 |     (** [find_f f zip] search forward for a character to satisfy
274 |         [f]. It returns a zipper pointing to the left of the first
275 |         character to satisfy [f], or a zipper pointing to the end of
276 |         the rope if no such character exists. *)
277 | 
278 |   val find_b : (Uchar.t -> bool) -> t -> t
279 |     (** [find_b f zip] search backward for a character to satisfy
280 |         [f]. It returns a zipper pointing to the right of the first
281 |         character to satisfy [f], or a zipper pointing to the
282 |         beginning of the rope if no such character exists. *)
283 | end
284 | 
285 | (** {5 Buffers} *)
286 | 
287 | module String_buffer = Buffer
288 | 
289 | module Buffer :
290 |   sig
291 |     type t
292 |       (** Type of rope buffers. *)
293 | 
294 |     val create : unit -> t
295 |       (** Create a new empty buffer. *)
296 | 
297 |     val add : t -> Zed_char.t -> unit
298 |       (** [add buffer zChar] add [zChar] at the end of [buffer]. *)
299 | 
300 |     val add_uChar : t -> Uchar.t -> unit
301 |       (** [add buffer uChar] add [uChar] at the end of [buffer]. *)
302 | 
303 |     val add_rope : t -> rope -> unit
304 |       (** [add buffer rope] add [rope] at the end of [buffer]. *)
305 | 
306 |     val add_string : t -> Zed_string.t -> unit
307 |       (** [add buffer str] add [str] at the end of [buffer]. *)
308 | 
309 |     val contents : t -> rope
310 |       (** [contents buffer] returns the contents of [buffer] as a rope. *)
311 | 
312 |     val reset : t -> unit
313 |       (** [reset buffer] resets [buffer] to its initial state. *)
314 |   end
315 | val init : int -> (int -> Zed_char.t) -> rope
316 | val init_from_uChars : int -> (int -> Uchar.t) -> rope
317 | val of_string : Zed_string.t -> rope
318 | val to_string : rope -> Zed_string.t
319 | 
320 | val lowercase : ?locale:string -> t -> t
321 | val uppercase : ?locale:string -> t -> t
322 | 


--------------------------------------------------------------------------------
/src/zed_string.ml:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * zed_string.ml
  3 |  * -----------
  4 |  * Copyright : (c) 2019, ZAN DoYe <zandoye@gmail.com>
  5 |  * Licence   : BSD3
  6 |  *
  7 |  * This file is a part of Zed, an editor engine.
  8 |  *)
  9 | 
 10 | (* This aliasing needs to come before 'open Result' which now offers a
 11 |    'compare' function. We don't use 'Pervasives.compare' or 'Stdlib.compare'
 12 |    because neither seems to work with every version of OCaml. *)
 13 | let pervasives_compare= compare
 14 | 
 15 | open Result
 16 | 
 17 | exception Invalid of string * string
 18 | exception Out_of_bounds
 19 |   (** Exception raised when trying to access a character which is
 20 |       outside the bounds of a string. *)
 21 | 
 22 | let fail str pos msg = raise (Invalid(Printf.sprintf "at position %d: %s" pos msg, str))
 23 | 
 24 | module Zed_string0 = struct
 25 | 
 26 |   type seg_width= {
 27 |     start: int;
 28 |     len: int;
 29 |     width: int;
 30 |   }
 31 | 
 32 |   type all_width= {
 33 |     len: int;
 34 |     width: int;
 35 |   }
 36 | 
 37 |   type width= (all_width, seg_width) result
 38 | 
 39 |   type t= Zed_utf8.t
 40 | 
 41 |   let aval_width= function
 42 |     | Ok {len=_;width}-> width
 43 |     | Error {start=_;len=_;width}-> width
 44 | 
 45 |   let bytes str= String.length str
 46 | 
 47 |   let size str= Zed_utf8.length str
 48 | 
 49 |   let copy t= t
 50 | 
 51 |   let unsafe_next str ofs=
 52 |     let str_len= String.length str in
 53 |     let rec skip str ofs=
 54 |       if ofs >= str_len then
 55 |         str_len
 56 |       else
 57 |         let chr, next= Zed_utf8.unsafe_extract_next str ofs in
 58 |         if Zed_char.is_combining_mark chr then
 59 |           skip str next
 60 |         else
 61 |           ofs
 62 |     in
 63 |     if ofs < 0 || ofs >= String.length str then
 64 |       raise Out_of_bounds
 65 |     else
 66 |       let chr, next= Zed_utf8.unsafe_extract_next str ofs in
 67 |       if Zed_char.is_printable chr then
 68 |         skip str next
 69 |       else
 70 |         next
 71 | 
 72 |   let next_ofs str ofs=
 73 |     let str_len= String.length str in
 74 |     let rec skip str ofs=
 75 |       if ofs >= str_len then
 76 |         str_len
 77 |       else
 78 |         let chr, next= Zed_utf8.unsafe_extract_next str ofs in
 79 |         if Zed_char.is_combining_mark chr then
 80 |           skip str next
 81 |         else
 82 |           ofs
 83 |     in
 84 |     if ofs < 0 || ofs >= String.length str then
 85 |       raise Out_of_bounds
 86 |     else
 87 |       let chr, next= Zed_utf8.unsafe_extract_next str ofs in
 88 |       if Zed_char.is_printable_core chr then
 89 |         skip str next
 90 |       else if Zed_char.is_combining_mark chr then
 91 |         fail str ofs "individual combining marks encountered"
 92 |       else
 93 |         next
 94 | 
 95 |   let length str=
 96 |     let eos= String.length str in
 97 |     let rec length len ofs=
 98 |       if ofs < eos then
 99 |         length (len + 1) (unsafe_next str ofs)
100 |       else
101 |         len
102 |     in
103 |     length 0 0
104 | 
105 |   let unsafe_prev str ofs=
106 |     let rec skip str ofs=
107 |       if ofs = 0 then
108 |         ofs
109 |       else
110 |         let chr, prev= Zed_utf8.unsafe_extract_prev str ofs in
111 |         if Zed_char.is_combining_mark chr then
112 |           skip str prev
113 |         else
114 |           prev
115 |     in
116 |     if ofs <= 0 || ofs > String.length str then
117 |       raise Out_of_bounds
118 |     else
119 |       let chr, prev= Zed_utf8.extract_prev str ofs in
120 |       if Zed_char.is_combining_mark chr then
121 |         skip str prev
122 |       else
123 |         prev
124 | 
125 |   let prev_ofs str ofs=
126 |     let rec skip str ofs=
127 |       if ofs = 0 then
128 |         ofs
129 |       else
130 |         let chr, prev= Zed_utf8.unsafe_extract_prev str ofs in
131 |         if Zed_char.is_combining_mark chr then
132 |           skip str prev
133 |         else
134 |           prev
135 |     in
136 |     if ofs <= 0 || ofs > String.length str then
137 |       raise Out_of_bounds
138 |     else
139 |       let chr, prev= Zed_utf8.extract_prev str ofs in
140 |       if Zed_char.is_combining_mark chr then
141 |         let prev= skip str prev in
142 |         if prev = 0 then
143 |           if Zed_char.is_printable_core (Zed_utf8.unsafe_extract str 0) then
144 |             prev
145 |           else
146 |             fail str 0 "individual combining marks encountered"
147 |         else
148 |           let chr, next= Zed_utf8.unsafe_extract_next str prev in
149 |           match Zed_char.prop_uChar chr with
150 |           | Printable 0
151 |           | Other
152 |           | Null -> fail str next "individual combining marks encountered"
153 |           | _-> prev
154 |       else
155 |         prev
156 | 
157 |   let rec move_l str ofs len=
158 |     if len = 0 then
159 |       ofs
160 |     else if ofs >= String.length str then
161 |       raise Out_of_bounds
162 |     else
163 |       move_l str (unsafe_next str ofs) (len - 1)
164 | 
165 |   let move_b str ofs len=
166 |     let rec move str ofs len=
167 |       if len = 0 then
168 |         ofs
169 |       else if ofs < 0 then
170 |         raise Out_of_bounds
171 |       else
172 |         move str (unsafe_prev str ofs) (len - 1)
173 |     in
174 |     if ofs < 0 || ofs > String.length str then
175 |       raise Out_of_bounds
176 |     else
177 |       move str ofs len
178 | 
179 |   let rec move_l_raw str ofs len=
180 |     if len = 0 then
181 |       ofs
182 |     else if ofs >= String.length str then
183 |       raise Out_of_bounds
184 |     else
185 |       move_l_raw str (Zed_utf8.unsafe_next str ofs) (len - 1)
186 | 
187 |   let move_b_raw str ofs len=
188 |     let rec move str ofs len=
189 |       if len = 0 then
190 |         ofs
191 |       else if ofs < 0 then
192 |         raise Out_of_bounds
193 |       else
194 |         move str (Zed_utf8.unsafe_prev str ofs) (len - 1)
195 |     in
196 |     if ofs < 0 || ofs > String.length str then
197 |       raise Out_of_bounds
198 |     else
199 |       move str ofs len
200 | 
201 |   let extract str ofs=
202 |     let next= next_ofs str ofs in
203 |     Zed_char.unsafe_of_utf8 (String.sub str ofs (next - ofs))
204 | 
205 |   let extract_next str ofs=
206 |     let next= next_ofs str ofs in
207 |     (Zed_char.unsafe_of_utf8 (String.sub str ofs (next - ofs)), next)
208 | 
209 |   let extract_prev str ofs=
210 |     let prev= prev_ofs str ofs in
211 |     (Zed_char.unsafe_of_utf8 (String.sub str prev (ofs - prev)), prev)
212 | 
213 |   let to_raw_list str= Zed_utf8.explode str
214 | 
215 |   let to_raw_array str= Array.of_list (to_raw_list str)
216 | 
217 |   type index= int
218 | 
219 |   let get str idx =
220 |     if idx < 0 then
221 |       raise Out_of_bounds
222 |     else
223 |       extract str (move_l str 0 idx)
224 | 
225 |   let get_raw= Zed_utf8.get
226 | 
227 |   let empty ()= ""
228 | 
229 |   let width_ofs ?(start=0) ?num str=
230 |     let str_len= String.length str in
231 |     let rec calc w idx ofs=
232 |       if ofs < str_len then
233 |         let chr, next= extract_next str ofs in
234 |         let chr_width= Zed_char.width chr in
235 |         if chr_width > 0 then
236 |           calc (w + chr_width) (idx+1) next
237 |         else
238 |           Error { start; len= idx - start; width= w }
239 |       else Ok {len= idx - start; width= w }
240 |     in
241 |     let calc_num num w idx ofs=
242 |       let rec calc n w idx ofs=
243 |         if ofs < str_len && n > 0 then
244 |           let chr, next= extract_next str ofs in
245 |           let chr_width= Zed_char.width chr in
246 |           if chr_width > 0 then
247 |             calc (n-1) (w + chr_width) (idx+1) next
248 |           else
249 |             Error { start; len= idx - start; width= w }
250 |         else Ok {len= idx - start; width= w }
251 |       in
252 |       calc num w idx ofs
253 |     in
254 |     match num with
255 |     | Some num-> calc_num num 0 start start
256 |     | None-> calc 0 start start
257 | 
258 |   let width ?(start=0) ?num str=
259 |     let ofs= move_l str 0 start in
260 |     width_ofs ~start:ofs ?num str
261 | 
262 |   let explode str=
263 |     let str_len= String.length str in
264 |     let rec aux acc str ofs=
265 |       if ofs > 0 then
266 |         let chr, prev= extract_prev str ofs in
267 |         aux (chr::acc) str prev
268 |       else
269 |         acc
270 |     in
271 |     if str_len > 0 then
272 |       aux [] str str_len
273 |     else
274 |       []
275 | 
276 |   let rev_explode str=
277 |     let str_len= String.length str in
278 |     let rec aux acc ofs=
279 |       if ofs < str_len then
280 |         let chr, next= extract_next str ofs in
281 |         aux (chr::acc) next
282 |       else
283 |         []
284 |     in
285 |     if str_len > 0 then
286 |       aux [] 0
287 |     else
288 |       []
289 | 
290 |   let unsafe_explode str=
291 |     let str_len= String.length str in
292 |     let rec aux acc str ofs=
293 |       if ofs > 0 then
294 |         let chr, prev= extract_prev str ofs in
295 |         aux (chr::acc) str prev
296 |       else
297 |         acc
298 |     in
299 |     if str_len > 0 then
300 |       aux [] str str_len
301 |     else
302 |       []
303 | 
304 |   let unsafe_rev_explode str=
305 |     let str_len= String.length str in
306 |     let rec aux acc ofs=
307 |       if ofs < str_len then
308 |         let chr, next= extract_next str ofs in
309 |         aux (chr::acc) next
310 |       else
311 |         []
312 |     in
313 |     if str_len > 0 then
314 |       aux [] 0
315 |     else
316 |       []
317 | 
318 |   let implode chars=
319 |     String.concat "" (List.map Zed_char.to_utf8 chars)
320 | 
321 |   let init len (f: int -> Zed_char.t)=
322 |     let rec create acc n=
323 |       if n > 0 then
324 |         create ((f (n-1))::acc) (n-1)
325 |       else acc
326 |     in
327 |     implode (create [] len)
328 | 
329 |   let init_from_uChars len f=
330 |     match len with
331 |     | 0-> empty ()
332 |     | len when len > 0 ->
333 |       let rec create acc n=
334 |         if n > 0 then
335 |           create ((f (n-1))::acc) (n-1)
336 |         else acc
337 |       in
338 |       let uChars= create [] len in
339 |       let zChars, _= Zed_char.zChars_of_uChars uChars in
340 |       implode zChars
341 |     | _-> raise (Invalid_argument "Zed_string0.init_from_uChars")
342 | 
343 | 
344 |   let unsafe_of_uChars uChars=
345 |     match uChars with
346 |     | []-> ""
347 |     | _-> String.concat "" (List.map Zed_utf8.singleton uChars)
348 | 
349 |   let of_uChars uChars=
350 |     match uChars with
351 |     | []-> "", []
352 |     | fst::_->
353 |       if Zed_char.is_combining_mark fst then
354 |         ("", uChars)
355 |       else
356 |         (uChars |> List.map Zed_utf8.singleton |> String.concat "", [])
357 | 
358 |   let unsafe_append s1 s2=
359 |     s1 ^ s2
360 | 
361 |   let append s1 s2=
362 |     let validate_s2 ()=
363 |       let s2_first= Zed_utf8.unsafe_extract s2 0 in
364 |       if Zed_char.is_combining_mark s2_first then
365 |         fail s2 0 "individual combining marks encountered"
366 |       else
367 |         s2
368 |     in
369 |     if s1 = "" then
370 |       validate_s2 ()
371 |     else if s2 = "" then
372 |       s1
373 |     else
374 |       let (s1_last, _)= extract_prev s1 (bytes s1) in
375 |       if Zed_char.(is_printable_core (core s1_last)) then
376 |         unsafe_append s1 s2
377 |       else
378 |         unsafe_append s1 (validate_s2 ())
379 | 
380 |   external id : 'a -> 'a = "%identity"
381 |   let unsafe_of_utf8 : string -> t= id
382 |   let of_utf8 : string -> t= fun str->
383 |     if String.length str = 0 then ""
384 |     else if Zed_char.is_combining_mark (Zed_utf8.extract str 0) then
385 |       fail str 0 "individual combining marks encountered"
386 |     else
387 |       unsafe_of_utf8 str
388 |   let to_utf8 : t -> string= id
389 | 
390 |   let for_all p str= List.for_all p (explode str)
391 | 
392 |   let check_range t n= n >= 0 && n <= length t
393 | 
394 |   let look str ofs= Zed_utf8.extract str ofs
395 | 
396 |   let nth t n= if check_range t n
397 |     then n
398 |     else raise (Invalid_argument "Zed_string.nth")
399 | 
400 |   let next t n=
401 |     let n= n + 1 in
402 |     if check_range t n
403 |     then n
404 |     else raise (Invalid_argument "Zed_string.next")
405 | 
406 |   let prev t n=
407 |     let n= n - 1 in
408 |     if check_range t n
409 |     then n
410 |     else raise (Invalid_argument "Zed_string.prev")
411 | 
412 |   let out_of_range t n= n < 0 || n >= length t
413 | 
414 |   let iter f str= List.iter f (explode str)
415 | 
416 |   let rev_iter f str= List.iter f (rev_explode str)
417 | 
418 |   let fold f str acc=
419 |     let rec aux f chars acc=
420 |       match chars with
421 |       | []-> acc
422 |       | chr::tl-> aux f tl (f chr acc)
423 |     in
424 |     aux f (explode str) acc
425 | 
426 |   let rev_fold f str acc=
427 |     let rec aux f chars acc=
428 |       match chars with
429 |       | []-> acc
430 |       | chr::tl-> aux f tl (f chr acc)
431 |     in
432 |     aux f (rev_explode str) acc
433 | 
434 |   let map f str=
435 |     implode (List.map f (explode str))
436 | 
437 |   let rev_map f str=
438 |     implode (List.map f (rev_explode str))
439 | 
440 |   let compare str1 str2= Zed_utils.list_compare
441 |     ~compare:Zed_char.compare_raw
442 |     (explode str1) (explode str2)
443 | 
444 |   let first (_:t)= 0
445 |   let last t= max (length t - 1) 0
446 | 
447 |   let move t i n=
448 |     if n >= 0 then move_l t i n
449 |     else move_b t i n
450 | 
451 |   let move_raw t i n=
452 |     if n >= 0 then move_l_raw t i n
453 |     else move_b_raw t i n
454 | 
455 |   let compare_index (_:t) i j= pervasives_compare i j
456 | 
457 |   let sub_ofs ~ofs ~len s=
458 |     if ofs < 0 || len < 0 || ofs > bytes s - len then
459 |       invalid_arg "Zed_string.sub"
460 |     else
461 |       String.sub s ofs len
462 | 
463 |   let sub ~pos ~len s=
464 |     if pos < 0 || len < 0 || pos > length s - len then
465 |       invalid_arg "Zed_string.sub"
466 |     else
467 |       let ofs_start= move_l s 0 pos in
468 |       let ofs_end= move_l s ofs_start len in
469 |       String.sub s ofs_start (ofs_end - ofs_start)
470 | 
471 |   let after s i=
472 |     let len= length s in
473 |     if i < len then
474 |       sub ~pos:i ~len:(len-i) s
475 |     else
476 |       empty ()
477 | 
478 |   let rec unsafe_sub_equal str ofs sub ofs_sub=
479 |     if ofs_sub = String.length sub then
480 |       true
481 |     else
482 |       (String.unsafe_get str ofs = String.unsafe_get sub ofs_sub)
483 |       && unsafe_sub_equal str (ofs + 1) sub (ofs_sub + 1)
484 | 
485 |   let starts_with ~prefix str=
486 |     if String.length prefix > String.length str then
487 |       false
488 |     else
489 |       unsafe_sub_equal str 0 prefix 0
490 | 
491 |   let make len c=
492 |     implode (Array.to_list (Array.make len c))
493 | 
494 |   let ends_with ~suffix str=
495 |     Zed_utf8.ends_with str suffix
496 | 
497 |   module Buf0 = struct
498 |     type buf= Buffer.t
499 | 
500 |     let create n= Buffer.create n
501 | 
502 |     let contents b= Buffer.contents b
503 | 
504 |     let clear b= Buffer.clear b
505 | 
506 |     let reset b= Buffer.reset b
507 | 
508 |     let length b= length (contents b)
509 | 
510 |     let add_zChar b zChar=
511 |       Buffer.add_string b (Zed_char.to_utf8 zChar)
512 | 
513 |     let add_uChar b uChar=
514 |       Buffer.add_string b (Zed_utf8.singleton uChar)
515 | 
516 |     let add_string b s= Buffer.add_string b s
517 | 
518 |     let add_buffer b1 b2= Buffer.add_buffer b1 b2
519 |   end
520 | end
521 | 
522 | include Zed_string0
523 | module Buf = Buf0
524 | 
525 | 


--------------------------------------------------------------------------------
/src/zed_string.mli:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * zed_string.mli
  3 |  * -----------
  4 |  * Copyright : (c) 2019, ZAN DoYe <zandoye@gmail.com>
  5 |  * Licence   : BSD3
  6 |  *
  7 |  * This file is a part of Zed, an editor engine.
  8 |  *)
  9 | 
 10 | open Result
 11 | 
 12 | exception Invalid of string * string
 13 |   (** [Invalid (error, text)] Exception raised when an invalid Zed_char
 14 |       sequence is encountered. [text] is the faulty text and
 15 |       [error] is a description of the first error in [text]. *)
 16 | 
 17 | exception Out_of_bounds
 18 |   (** Exception raised when trying to access a character which is
 19 |       outside the bounds of a string. *)
 20 | 
 21 | type seg_width = { start : int; len : int; width : int; }
 22 |   (** Type of the width of a segment of a Zed_string.t *)
 23 | 
 24 | type all_width = { len : int; width : int; }
 25 |   (** Type of the width of a whole Zed_string.t *)
 26 | 
 27 | type width = (all_width, seg_width) result
 28 |   (** Type of the width of a Zed_string.t *)
 29 | 
 30 | type t
 31 |   (** Type of Zed_string.t *)
 32 | 
 33 | val unsafe_of_utf8 : string -> t
 34 |   (** Create a Zed_string.t from a utf8 encoded string. *)
 35 | 
 36 | val of_utf8 : string -> t
 37 |   (** Create a Zed_string.t from a utf8 encoded string and check whether it's well formed.
 38 |     @raise Invalid
 39 |     @raise Zed_utf8.Invalid
 40 |    *)
 41 | 
 42 | val to_utf8 : t -> string
 43 |   (** Create a utf8 encoded string from a Zed_string.t. *)
 44 | 
 45 | val explode : t -> Zed_char.t list
 46 |   (** [explode str] returns the list of all Zed_char.t of [str]. *)
 47 | 
 48 | val rev_explode : t -> Zed_char.t list
 49 |   (** [explode str] returns the list of all Zed_char.t of [str] in reverse order. *)
 50 | 
 51 | val unsafe_explode : t -> Zed_char.t list
 52 |   (** [explode str] returns the list of all Zed_char.t of [str] even if [str] is malformed. *)
 53 | 
 54 | val unsafe_rev_explode : t -> Zed_char.t list
 55 |   (** [explode str] returns the list of all Zed_char.t of [str] in reverse order even if [str] is malformed. *)
 56 | 
 57 | val implode : Zed_char.t list -> t
 58 |   (** [implode l] returns the concatenation of all Zed_char.t of [l]. *)
 59 | 
 60 | val aval_width : width -> int
 61 |   (** Returns the widest available width *)
 62 | 
 63 | val init : int -> (int -> Zed_char.t) -> t
 64 |   (** [init n f] returns the contenation of [implode [(f 0)]],
 65 |       [implode [(f 1)]], ..., [implode [(f (n - 1))]]. *)
 66 | 
 67 | val init_from_uChars : int -> (int -> Uchar.t) -> t
 68 |   (** [init n f] creates a sequence of Uchar.t of [f 0], [f 1], ..., [f (n-1)] and implode the contenation of it. *)
 69 | 
 70 | val make : int -> Zed_char.t -> t
 71 |   (** [make n ch] creates a Zed_string.t of length [n] filled with [ch]. *)
 72 | 
 73 | val copy : t -> t
 74 |   (** [copy s] returns a copy of [s], that is, a fresh Zed_string.t containing the same elements as [s]. *)
 75 | 
 76 | val to_raw_list : t -> Uchar.t list
 77 |   (** Same as explode, but the elements in the list is [Uchar.t]. *)
 78 | 
 79 | val to_raw_array : t -> Uchar.t array
 80 |   (** Same as explode, but the elements in the array is [Uchar.t]. *)
 81 | 
 82 | type index = int
 83 | val get : t -> int -> Zed_char.t
 84 |   (** [get str idx] returns the Zed_char.t at index [idx] in [str]. *)
 85 | 
 86 | val get_raw : t -> int -> Uchar.t
 87 |   (** [get_raw str idx] returns the Uchar.t at Uchar.t based index [idx] in [str]. *)
 88 | 
 89 | val empty : unit -> t
 90 |   (** [empty ()] creates an empty Zed_string.t. *)
 91 | 
 92 | val width_ofs : ?start:index -> ?num:int -> t -> width
 93 | (** [width_ofs ?start ?num str] returns the [width] of a Zed_string.t that starts at offset [start] and has length less than [num]. *)
 94 | 
 95 | val width : ?start:int -> ?num:int -> t -> width
 96 | (** [width ?start ?num str] returns the [width] of a Zed_string.t that starts at positon [start] and has length less than [num]. *)
 97 | 
 98 | val bytes : t -> index
 99 |   (** [bytes str] returns the number of bytes in [str]. It's also the index point to the end of [str]. *)
100 | 
101 | val size : t -> int
102 |   (** [size str] returns the number of Uchar.t in [str]. *)
103 | 
104 | val length : t -> int
105 |   (** [length str] returns the number of Zed_char.t in [str] *)
106 | 
107 | val next_ofs : t -> int -> int
108 |   (** [next_ofs str ofs] returns the offset of the next zed_char in [str]. *)
109 | 
110 | val prev_ofs : t -> int -> int
111 |   (** [prev_ofs str ofs] returns the offset of the previous zed_char in [str]. *)
112 | 
113 | val extract : t -> index -> Zed_char.t
114 |   (** [extract str ofs] returns the Zed_char.t at offset [ofs] in [str]. *)
115 | 
116 | val extract_next : t -> index -> (Zed_char.t * index)
117 |   (** [extract_next str ofs] returns the Zed_char.t at offset [ofs] in [str] and the offset of the next Zed_char.t *)
118 | 
119 | val extract_prev : t -> index -> (Zed_char.t * index)
120 |   (** [extract_prev str ofs] returns the Zed_char.t at the previous offset [ofs] in [str] and this offset. *)
121 | 
122 | val unsafe_of_uChars : Uchar.t list -> t
123 |   (** [unsafe_of_uChars l] returns the concatenation of all Uchar.t of [l]. *)
124 | 
125 | val of_uChars : Uchar.t list -> t * Uchar.t list
126 |   (** [of_uChars l] returns a tuple of which the first element is a well formed Zed_string.t concatenating of all Uchar.t of [l] and the second element is a list of the remaining Uchar.t. *)
127 | 
128 | val for_all : (Zed_char.t -> bool) -> t -> bool
129 |   (** [for_all p zStr] checks if all Zed_char.t in [zStr]
130 |    satisfy the predicate [p]. *)
131 | 
132 | 
133 | val iter : (Zed_char.t -> unit) -> t -> unit
134 |   (** [iter f str] applies [f] an all characters of [str] starting from the left. *)
135 | 
136 | val rev_iter : (Zed_char.t -> unit) -> t -> unit
137 |   (** [iter f str] applies [f] an all characters of [str] starting from the right. *)
138 | 
139 | val fold : (Zed_char.t -> 'a -> 'a) -> t -> 'a -> 'a
140 |   (** [fold f str acc] applies [f] on all characters of [str] starting from the left, accumulating a value. *)
141 | 
142 | val rev_fold : (Zed_char.t -> 'a -> 'a) -> t -> 'a -> 'a
143 |   (** [fold f str acc] applies [f] on all characters of [str] starting from the right, accumulating a value. *)
144 | 
145 | val map : (Zed_char.t -> Zed_char.t) -> t -> t
146 |   (** [map f str] maps all characters of [str] with [f]. *)
147 | 
148 | val rev_map : (Zed_char.t -> Zed_char.t) -> t -> t
149 |   (** [map f str] maps all characters of [str] with [f] in reverse order. *)
150 | 
151 | 
152 | val check_range : t -> int -> bool
153 | val look : t -> index -> Uchar.t
154 |   (** [look str idx] returns the character in the location [idx] of [str]. *)
155 | 
156 | val nth : t -> int -> index
157 |   (** [nth str n] returns the location of the [n]-th character in [str]. *)
158 | 
159 | (**  [next str i], [prev str i] The operation is valid if [i] points the valid element, i.e. the returned value may point the location beyond valid elements by one. If i does not point a valid element, the results are unspecified. *)
160 | 
161 | val next : t -> index -> index
162 |   (** [next str idx] returns the index of the next zed_char in [str]. *)
163 | 
164 | val prev : t -> index -> index
165 |   (** [prev str idx] returns the index of the previous zed_char in [str]. *)
166 | 
167 | val out_of_range : t -> index -> bool
168 | val compare : t -> t -> int
169 |   (** Compares two strings by [Zed_char.compare]. *)
170 | 
171 | val first : t -> index
172 |   (** [first str] returns the location of the first character in [str]. *)
173 | 
174 | val last : t -> index
175 |   (** [last str] returns the location of the last character in [str]. *)
176 | 
177 | val move : t -> index -> int -> index
178 |   (**  [move str i n] if n >= 0, then returns [n]-th character after [i] and otherwise returns -[n]-th character before [i.] If there is no such character, or [i] does not point a valid character, the result is unspecified. *)
179 | 
180 | val move_raw : t -> index -> int -> index
181 |   (**  [move_raw str i n] if n >= 0, then returns [n]-th Uchar.t after [i] and otherwise returns -[n]-th Uchar.t before [i.] If there is no such Uchar.t, or [i] does not point a valid Uchar.t, the result is unspecified. *)
182 | 
183 | val compare_index : t -> index -> index -> int
184 |   (**  [compare_index str i j] returns a positive integer if [i] is the location placed after [j] in [str], 0 if [i] and [j] point the same location, and a negative integer if [i] is the location placed before [j] in [str]. *)
185 | 
186 | val sub_ofs : ofs:index -> len:int -> t -> t
187 |   (** [sub_ofs ofs len str] returns the sub-string of [str] starting at byte-offset [ofs] and of byte-length [len]. *)
188 | 
189 | val sub : pos:int -> len:int -> t -> t
190 |   (** [sub ~pos ~len str] returns the sub-string of [str] starting at position [pos] and of length [len]. *)
191 | 
192 | val after : t -> int -> t
193 |   (** [after str pos] returns the sub-string after [pos] in [str] *)
194 | 
195 | val unsafe_sub_equal : t -> int -> t -> int -> bool
196 | val starts_with : prefix:t -> t -> bool
197 |   (** [starts_with ~prefix str] returns [true] if [str] starts with [prefix]. *)
198 | 
199 | val ends_with : suffix:t -> t -> bool
200 |   (** [ends_with ~suffix str] returns [true] if [str] ends with [suffix]. *)
201 | 
202 | val unsafe_append : t -> t -> t
203 |   (** [unsafe_append str1 str2] returns the concatenation of [str1] and [str2] without sequence validation. *)
204 | 
205 | val append : t -> t -> t
206 |   (** [append str1 str2] returns the concatenation of [str1] and [str2].
207 |     @raise Invalid
208 |     @raise Zed_utf8.Invalid
209 |    *)
210 | 
211 | module Buf :
212 |   sig
213 |     type buf
214 |       (** Type of Zed_string buffers. *)
215 | 
216 |     val create : int -> buf
217 |       (** Create a new empty buffer. *)
218 | 
219 |     val contents : buf -> t
220 |       (** [contents buffer] returns the contents of [buffer] as a Zed_string.t. *)
221 | 
222 |     val clear : buf -> unit
223 |       (** [clear buffer] clear the contents of [buffer]. *)
224 | 
225 |     val reset : buf -> unit
226 |       (** [reset buffer] resets [buffer] to its initial state. *)
227 | 
228 |     val length : buf -> int
229 |       (** [length buffer] returns the length of the contents in [buffer] *)
230 | 
231 |     val add_zChar : buf -> Zed_char.t -> unit
232 |       (** [add buffer zChar] add [zChar] at the end of [buffer]. *)
233 | 
234 |     val add_uChar : buf -> Uchar.t -> unit
235 |       (** [add buffer uChar] add [uChar] at the end of [buffer]. *)
236 | 
237 |     val add_string : buf -> t -> unit
238 |       (** [add buffer str] add [str] at the end of [buffer]. *)
239 | 
240 |     val add_buffer : buf -> buf -> unit
241 |       (** [add buffer buf] add [buf] at the end of [buffer]. *)
242 |   end
243 | 
244 | 


--------------------------------------------------------------------------------
/src/zed_utf8.ml:
--------------------------------------------------------------------------------
   1 | (*
   2 |  * zed_utf8.ml
   3 |  * -----------
   4 |  * Copyright : (c) 2011, Jeremie Dimino <jeremie@dimino.org>
   5 |  * Licence   : BSD3
   6 |  *
   7 |  * This file is a part of Zed, an editor engine.
   8 |  *)
   9 | 
  10 | type t = string
  11 | exception Invalid of string * string
  12 | exception Out_of_bounds
  13 | 
  14 | let fail str pos msg = raise (Invalid(Printf.sprintf "at position %d: %s" pos msg, str))
  15 | 
  16 | let byte str i = Char.code (String.unsafe_get str i)
  17 | let set_byte str i n = Bytes.unsafe_set str i (Char.unsafe_chr n)
  18 | 
  19 | (* +-----------------------------------------------------------------+
  20 |    | Validation                                                      |
  21 |    +-----------------------------------------------------------------+ *)
  22 | 
  23 | type check_result =
  24 |   | Correct of int
  25 |   | Message of string
  26 | 
  27 | let next_error s i =
  28 |   let len = String.length s in
  29 |   if i < 0 || i >= len then
  30 |     raise Out_of_bounds
  31 |   else
  32 |     let rec main i ulen =
  33 |       if i = len then
  34 |         (i, ulen, "")
  35 |       else
  36 |         let ch = String.unsafe_get s i in
  37 |         match ch with
  38 |           | '\x00' .. '\x7f' ->
  39 |               main (i + 1) (ulen + 1)
  40 |           | '\xc0' .. '\xdf' ->
  41 |               if i + 1 >= len then
  42 |                 (i, ulen, "premature end of UTF8 sequence")
  43 |               else begin
  44 |                 let byte1 = Char.code (String.unsafe_get s (i + 1)) in
  45 |                 if byte1 land 0xc0 != 0x80 then
  46 |                   (i, ulen, "malformed UTF8 sequence")
  47 |                 else if ((Char.code ch land 0x1f) lsl 6) lor (byte1 land 0x3f) < 0x80 then
  48 |                   (i, ulen, "overlong UTF8 sequence")
  49 |                 else
  50 |                   main (i + 2) (ulen + 1)
  51 |               end
  52 |           | '\xe0' .. '\xef' ->
  53 |               if i + 2 >= len then
  54 |                 (i, ulen, "premature end of UTF8 sequence")
  55 |               else begin
  56 |                 let byte1 = Char.code (String.unsafe_get s (i + 1))
  57 |                 and byte2 = Char.code (String.unsafe_get s (i + 2)) in
  58 |                 if byte1 land 0xc0 != 0x80 then
  59 |                   (i, ulen, "malformed UTF8 sequence")
  60 |                 else if byte2 land 0xc0 != 0x80 then
  61 |                   (i, ulen, "malformed UTF8 sequence")
  62 |                 else if ((Char.code ch land 0x0f) lsl 12) lor ((byte1 land 0x3f) lsl 6) lor (byte2 land 0x3f) < 0x800 then
  63 |                   (i, ulen, "overlong UTF8 sequence")
  64 |                 else
  65 |                   main (i + 3) (ulen + 1)
  66 |               end
  67 |           | '\xf0' .. '\xf7' ->
  68 |               if i + 3 >= len then
  69 |                 (i, ulen, "premature end of UTF8 sequence")
  70 |               else begin
  71 |                 let byte1 = Char.code (String.unsafe_get s (i + 1))
  72 |                 and byte2 = Char.code (String.unsafe_get s (i + 2))
  73 |                 and byte3 = Char.code (String.unsafe_get s (i + 3)) in
  74 |                 if byte1 land 0xc0 != 0x80 then
  75 |                   (i, ulen, "malformed UTF8 sequence")
  76 |                 else if byte2 land 0xc0 != 0x80 then
  77 |                   (i, ulen, "malformed UTF8 sequence")
  78 |                 else if byte3 land 0xc0 != 0x80 then
  79 |                   (i, ulen, "malformed UTF8 sequence")
  80 |                 else
  81 |                   let value = ((Char.code ch land 0x07) lsl 18) lor ((byte1 land 0x3f) lsl 12) lor ((byte2 land 0x3f) lsl 6) lor (byte3 land 0x3f) in
  82 |                   if value < 0x10000 then
  83 |                   (i, ulen, "overlong UTF8 sequence")
  84 |                 else if value > Uchar.to_int Uchar.max then
  85 |                   (i, ulen, "scalar value too large in UTF8 sequence")
  86 |                 else
  87 |                   main (i + 4) (ulen + 1)
  88 |               end
  89 |           | _ ->
  90 |               (i, ulen, "invalid start of UTF8 sequence")
  91 |     in
  92 |   main i 0
  93 | 
  94 | let check str =
  95 |   let ofs, len, msg = next_error str 0 in
  96 |   if ofs = String.length str then
  97 |     Correct len
  98 |   else
  99 |     Message (Printf.sprintf "at position %d: %s" ofs msg)
 100 | 
 101 | let validate str =
 102 |   let ofs, len, msg = next_error str 0 in
 103 |   if ofs = String.length str then
 104 |     len
 105 |   else
 106 |     fail str ofs msg
 107 | 
 108 | (* +-----------------------------------------------------------------+
 109 |    | Unsafe UTF-8 manipulation                                       |
 110 |    +-----------------------------------------------------------------+ *)
 111 | 
 112 | let unsafe_next str ofs =
 113 |   match String.unsafe_get str ofs with
 114 |     | '\x00' .. '\x7f' ->
 115 |         ofs + 1
 116 |     | '\xc0' .. '\xdf' ->
 117 |         if ofs + 2 > String.length str then
 118 |           fail str ofs "unterminated UTF-8 sequence"
 119 |         else
 120 |           ofs + 2
 121 |     | '\xe0' .. '\xef' ->
 122 |         if ofs + 3 > String.length str then
 123 |           fail str ofs "unterminated UTF-8 sequence"
 124 |         else
 125 |           ofs + 3
 126 |     | '\xf0' .. '\xf7' ->
 127 |         if ofs + 4 > String.length str then
 128 |           fail str ofs "unterminated UTF-8 sequence"
 129 |         else
 130 |           ofs + 4
 131 |     | _ ->
 132 |         fail str ofs "invalid start of UTF-8 sequence"
 133 | 
 134 | let unsafe_prev str ofs =
 135 |   match String.unsafe_get str (ofs - 1) with
 136 |     | '\x00' .. '\x7f' ->
 137 |         ofs - 1
 138 |     | '\x80' .. '\xbf' ->
 139 |         if ofs >= 2 then
 140 |           match String.unsafe_get str (ofs - 2) with
 141 |             | '\xc0' .. '\xdf' ->
 142 |                 ofs - 2
 143 |             | '\x80' .. '\xbf' ->
 144 |                 if ofs >= 3 then
 145 |                   match String.unsafe_get str (ofs - 3) with
 146 |                     | '\xe0' .. '\xef' ->
 147 |                         ofs - 3
 148 |                     | '\x80' .. '\xbf' ->
 149 |                         if ofs >= 4 then
 150 |                           match String.unsafe_get str (ofs - 4) with
 151 |                             | '\xf0' .. '\xf7' ->
 152 |                                 ofs - 4
 153 |                             | _ ->
 154 |                                 fail str (ofs - 4) "invalid start of UTF-8 sequence"
 155 |                         else
 156 |                           fail str (ofs - 3) "invalid start of UTF-8 string"
 157 |                     | _ ->
 158 |                         fail str (ofs - 3) "invalid middle of UTF-8 sequence"
 159 |                 else
 160 |                   fail str (ofs - 2) "invaild start of UTF-8 string"
 161 |             | _ ->
 162 |                 fail str (ofs - 2) "invalid middle of UTF-8 sequence"
 163 |         else
 164 |           fail str (ofs - 1) "invalid start of UTF-8 string"
 165 |     | _ ->
 166 |         fail str (ofs - 1) "invalid end of UTF-8 sequence"
 167 | 
 168 | let unsafe_extract str ofs =
 169 |   let ch = String.unsafe_get str ofs in
 170 |   match ch with
 171 |     | '\x00' .. '\x7f' ->
 172 |         Uchar.of_char ch
 173 |     | '\xc0' .. '\xdf' ->
 174 |         if ofs + 2 > String.length str then
 175 |           fail str ofs "unterminated UTF-8 sequence"
 176 |         else
 177 |           Uchar.of_int (((Char.code ch land 0x1f) lsl 6) lor (byte str (ofs + 1) land 0x3f))
 178 |     | '\xe0' .. '\xef' ->
 179 |         if ofs + 3 > String.length str then
 180 |           fail str ofs "unterminated UTF-8 sequence"
 181 |         else (
 182 |           let n = ((Char.code ch land 0x0f) lsl 12) lor
 183 |                   ((byte str (ofs + 1) land 0x3f) lsl 6) lor
 184 |                     (byte str (ofs + 2) land 0x3f) in
 185 |           if not (Uchar.is_valid n) then
 186 |             fail str ofs "not a Unicode scalar value"
 187 |           else
 188 |             Uchar.of_int n)
 189 |     | '\xf0' .. '\xf7' ->
 190 |         if ofs + 4 > String.length str then
 191 |           fail str ofs "unterminated UTF-8 sequence"
 192 |         else (
 193 |           let n = (((Char.code ch land 0x07) lsl 18) lor
 194 |                     ((byte str (ofs + 1) land 0x3f) lsl 12) lor
 195 |                     ((byte str (ofs + 2) land 0x3f) lsl  6) lor
 196 |                     (byte str (ofs + 3) land 0x3f)) in
 197 |           if not (Uchar.is_valid n) then
 198 |             fail str ofs "not a Unicode scalar value"
 199 |           else
 200 |             Uchar.of_int n)
 201 |     | _ ->
 202 |         fail str ofs "invalid start of UTF-8 sequence"
 203 | 
 204 | let unsafe_extract_next str ofs =
 205 |   let ch = String.unsafe_get str ofs in
 206 |   match ch with
 207 |     | '\x00' .. '\x7f' ->
 208 |         (Uchar.of_char ch, ofs + 1)
 209 |     | '\xc0' .. '\xdf' ->
 210 |         if ofs + 2 > String.length str then
 211 |           fail str ofs "unterminated UTF-8 sequence"
 212 |         else
 213 |           (Uchar.of_int (((Char.code ch land 0x1f) lsl 6) lor (byte str (ofs + 1) land 0x3f)), ofs + 2)
 214 |     | '\xe0' .. '\xef' ->
 215 |         if ofs + 3 > String.length str then
 216 |           fail str ofs "unterminated UTF-8 sequence"
 217 |         else
 218 |           (Uchar.of_int (((Char.code ch land 0x0f) lsl 12) lor ((byte str (ofs + 1) land 0x3f) lsl 6) lor (byte str (ofs + 2) land 0x3f)), ofs + 3)
 219 |     | '\xf0' .. '\xf7' ->
 220 |         if ofs + 4 > String.length str then
 221 |           fail str ofs "unterminated UTF-8 sequence"
 222 |         else
 223 |           (Uchar.of_int (((Char.code ch land 0x07) lsl 18) lor ((byte str (ofs + 1) land 0x3f) lsl 12) lor ((byte str (ofs + 2) land 0x3f) lsl 6) lor (byte str (ofs + 3) land 0x3f)), ofs + 4)
 224 |     | _ ->
 225 |         fail str ofs "invalid start of UTF-8 sequence"
 226 | 
 227 | let unsafe_extract_prev str ofs =
 228 |   let ch1 = String.unsafe_get str (ofs - 1) in
 229 |   match ch1 with
 230 |     | '\x00' .. '\x7f' ->
 231 |         (Uchar.of_char ch1, ofs - 1)
 232 |     | '\x80' .. '\xbf' ->
 233 |         if ofs >= 2 then
 234 |           let ch2 = String.unsafe_get str (ofs - 2) in
 235 |           match ch2 with
 236 |             | '\xc0' .. '\xdf' ->
 237 |                 (Uchar.of_int (((Char.code ch2 land 0x1f) lsl 6) lor (Char.code ch1 land 0x3f)), ofs - 2)
 238 |             | '\x80' .. '\xbf' ->
 239 |                 if ofs >= 3 then
 240 |                   let ch3 = String.unsafe_get str (ofs - 3) in
 241 |                   match ch3 with
 242 |                     | '\xe0' .. '\xef' ->
 243 |                         (Uchar.of_int (((Char.code ch3 land 0x0f) lsl 12) lor ((Char.code ch2 land 0x3f) lsl 6) lor (Char.code ch1 land 0x3f)), ofs - 3)
 244 |                     | '\x80' .. '\xbf' ->
 245 |                         if ofs >= 4 then
 246 |                           let ch4 = String.unsafe_get str (ofs - 4) in
 247 |                           match ch4 with
 248 |                             | '\xf0' .. '\xf7' ->
 249 |                                 (Uchar.of_int (((Char.code ch4 land 0x07) lsl 18) lor ((Char.code ch3 land 0x3f) lsl 12) lor ((Char.code ch2 land 0x3f) lsl 6) lor (Char.code ch1 land 0x3f)), ofs - 4)
 250 |                             | _ ->
 251 |                                 fail str (ofs - 4) "invalid start of UTF-8 sequence"
 252 |                         else
 253 |                           fail str (ofs - 3) "invalid start of UTF-8 string"
 254 |                     | _ ->
 255 |                         fail str (ofs - 3) "invalid middle of UTF-8 sequence"
 256 |                 else
 257 |                   fail str (ofs - 2) "invaild start of UTF-8 string"
 258 |             | _ ->
 259 |                 fail str (ofs - 2) "invalid middle of UTF-8 sequence"
 260 |         else
 261 |           fail str (ofs - 1) "invalid start of UTF-8 string"
 262 |     | _ ->
 263 |         fail str (ofs - 1) "invalid end of UTF-8 sequence"
 264 | 
 265 | let rec move_l str ofs len =
 266 |   if len = 0 then
 267 |     ofs
 268 |   else if ofs = String.length str then
 269 |     raise Out_of_bounds
 270 |   else
 271 |     move_l str (unsafe_next str ofs) (len - 1)
 272 | 
 273 | let unsafe_sub str ofs len =
 274 |   let res = Bytes.create len in
 275 |   String.unsafe_blit str ofs res 0 len;
 276 |   Bytes.unsafe_to_string res
 277 | 
 278 | (* +-----------------------------------------------------------------+
 279 |    | Construction                                                    |
 280 |    +-----------------------------------------------------------------+ *)
 281 | 
 282 | let singleton char =
 283 |   let code = Uchar.to_int char in
 284 |   Bytes.unsafe_to_string @@
 285 |   if code < 0x80 then begin
 286 |     let s = Bytes.create 1 in
 287 |     set_byte s 0 code;
 288 |     s
 289 |   end else if code <= 0x800 then begin
 290 |     let s = Bytes.create 2 in
 291 |     set_byte s 0 ((code lsr 6) lor 0xc0);
 292 |     set_byte s 1 ((code land 0x3f) lor 0x80);
 293 |     s
 294 |   end else if code <= 0x10000 then begin
 295 |     let s = Bytes.create 3 in
 296 |     set_byte s 0 ((code lsr 12) lor 0xe0);
 297 |     set_byte s 1 (((code lsr 6) land 0x3f) lor 0x80);
 298 |     set_byte s 2 ((code land 0x3f) lor 0x80);
 299 |     s
 300 |   end else if code <= 0x10ffff then begin
 301 |     let s = Bytes.create 4 in
 302 |     set_byte s 0 ((code lsr 18) lor 0xf0);
 303 |     set_byte s 1 (((code lsr 12) land 0x3f) lor 0x80);
 304 |     set_byte s 2 (((code lsr 6) land 0x3f) lor 0x80);
 305 |     set_byte s 3 ((code land 0x3f) lor 0x80);
 306 |     s
 307 |   end else
 308 |     (* Camomile allow characters with code-point greater than
 309 |        0x10ffff *)
 310 |     invalid_arg "Zed_utf8.singleton"
 311 | 
 312 | let make n code =
 313 |   let str = singleton code in
 314 |   let len = String.length str in
 315 |   let res = Bytes.create (n * len) in
 316 |   let ofs = ref 0 in
 317 |   for _ = 1 to n do
 318 |     String.unsafe_blit str 0 res !ofs len;
 319 |     ofs := !ofs + len
 320 |   done;
 321 |   Bytes.unsafe_to_string res
 322 | 
 323 | let init n f =
 324 |   let buf = Buffer.create n in
 325 |   for i = 0 to n - 1 do
 326 |     Buffer.add_string buf (singleton (f i))
 327 |   done;
 328 |   Buffer.contents buf
 329 | 
 330 | let rev_init n f =
 331 |   let buf = Buffer.create n in
 332 |   for i = n - 1 downto 0 do
 333 |     Buffer.add_string buf (singleton (f i))
 334 |   done;
 335 |   Buffer.contents buf
 336 | 
 337 | (* +-----------------------------------------------------------------+
 338 |    | Informations                                                    |
 339 |    +-----------------------------------------------------------------+ *)
 340 | 
 341 | let rec length_rec str ofs len =
 342 |   if ofs = String.length str then
 343 |     len
 344 |   else
 345 |     length_rec str (unsafe_next str ofs) (len + 1)
 346 | 
 347 | let length str =
 348 |   length_rec str 0 0
 349 | 
 350 | (* +-----------------------------------------------------------------+
 351 |    | Comparison                                                      |
 352 |    +-----------------------------------------------------------------+ *)
 353 | 
 354 | let rec compare_rec str1 ofs1 str2 ofs2 =
 355 |   if ofs1 = String.length str1 then
 356 |     if ofs2 = String.length str2 then
 357 |       0
 358 |     else
 359 |       -1
 360 |   else if ofs2 = String.length str2 then
 361 |     1
 362 |   else
 363 |     let code1, ofs1 = unsafe_extract_next str1 ofs1
 364 |     and code2, ofs2 = unsafe_extract_next str2 ofs2 in
 365 |     let d = Uchar.to_int code1 - Uchar.to_int code2 in
 366 |     if d <> 0 then
 367 |       d
 368 |     else
 369 |       compare_rec str1 ofs1 str2 ofs2
 370 | 
 371 | let compare str1 str2 =
 372 |   compare_rec str1 0 str2 0
 373 | 
 374 | (* +-----------------------------------------------------------------+
 375 |    | Random access                                                   |
 376 |    +-----------------------------------------------------------------+ *)
 377 | 
 378 | let get str idx =
 379 |   if idx < 0 then
 380 |     raise Out_of_bounds
 381 |   else
 382 |     unsafe_extract str (move_l str 0 idx)
 383 | 
 384 | (* +-----------------------------------------------------------------+
 385 |    | Manipulation                                                    |
 386 |    +-----------------------------------------------------------------+ *)
 387 | 
 388 | let sub str idx len =
 389 |   if idx < 0 || len < 0 then
 390 |     raise Out_of_bounds
 391 |   else
 392 |     let ofs1 = move_l str 0 idx in
 393 |     let ofs2 = move_l str ofs1 len in
 394 |     unsafe_sub str ofs1 (ofs2 - ofs1)
 395 | 
 396 | let break str idx =
 397 |   if idx < 0 then
 398 |     raise Out_of_bounds
 399 |   else
 400 |     let ofs = move_l str 0 idx in
 401 |     (unsafe_sub str 0 ofs, unsafe_sub str ofs (String.length str - ofs))
 402 | 
 403 | let before str idx =
 404 |   if idx < 0 then
 405 |     raise Out_of_bounds
 406 |   else
 407 |     let ofs = move_l str 0 idx in
 408 |     unsafe_sub str 0 ofs
 409 | 
 410 | let after str idx =
 411 |   if idx < 0 then
 412 |     raise Out_of_bounds
 413 |   else
 414 |     let ofs = move_l str 0 idx in
 415 |     unsafe_sub str ofs (String.length str - ofs)
 416 | 
 417 | let concat3 a b c =
 418 |   let lena = String.length a
 419 |   and lenb = String.length b
 420 |   and lenc = String.length c in
 421 |   let res = Bytes.create (lena + lenb + lenc) in
 422 |   String.unsafe_blit a 0 res 0 lena;
 423 |   String.unsafe_blit b 0 res lena lenb;
 424 |   String.unsafe_blit c 0 res (lena + lenb) lenc;
 425 |   Bytes.unsafe_to_string res
 426 | 
 427 | let insert str idx sub =
 428 |   let a, b = break str idx in
 429 |   concat3 a sub b
 430 | 
 431 | let remove str idx len =
 432 |   if idx < 0 || len < 0 then
 433 |     raise Out_of_bounds
 434 |   else
 435 |     let ofs1 = move_l str 0 idx in
 436 |     let ofs2 = move_l str ofs1 len in
 437 |     unsafe_sub str 0 ofs1 ^ unsafe_sub str ofs2 (String.length str - ofs2)
 438 | 
 439 | let replace str idx len repl =
 440 |   if idx < 0 || len < 0 then
 441 |     raise Out_of_bounds
 442 |   else
 443 |     let ofs1 = move_l str 0 idx in
 444 |     let ofs2 = move_l str ofs1 len in
 445 |     concat3 (unsafe_sub str 0 ofs1) repl (unsafe_sub str ofs2 (String.length str - ofs2))
 446 | 
 447 | (* +-----------------------------------------------------------------+
 448 |    | Exploding and imploding                                         |
 449 |    +-----------------------------------------------------------------+ *)
 450 | 
 451 | let rec rev_rec (res : Bytes.t) str ofs_src ofs_dst =
 452 |   if ofs_src = String.length str then
 453 |     Bytes.unsafe_to_string res
 454 |   else begin
 455 |     let ofs_src' = unsafe_next str ofs_src in
 456 |     let len = ofs_src' - ofs_src in
 457 |     let ofs_dst = ofs_dst - len in
 458 |     String.unsafe_blit str ofs_src res ofs_dst len;
 459 |     rev_rec res str ofs_src' ofs_dst
 460 |   end
 461 | 
 462 | let rev str =
 463 |   let len = String.length str in
 464 |   rev_rec (Bytes.create len) str 0 len
 465 | 
 466 | let concat sep l =
 467 |   match l with
 468 |     | [] ->
 469 |         ""
 470 |     | x :: l ->
 471 |         let sep_len = String.length sep in
 472 |         let len = List.fold_left (fun len str -> len + sep_len + String.length str) (String.length x) l in
 473 |         let res = Bytes.create len in
 474 |         String.unsafe_blit x 0 res 0 (String.length x);
 475 |         ignore
 476 |           (List.fold_left
 477 |              (fun ofs str ->
 478 |                 String.unsafe_blit sep 0 res ofs sep_len;
 479 |                 let ofs = ofs + sep_len in
 480 |                 let len = String.length str in
 481 |                 String.unsafe_blit str 0 res ofs len;
 482 |                 ofs + len)
 483 |              (String.length x) l);
 484 |         Bytes.unsafe_to_string res
 485 | 
 486 | let rev_concat sep l =
 487 |   match l with
 488 |     | [] ->
 489 |         ""
 490 |     | x :: l ->
 491 |         let sep_len = String.length sep in
 492 |         let len = List.fold_left (fun len str -> len + sep_len + String.length str) (String.length x) l in
 493 |         let res = Bytes.create len in
 494 |         let ofs = len - String.length x in
 495 |         String.unsafe_blit x 0 res ofs (String.length x);
 496 |         ignore
 497 |           (List.fold_left
 498 |              (fun ofs str ->
 499 |                 let ofs = ofs - sep_len in
 500 |                 String.unsafe_blit sep 0 res ofs sep_len;
 501 |                 let len = String.length str in
 502 |                 let ofs = ofs - len in
 503 |                 String.unsafe_blit str 0 res ofs len;
 504 |                 ofs)
 505 |              ofs l);
 506 |         Bytes.unsafe_to_string res
 507 | 
 508 | let rec explode_rec str ofs acc =
 509 |   if ofs = 0 then
 510 |     acc
 511 |   else
 512 |     let x, ofs = unsafe_extract_prev str ofs in
 513 |     explode_rec str ofs (x :: acc)
 514 | 
 515 | let explode str =
 516 |   explode_rec str (String.length str) []
 517 | 
 518 | let rec rev_explode_rec str ofs acc =
 519 |   if ofs = String.length str then
 520 |     acc
 521 |   else
 522 |     let x, ofs = unsafe_extract_next str ofs in
 523 |     rev_explode_rec str ofs (x :: acc)
 524 | 
 525 | let rev_explode str =
 526 |   rev_explode_rec str 0 []
 527 | 
 528 | let implode l =
 529 |   let l = List.map singleton l in
 530 |   let len = List.fold_left (fun len str -> len + String.length str) 0 l in
 531 |   let res = Bytes.create len in
 532 |   ignore
 533 |     (List.fold_left
 534 |        (fun ofs str ->
 535 |           let len = String.length str in
 536 |           String.unsafe_blit str 0 res ofs len;
 537 |           ofs + len)
 538 |        0 l);
 539 |   Bytes.unsafe_to_string res
 540 | 
 541 | let rev_implode l =
 542 |   let l = List.map singleton l in
 543 |   let len = List.fold_left (fun len str -> len + String.length str) 0 l in
 544 |   let res = Bytes.create len in
 545 |   ignore
 546 |     (List.fold_left
 547 |        (fun ofs str ->
 548 |           let len = String.length str in
 549 |           let ofs = ofs - len in
 550 |           String.unsafe_blit str 0 res ofs len;
 551 |           ofs)
 552 |        len l);
 553 |   Bytes.unsafe_to_string res
 554 | 
 555 | (* +-----------------------------------------------------------------+
 556 |    | Text transversal                                                |
 557 |    +-----------------------------------------------------------------+ *)
 558 | 
 559 | let rec iter_rec f str ofs =
 560 |   if ofs = String.length str then
 561 |     ()
 562 |   else begin
 563 |     let chr, ofs = unsafe_extract_next str ofs in
 564 |     f chr;
 565 |     iter_rec f str ofs
 566 |   end
 567 | 
 568 | let iter f str =
 569 |   iter_rec f str 0
 570 | 
 571 | let rec rev_iter_rec f str ofs =
 572 |   if ofs = 0 then
 573 |     ()
 574 |   else begin
 575 |     let chr, ofs = unsafe_extract_prev str ofs in
 576 |     f chr;
 577 |     rev_iter_rec f str ofs
 578 |   end
 579 | 
 580 | let rev_iter f str =
 581 |   rev_iter_rec f str (String.length str)
 582 | 
 583 | let rec fold_rec f str ofs acc =
 584 |   if ofs = String.length str then
 585 |     acc
 586 |   else begin
 587 |     let chr, ofs = unsafe_extract_next str ofs in
 588 |     fold_rec f str ofs (f chr acc)
 589 |   end
 590 | 
 591 | let fold f str acc =
 592 |   fold_rec f str 0 acc
 593 | 
 594 | let rec rev_fold_rec f str ofs acc =
 595 |   if ofs = 0 then
 596 |     acc
 597 |   else begin
 598 |     let chr, ofs = unsafe_extract_prev str ofs in
 599 |     rev_fold_rec f str ofs (f chr acc)
 600 |   end
 601 | 
 602 | let rev_fold f str acc =
 603 |   rev_fold_rec f str (String.length str) acc
 604 | 
 605 | let rec map_rec buf f str ofs =
 606 |   if ofs = String.length str then
 607 |     Buffer.contents buf
 608 |   else begin
 609 |     let chr, ofs = unsafe_extract_next str ofs in
 610 |     Buffer.add_string buf (singleton (f chr));
 611 |     map_rec buf f str ofs
 612 |   end
 613 | 
 614 | let map f str =
 615 |   map_rec (Buffer.create (String.length str)) f str 0
 616 | 
 617 | let rec map_concat_rec buf f str ofs =
 618 |   if ofs = String.length str then
 619 |     Buffer.contents buf
 620 |   else begin
 621 |     let chr, ofs = unsafe_extract_next str ofs in
 622 |     Buffer.add_string buf (f chr);
 623 |     map_concat_rec buf f str ofs
 624 |   end
 625 | 
 626 | let map_concat f str =
 627 |   map_concat_rec (Buffer.create (String.length str)) f str 0
 628 | 
 629 | let rec rev_map_rec buf f str ofs =
 630 |   if ofs = 0 then
 631 |     Buffer.contents buf
 632 |   else begin
 633 |     let chr, ofs = unsafe_extract_prev str ofs in
 634 |     Buffer.add_string buf (singleton (f chr));
 635 |     rev_map_rec buf f str ofs
 636 |   end
 637 | 
 638 | let rev_map f str =
 639 |   rev_map_rec (Buffer.create (String.length str)) f str (String.length str)
 640 | 
 641 | let rec rev_map_concat_rec buf f str ofs =
 642 |   if ofs = 0 then
 643 |     Buffer.contents buf
 644 |   else begin
 645 |     let chr, ofs = unsafe_extract_prev str ofs in
 646 |     Buffer.add_string buf (f chr);
 647 |     rev_map_concat_rec buf f str ofs
 648 |   end
 649 | 
 650 | let rev_map_concat f str =
 651 |   rev_map_concat_rec (Buffer.create (String.length str)) f str (String.length str)
 652 | 
 653 | let rec filter_rec buf f str ofs =
 654 |   if ofs = String.length str then
 655 |     Buffer.contents buf
 656 |   else begin
 657 |     let chr, ofs = unsafe_extract_next str ofs in
 658 |     if f chr then
 659 |       Buffer.add_string buf (singleton chr);
 660 |     filter_rec buf f str ofs
 661 |   end
 662 | 
 663 | let filter f str =
 664 |   filter_rec (Buffer.create (String.length str)) f str 0
 665 | 
 666 | let rec rev_filter_rec buf f str ofs =
 667 |   if ofs = 0 then
 668 |     Buffer.contents buf
 669 |   else begin
 670 |     let chr, ofs = unsafe_extract_prev str ofs in
 671 |     if f chr then
 672 |       Buffer.add_string buf (singleton chr);
 673 |     rev_filter_rec buf f str ofs
 674 |   end
 675 | 
 676 | let rev_filter f str =
 677 |   rev_filter_rec (Buffer.create (String.length str)) f str (String.length str)
 678 | 
 679 | let rec filter_map_rec buf f str ofs =
 680 |   if ofs = String.length str then
 681 |     Buffer.contents buf
 682 |   else begin
 683 |     let chr, ofs = unsafe_extract_next str ofs in
 684 |     (match f chr with
 685 |        | Some chr ->
 686 |            Buffer.add_string buf (singleton chr)
 687 |        | None ->
 688 |            ());
 689 |     filter_map_rec buf f str ofs
 690 |   end
 691 | 
 692 | let filter_map f str =
 693 |   filter_map_rec (Buffer.create (String.length str)) f str 0
 694 | 
 695 | let rec filter_map_concat_rec buf f str ofs =
 696 |   if ofs = String.length str then
 697 |     Buffer.contents buf
 698 |   else begin
 699 |     let chr, ofs = unsafe_extract_next str ofs in
 700 |     (match f chr with
 701 |        | Some txt ->
 702 |            Buffer.add_string buf txt
 703 |        | None ->
 704 |            ());
 705 |     filter_map_concat_rec buf f str ofs
 706 |   end
 707 | 
 708 | let filter_map_concat f str =
 709 |   filter_map_concat_rec (Buffer.create (String.length str)) f str 0
 710 | 
 711 | let rec rev_filter_map_rec buf f str ofs =
 712 |   if ofs = 0 then
 713 |     Buffer.contents buf
 714 |   else begin
 715 |     let chr, ofs = unsafe_extract_prev str ofs in
 716 |     (match f chr with
 717 |        | Some chr ->
 718 |            Buffer.add_string buf (singleton chr)
 719 |        | None ->
 720 |            ());
 721 |     rev_filter_map_rec buf f str ofs
 722 |   end
 723 | 
 724 | let rev_filter_map f str =
 725 |   rev_filter_map_rec (Buffer.create (String.length str)) f str (String.length str)
 726 | 
 727 | let rec rev_filter_map_concat_rec buf f str ofs =
 728 |   if ofs = 0 then
 729 |     Buffer.contents buf
 730 |   else begin
 731 |     let chr, ofs = unsafe_extract_prev str ofs in
 732 |     (match f chr with
 733 |        | Some txt ->
 734 |            Buffer.add_string buf txt
 735 |        | None ->
 736 |            ());
 737 |     rev_filter_map_concat_rec buf f str ofs
 738 |   end
 739 | 
 740 | let rev_filter_map_concat f str =
 741 |   rev_filter_map_concat_rec (Buffer.create (String.length str)) f str (String.length str)
 742 | 
 743 | (* +-----------------------------------------------------------------+
 744 |    | Scanning                                                        |
 745 |    +-----------------------------------------------------------------+ *)
 746 | 
 747 | let rec for_all_rec f str ofs =
 748 |   if ofs = String.length str then
 749 |     true
 750 |   else
 751 |     let chr, ofs = unsafe_extract_next str ofs in
 752 |     f chr && for_all_rec f str ofs
 753 | 
 754 | let for_all f str =
 755 |   for_all_rec f str 0
 756 | 
 757 | let rec exists_rec f str ofs =
 758 |   if ofs = String.length str then
 759 |     false
 760 |   else
 761 |     let chr, ofs = unsafe_extract_next str ofs in
 762 |     f chr || exists_rec f str ofs
 763 | 
 764 | let exists f str =
 765 |   exists_rec f str 0
 766 | 
 767 | let rec count_rec f str ofs n =
 768 |   if ofs = String.length str then
 769 |     n
 770 |   else
 771 |     let chr, ofs = unsafe_extract_next str ofs in
 772 |     count_rec f str ofs (if f chr then n + 1 else n)
 773 | 
 774 | let count f str =
 775 |   count_rec f str 0 0
 776 | 
 777 | (* +-----------------------------------------------------------------+
 778 |    | Tests                                                           |
 779 |    +-----------------------------------------------------------------+ *)
 780 | 
 781 | let rec unsafe_sub_equal str ofs sub ofs_sub =
 782 |   if ofs_sub = String.length sub then
 783 |     true
 784 |   else
 785 |     (String.unsafe_get str ofs = String.unsafe_get sub ofs_sub)
 786 |     && unsafe_sub_equal str (ofs + 1) sub (ofs_sub + 1)
 787 | 
 788 | let rec contains_rec str sub ofs =
 789 |   if ofs + String.length sub > String.length str then
 790 |     false
 791 |   else
 792 |     unsafe_sub_equal str ofs sub 0 || contains_rec str sub (unsafe_next str ofs)
 793 | 
 794 | let contains str sub =
 795 |   contains_rec str sub 0
 796 | 
 797 | let starts_with str prefix =
 798 |   if String.length prefix > String.length str then
 799 |     false
 800 |   else
 801 |     unsafe_sub_equal str 0 prefix 0
 802 | 
 803 | let ends_with str suffix =
 804 |   let ofs = String.length str - String.length suffix in
 805 |   if ofs < 0 then
 806 |     false
 807 |   else
 808 |     unsafe_sub_equal str ofs suffix 0
 809 | 
 810 | (* +-----------------------------------------------------------------+
 811 |    | Stripping                                                       |
 812 |    +-----------------------------------------------------------------+ *)
 813 | 
 814 | let rec lfind predicate str ofs =
 815 |   if ofs = String.length str then
 816 |     ofs
 817 |   else
 818 |     let chr, ofs' = unsafe_extract_next str ofs in
 819 |     if predicate chr then
 820 |       lfind predicate str ofs'
 821 |     else
 822 |       ofs
 823 | 
 824 | let rec rfind predicate str ofs =
 825 |   if ofs = 0 then
 826 |     0
 827 |   else
 828 |     let chr, ofs' = unsafe_extract_prev str ofs in
 829 |     if predicate chr then
 830 |       rfind predicate str ofs'
 831 |     else
 832 |       ofs
 833 | 
 834 | let strip ?(predicate=Uucp.White.is_white_space) str =
 835 |   let lofs = lfind predicate str 0 and rofs = rfind predicate str (String.length str) in
 836 |   if lofs < rofs then
 837 |     unsafe_sub str lofs (rofs - lofs)
 838 |   else
 839 |     ""
 840 | 
 841 | let lstrip ?(predicate=Uucp.White.is_white_space) str =
 842 |   let lofs = lfind predicate str 0 in
 843 |   unsafe_sub str lofs (String.length str - lofs)
 844 | 
 845 | let rstrip ?(predicate=Uucp.White.is_white_space) str =
 846 |   let rofs = rfind predicate str (String.length str) in
 847 |   unsafe_sub str 0 rofs
 848 | 
 849 | let lchop = function
 850 |   | "" ->
 851 |       ""
 852 |   | str ->
 853 |       let ofs = unsafe_next str 0 in
 854 |       unsafe_sub str ofs (String.length str - ofs)
 855 | 
 856 | let rchop = function
 857 |   | "" ->
 858 |       ""
 859 |   | str ->
 860 |       let ofs = unsafe_prev str (String.length str) in
 861 |       unsafe_sub str 0 ofs
 862 | 
 863 | (* +-----------------------------------------------------------------+
 864 |    | Buffers                                                         |
 865 |    +-----------------------------------------------------------------+ *)
 866 | 
 867 | let add buf char =
 868 |   let code = Uchar.to_int char in
 869 |   if code < 0x80 then
 870 |     Buffer.add_char buf (Char.unsafe_chr code)
 871 |   else if code <= 0x800 then begin
 872 |     Buffer.add_char buf (Char.unsafe_chr ((code lsr 6) lor 0xc0));
 873 |     Buffer.add_char buf (Char.unsafe_chr ((code land 0x3f) lor 0x80))
 874 |   end else if code <= 0x10000 then begin
 875 |     Buffer.add_char buf (Char.unsafe_chr ((code lsr 12) lor 0xe0));
 876 |     Buffer.add_char buf (Char.unsafe_chr (((code lsr 6) land 0x3f) lor 0x80));
 877 |     Buffer.add_char buf (Char.unsafe_chr ((code land 0x3f) lor 0x80))
 878 |   end else if code <= 0x10ffff then begin
 879 |     Buffer.add_char buf (Char.unsafe_chr ((code lsr 18) lor 0xf0));
 880 |     Buffer.add_char buf (Char.unsafe_chr (((code lsr 12) land 0x3f) lor 0x80));
 881 |     Buffer.add_char buf (Char.unsafe_chr (((code lsr 6) land 0x3f) lor 0x80));
 882 |     Buffer.add_char buf (Char.unsafe_chr ((code land 0x3f) lor 0x80))
 883 |   end else
 884 |     invalid_arg "Zed_utf8.add"
 885 | 
 886 | (* +-----------------------------------------------------------------+
 887 |    | Offset API                                                      |
 888 |    +-----------------------------------------------------------------+ *)
 889 | 
 890 | let extract str ofs =
 891 |   if ofs < 0 || ofs >= String.length str then
 892 |     raise Out_of_bounds
 893 |   else
 894 |     unsafe_extract str ofs
 895 | 
 896 | let next str ofs =
 897 |   if ofs < 0 || ofs >= String.length str then
 898 |     raise Out_of_bounds
 899 |   else
 900 |     unsafe_next str ofs
 901 | 
 902 | let extract_next str ofs =
 903 |   if ofs < 0 || ofs >= String.length str then
 904 |     raise Out_of_bounds
 905 |   else
 906 |     unsafe_extract_next str ofs
 907 | 
 908 | let prev str ofs =
 909 |   if ofs <= 0 || ofs > String.length str then
 910 |     raise Out_of_bounds
 911 |   else
 912 |     unsafe_prev str ofs
 913 | 
 914 | let extract_prev str ofs =
 915 |   if ofs <= 0 || ofs > String.length str then
 916 |     raise Out_of_bounds
 917 |   else
 918 |     unsafe_extract_prev str ofs
 919 | 
 920 | (* +-----------------------------------------------------------------+
 921 |    | Escaping                                                        |
 922 |    +-----------------------------------------------------------------+ *)
 923 | 
 924 | let escaped_char ch =
 925 |   match Uchar.to_int ch with
 926 |     | 7 ->
 927 |         "\\a"
 928 |     | 8 ->
 929 |         "\\b"
 930 |     | 9 ->
 931 |         "\\t"
 932 |     | 10 ->
 933 |         "\\n"
 934 |     | 11 ->
 935 |         "\\v"
 936 |     | 12 ->
 937 |         "\\f"
 938 |     | 13 ->
 939 |         "\\r"
 940 |     | 27 ->
 941 |         "\\e"
 942 |     | 92 ->
 943 |         "\\\\"
 944 |     | code when code >= 32 && code <= 126 ->
 945 |         String.make 1 (Char.chr code)
 946 |     | _ when Uucp.Alpha.is_alphabetic ch ->
 947 |         singleton ch
 948 |     | code when code <= 127 ->
 949 |         Printf.sprintf "\\x%02x" code
 950 |     | code when code <= 0xffff ->
 951 |         Printf.sprintf "\\u%04x" code
 952 |     | code ->
 953 |         Printf.sprintf "\\U%06x" code
 954 | 
 955 | let add_escaped_char buf ch =
 956 |   match Uchar.to_int ch with
 957 |     | 7 ->
 958 |         Buffer.add_string buf "\\a"
 959 |     | 8 ->
 960 |         Buffer.add_string buf "\\b"
 961 |     | 9 ->
 962 |         Buffer.add_string buf "\\t"
 963 |     | 10 ->
 964 |         Buffer.add_string buf "\\n"
 965 |     | 11 ->
 966 |         Buffer.add_string buf "\\v"
 967 |     | 12 ->
 968 |         Buffer.add_string buf "\\f"
 969 |     | 13 ->
 970 |         Buffer.add_string buf "\\r"
 971 |     | 27 ->
 972 |         Buffer.add_string buf "\\e"
 973 |     | 92 ->
 974 |         Buffer.add_string buf "\\\\"
 975 |     | code when code >= 32 && code <= 126 ->
 976 |         Buffer.add_char buf (Char.chr code)
 977 |     | _ when Uucp.Alpha.is_alphabetic ch ->
 978 |         add buf ch
 979 |     | code when code <= 127 ->
 980 |         Printf.bprintf buf "\\x%02x" code
 981 |     | code when code <= 0xffff ->
 982 |         Printf.bprintf buf "\\u%04x" code
 983 |     | code ->
 984 |         Printf.bprintf buf "\\U%06x" code
 985 | 
 986 | let escaped str =
 987 |   let buf = Buffer.create (String.length str) in
 988 |   iter (add_escaped_char buf) str;
 989 |   Buffer.contents buf
 990 | 
 991 | let add_escaped buf str =
 992 |   iter (add_escaped_char buf) str
 993 | 
 994 | let add_escaped_string buf encoding str =
 995 |   let b = Buffer.create (String.length str) in
 996 |   let d = Uutf.decoder ~encoding (`String str) in
 997 |   let rec loop () =
 998 |     match Uutf.decode d with
 999 |     | `Uchar u -> ignore (Uutf.Buffer.add_utf_8 b u); loop ()
1000 |     | `End -> add_escaped buf (Buffer.contents b)
1001 |     | `Malformed _ ->
1002 |         String.iter
1003 |           (function
1004 |              | '\x20' .. '\x7e' as ch ->
1005 |                  Buffer.add_char buf ch
1006 |              | ch ->
1007 |                  Printf.bprintf buf "\\y%02x" (Char.code ch))
1008 |           str
1009 |     | `Await -> assert false
1010 |   in
1011 |   loop ()
1012 | 
1013 | let escaped_string enc str =
1014 |   let buf = Buffer.create (String.length str) in
1015 |   add_escaped_string buf enc str;
1016 |   Buffer.contents buf
1017 | 


--------------------------------------------------------------------------------
/src/zed_utf8.mli:
--------------------------------------------------------------------------------
  1 | (*
  2 |  * zed_utf8.mli
  3 |  * ------------
  4 |  * Copyright : (c) 2011, Jeremie Dimino <jeremie@dimino.org>
  5 |  * Licence   : BSD3
  6 |  *
  7 |  * This file is a part of Zed, an editor engine.
  8 |  *)
  9 | 
 10 | (** UTF-8 enoded strings *)
 11 | 
 12 | type t = string
 13 |     (** Type of UTF-8 encoded strings. *)
 14 | 
 15 | exception Invalid of string * string
 16 |   (** [Invalid(error, text)] Exception raised when an invalid UTF-8
 17 |       encoded string is encountered. [text] is the faulty text and
 18 |       [error] is a description of the first error in [text]. *)
 19 | 
 20 | exception Out_of_bounds
 21 |   (** Exception raised when trying to access a character which is
 22 |       outside the bounds of a string. *)
 23 | 
 24 | (** {5 Validation} *)
 25 | 
 26 | (** Result of cheking a string for correct UTF-8. *)
 27 | type check_result =
 28 |   | Correct of int
 29 |       (** The string is correctly UTF-8 encoded, and the paramter is
 30 |           the length of the string. *)
 31 |   | Message of string
 32 |       (** The string is invalid and the parameter is an error
 33 |           message. *)
 34 | 
 35 | val check : t -> check_result
 36 |   (** [check str] checks that [str] is a valid UTF-8 encoded
 37 |       string. *)
 38 | 
 39 | val validate : t -> int
 40 |   (** Same as check but raises an exception in case the argument is
 41 |       not a valid text, otherwise returns the length of the string. *)
 42 | 
 43 | val next_error : t -> int -> int * int * string
 44 |   (** [next_error str ofs] returns [(ofs', count, msg)] where [ofs']
 45 |       is the offset of the start of the first invalid sequence after
 46 |       [ofs] (inclusive) in [str], [count] is the number of unicode
 47 |       character between [ofs] and [ofs'] (exclusive) and [msg] is an
 48 |       error message. If there is no error until the end of string then
 49 |       [ofs] is [String.length str] and [msg] is the empty string.
 50 | 
 51 |     @raise [Zed_utf8.Invalid] if [str] is not a correct UTF8 sequence.
 52 |     @raise [Zed_utf8.Out_of_bounds] if [ofs] not an index of [str]. *)
 53 | 
 54 | (** {5 Construction} *)
 55 | 
 56 | val singleton : Uchar.t -> t
 57 |   (** [singleton ch] creates a string of length 1 containing only the
 58 |       given character. *)
 59 | 
 60 | val make : int -> Uchar.t -> t
 61 |   (** [make n ch] creates a string of length [n] filled with [ch]. *)
 62 | 
 63 | val init : int -> (int -> Uchar.t) -> t
 64 |   (** [init n f] returns the contenation of [singleton (f 0)],
 65 |       [singleton (f 1)], ..., [singleton (f (n - 1))]. *)
 66 | 
 67 | val rev_init : int -> (int -> Uchar.t) -> t
 68 |   (** [rev_init n f] returns the contenation of [singleton (f (n -
 69 |       1))], ..., [singleton (f 1)], [singleton (f 0)]. *)
 70 | 
 71 | (** {5 Informations} *)
 72 | 
 73 | val length : t -> int
 74 |   (** Returns the length of the given string. *)
 75 | 
 76 | (** {5 Comparison} *)
 77 | 
 78 | val compare : t -> t -> int
 79 |   (** Compares two strings (in code point order). *)
 80 | 
 81 | (** {5 Random access} *)
 82 | 
 83 | val get : t -> int -> Uchar.t
 84 |   (** [get str idx] returns the character at index [idx] in
 85 |       [str]. *)
 86 | 
 87 | (** {5 String manipulation} *)
 88 | 
 89 | val sub : t -> int -> int -> t
 90 |   (** [sub str ofs len] Returns the sub-string of [str] starting at
 91 |       [ofs] and of length [len]. *)
 92 | 
 93 | val break : t -> int -> t * t
 94 |   (** [break str pos] returns the sub-strings before and after [pos]
 95 |       in [str]. It is more efficient than creating two sub-strings
 96 |       with {!sub}. *)
 97 | 
 98 | val before : t -> int -> t
 99 |   (** [before str pos] returns the sub-string before [pos] in [str] *)
100 | 
101 | val after : t -> int -> t
102 |   (** [after str pos] returns the sub-string after [pos] in [str] *)
103 | 
104 | val insert : t -> int -> t -> t
105 |   (** [insert str pos sub] inserts [sub] in [str] at position
106 |       [pos]. *)
107 | 
108 | val remove : t -> int -> int -> t
109 |   (** [remove str pos len] removes the [len] characters at position
110 |       [pos] in [str] *)
111 | 
112 | val replace : t -> int -> int -> t -> t
113 |   (** [replace str pos len repl] replaces the [len] characters at
114 |       position [pos] in [str] by [repl]. *)
115 | 
116 | (** {5 Tranformation} *)
117 | 
118 | val rev : t -> t
119 |   (** [rev str] reverses all characters of [str]. *)
120 | 
121 | val concat : t -> t list -> t
122 |   (** [concat sep l] returns the concatenation of all strings of [l]
123 |       separated by [sep]. *)
124 | 
125 | val rev_concat : t -> t list -> t
126 |   (** [concat sep l] returns the concatenation of all strings of [l]
127 |       in reverse order separated by [sep]. *)
128 | 
129 | val explode : t -> Uchar.t list
130 |   (** [explode str] returns the list of all characters of [str]. *)
131 | 
132 | val rev_explode : t -> Uchar.t list
133 |   (** [rev_explode str] returns the list of all characters of [str] in
134 |       reverse order. *)
135 | 
136 | val implode : Uchar.t list -> t
137 |   (** [implode l] returns the concatenation of all characters of [l]. *)
138 | 
139 | val rev_implode : Uchar.t list -> t
140 |   (** [rev_implode l] is the same as [implode (List.rev l)] but more
141 |       efficient. *)
142 | 
143 | (** {5 Text traversals} *)
144 | 
145 | val iter : (Uchar.t -> unit) -> t -> unit
146 |   (** [iter f str] applies [f] an all characters of [str] starting
147 |       from the left. *)
148 | 
149 | val rev_iter : (Uchar.t -> unit) -> t -> unit
150 |   (** [rev_iter f str] applies [f] an all characters of [str] starting
151 |       from the right. *)
152 | 
153 | val fold : (Uchar.t -> 'a -> 'a) -> t -> 'a -> 'a
154 |   (** [fold f str acc] applies [f] on all characters of [str]
155 |       starting from the left, accumulating a value. *)
156 | 
157 | val rev_fold : (Uchar.t -> 'a -> 'a) -> t -> 'a -> 'a
158 |   (** [rev_fold f str acc] applies [f] on all characters of [str]
159 |       starting from the right, accumulating a value. *)
160 | 
161 | val map : (Uchar.t -> Uchar.t) -> t -> t
162 |   (** [map f str] maps all characters of [str] with [f]. *)
163 | 
164 | val rev_map : (Uchar.t -> Uchar.t) -> t -> t
165 |   (** [rev_map f str] maps all characters of [str] with [f] in reverse
166 |       order. *)
167 | 
168 | val map_concat : (Uchar.t -> t) -> t -> t
169 |   (** [map f str] maps all characters of [str] with [f] and
170 |       concatenate the result. *)
171 | 
172 | val rev_map_concat : (Uchar.t -> t) -> t -> t
173 |   (** [rev_map f str] maps all characters of [str] with [f] in reverse
174 |       order and concatenate the result. *)
175 | 
176 | val filter : (Uchar.t -> bool) -> t -> t
177 |   (** [filter f str] filters characters of [str] with [f]. *)
178 | 
179 | val rev_filter : (Uchar.t -> bool) -> t -> t
180 |   (** [rev_filter f str] filters characters of [str] with [f] in
181 |       reverse order. *)
182 | 
183 | val filter_map : (Uchar.t -> Uchar.t option) -> t -> t
184 |   (** [filter_map f str] filters and maps characters of [str] with
185 |       [f]. *)
186 | 
187 | val rev_filter_map : (Uchar.t -> Uchar.t option) -> t -> t
188 |   (** [rev_filter_map f str] filters and maps characters of [str] with
189 |       [f] in reverse order. *)
190 | 
191 | val filter_map_concat : (Uchar.t -> t option) -> t -> t
192 |   (** [filter_map f str] filters and maps characters of [str] with [f]
193 |       and concatenate the result. *)
194 | 
195 | val rev_filter_map_concat : (Uchar.t -> t option) -> t -> t
196 |   (** [rev_filter_map f str] filters and maps characters of [str] with
197 |       [f] in reverse order and concatenate the result. *)
198 | 
199 | (** {5 Scanning} *)
200 | 
201 | val for_all : (Uchar.t -> bool) -> t -> bool
202 |   (** [for_all f text] returns whether all characters of [text] verify
203 |       the predicate [f]. *)
204 | 
205 | val exists : (Uchar.t -> bool) -> t -> bool
206 |   (** [exists f text] returns whether at least one character of [text]
207 |       verify [f]. *)
208 | 
209 | val count : (Uchar.t -> bool) -> t -> int
210 |   (** [count f text] returhs the number of characters of [text]
211 |       verifying [f]. *)
212 | 
213 | (** {5 Tests} *)
214 | 
215 | val contains : t -> t -> bool
216 |   (** [contains text sub] returns whether [sub] appears in [text] *)
217 | 
218 | val starts_with : t -> t -> bool
219 |   (** [starts_with text prefix] returns [true] iff [s] starts with
220 |       [prefix]. *)
221 | 
222 | val ends_with : t -> t -> bool
223 |   (** [ends_with text suffix] returns [true] iff [s] ends with
224 |       [suffix]. *)
225 | 
226 | (** {5 Stripping} *)
227 | 
228 | val strip : ?predicate : (Uchar.t -> bool) -> t -> t
229 |   (** [strip ?predicate text] returns [text] without its firsts and
230 |       lasts characters that match [predicate]. [predicate] default to
231 |       testing whether the given character has the [`White_Space]
232 |       unicode property. For example:
233 | 
234 |       {[
235 |         strip "\n  foo\n  " = "foo"
236 |       ]}
237 |   *)
238 | 
239 | val lstrip : ?predicate : (Uchar.t -> bool) -> t -> t
240 |   (** [lstrip ?predicate text] is the same as {!strip} but it only
241 |       removes characters at the left of [text]. *)
242 | 
243 | val rstrip : ?predicate : (Uchar.t -> bool) -> t -> t
244 |   (** [lstrip ?predicate text] is the same as {!strip} but it only
245 |       removes characters at the right of [text]. *)
246 | 
247 | val lchop : t -> t
248 |   (** [lchop t] returns [t] without is first character. Returns [""]
249 |       if [t = ""] *)
250 | 
251 | val rchop : t -> t
252 |   (** [rchop t] returns [t] without is last character. Returns [""] if
253 |       [t = ""]. *)
254 | 
255 | (** {5 Buffers} *)
256 | 
257 | val add : Buffer.t -> Uchar.t -> unit
258 |   (** [add buf ch] is the same as [Buffer.add_string buf (singleton
259 |       ch)] but is more efficient. *)
260 | 
261 | (** {5 Escaping} *)
262 | 
263 | val escaped_char : Uchar.t -> t
264 |   (** [escaped_char ch] returns a string containg [ch] or an escaped
265 |       version of [ch] if:
266 |       - [ch] is a control character (code < 32)
267 |       - [ch] is the character with code 127
268 |       - [ch] is a non-ascii, non-alphabetic character
269 | 
270 |       It uses the syntax [\xXX], [\uXXXX], [\UXXXXXX] or a specific
271 |       escape sequence [\n, \r, ...]. *)
272 | 
273 | val add_escaped_char : Buffer.t -> Uchar.t -> unit
274 |   (** [add_escaped_char buf ch] is the same as [Buffer.add_string buf
275 |       (escaped_char ch)] but a bit more efficient. *)
276 | 
277 | val escaped : t -> t
278 |   (** [escaped text] escape all characters of [text] as with
279 |       [escape_char]. *)
280 | 
281 | val add_escaped : Buffer.t -> t -> unit
282 |   (** [add_escaped_char buf text] is the same as [Buffer.add_string
283 |       buf (escaped text)] but a bit more efficient. *)
284 | 
285 | val escaped_string : Uutf.encoding -> string -> t
286 |   (** [escaped_string enc str] escape the string [str] which is
287 |       encoded with encoding [enc]. If decoding [str] with [enc] fails,
288 |       it escape all non-printable bytes of [str] with the syntax
289 |       [\yAB]. *)
290 | 
291 | val add_escaped_string : Buffer.t -> Uutf.encoding -> string -> unit
292 |   (** [add_escaped_char buf enc text] is the same as
293 |       [Buffer.add_string buf (escaped_string enc text)] but a bit more
294 |       efficient. *)
295 | 
296 | (** {5 Safe offset API} *)
297 | 
298 | val next : t -> int -> int
299 |   (** [next str ofs] returns the offset of the next character in
300 |       [str]. *)
301 | 
302 | val prev : t -> int -> int
303 |   (** [prev str ofs] returns the offset of the previous character in
304 |       [str]. *)
305 | 
306 | val extract : t -> int -> Uchar.t
307 |   (** [extract str ofs] returns the code-point at offset [ofs] in
308 |       [str]. *)
309 | 
310 | val extract_next : t -> int -> Uchar.t * int
311 |   (** [extract_next str ofs] returns the code-point at offset [ofs] in
312 |       [str] and the offset of the next character. *)
313 | 
314 | val extract_prev : t -> int -> Uchar.t * int
315 |   (** [extract_prev str ofs] returns the code-point at the previous
316 |       offset in [str] and this offset. *)
317 | 
318 | (** {5 Unsafe offset API} *)
319 | 
320 | (** These functions does not check that the given offset is inside the
321 |     bounds of the given string. *)
322 | 
323 | val unsafe_next : t -> int -> int
324 |   (** [unsafe_next str ofs] returns the offset of the next character
325 |       in [str]. *)
326 | 
327 | val unsafe_prev : t -> int -> int
328 |   (** [unsafe_prev str ofs] returns the offset of the previous
329 |       character in [str]. *)
330 | 
331 | val unsafe_extract : t -> int -> Uchar.t
332 |   (** [unsafe_extract str ofs] returns the code-point at offset [ofs]
333 |       in [str]. *)
334 | 
335 | val unsafe_extract_next : t -> int -> Uchar.t * int
336 |   (** [unsafe_extract_next str ofs] returns the code-point at offset
337 |       [ofs] in [str] and the offset the next character. *)
338 | 
339 | val unsafe_extract_prev : t -> int -> Uchar.t * int
340 |   (** [unsafe_extract_prev str ofs] returns the code-point at the
341 |       previous offset in [str] and this offset. *)
342 | 


--------------------------------------------------------------------------------
/src/zed_utils.ml:
--------------------------------------------------------------------------------
 1 | (*
 2 |  * zed_utils.ml
 3 |  * -----------
 4 |  * Copyright : (c) 2019, ZAN DoYe <zandoye@gmail.com>
 5 |  * Licence   : BSD3
 6 |  *
 7 |  * This file is a part of Zed, an editor engine.
 8 |  *)
 9 | 
10 | 
11 | let array_rev a=
12 |   let len= Array.length a - 1 in
13 |   Array.init len (fun i-> a.(len-i))
14 | 
15 | let rec list_compare ?(compare=compare) l1 l2=
16 |   match l1, l2 with
17 |   | [], []-> 0
18 |   | [], _-> -1
19 |   | _, []-> 1
20 |   | h1::t1, h2::t2->
21 |     match compare h1 h2 with
22 |     | 0-> list_compare ~compare t1 t2
23 |     | _ as r-> r
24 | 
25 | let array_compare ?(compare=compare) a1 a2=
26 |   let len1= Array.length a1
27 |   and len2= Array.length a2 in
28 |   let rec compare_aux pos=
29 |     let remain1= len1 - pos
30 |     and remain2= len2 - pos in
31 |     if remain1 <= 0 && remain2 <= 0 then 0
32 |     else if remain1 <= 0 && remain2 > 0 then -1
33 |     else if remain1 > 0 && remain2 <= 0 then 1
34 |     else match compare a1.(pos) a2.(pos) with
35 |       | 0-> compare_aux (pos + 1)
36 |       | _ as r-> r
37 |   in
38 |   compare_aux 0
39 | 
40 | 


--------------------------------------------------------------------------------
/style.css:
--------------------------------------------------------------------------------
  1 | /* A style for ocamldoc. Daniel C. Buenzli, Jérémie Dimino */
  2 | 
  3 | body {
  4 |     padding: 0em;
  5 |     border: 0em;
  6 |     margin: 2em 10% 2em 10%;
  7 |     font-weight: normal;
  8 |     line-height: 130%;
  9 |     text-align: justify;
 10 |     background: white;
 11 |     color : black;
 12 |     min-width: 40ex;
 13 | }
 14 | 
 15 | pre, p, div, span, img, table, td, ol, ul, li {
 16 |     padding: 0em;
 17 |     border: 0em;
 18 |     margin: 0em
 19 | }
 20 | 
 21 | h1, h2, h3, h4, h5, h6, div.h7, div.h8, div.h9 {
 22 |     fontsize: 100%;
 23 |     margin-bottom: 1em
 24 |     padding: 1ex 0em 0em 0em;
 25 |     border: 0em;
 26 |     margin: 1em 0em 0em 0em;
 27 |     font-weight : bold;
 28 |     text-align: center;
 29 | }
 30 | 
 31 | h1 {
 32 |     font-size : 140%
 33 | }
 34 | 
 35 | h2, h3, h4, h5, h6, div.h7, div.h8, div.h9 {
 36 |     font-size : 100%;
 37 |     border-top-style : none;
 38 |     margin: 1ex 0em 0em 0em;
 39 |     border: 1px solid #000000;
 40 |     margin-top: 5px;
 41 |     margin-bottom: 2px;
 42 |     text-align: center;
 43 |     padding: 2px;
 44 | }
 45 | 
 46 | h2 {
 47 |     font-size : 120%;
 48 |     background-color: #90BDFF ;
 49 | }
 50 | h3 {
 51 |     background-color: #90DDFF;
 52 | }
 53 | h4 {
 54 |     background-color: #90EDFF;
 55 | }
 56 | h5 {
 57 |     background-color: #90FDFF;
 58 | }
 59 | h6 {
 60 |     background-color: #C0FFFF;
 61 | }
 62 | div.h7 {
 63 |     background-color: #E0FFFF;
 64 | }
 65 | div.h8 {
 66 |     background-color: #F0FFFF;
 67 | }
 68 | div.h9 {
 69 |     background-color: #FFFFFF;
 70 | }
 71 | 
 72 | .navbar {
 73 |     padding-bottom : 1em;
 74 |     margin-bottom: 1em;
 75 |     border-bottom: 1px solid #000000;
 76 |     border-bottom-style: dotted;
 77 | }
 78 | 
 79 | p {
 80 |     padding: 1em 0ex 0em 0em
 81 | }
 82 | 
 83 | a, a:link, a:visited, a:active, a:hover {
 84 |     color : #009;
 85 |     text-decoration: none
 86 | }
 87 | a:hover {
 88 |     color : #009;
 89 |     text-decoration : none;
 90 |     background-color: #5FFF88
 91 | }
 92 | 
 93 | hr {
 94 |     border-style: none;
 95 | }
 96 | table {
 97 |     font-size : 100% /* Why ? */
 98 | }
 99 | ul li {
100 |     padding: 1em 0em 0em 0em;
101 |     margin:0em 0em 0em 2.5ex
102 | }
103 | ol li {
104 |     padding: 1em 0em 0em 0em;
105 |     margin:0em 0em 0em 2em
106 | }
107 | 
108 | pre {
109 |     margin: 3ex 0em 1ex 0em;
110 |     background-color: #edf0f9;
111 | }
112 | .keyword {
113 |     font-weight: bold;
114 |     color: #a020f0;
115 | }
116 | .keywordsign {
117 |     font-weight: bold;
118 |     color: #a020f0;
119 | }
120 | .typefieldcomment {
121 |     color : #b22222;
122 | }
123 | .keywordsign {
124 |     color: #a020f0;
125 | 
126 | }
127 | .code {
128 |     font-size: 100%;
129 |     color: #5f5f5f;
130 | }
131 | .info {
132 |     margin: 0em 0em 0em 2em
133 | }
134 | .comment {
135 |     color : #b22222;
136 | }
137 | .constructor {
138 |     color : #072
139 | }
140 | .type {
141 |     color : #228b22;
142 | }
143 | .string {
144 |     color : #bc8f8f;
145 | }
146 | .warning {
147 |     color : Red;
148 |     font-weight : bold
149 | }
150 | 
151 | div.sig_block {
152 |     margin-left: 2em
153 | }
154 | .typetable {
155 |     color : #b8860b;
156 |     border-style : hidden
157 | }
158 | .indextable {
159 |     border-style : hidden
160 | }
161 | .paramstable {
162 |     border-style : hidden;
163 |     padding: 5pt 5pt
164 | }
165 | 
166 | .superscript {
167 |     font-size : 80%
168 | }
169 | .subscript {
170 |     font-size : 80%
171 | }
172 | 


--------------------------------------------------------------------------------
/test/dune:
--------------------------------------------------------------------------------
1 | (test
2 |  (name test_zed)
3 |  (libraries alcotest zed))
4 | 


--------------------------------------------------------------------------------
/test/test_zed.ml:
--------------------------------------------------------------------------------
 1 | (* available in recent alcotest *)
 2 | let alcotest_triple ta tb tc =
 3 |   let pp ppf (a, b, c) =
 4 |           Format.fprintf ppf "(%a, %a, %a)"
 5 |           (Alcotest.pp ta) a
 6 |           (Alcotest.pp tb) b
 7 |           (Alcotest.pp tc) c
 8 |   in
 9 |   let eq (a1, b1, c1) (a2, b2, c2) =
10 |     Alcotest.equal ta a1 a2
11 |     &&
12 |     Alcotest.equal tb b1 b2
13 |     &&
14 |     Alcotest.equal tc c1 c2
15 |   in
16 |   Alcotest.testable pp eq
17 | 
18 | let test_next_error =
19 |   let test ~name input offset ~expected =
20 |     (name, `Quick, (fun () ->
21 |       let got =
22 |         match Zed_utf8.next_error input offset with
23 |         | s -> Result.Ok s
24 |         | exception Zed_utf8.Out_of_bounds -> Error "Out_of_bounds"
25 |       in
26 |       Alcotest.check
27 |         Alcotest.(result (alcotest_triple int int string) string)
28 |         __LOC__ expected got))
29 |   in
30 |   ( "next_error",
31 |   [ test
32 |       ~name:"scalar value too large"
33 |       "\247\165\165\165"
34 |       0
35 |       ~expected:(Ok (0, 0, "scalar value too large in UTF8 sequence"))
36 |   ; test ~name:"out of bounds"
37 |       "cat"
38 |       3
39 |       ~expected:(Error "Out_of_bounds")
40 |   ])
41 | 
42 | let test_of_utf8 =
43 |   let test_invalid ~name input =
44 |     (name, `Quick, fun () ->
45 |      let raised_correctly =
46 |        match Zed_string.of_utf8 input with
47 |        | (_ : Zed_string.t) -> false
48 |        | exception Zed_utf8.Invalid _ -> true
49 |        | exception _ -> false
50 |      in
51 |      Alcotest.check Alcotest.bool __LOC__ true raised_correctly)
52 |   in
53 |   ( "of_utf8",
54 |   [ test_invalid ~name:"uchar_max (U+110000)" "\xf4\x90\x80\x80"
55 |   ; test_invalid ~name:"U+D800" "\xed\xa0\x80"
56 |   ; test_invalid ~name:"U+DFFF" "\xed\xbf\xbf"
57 |   ])
58 | 
59 | let test_kill_next_word =
60 |   let test =
61 |     ("kill_next_word", `Quick, fun () ->
62 |       (* Test that [kill_next_word] does not raise [Out_of_bounds] *)
63 |       let engine = Zed_edit.create () in
64 |       let cursor = Zed_edit.new_cursor engine in
65 |       let ctxt = Zed_edit.context engine cursor in
66 |       Zed_edit.insert ctxt (Zed_rope.of_string (Zed_string.of_utf8 "hello"));
67 |       Zed_edit.set_mark ctxt;
68 |       Zed_edit.insert ctxt (Zed_rope.of_string (Zed_string.of_utf8 " world"));
69 |       Zed_edit.goto_mark ctxt;
70 |       Zed_edit.kill_next_word ctxt)
71 |   in
72 |   ( "kill_next_word", [ test ] )
73 | 
74 | let () = Alcotest.run "zed" [test_next_error; test_of_utf8; test_kill_next_word]
75 | 


--------------------------------------------------------------------------------
/zed.descr:
--------------------------------------------------------------------------------
 1 | Abstract engine for text edition in OCaml
 2 | 
 3 | Zed is an abstract engine for text edition. It can be used to write text
 4 | editors, edition widgets, readlines, ... Zed uses Camomile to fully support the
 5 | Unicode specification, and implements an UTF-8 encoded string type with
 6 | validation, and a rope datastructure to achieve efficient operations on large
 7 | Unicode buffers. Zed also features a regular expression search on ropes. To
 8 | support efficient text edition capabilities, Zed provides macro recording and
 9 | cursor management facilities.
10 | 


--------------------------------------------------------------------------------
/zed.opam:
--------------------------------------------------------------------------------
 1 | # This file is generated by dune, edit dune-project instead
 2 | opam-version: "2.0"
 3 | synopsis: "Abstract engine for text edition in OCaml"
 4 | description: """
 5 | Zed is an abstract engine for text edition. It can be used to write text
 6 | editors, edition widgets, readlines, ... Zed uses Camomile to fully support the
 7 | Unicode specification, and implements an UTF-8 encoded string type with
 8 | validation, and a rope datastructure to achieve efficient operations on large
 9 | Unicode buffers. Zed also features a regular expression search on ropes. To
10 | support efficient text edition capabilities, Zed provides macro recording and
11 | cursor management facilities."""
12 | maintainer: ["ZAN DoYe <zandoye+ocaml@gmail.com>"]
13 | authors: ["Jérémie Dimino"]
14 | license: "BSD-3-Clause"
15 | homepage: "https://github.com/ocaml-community/zed"
16 | bug-reports: "https://github.com/ocaml-community/zed/issues"
17 | depends: [
18 |   "dune" {>= "3.0"}
19 |   "ocaml" {>= "4.02.3"}
20 |   "react"
21 |   "result"
22 |   "uchar"
23 |   "uutf"
24 |   "uucp" {>= "2.0.0"}
25 |   "uuseg"
26 |   "alcotest" {with-test}
27 |   "odoc" {with-doc}
28 | ]
29 | build: [
30 |   ["dune" "subst"] {dev}
31 |   [
32 |     "dune"
33 |     "build"
34 |     "-p"
35 |     name
36 |     "-j"
37 |     jobs
38 |     "@install"
39 |     "@runtest" {with-test}
40 |     "@doc" {with-doc}
41 |   ]
42 | ]
43 | dev-repo: "git+https://github.com/ocaml-community/zed.git"
44 | 


--------------------------------------------------------------------------------