├── .github
    ├── dependabot.yml
    └── workflows
    │   └── haskell.yml
├── .gitignore
├── .nvimrc
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE.md
├── README.md
├── Setup.hs
├── cabal.project
├── src
    └── Text
    │   ├── Ascii.hs
    │   └── Ascii
    │       ├── Char.hs
    │       ├── Internal.hs
    │       ├── QQ.hs
    │       └── Unsafe.hs
└── text-ascii.cabal


/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # From:
 2 | # - https://github.com/haskell/hackage-server
 3 | # - https://github.com/rhysd/actionlint/issues/228#issuecomment-1272493095
 4 | # - https://docs.github.com/en/code-security/dependabot/working-with-dependabot/keeping-your-actions-up-to-date-with-dependabot
 5 | 
 6 | # Set update schedule for GitHub Actions
 7 | 
 8 | version: 2
 9 | updates:
10 | 
11 |   - package-ecosystem: "github-actions"
12 |     directory: "/"
13 |     schedule:
14 |       # Check for updates to GitHub Actions every week
15 |       interval: "weekly"
16 | 


--------------------------------------------------------------------------------
/.github/workflows/haskell.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     branches: [main, dev]
 7 | 
 8 | jobs:
 9 |   generate-matrix:
10 |     name: "Generate matrix from cabal"
11 |     outputs: 
12 |       matrix: ${{ steps.set-matrix.outputs.matrix }}
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - name: Extract the tested GHC versions
16 |         id: set-matrix
17 |         uses: kleidukos/get-tested@v0.1.6.0
18 |         with:
19 |           cabal-file: text-ascii.cabal
20 |           ubuntu: true
21 |           macos: true
22 |           windows: true
23 |           version: 0.1.6.0
24 |   tests:
25 |     name: ${{ matrix.ghc }} on ${{ matrix.os }}
26 |     needs: generate-matrix
27 |     runs-on: ${{ matrix.os }}
28 |     strategy:
29 |       matrix: ${{ fromJSON(needs.generate-matrix.outputs.matrix) }}
30 |     steps:
31 |     - uses: actions/checkout@v4
32 |       name: Checkout base repo
33 |     - uses: haskell-actions/setup@v2
34 |       id: haskell-setup
35 |       name: Setup Haskell
36 |       with:
37 |         ghc-version: ${{ matrix.ghc }}
38 |         cabal-version: 'latest'
39 |     - name: Configure
40 |       run: | 
41 |         cabal configure --enable-tests
42 |         cabal freeze
43 |     - name: Cache
44 |       uses: actions/cache@v4.0.0
45 |       with:
46 |         path: ${{ steps.haskell-setup.outputs.cabal-store }}
47 |         key: ${{ runner.os }}-ghc-${{ matrix.ghc }}-cabal-${{ hashFiles('**/plan.json') }}
48 |         restore-keys: ${{ runner.os }}-ghc-${{ matrix.ghc }}-
49 |     - name: Install base dependencies
50 |       run: cabal build --only-dependencies
51 |     - name: Build
52 |       run: cabal build
53 |     - name: Run tests
54 |       run: cabal test
55 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | */.nvimrc
 2 | .cabal
 3 | dist
 4 | dist-newstyle
 5 | .ghc.environment.*
 6 | cabal.project.local*
 7 | .stack-work
 8 | stack.yaml.lock
 9 | cabal.project.local~
10 | 


--------------------------------------------------------------------------------
/.nvimrc:
--------------------------------------------------------------------------------
 1 | " Enable hlint and GHC via Cabal
 2 | let g:ale_linters = {'haskell': ['hlint', 'cabal-build']}
 3 | " ... only
 4 | let g:ale_linters_explicit = 1
 5 | " Don't lint until I save
 6 | let g:ale_lint_on_text_changed = 'never'
 7 | let g:ale_lint_on_insert_leave = 0
 8 | let g:ale_lint_on_enter = 0
 9 | 
10 | call ale#Set('haskell_cabal_build_options', '--enable-tests --disable-optimization')
11 | 
12 | function! GetCabalCommand(buffer) abort
13 |   let l:flags = ale#Var(a:buffer, 'haskell_cabal_build_options')
14 |   return 'cabal new-build ' . l:flags
15 | endfunction
16 | 
17 | call ale#linter#Define('haskell', {
18 |       \ 'name': 'cabal_build',
19 |       \ 'aliases': ['cabal-build'],
20 |       \ 'output_stream': 'stderr',
21 |       \ 'executable': 'cabal',
22 |       \ 'command': function('GetCabalCommand'),
23 |       \ 'callback': 'ale#handlers#haskell#HandleGHCFormat',
24 |       \})
25 | 
26 | " Configure Neoformat to use cabal-fmt for Cabal files
27 | let g:neoformat_cabal_cabalfmt = { 'exe': 'cabal-fmt', 'args': [] }
28 | let g:neoformat_enabled_cabal = ['cabalfmt']
29 | 
30 | " Configure Neoformat to use ormolu for Haskell
31 | let g:neoformat_haskell_ormolu = { 'exe': 'ormolu', 'args': [] }
32 | let g:neoformat_enabled_haskell = ['ormolu']
33 | 
34 | " Enable automagic autoformatting
35 | augroup fmt
36 |   autocmd!
37 |   autocmd BufWritePre * undojoin | Neoformat
38 | augroup end
39 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Revision history for text-ascii
 2 | 
 3 | ## 1.2.1 -- 2023-01-20
 4 | 
 5 | * Dependency bump
 6 | 
 7 | ## 1.2 -- 2021-11-07
 8 | 
 9 | * Add instances of `Factorial`, `FactorialMonoid`, `LeftCancellative`,
10 |   `LeftGCDMonoid`, `LeftReductive`, `MonoidNull`, `OverlappingGCDMonoid`,
11 |   `PositiveMonoid`, `RightCancellative`, `RightGCDMonoid`, `RightReductive` and
12 |   `StableFactorial` for `AsciiText`.
13 | 
14 | ## 1.1 -- 2021-10-31
15 | 
16 | * Support GHC 9.2.
17 | * Remove support for GHCs below 8.10.
18 | * Add `eitherFromText` and `eitherFromByteString` for better conversion errors.
19 | 
20 | ## 1.0.1 -- 2021-03-02
21 | 
22 | * Support GHC 9.
23 | * Replace 8.10.3 with 8.10.4 in CI.
24 | * Expose `Text.Ascii.Internal` and `Text.Ascii.QQ`.
25 | * Add `Ixed` instance (and supporting type instances) for `AsciiText`.
26 | * Add `Stream`, `VisualStream` and `TraversableStream` instances (and supporting
27 |   type instances) for `AsciiText`.
28 | * Drop Parsec in favour of Megaparsec.
29 | * Add `FoldCase` instances for `AsciiChar` and `AsciiText`.
30 | * Implement `lines`, `unlines`, `words`, `unwords`, `replicate`, `chunksOf`, 
31 |   `index`, `zipWith`, `justifyLeft`, `justifyRight`, `center`, `takeEnd`, 
32 |   `dropEnd`, `dropAround`, `strip`, `stripStart`, `stripEnd`, `commonPrefixes` 
33 |   for `AsciiText`.
34 | * Implement [NSN](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html), as 
35 |   well as the following functions that use it:
36 |   * `count`
37 |   * `replace`
38 |   * `splitOn`
39 |   * `stripInfix`
40 |   * `breakOnAll`
41 |   * `breakOn`
42 |   * `breakOnEnd`
43 | * Add `Unsafe` module containing an `Unsafe` wrapper, plus instances and
44 |   functions.
45 | * Add a range of optics for `AsciiText`.
46 | 
47 | ## 1.0.0 -- 2021-02-07
48 | 
49 | * First version. Released on an unsuspecting world.
50 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contribution guide
 2 | 
 3 | ## Introduction
 4 | 
 5 | First of all, thank you for wanting to contribute! This guide is designed to
 6 | help make sure that your contribution experience is as stress-free and
 7 | straightforward as possible.
 8 | 
 9 | ## Git practices
10 | 
11 | Please fork, and make PRs to, the `dev` branch. `main` is used only for
12 | releases.
13 | 
14 | Ensure that your commits are individually buildable, and that all tests pass on
15 | each commit (doctests and otherwise). Each commit should have a concise, but
16 | clear description of what it fixes or adds. Refer to issues if relevant by
17 | tagging with # followed by the issue number (for example, "Fix #1234"). To check
18 | if your doctests pass, we recommend `cabal-docspec` from
19 | [`cabal-extras`](https://github.com/phadej/cabal-extras).
20 | 
21 | ## Cabal file standards
22 | 
23 | The cabal file for this project must be formatted according to
24 | [`cabal-fmt`](http://hackage.haskell.org/package/cabal-fmt). All dependencies
25 | must have bounds; where possible, `^>=`-style bounds are preferable.
26 | 
27 | ## Code standards
28 | 
29 | We follow the [Package Versioning Policy](https://pvp.haskell.org). If your
30 | changes are significant enough to warrant a version change by the Policy, ensure
31 | that you do so, and update the changelog to match. If you are unsure, you can
32 | use [`Policeman`](http://hackage.haskell.org/package/policeman) to check.
33 | 
34 | All code is to be formatted using
35 | [`ormolu`](http://hackage.haskell.org/package/ormolu), and must be free of
36 | warnings as emitted by [Hlint](http://hackage.haskell.org/package/hlint), both
37 | with default settings. If a warning is spurious, it must be silenced in the
38 | narrowest possible scope, with an explanatory comment.
39 | 
40 | Imports into a module may take one of the following forms only:
41 | 
42 | * `import Foo (Bar, baz, quux)`; or
43 | * `import qualified Foo as Baz`
44 | 
45 | For data type imports, wildcard imports should not be used; instead, specify the
46 | constructor(s) you want explicitly:
47 | 
48 | ```haskell
49 | -- Not like this: import Foo (Bar(..))
50 | import Foo (Bar (Baz, Quux))
51 | ```
52 | 
53 | Every publically-facing module must have an explicit export list (internal
54 | modules can omit this). All publically-exported identifiers should have
55 | Haddocks, indicating an `@since` with the version where they first appeared, or
56 | last changed semantically. For functions, doctests should be provided, ideally
57 | demonstrating as much of the functionality as reasonable. Edge cases are
58 | _especially_ critical: provide a clear explanation of these in the Haddocks, or
59 | show the behaviour with doctests, preferably both.
60 | 
61 | Where possible, keep to a similar style to the rest of the module (and the
62 | package). This isn't a hard-and-fast rule, but a good thing to keep in mind for
63 | consistency reasons.
64 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
  1 | Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irreocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # `text-ascii` [![Hackage](https://img.shields.io/hackage/v/text-ascii?style=flat-square)][hackage]
 2 | 
 3 | ## What is this thing?
 4 | 
 5 | A library for handling ASCII text.
 6 | 
 7 | ## What are the goals of this project?
 8 | 
 9 | ### Totality by default
10 | 
11 | Partial functions (and type classes which provide them) will not be included:
12 | everything is total. When we include anything unsafe, it will be explicitly
13 | firewalled into its own module, behind a newtype.
14 | 
15 | ### No boolean blindness
16 | 
17 | [Boolean blindness](http://dev.stephendiehl.com/hask/#boolean-blindness) is not
18 | a good thing, for all the reasons listed in the link. Whenever possible, we'll
19 | try and give more useful information than a `Bool`.
20 | 
21 | ### Compatibility with the [`text`](http://hackage.haskell.org/package/text) API
22 | 
23 | We want match the API of the `text` package exactly. If you know how to use `text`, 
24 | you'll know how to use this package too. Exceptions are made for places where
25 | `text` is either partial or boolean-blind.
26 | 
27 | ### Discoverability, documentation and user-friendliness
28 | 
29 | In addition to documenting everything with Haddocks, we have over 250 doctests,
30 | which provide _executable_ examples of how the API can be used, and how it will
31 | behave. We aim to clarify _every_ corner case left by the documentation of
32 | `text`, and care strongly about making the API easy to follow, learn and
33 | understand.
34 | 
35 | ### Correctness
36 | 
37 | We currently use doctests, but plan to add support for more testing. No such
38 | thing as too much!
39 | 
40 | ### Low dependencies
41 | 
42 | As far as possible, we aim to depend on [GHC boot packages](https://gitlab.haskell.org/ghc/ghc/-/wikis/commentary/libraries/version-history) only. When we
43 | introduce more dependencies, we do it only when we have to. This way, we ensure
44 | this package builds quickly and doesn't 'lag' more than necessary due to GHC
45 | version changes.
46 | 
47 | ## What's with all the cat stuff?
48 | 
49 | [I am a Haskell catboy.](https://twitter.com/KozRoss)
50 | 
51 | ## What does this run on?
52 | 
53 | We support the latest three releases of GHC. Currently, these are:
54 | 
55 | * 9.4
56 | * 9.6
57 | * 9.8
58 | 
59 | We check on the following platforms:
60 | 
61 | * Windows
62 | * Linux
63 | * MacOS
64 | 
65 | ## What can I do with this?
66 | 
67 | The project is licensed Apache 2.0 (SPDX code
68 | [`Apache-2.0`](https://spdx.org/licenses/Apache-2.0.html)). For more details,
69 | please see the `LICENSE.md` file.
70 | 
71 | [hackage]: https://hackage.haskell.org/package/text-ascii
72 | 


--------------------------------------------------------------------------------
/Setup.hs:
--------------------------------------------------------------------------------
1 | import Distribution.Simple
2 | 
3 | main = defaultMain
4 | 


--------------------------------------------------------------------------------
/cabal.project:
--------------------------------------------------------------------------------
1 | packages:
2 |   ./text-ascii.cabal
3 | 


--------------------------------------------------------------------------------
/src/Text/Ascii.hs:
--------------------------------------------------------------------------------
   1 | {-# LANGUAGE DerivingVia #-}
   2 | {-# LANGUAGE LambdaCase #-}
   3 | {-# LANGUAGE QuasiQuotes #-}
   4 | {-# LANGUAGE Trustworthy #-}
   5 | {-# LANGUAGE TypeApplications #-}
   6 | {-# LANGUAGE NoImplicitPrelude #-}
   7 | 
   8 | -- |
   9 | -- Module: Text.Ascii
  10 | -- Copyright: (C) 2021 Koz Ross
  11 | -- License: Apache 2.0
  12 | -- Maintainer: Koz Ross <koz.ross@retro-freedom.nz>
  13 | -- Stability: stable
  14 | -- Portability: GHC only
  15 | --
  16 | -- An implementation of ASCII strings.
  17 | --
  18 | -- This module is designed for qualified importing:
  19 | --
  20 | -- > import qualified Text.Ascii as Ascii
  21 | --
  22 | -- /See also:/ [Wikipedia entry for ASCII](https://en.wikipedia.org/wiki/ASCII)
  23 | module Text.Ascii
  24 |   ( -- * Type
  25 |     AsciiText,
  26 | 
  27 |     -- * Creation
  28 |     empty,
  29 |     singleton,
  30 |     ascii,
  31 | 
  32 |     -- * Basic interface
  33 |     cons,
  34 |     snoc,
  35 |     uncons,
  36 |     unsnoc,
  37 |     length,
  38 | 
  39 |     -- * Transformations
  40 |     map,
  41 |     intercalate,
  42 |     intersperse,
  43 |     transpose,
  44 |     reverse,
  45 |     replace,
  46 | 
  47 |     -- ** Justification
  48 |     justifyLeft,
  49 |     justifyRight,
  50 |     center,
  51 | 
  52 |     -- * Folds
  53 |     foldl,
  54 |     foldl',
  55 |     foldr,
  56 |     foldr',
  57 | 
  58 |     -- ** Special folds
  59 |     concat,
  60 |     concatMap,
  61 | 
  62 |     -- * Construction
  63 | 
  64 |     -- ** Scans
  65 |     scanl,
  66 |     scanr,
  67 | 
  68 |     -- ** Accumulating maps
  69 |     mapAccumL,
  70 |     mapAccumR,
  71 | 
  72 |     -- ** Generation and unfolding
  73 |     replicate,
  74 |     unfoldr,
  75 |     unfoldrN,
  76 | 
  77 |     -- * Substrings
  78 | 
  79 |     -- ** Breaking strings
  80 |     take,
  81 |     takeEnd,
  82 |     drop,
  83 |     dropEnd,
  84 |     takeWhile,
  85 |     takeWhileEnd,
  86 |     dropWhile,
  87 |     dropWhileEnd,
  88 |     dropAround,
  89 |     strip,
  90 |     stripStart,
  91 |     stripEnd,
  92 |     splitAt,
  93 |     breakOn,
  94 |     breakOnEnd,
  95 |     break,
  96 |     span,
  97 |     group,
  98 |     groupBy,
  99 |     inits,
 100 |     tails,
 101 | 
 102 |     -- ** Breaking into many substrings
 103 |     splitOn,
 104 |     split,
 105 |     chunksOf,
 106 | 
 107 |     -- ** Breaking into lines and words
 108 |     lines,
 109 |     unlines,
 110 |     words,
 111 |     unwords,
 112 | 
 113 |     -- * View patterns
 114 |     stripPrefix,
 115 |     stripSuffix,
 116 |     stripInfix,
 117 |     commonPrefixes,
 118 | 
 119 |     -- * Searching
 120 |     filter,
 121 |     breakOnAll,
 122 |     find,
 123 |     partition,
 124 | 
 125 |     -- * Indexing
 126 |     index,
 127 |     findIndex,
 128 |     count,
 129 | 
 130 |     -- * Zipping
 131 |     zip,
 132 |     zipWith,
 133 | 
 134 |     -- * Conversions
 135 |     fromText,
 136 |     eitherFromText,
 137 |     fromByteString,
 138 |     eitherFromByteString,
 139 |     toText,
 140 |     toByteString,
 141 | 
 142 |     -- * Optics
 143 |     textWise,
 144 |     byteStringWise,
 145 |     packedChars,
 146 |     chars,
 147 |     packedBytes,
 148 |     bytes,
 149 |   )
 150 | where
 151 | 
 152 | import Control.Category ((.))
 153 | import Data.Bifunctor (first)
 154 | import Data.Bool (Bool (False, True), otherwise, (&&))
 155 | import Data.ByteString (ByteString)
 156 | import qualified Data.ByteString as BS
 157 | import qualified Data.ByteString.Optics as BSO
 158 | import Data.Char (isAscii)
 159 | import Data.Coerce (coerce)
 160 | import Data.Foldable (Foldable (foldMap))
 161 | import qualified Data.Foldable as F
 162 | import Data.Int (Int64)
 163 | import qualified Data.List as L
 164 | import Data.Maybe (Maybe (Just, Nothing))
 165 | import Data.Text (Text)
 166 | import qualified Data.Text as T
 167 | import Data.Text.Encoding (decodeUtf8, encodeUtf8)
 168 | import Data.Word (Word8)
 169 | import Optics.Coerce (coerceA, coerceB, coerceS, coerceT)
 170 | import Optics.Getter (Getter, view)
 171 | import Optics.Iso (Iso')
 172 | import Optics.IxFold (IxFold)
 173 | import Optics.IxTraversal (IxTraversal')
 174 | import Optics.Optic (castOptic)
 175 | import Optics.Prism (Prism', prism')
 176 | import Optics.Review (Review, review)
 177 | import Text.Ascii.Internal (AsciiChar (AsciiChar), AsciiText (AsciiText))
 178 | import Text.Ascii.QQ (ascii, char)
 179 | import Prelude
 180 |   ( Int,
 181 |     not,
 182 |     pure,
 183 |     ($),
 184 |     (+),
 185 |     (-),
 186 |     (/=),
 187 |     (<),
 188 |     (<$>),
 189 |     (<=),
 190 |     (<>),
 191 |     (==),
 192 |     (>),
 193 |     (>=),
 194 |     (||),
 195 |   )
 196 | import qualified Prelude as P
 197 | 
 198 | -- Note on pragmata
 199 | --
 200 | -- This is cribbed directly from bytestring, as I figure they know what they're
 201 | -- doing way better than we do. When we add our own functionality, this probably
 202 | -- needs to be considered more carefully. - Koz
 203 | 
 204 | -- Creation
 205 | 
 206 | -- $setup
 207 | -- >>> :set -XNoImplicitPrelude
 208 | -- >>> :seti -XQuasiQuotes
 209 | -- >>> :seti -XOverloadedStrings
 210 | -- >>> import Text.Ascii
 211 | -- >>> import Text.Ascii.Char (char, upcase, AsciiCase (Lower), caseOf)
 212 | -- >>> import Prelude ((.), ($), (<>), (==), (<), (/=), (-), max, even)
 213 | -- >>> import qualified Prelude as Prelude
 214 | -- >>> import Data.Maybe (Maybe (Just), fromMaybe)
 215 | -- >>> import qualified Data.ByteString as BS
 216 | -- >>> import Optics.AffineFold (preview)
 217 | -- >>> import Optics.Review (review)
 218 | -- >>> import Optics.Getter (view)
 219 | -- >>> import Optics.IxTraversal (elementOf)
 220 | -- >>> import Optics.IxSetter (iover)
 221 | -- >>> import Data.Bool (bool)
 222 | -- >>> import Optics.IxFold (itoListOf)
 223 | 
 224 | -- | The empty text.
 225 | --
 226 | -- >>> empty
 227 | -- ""
 228 | --
 229 | -- /Complexity:/ \(\Theta(1)\)
 230 | --
 231 | -- @since 1.0.0
 232 | empty :: AsciiText
 233 | empty = coerce BS.empty
 234 | 
 235 | -- | A text consisting of a single ASCII character.
 236 | --
 237 | -- >>> singleton [char| 'w' |]
 238 | -- "w"
 239 | --
 240 | -- /Complexity:/ \(\Theta(1)\)
 241 | --
 242 | -- @since 1.0.0
 243 | {-# INLINE [1] singleton #-}
 244 | singleton :: AsciiChar -> AsciiText
 245 | singleton = coerce BS.singleton
 246 | 
 247 | -- Basic interface
 248 | 
 249 | -- | Adds a character to the front of a text. This requires copying, which gives
 250 | -- its complexity.
 251 | --
 252 | -- >>> cons [char| 'n' |] [ascii| "eko" |]
 253 | -- "neko"
 254 | --
 255 | -- /Complexity:/ \(\Theta(n)\)
 256 | --
 257 | -- @since 1.0.0
 258 | {-# INLINE cons #-}
 259 | cons :: AsciiChar -> AsciiText -> AsciiText
 260 | cons = coerce BS.cons
 261 | 
 262 | -- | Adds a character to the back of a text. This requires copying, which gives
 263 | -- its complexity.
 264 | --
 265 | -- >>> snoc [ascii| "nek" |] [char| 'o' |]
 266 | -- "neko"
 267 | --
 268 | -- /Complexity:/ \(\Theta(n)\)
 269 | --
 270 | -- @since 1.0.0
 271 | {-# INLINE snoc #-}
 272 | snoc :: AsciiText -> AsciiChar -> AsciiText
 273 | snoc = coerce BS.snoc
 274 | 
 275 | -- | If the argument is non-empty, gives 'Just' the first character and the
 276 | -- rest, and 'Nothing' otherwise.
 277 | --
 278 | -- >>> uncons empty
 279 | -- Nothing
 280 | -- >>> uncons . singleton $ [char| 'w' |]
 281 | -- Just ('0x77',"")
 282 | -- >>> uncons [ascii| "nekomimi" |]
 283 | -- Just ('0x6e',"ekomimi")
 284 | --
 285 | -- /Complexity:/ \(\Theta(1)\)
 286 | --
 287 | -- @since 1.0.0
 288 | {-# INLINE uncons #-}
 289 | uncons :: AsciiText -> Maybe (AsciiChar, AsciiText)
 290 | uncons = coerce BS.uncons
 291 | 
 292 | -- | If the argument is non-empty, gives 'Just' the initial segment and the last
 293 | -- character, and 'Nothing' otherwise.
 294 | --
 295 | -- >>> unsnoc empty
 296 | -- Nothing
 297 | -- >>> unsnoc . singleton $ [char| 'w' |]
 298 | -- Just ("",'0x77')
 299 | -- >>> unsnoc [ascii| "catboy" |]
 300 | -- Just ("catbo",'0x79')
 301 | --
 302 | -- /Complexity:/ \(\Theta(1)\)
 303 | --
 304 | -- @since 1.0.0
 305 | {-# INLINE unsnoc #-}
 306 | unsnoc :: AsciiText -> Maybe (AsciiText, AsciiChar)
 307 | unsnoc = coerce BS.unsnoc
 308 | 
 309 | -- | The number of characters (and, since this is ASCII, bytes) in the text.
 310 | --
 311 | -- >>> length . singleton $ [char| 'w' |]
 312 | -- 1
 313 | -- >>> length [ascii| "nyan nyan" |]
 314 | -- 9
 315 | --
 316 | -- /Complexity:/ \(\Theta(1)\)
 317 | --
 318 | -- @since 1.0.0
 319 | {-# INLINE length #-}
 320 | length :: AsciiText -> Int
 321 | length = coerce BS.length
 322 | 
 323 | -- Transformations
 324 | 
 325 | -- | Copy, and apply the function to each element of, the text.
 326 | --
 327 | -- >>> map (\c -> fromMaybe c . upcase $ c) [ascii| "nyan!" |]
 328 | -- "NYAN!"
 329 | --
 330 | -- /Complexity:/ \(\Theta(n)\)
 331 | --
 332 | -- @since 1.0.0
 333 | {-# INLINE map #-}
 334 | map :: (AsciiChar -> AsciiChar) -> AsciiText -> AsciiText
 335 | map = coerce BS.map
 336 | 
 337 | -- | Takes a text and a list of texts, and concatenates the list after
 338 | -- interspersing the first argument between each element of the list.
 339 | --
 340 | -- >>> intercalate [ascii| " ~ " |] []
 341 | -- ""
 342 | -- >>> intercalate [ascii| " ~ " |] [[ascii| "nyan" |]]
 343 | -- "nyan"
 344 | -- >>> intercalate [ascii| " ~ " |] . Prelude.replicate 3 $ [ascii| "nyan" |]
 345 | -- "nyan ~ nyan ~ nyan"
 346 | -- >>> intercalate empty . Prelude.replicate 3 $ [ascii| "nyan" |]
 347 | -- "nyannyannyan"
 348 | --
 349 | -- /Complexity:/ \(\Theta(n)\)
 350 | --
 351 | -- @since 1.0.0
 352 | {-# INLINE [1] intercalate #-}
 353 | intercalate :: AsciiText -> [AsciiText] -> AsciiText
 354 | intercalate = coerce BS.intercalate
 355 | 
 356 | -- | Takes a character, and places it between the characters of a text.
 357 | --
 358 | -- >>> intersperse [char| '~' |] empty
 359 | -- ""
 360 | -- >>> intersperse [char| '~' |] . singleton $ [char| 'w' |]
 361 | -- "w"
 362 | -- >>> intersperse [char| '~' |] [ascii| "nyan" |]
 363 | -- "n~y~a~n"
 364 | --
 365 | -- /Complexity:/ \(\Theta(n)\)
 366 | --
 367 | -- @since 1.0.0
 368 | intersperse :: AsciiChar -> AsciiText -> AsciiText
 369 | intersperse = coerce BS.intersperse
 370 | 
 371 | -- | Transpose the rows and columns of the argument. This uses
 372 | -- 'Data.List.transpose' internally, and thus, isn't very efficient.
 373 | --
 374 | -- >>> transpose []
 375 | -- []
 376 | -- >>> transpose [[ascii| "w" |]]
 377 | -- ["w"]
 378 | -- >>> transpose [[ascii| "nyan" |]]
 379 | -- ["n","y","a","n"]
 380 | -- >>> transpose . Prelude.replicate 3 $ [ascii| "nyan" |]
 381 | -- ["nnn","yyy","aaa","nnn"]
 382 | -- >>> transpose [[ascii| "cat" |], [ascii| "boy" |], [ascii| "nyan" |]]
 383 | -- ["cbn","aoy","tya","n"]
 384 | --
 385 | -- /Complexity:/ \(\Theta(n)\)
 386 | --
 387 | -- @since 1.0.0
 388 | transpose :: [AsciiText] -> [AsciiText]
 389 | transpose = coerce BS.transpose
 390 | 
 391 | -- | Reverse the text.
 392 | --
 393 | -- >>> reverse empty
 394 | -- ""
 395 | -- >>> reverse . singleton $ [char| 'w' |]
 396 | -- "w"
 397 | -- >>> reverse [ascii| "catboy goes nyan" |]
 398 | -- "nayn seog yobtac"
 399 | --
 400 | -- /Complexity:/ \(\Theta(n)\)
 401 | --
 402 | -- @since 1.0.0
 403 | reverse :: AsciiText -> AsciiText
 404 | reverse = coerce BS.reverse
 405 | 
 406 | -- | @replace needle replacement haystack@, given a @needle@ of length \(n\) and
 407 | -- a haystack of length \(h\), replaces each non-overlapping occurrence of
 408 | -- @needle@ in @haystack@ with @replacement@. If the @needle@ is empty, no
 409 | -- replacement will be performed. Equivalent to @'intercalate' replacement '.'
 410 | -- 'splitOn' needle '$' haystack@.
 411 | --
 412 | -- >>> replace empty [ascii| "NYAN~" |] [ascii| "catboy goes nyan nyan" |]
 413 | -- "catboy goes nyan nyan"
 414 | -- >>> replace [ascii| "nyan" |] [ascii| "NYAN~" |] empty
 415 | -- ""
 416 | -- >>> replace [ascii| "nyan" |] [ascii| "NYAN~" |] [ascii| "catboy goes nyan nyan" |]
 417 | -- "catboy goes NYAN~ NYAN~"
 418 | -- >>> replace [ascii| "nyan" |] [ascii| "NYAN~" |] [ascii| "nyanyan" |]
 419 | -- "NYAN~yan"
 420 | --
 421 | -- = On complexity
 422 | --
 423 | -- This function is based on a variant of the
 424 | -- [NSN](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) algorithm,
 425 | -- except it does not detect overlapping needles. Its average-case analysis is
 426 | -- based on the assumption that:
 427 | --
 428 | -- * All ASCII symbols are equally likely to occur in both the needle and the
 429 | -- haystack; and
 430 | -- * The needle has length at least two; and
 431 | -- * Both the needle and the haystack contain at least four unique symbols.
 432 | --
 433 | -- We fall back to 'split' for singleton needles, and there is no work to be
 434 | -- done on empty needles, which means the second assumption always holds.
 435 | --
 436 | -- Worst-case behaviour becomes more likely the more your input satisfies the
 437 | -- following conditions:
 438 | --
 439 | -- * The needle and/or haystack use few unique symbols (less than four is the
 440 | -- worst); or
 441 | -- * The haystack contains many instances of the second symbol of the needle
 442 | -- which don't lead to full matches.
 443 | --
 444 | -- The analysis below also doesn't factor in the cost of performing the
 445 | -- replacement, as this is (among other things) proportional to the number of
 446 | -- matches of the needle (and thus is hard to quantify).
 447 | --
 448 | -- /Complexity:/ \(\Theta(h)\) average case, \(\Theta(h \cdot n\)\) worst-case.
 449 | --
 450 | -- /See also:/ Note that all the below are references for the original
 451 | -- algorithm, which includes searching for overlapping needles. Thus, our
 452 | -- implementation will perform better than the analysis suggests.
 453 | --
 454 | -- * [Description and pseudocode](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html)
 455 | -- * ["Algorithms on Strings"](https://www.cambridge.org/core/books/algorithms-on-strings/19049704C876795D95D8882C73257C70) by Crochemore, Hancart and Lecroq. PDF available [here](https://www.researchgate.net/publication/220693689_Algorithms_on_Strings).
 456 | --
 457 | -- @since 1.0.1
 458 | replace ::
 459 |   -- | @needle@ to search for
 460 |   AsciiText ->
 461 |   -- | @replacement@ to replace @needle@ with
 462 |   AsciiText ->
 463 |   -- | @haystack@ in which to search
 464 |   AsciiText ->
 465 |   AsciiText
 466 | replace needle replacement haystack
 467 |   | length needle == 0 || length haystack == 0 = haystack
 468 |   | length needle > length haystack = haystack
 469 |   | otherwise = intercalate replacement . splitOn needle $ haystack
 470 | 
 471 | -- | @justifyLeft n c t@ produces a result of length \(\max \{ {\tt n }, {\tt length} \; {\tt t} \}\),
 472 | -- consisting of a copy of @t@ followed by (zero or more) copies
 473 | -- of @c@.
 474 | --
 475 | -- >>> justifyLeft (-100) [char| '~' |] [ascii| "nyan" |]
 476 | -- "nyan"
 477 | -- >>> justifyLeft 4 [char| '~' |] [ascii| "nyan" |]
 478 | -- "nyan"
 479 | -- >>> justifyLeft 10 [char| '~' |] [ascii| "nyan" |]
 480 | -- "nyan~~~~~~"
 481 | --
 482 | -- /Complexity:/ \(\Theta(n)\)
 483 | --
 484 | -- @since 1.0.1
 485 | justifyLeft :: Int -> AsciiChar -> AsciiText -> AsciiText
 486 | justifyLeft n c t = t <> replicate (n - length t) (singleton c)
 487 | 
 488 | -- | @justifyRight n c t@ produces a result of length \(\max \{ {\tt n }, {\tt length} \; {\tt t} \}\),
 489 | -- consisting of (zero or more) copies of @c@ followed by a copy of @t@.
 490 | --
 491 | -- >>> justifyRight (-100) [char| '~' |] [ascii| "nyan" |]
 492 | -- "nyan"
 493 | -- >>> justifyRight 4 [char| '~' |] [ascii| "nyan" |]
 494 | -- "nyan"
 495 | -- >>> justifyRight 10 [char| '~' |] [ascii| "nyan" |]
 496 | -- "~~~~~~nyan"
 497 | --
 498 | -- /Complexity:/ \(\Theta(n)\)
 499 | --
 500 | -- @since 1.0.1
 501 | justifyRight :: Int -> AsciiChar -> AsciiText -> AsciiText
 502 | justifyRight n c t = replicate (n - length t) (singleton c) <> t
 503 | 
 504 | -- | @center n c t@ produces a result of length \({\tt k } = \max \{ {\tt n }, {\tt length} \; {\tt t} \}\),
 505 | -- consisting of:
 506 | --
 507 | -- * \(\lceil \frac{{\tt k} - {\tt length} \; {\tt t}}{2} \rceil\) copies of @c@;
 508 | -- followed by
 509 | -- * A copy of @t@; followed by
 510 | -- * Zero or more copies of @c@
 511 | --
 512 | -- This means that the centering is \'left-biased\'. This mimicks the behaviour
 513 | -- of the function of the same name in the [text
 514 | -- package](http://hackage.haskell.org/package/text-1.2.4.1/docs/Data-Text.html#v:center),
 515 | -- although that function's documenation does not describe this behaviour.
 516 | --
 517 | -- >>> center (-100) [char| '~' |] [ascii| "nyan" |]
 518 | -- "nyan"
 519 | -- >>> center 4 [char| '~' |] [ascii| "nyan" |]
 520 | -- "nyan"
 521 | -- >>> center 5 [char| '~' |] [ascii| "nyan" |]
 522 | -- "~nyan"
 523 | -- >>> center 6 [char| '~' |] [ascii| "nyan" |]
 524 | -- "~nyan~"
 525 | --
 526 | -- /Complexity:/ \(\Theta(n)\)
 527 | --
 528 | -- @since 1.0.1
 529 | center :: Int -> AsciiChar -> AsciiText -> AsciiText
 530 | center n c t
 531 |   | n <= length t = t
 532 |   | P.even (n - length t) = copied <> t <> copied
 533 |   | otherwise = copied <> singleton c <> t <> copied
 534 |   where
 535 |     copied :: AsciiText
 536 |     copied = replicate ((n - length t) `P.div` 2) (singleton c)
 537 | 
 538 | -- Folds
 539 | 
 540 | -- | Left-associative fold of a text.
 541 | --
 542 | -- >>> foldl (\acc c -> [ascii| "f(" |] <> acc <> singleton c <> [ascii| ")" |]) [ascii| "a" |] [ascii| "catboy" |]
 543 | -- "f(f(f(f(f(f(ac)a)t)b)o)y)"
 544 | --
 545 | -- /Complexity:/ \(\Theta(n)\)
 546 | --
 547 | -- @since 1.0.0
 548 | {-# INLINE foldl #-}
 549 | foldl :: (a -> AsciiChar -> a) -> a -> AsciiText -> a
 550 | foldl f x (AsciiText bs) = BS.foldl (coerce f) x bs
 551 | 
 552 | -- | Left-associative fold of a text, strict in the accumulator.
 553 | --
 554 | -- >>> foldl' (\acc c -> [ascii| "f(" |] <> acc <> singleton c <> [ascii| ")" |]) [ascii| "a" |] [ascii| "catboy" |]
 555 | -- "f(f(f(f(f(f(ac)a)t)b)o)y)"
 556 | --
 557 | -- /Complexity:/ \(\Theta(n)\)
 558 | --
 559 | -- @since 1.0.0
 560 | {-# INLINE foldl' #-}
 561 | foldl' :: (a -> AsciiChar -> a) -> a -> AsciiText -> a
 562 | foldl' f x (AsciiText bs) = BS.foldl' (coerce f) x bs
 563 | 
 564 | -- | Right-associative fold of a text.
 565 | --
 566 | -- >>> foldr (\c acc -> [ascii| "f(" |] <> acc <> singleton c <> [ascii| ")" |]) [ascii| "a" |] [ascii| "catboy" |]
 567 | -- "f(f(f(f(f(f(ay)o)b)t)a)c)"
 568 | --
 569 | -- /Complexity:/ \(\Theta(n)\)
 570 | --
 571 | -- @since 1.0.0
 572 | {-# INLINE foldr #-}
 573 | foldr :: (AsciiChar -> a -> a) -> a -> AsciiText -> a
 574 | foldr f x (AsciiText bs) = BS.foldr (coerce f) x bs
 575 | 
 576 | -- | Right-associative fold of a text, strict in the accumulator.
 577 | --
 578 | -- >>> foldr' (\c acc -> [ascii| "f(" |] <> acc <> singleton c <> [ascii| ")" |]) [ascii| "a" |] [ascii| "catboy" |]
 579 | -- "f(f(f(f(f(f(ay)o)b)t)a)c)"
 580 | --
 581 | -- /Complexity:/ \(\Theta(n)\)
 582 | --
 583 | -- @since 1.0.0
 584 | {-# INLINE foldr' #-}
 585 | foldr' :: (AsciiChar -> a -> a) -> a -> AsciiText -> a
 586 | foldr' f x (AsciiText bs) = BS.foldr' (coerce f) x bs
 587 | 
 588 | -- Special folds
 589 | 
 590 | -- | Concatenate a list of texts.
 591 | --
 592 | -- >>> concat []
 593 | -- ""
 594 | -- >>> concat [[ascii| "catboy" |]]
 595 | -- "catboy"
 596 | -- >>> concat . Prelude.replicate 4 $ [ascii| "nyan" |]
 597 | -- "nyannyannyannyan"
 598 | --
 599 | -- /Complexity:/ \(\Theta(n)\)
 600 | --
 601 | -- @since 1.0.0
 602 | concat :: [AsciiText] -> AsciiText
 603 | concat = coerce BS.concat
 604 | 
 605 | -- | Map a text-producing function over a text, then concatenate the results.
 606 | --
 607 | -- >>> concatMap singleton empty
 608 | -- ""
 609 | -- >>> concatMap singleton [ascii| "nyan" |]
 610 | -- "nyan"
 611 | -- >>> concatMap (\c -> singleton c <> singleton c) [ascii| "nekomimi" |]
 612 | -- "nneekkoommiimmii"
 613 | --
 614 | -- /Complexity:/ \(\Theta(n)\)
 615 | --
 616 | -- @since 1.0.0
 617 | concatMap :: (AsciiChar -> AsciiText) -> AsciiText -> AsciiText
 618 | concatMap = coerce BS.concatMap
 619 | 
 620 | -- | 'scanl' is similar to 'foldl', but returns a list of successive values from
 621 | -- the left.
 622 | --
 623 | -- /Complexity:/ \(\Theta(n)\)
 624 | --
 625 | -- @since 1.0.0
 626 | {-# INLINE scanl #-}
 627 | scanl ::
 628 |   -- | accumulator -> element -> new accumulator
 629 |   (AsciiChar -> AsciiChar -> AsciiChar) ->
 630 |   -- | Starting accumulator value
 631 |   AsciiChar ->
 632 |   -- | Input of length \(n\)
 633 |   AsciiText ->
 634 |   -- | Output of length \(n + 1\)
 635 |   AsciiText
 636 | scanl = coerce BS.scanl
 637 | 
 638 | -- | 'scanr' is similar to 'foldr', but returns a list of successive values from
 639 | -- the right.
 640 | --
 641 | -- /Complexity:/ \(\Theta(n)\)
 642 | --
 643 | -- @since 1.0.0
 644 | {-# INLINE scanr #-}
 645 | scanr ::
 646 |   -- | element -> accumulator -> new accumulator
 647 |   (AsciiChar -> AsciiChar -> AsciiChar) ->
 648 |   -- | Starting accumulator value
 649 |   AsciiChar ->
 650 |   -- | Input of length \(n\)
 651 |   AsciiText ->
 652 |   -- | Output of length \(n + 1\)
 653 |   AsciiText
 654 | scanr = coerce BS.scanr
 655 | 
 656 | -- Accumulating maps
 657 | 
 658 | -- | Like a combination of 'map' and 'foldl''. Applies a function to each
 659 | -- element of an 'AsciiText', passing an accumulating parameter from left to
 660 | -- right, and returns a final 'AsciiText' along with the accumulating
 661 | -- parameter's final value.
 662 | --
 663 | -- /Complexity:/ \(\Theta(n)\)
 664 | --
 665 | -- @since 1.0.0
 666 | {-# INLINE mapAccumL #-}
 667 | mapAccumL :: (a -> AsciiChar -> (a, AsciiChar)) -> a -> AsciiText -> (a, AsciiText)
 668 | mapAccumL f x (AsciiText bs) = AsciiText <$> BS.mapAccumL (coerce f) x bs
 669 | 
 670 | -- | Like a combination of 'map' and 'foldr'. Applies a function to each element
 671 | -- of an 'AsciiText', passing an accumulating parameter from right to left, and
 672 | -- returns a final 'AsciiText' along with the accumulating parameter's final
 673 | -- value.
 674 | --
 675 | -- /Complexity:/ \(\Theta(n)\)
 676 | --
 677 | -- @since 1.0.0
 678 | {-# INLINE mapAccumR #-}
 679 | mapAccumR :: (a -> AsciiChar -> (a, AsciiChar)) -> a -> AsciiText -> (a, AsciiText)
 680 | mapAccumR f x (AsciiText bs) = AsciiText <$> BS.mapAccumL (coerce f) x bs
 681 | 
 682 | -- Generation and unfolding
 683 | 
 684 | -- | @replicate n t@ consists of @t@ repeated \(\max \{ 0, {\tt n } \}\) times.
 685 | --
 686 | -- >>> replicate (-100) [ascii| "nyan" |]
 687 | -- ""
 688 | -- >>> replicate 0 [ascii| "nyan" |]
 689 | -- ""
 690 | -- >>> replicate 3 [ascii| "nyan" |]
 691 | -- "nyannyannyan"
 692 | --
 693 | -- /Complexity:/ \(\Theta(n \cdot m)\)
 694 | --
 695 | -- @since 1.0.1
 696 | replicate :: Int -> AsciiText -> AsciiText
 697 | replicate n t
 698 |   | n <= 0 = empty
 699 |   | otherwise = concat . P.replicate n $ t
 700 | 
 701 | -- | Similar to 'Data.List.unfoldr'. The function parameter takes a seed value,
 702 | -- and produces either 'Nothing' (indicating that we're done) or 'Just' an
 703 | -- 'AsciiChar' and a new seed value. 'unfoldr' then, given a starting seed, will
 704 | -- repeatedly call the function parameter on successive seed values, returning
 705 | -- the resulting 'AsciiText', based on the 'AsciiChar's produced, in the same
 706 | -- order.
 707 | --
 708 | -- /Complexity:/ \(\Theta(n)\)
 709 | --
 710 | -- @since 1.0.0
 711 | {-# INLINE unfoldr #-}
 712 | unfoldr :: (a -> Maybe (AsciiChar, a)) -> a -> AsciiText
 713 | unfoldr f = AsciiText . BS.unfoldr (coerce f)
 714 | 
 715 | -- | Similar to 'unfoldr', but also takes a maximum length parameter. The second
 716 | -- element of the result tuple will be 'Nothing' if we finished with the
 717 | -- function argument returning 'Nothing', and 'Just' the final seed value if we
 718 | -- reached the maximum length before that happened.
 719 | --
 720 | -- /Complexity:/ \(\Theta(n)\)
 721 | --
 722 | -- @since 1.0.0
 723 | {-# INLINE unfoldrN #-}
 724 | unfoldrN :: Int -> (a -> Maybe (AsciiChar, a)) -> a -> (AsciiText, Maybe a)
 725 | unfoldrN n f = first AsciiText . BS.unfoldrN n (coerce f)
 726 | 
 727 | -- | @take n t@ returns the prefix of @t@ with length
 728 | -- \(\min \{ \max \{ 0, {\tt n}\}, {\tt length} \; {\tt t} \}\).
 729 | --
 730 | -- >>> take (-100) [ascii| "catboy" |]
 731 | -- ""
 732 | -- >>> take 0 [ascii| "catboy" |]
 733 | -- ""
 734 | -- >>> take 4 [ascii| "catboy" |]
 735 | -- "catb"
 736 | -- >>> take 1000 [ascii| "catboy" |]
 737 | -- "catboy"
 738 | --
 739 | -- /Complexity:/ \(\Theta(1)\)
 740 | --
 741 | -- @since 1.0.0
 742 | {-# INLINE take #-}
 743 | take :: Int -> AsciiText -> AsciiText
 744 | take = coerce BS.take
 745 | 
 746 | -- | @takeEnd n t@ returns the suffix of @t@ with length
 747 | -- \(\min \{ \max \{0, {\tt n} \}, {\tt length} \; {\tt t} \}\).
 748 | --
 749 | -- >>> takeEnd (-100) [ascii| "catboy" |]
 750 | -- ""
 751 | -- >>> takeEnd 0 [ascii| "catboy" |]
 752 | -- ""
 753 | -- >>> takeEnd 4 [ascii| "catboy" |]
 754 | -- "tboy"
 755 | -- >>> takeEnd 1000 [ascii| "catboy" |]
 756 | -- "catboy"
 757 | --
 758 | -- /Complexity:/ \(\Theta(1)\)
 759 | --
 760 | -- @since 1.0.1
 761 | takeEnd :: Int -> AsciiText -> AsciiText
 762 | takeEnd n t = drop (length t - n) t
 763 | 
 764 | -- | @drop n t@ returns the suffix of @t@ with length
 765 | -- \(\max \{ 0, \min \{ {\tt length} \; {\tt t}, {\tt length} \; {\tt t} - {\tt n} \} \}\).
 766 | --
 767 | -- >>> drop (-100) [ascii| "catboy" |]
 768 | -- "catboy"
 769 | -- >>> drop 0 [ascii| "catboy" |]
 770 | -- "catboy"
 771 | -- >>> drop 4 [ascii| "catboy" |]
 772 | -- "oy"
 773 | -- >>> drop 1000 [ascii| "catboy" |]
 774 | -- ""
 775 | --
 776 | -- /Complexity:/ \(\Theta(1)\)
 777 | --
 778 | -- @since 1.0.0
 779 | {-# INLINE drop #-}
 780 | drop :: Int -> AsciiText -> AsciiText
 781 | drop = coerce BS.drop
 782 | 
 783 | -- | @dropEnd n t@ returns the prefix of @t@ with length
 784 | -- \(\max \{ 0, \min \{ {\tt length} \; {\tt t}, {\tt length} \; {\tt t} - {\tt n} \} \}\).
 785 | --
 786 | -- >>> dropEnd (-100) [ascii| "catboy" |]
 787 | -- "catboy"
 788 | -- >>> dropEnd 0 [ascii| "catboy" |]
 789 | -- "catboy"
 790 | -- >>> dropEnd 4 [ascii| "catboy" |]
 791 | -- "ca"
 792 | -- >>> dropEnd 1000 [ascii| "catboy" |]
 793 | -- ""
 794 | --
 795 | -- /Complexity:/ \(\Theta(1)\)
 796 | --
 797 | -- @since 1.0.1
 798 | dropEnd :: Int -> AsciiText -> AsciiText
 799 | dropEnd n t = take (length t - n) t
 800 | 
 801 | -- | @takeWhile p t@ returns the longest prefix of @t@ of characters that
 802 | -- satisfy @p@.
 803 | --
 804 | -- >>> takeWhile ((Just Lower ==) . caseOf) empty
 805 | -- ""
 806 | -- >>> takeWhile ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |]
 807 | -- "catboy"
 808 | --
 809 | -- /Complexity:/ \(\Theta(n)\)
 810 | --
 811 | -- @since 1.0.0
 812 | {-# INLINE [1] takeWhile #-}
 813 | takeWhile :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
 814 | takeWhile f (AsciiText at) = AsciiText . BS.takeWhile (coerce f) $ at
 815 | 
 816 | -- | @takeWhileEnd p t@ returns the longest suffix of @t@ of characters that
 817 | -- satisfy @p@. Equivalent to @'reverse' . 'takeWhile' p . 'reverse'@.
 818 | --
 819 | -- >>> takeWhileEnd ((Just Lower ==) . caseOf) empty
 820 | -- ""
 821 | -- >>> takeWhileEnd ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |]
 822 | -- "nyan"
 823 | --
 824 | -- /Complexity:/ \(\Theta(n)\)
 825 | --
 826 | -- @since 1.0.0
 827 | {-# INLINE takeWhileEnd #-}
 828 | takeWhileEnd :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
 829 | takeWhileEnd f = AsciiText . BS.takeWhileEnd (coerce f) . coerce
 830 | 
 831 | -- | @dropWhile p t@ returns the suffix remaining after @'takeWhile' p t@.
 832 | --
 833 | -- >>> dropWhile ((Just Lower ==) . caseOf) empty
 834 | -- ""
 835 | -- >>> dropWhile ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |]
 836 | -- " goes nyan"
 837 | --
 838 | -- /Complexity:/ \(\Theta(n)\)
 839 | --
 840 | -- @since 1.0.0
 841 | {-# INLINE [1] dropWhile #-}
 842 | dropWhile :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
 843 | dropWhile f (AsciiText at) = AsciiText . BS.dropWhile (coerce f) $ at
 844 | 
 845 | -- | @dropWhileEnd p t@ returns the prefix remaining after @'takeWhileEnd' p t@.
 846 | -- Equivalent to @'reverse' . 'dropWhile' p . 'reverse'@.
 847 | --
 848 | -- >>> dropWhileEnd ((Just Lower ==) . caseOf) empty
 849 | -- ""
 850 | -- >>> dropWhileEnd ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |]
 851 | -- "catboy goes "
 852 | --
 853 | -- /Complexity:/ \(\Theta(n)\)
 854 | --
 855 | -- @since 1.0.0
 856 | {-# INLINE dropWhileEnd #-}
 857 | dropWhileEnd :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
 858 | dropWhileEnd f = AsciiText . BS.dropWhileEnd (coerce f) . coerce
 859 | 
 860 | -- | @dropAround p@ is equivalent to @'dropWhile' p '.' 'dropWhileEnd' p@.
 861 | --
 862 | -- >>> dropAround ((Just Lower ==) . caseOf) empty
 863 | -- ""
 864 | -- >>> dropAround ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |]
 865 | -- " goes "
 866 | --
 867 | -- /Complexity:/ \(\Theta(n)\)
 868 | --
 869 | -- @since 1.0.1
 870 | dropAround :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
 871 | dropAround p = dropWhile p . dropWhileEnd p
 872 | 
 873 | -- | Remove the longest prefix /and/ suffix of the input comprised entirely of
 874 | -- whitespace characters. We define a \'whitespace character\' as any of the
 875 | -- following:
 876 | --
 877 | -- * TAB (0x09)
 878 | -- * LF (0x0a)
 879 | -- * VT (0x0b)
 880 | -- * FF (0x0c)
 881 | -- * CR (0x0d)
 882 | -- * Space (0x20)
 883 | --
 884 | -- >>> strip empty
 885 | -- ""
 886 | -- >>> strip [ascii| "catboy goes nyan" |]
 887 | -- "catboy goes nyan"
 888 | -- >>> strip [ascii| "\n\n    \tcatboy goes nyan" |]
 889 | -- "catboy goes nyan"
 890 | -- >>> strip [ascii| "catboy goes nyan   \t\t\n" |]
 891 | -- "catboy goes nyan"
 892 | -- >>> strip [ascii| "\n\n    \tcatboy goes nyan   \t\t\n" |]
 893 | -- "catboy goes nyan"
 894 | --
 895 | -- /Complexity:/ \(\Theta(n)\)
 896 | --
 897 | -- @since 1.0.1
 898 | strip :: AsciiText -> AsciiText
 899 | strip = dropAround isSpace
 900 | 
 901 | -- | Remove the longest prefix of the input comprised entirely of whitespace
 902 | -- characters. We define a \'whitespace character\' as any of the following:
 903 | --
 904 | -- * TAB (0x09)
 905 | -- * LF (0x0a)
 906 | -- * VT (0x0b)
 907 | -- * FF (0x0c)
 908 | -- * CR (0x0d)
 909 | -- * Space (0x20)
 910 | --
 911 | -- >>> stripStart empty
 912 | -- ""
 913 | -- >>> stripStart [ascii| "catboy goes nyan" |]
 914 | -- "catboy goes nyan"
 915 | -- >>> stripStart [ascii| "\n\n    \tcatboy goes nyan" |]
 916 | -- "catboy goes nyan"
 917 | -- >>> stripStart [ascii| "catboy goes nyan   \t\t\n" |]
 918 | -- "catboy goes nyan   \t\t\n"
 919 | -- >>> stripStart [ascii| "\n\n    \tcatboy goes nyan   \t\t\n" |]
 920 | -- "catboy goes nyan   \t\t\n"
 921 | --
 922 | -- /Complexity:/ \(\Theta(n)\)
 923 | --
 924 | -- @since 1.0.1
 925 | stripStart :: AsciiText -> AsciiText
 926 | stripStart = dropWhile isSpace
 927 | 
 928 | -- | Remove the longest suffix of the input comprised entirely of whitespace
 929 | -- characters. We define a \'whitespace character\' as any of the following:
 930 | --
 931 | -- * TAB (0x09)
 932 | -- * LF (0x0a)
 933 | -- * VT (0x0b)
 934 | -- * FF (0x0c)
 935 | -- * CR (0x0d)
 936 | -- * Space (0x20)
 937 | --
 938 | -- >>> stripEnd empty
 939 | -- ""
 940 | -- >>> stripEnd [ascii| "catboy goes nyan" |]
 941 | -- "catboy goes nyan"
 942 | -- >>> stripEnd [ascii| "\n\n    \tcatboy goes nyan" |]
 943 | -- "\n\n    \tcatboy goes nyan"
 944 | -- >>> stripEnd [ascii| "catboy goes nyan   \t\t\n" |]
 945 | -- "catboy goes nyan"
 946 | -- >>> stripEnd [ascii| "\n\n    \tcatboy goes nyan   \t\t\n" |]
 947 | -- "\n\n    \tcatboy goes nyan"
 948 | --
 949 | -- /Complexity:/ \(\Theta(n)\)
 950 | --
 951 | -- @since 1.0.1
 952 | stripEnd :: AsciiText -> AsciiText
 953 | stripEnd = dropWhileEnd isSpace
 954 | 
 955 | -- | @splitAt n t@ is equivalent to @('take' n t, 'drop' n t)@.
 956 | --
 957 | -- >>> splitAt (-3) [ascii| "catboy" |]
 958 | -- ("","catboy")
 959 | -- >>> splitAt 0 [ascii| "catboy" |]
 960 | -- ("","catboy")
 961 | -- >>> splitAt 3 [ascii| "catboy" |]
 962 | -- ("cat","boy")
 963 | -- >>> splitAt 1000 [ascii| "catboy" |]
 964 | -- ("catboy","")
 965 | --
 966 | -- /Complexity:/ \(\Theta(1)\)
 967 | --
 968 | -- @since 1.0.0
 969 | {-# INLINE splitAt #-}
 970 | splitAt :: Int -> AsciiText -> (AsciiText, AsciiText)
 971 | splitAt = coerce BS.splitAt
 972 | 
 973 | -- | @breakOn needle haystack@, given a @needle@ of length \(n\) and a
 974 | -- @haystack@ of length \(h\), attempts to find the first instance of @needle@
 975 | -- in @haystack@. If successful, return a tuple consisting of:
 976 | --
 977 | -- * The prefix of @haystack@ before the match; and
 978 | -- * The rest of @haystack@, starting with the match.
 979 | --
 980 | -- If the needle is empty, this returns @('empty', haystack)@. If no match can
 981 | -- be found, this instead returns @(haystack, 'empty')@.
 982 | --
 983 | -- If you need to repeatedly split on the same needle, consider 'breakOnAll', as
 984 | -- this will be more efficient due to only having to run the matching algorithm
 985 | -- once.
 986 | --
 987 | -- >>> breakOn empty [ascii| "catboy goes nyan" |]
 988 | -- ("","catboy goes nyan")
 989 | -- >>> breakOn [ascii| "nyan" |] empty
 990 | -- ("","")
 991 | -- >>> breakOn [ascii| "goes" |] [ascii| "catboy goes nyan" |]
 992 | -- ("catboy ","goes nyan")
 993 | -- >>> breakOn [ascii| "catboy" |] [ascii| "nyan nyan nyan" |]
 994 | -- ("nyan nyan nyan","")
 995 | --
 996 | -- = On complexity
 997 | --
 998 | -- This function is based on a variant of the
 999 | -- [NSN](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) algorithm,
1000 | -- except it does not detect overlapping needles. Its average-case analysis is
1001 | -- based on the assumption that:
1002 | --
1003 | -- * All ASCII symbols are equally likely to occur in both the needle and the
1004 | -- haystack; and
1005 | -- * The needle has length at least two; and
1006 | -- * Both the needle and the haystack contain at least four unique symbols.
1007 | --
1008 | -- We fall back to 'split' for singleton needles, and there is no work to be
1009 | -- done on empty needles, which means the second assumption always holds.
1010 | --
1011 | -- Worst-case behaviour becomes more likely the more your input satisfies the
1012 | -- following conditions:
1013 | --
1014 | -- * The needle and/or haystack use few unique symbols (less than four is the
1015 | -- worst); or
1016 | -- * The haystack contains many instances of the second symbol of the needle
1017 | -- which don't lead to full matches.
1018 | --
1019 | -- /Complexity:/ \(\Theta(h)\) average case, \(\Theta(h \cdot n\)\) worst-case.
1020 | --
1021 | -- /See also:/ Note that all the below are references for the original
1022 | -- algorithm, which includes searching for overlapping needles. Thus, our
1023 | -- implementation will perform better than the analysis suggests.
1024 | --
1025 | -- * [Description and pseudocode](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html)
1026 | -- * ["Algorithms on Strings"](https://www.cambridge.org/core/books/algorithms-on-strings/19049704C876795D95D8882C73257C70) by Crochemore, Hancart and Lecroq. PDF available [here](https://www.researchgate.net/publication/220693689_Algorithms_on_Strings).
1027 | --
1028 | -- @since 1.0.1
1029 | breakOn :: AsciiText -> AsciiText -> (AsciiText, AsciiText)
1030 | breakOn needle@(AsciiText n) haystack@(AsciiText h)
1031 |   | length needle == 0 = (empty, haystack)
1032 |   | otherwise = case indices n h of
1033 |     [] -> (haystack, empty)
1034 |     ix : _ -> splitAt ix haystack
1035 | 
1036 | -- | @breakOnEnd needle haystack@, given a @needle@ of length \(n\) and a
1037 | -- @haystack@ of length \(h\), attempts to find the last instance of @needle@ in
1038 | -- @haystack@. If successful, return a tuple consisting of:
1039 | --
1040 | -- * The prefix of @haystack@ up to, and including, the match; and
1041 | -- * The rest of @haystack@.
1042 | --
1043 | -- If the needle is empty, this returns @(haystack, 'empty')@. If no match can
1044 | -- be found, this instead returns @('empty', haystack)@.
1045 | --
1046 | -- This function is similar to 'breakOn'. If you need to repeatedly split on the
1047 | -- same needle, consider 'breakOnAll', as this will be more efficient due to
1048 | -- only having to run the matching algorithm once.
1049 | --
1050 | -- >>> breakOnEnd empty [ascii| "catboy goes nyan" |]
1051 | -- ("catboy goes nyan","")
1052 | -- >>> breakOnEnd [ascii| "nyan" |] empty
1053 | -- ("","")
1054 | -- >>> breakOnEnd [ascii| "goes" |] [ascii| "catboy goes nyan" |]
1055 | -- ("catboy goes"," nyan")
1056 | -- >>> breakOnEnd [ascii| "catboy" |] [ascii| "nyan nyan nyan" |]
1057 | -- ("","nyan nyan nyan")
1058 | --
1059 | -- = On complexity
1060 | --
1061 | -- This function is based on a variant of the
1062 | -- [NSN](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) algorithm,
1063 | -- except it does not detect overlapping needles. Its average-case analysis is
1064 | -- based on the assumption that:
1065 | --
1066 | -- * All ASCII symbols are equally likely to occur in both the needle and the
1067 | -- haystack; and
1068 | -- * The needle has length at least two; and
1069 | -- * Both the needle and the haystack contain at least four unique symbols.
1070 | --
1071 | -- We fall back to 'split' for singleton needles, and there is no work to be
1072 | -- done on empty needles, which means the second assumption always holds.
1073 | --
1074 | -- Worst-case behaviour becomes more likely the more your input satisfies the
1075 | -- following conditions:
1076 | --
1077 | -- * The needle and/or haystack use few unique symbols (less than four is the
1078 | -- worst); or
1079 | -- * The haystack contains many instances of the second symbol of the needle
1080 | -- which don't lead to full matches.
1081 | --
1082 | -- /Complexity:/ \(\Theta(h)\) average case, \(\Theta(h \cdot n\)\) worst-case.
1083 | --
1084 | -- /See also:/ Note that all the below are references for the original
1085 | -- algorithm, which includes searching for overlapping needles. Thus, our
1086 | -- implementation will perform better than the analysis suggests.
1087 | --
1088 | -- * [Description and pseudocode](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html)
1089 | -- * ["Algorithms on Strings"](https://www.cambridge.org/core/books/algorithms-on-strings/19049704C876795D95D8882C73257C70) by Crochemore, Hancart and Lecroq. PDF available [here](https://www.researchgate.net/publication/220693689_Algorithms_on_Strings).
1090 | --
1091 | -- @since 1.0.1
1092 | breakOnEnd :: AsciiText -> AsciiText -> (AsciiText, AsciiText)
1093 | breakOnEnd needle@(AsciiText n) haystack@(AsciiText h)
1094 |   | length needle == 0 = (haystack, empty)
1095 |   | otherwise = case go . indices n $ h of
1096 |     Nothing -> (empty, haystack)
1097 |     Just ix -> splitAt (ix + length needle) haystack
1098 |   where
1099 |     go :: [Int] -> Maybe Int
1100 |     go = \case
1101 |       [] -> Nothing
1102 |       [i] -> Just i
1103 |       (_ : is) -> go is
1104 | 
1105 | -- | @break p t@ is equivalent to @('takeWhile' ('not' p) t, 'dropWhile' ('not'
1106 | -- p) t)@.
1107 | --
1108 | -- >>> break ([char| ' ' |] ==) [ascii| "catboy goes nyan" |]
1109 | -- ("catboy"," goes nyan")
1110 | --
1111 | -- /Complexity:/ \(\Theta(n)\)
1112 | --
1113 | -- @since 1.0.0
1114 | break :: (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText)
1115 | break = coerce BS.break
1116 | 
1117 | -- | @span p t@ is equivalent to @('takeWhile' p t, 'dropWhile' p t)@.
1118 | --
1119 | -- >>> span ([char| 'c' |] ==) [ascii| "catboy goes nyan" |]
1120 | -- ("c","atboy goes nyan")
1121 | --
1122 | -- /Complexity:/ \(\Theta(n)\)
1123 | --
1124 | -- @since 1.0.0
1125 | {-# INLINE [1] span #-}
1126 | span :: (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText)
1127 | span = coerce BS.span
1128 | 
1129 | -- | Separate a text into a list of texts such that:
1130 | --
1131 | -- * Their concatenation is equal to the original argument; and
1132 | -- * Equal adjacent characters in the original argument are in the same text in
1133 | -- the result.
1134 | --
1135 | -- This is a specialized form of 'groupBy', and is about 40% faster than
1136 | -- @'groupBy' '=='@.
1137 | --
1138 | -- >>> group empty
1139 | -- []
1140 | -- >>> group . singleton $ [char| 'w' |]
1141 | -- ["w"]
1142 | -- >>> group [ascii| "nyan" |]
1143 | -- ["n","y","a","n"]
1144 | -- >>> group [ascii| "nyaaaan" |]
1145 | -- ["n","y","aaaa","n"]
1146 | --
1147 | -- /Complexity:/ \(\Theta(n)\)
1148 | --
1149 | -- @since 1.0.0
1150 | group :: AsciiText -> [AsciiText]
1151 | group = coerce BS.group
1152 | 
1153 | -- | Separate a text into a list of texts such that:
1154 | --
1155 | -- * Their concatenation is equal to the original argument; and
1156 | -- * Adjacent characters for which the function argument returns @True@ are in
1157 | -- the same text in the result.
1158 | --
1159 | -- 'group' is a special case for the function argument '=='; it is also about
1160 | -- 40% faster.
1161 | --
1162 | -- >>> groupBy (<) empty
1163 | -- []
1164 | -- >>> groupBy (<) . singleton $ [char| 'w' |]
1165 | -- ["w"]
1166 | -- >>> groupBy (<) [ascii| "catboy goes nyan" |]
1167 | -- ["c","atboy"," goes"," nyan"]
1168 | --
1169 | -- /Complexity:/ \(\Theta(n)\)
1170 | --
1171 | -- @since 1.0.0
1172 | groupBy :: (AsciiChar -> AsciiChar -> Bool) -> AsciiText -> [AsciiText]
1173 | groupBy = coerce BS.groupBy
1174 | 
1175 | -- | All prefixes of the argument, from shortest to longest.
1176 | --
1177 | -- >>> inits empty
1178 | -- [""]
1179 | -- >>> inits . singleton $ [char| 'w' |]
1180 | -- ["","w"]
1181 | -- >>> inits [ascii| "nyan" |]
1182 | -- ["","n","ny","nya","nyan"]
1183 | --
1184 | -- /Complexity:/ \(\Theta(n)\)
1185 | --
1186 | -- @since 1.0.0
1187 | inits :: AsciiText -> [AsciiText]
1188 | inits = coerce BS.inits
1189 | 
1190 | -- | All suffixes of the argument, from shortest to longest.
1191 | --
1192 | -- >>> tails empty
1193 | -- [""]
1194 | -- >>> tails . singleton $ [char| 'w' |]
1195 | -- ["w",""]
1196 | -- >>> tails [ascii| "nyan" |]
1197 | -- ["nyan","yan","an","n",""]
1198 | --
1199 | -- /Complexity:/ \(\Theta(n)\)
1200 | --
1201 | -- @since 1.0.0
1202 | tails :: AsciiText -> [AsciiText]
1203 | tails = coerce BS.tails
1204 | 
1205 | -- Breaking into many substrings
1206 | 
1207 | -- | @splitOn needle haystack@, given a @needle@ of length \(n\) and a haystack
1208 | -- of length \(h\), breaks @haystack@ into pieces, separated by @needle@. Any
1209 | -- occurrences of @needle@ in @haystack@ are consumed.
1210 | --
1211 | -- >>> splitOn empty [ascii| "catboy goes nyan and goes nyan" |]
1212 | -- ["catboy goes nyan and goes nyan"]
1213 | -- >>> splitOn [ascii| "nyan" |] empty
1214 | -- [""]
1215 | -- >>> splitOn [ascii| "nyan" |] [ascii| "catboy goes nyan and goes nyan" |]
1216 | -- ["catboy goes "," and goes ",""]
1217 | -- >>> splitOn [ascii| "nyan" |] [ascii| "nyan" |]
1218 | -- ["",""]
1219 | -- >>> splitOn [ascii| "nyan" |] [ascii| "catboy" |]
1220 | -- ["catboy"]
1221 | --
1222 | -- = On complexity
1223 | --
1224 | -- This function is based on a variant of the
1225 | -- [NSN](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) algorithm,
1226 | -- except it does not detect overlapping needles. Its average-case analysis is
1227 | -- based on the assumption that:
1228 | --
1229 | -- * All ASCII symbols are equally likely to occur in both the needle and the
1230 | -- haystack; and
1231 | -- * The needle has length at least two; and
1232 | -- * Both the needle and the haystack contain at least four unique symbols.
1233 | --
1234 | -- We fall back to 'split' for singleton needles, and there is no work to be
1235 | -- done on empty needles, which means the second assumption always holds.
1236 | --
1237 | -- Worst-case behaviour becomes more likely the more your input satisfies the
1238 | -- following conditions:
1239 | --
1240 | -- * The needle and/or haystack use few unique symbols (less than four is the
1241 | -- worst); or
1242 | -- * The haystack contains many instances of the second symbol of the needle
1243 | -- which don't lead to full matches.
1244 | --
1245 | -- /Complexity:/ \(\Theta(h)\) average case, \(\Theta(h \cdot n\)\) worst-case.
1246 | --
1247 | -- /See also:/ Note that all the below are references for the original
1248 | -- algorithm, which includes searching for overlapping needles. Thus, our
1249 | -- implementation will perform better than the analysis suggests.
1250 | --
1251 | -- * [Description and pseudocode](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html)
1252 | -- * ["Algorithms on Strings"](https://www.cambridge.org/core/books/algorithms-on-strings/19049704C876795D95D8882C73257C70) by Crochemore, Hancart and Lecroq. PDF available [here](https://www.researchgate.net/publication/220693689_Algorithms_on_Strings).
1253 | --
1254 | -- @since 1.0.1
1255 | splitOn :: AsciiText -> AsciiText -> [AsciiText]
1256 | splitOn needle@(AsciiText n) haystack@(AsciiText h)
1257 |   | needleLen == 0 = [haystack]
1258 |   | length haystack == 0 = [empty]
1259 |   | needleLen == 1 = split (== (AsciiChar . BS.head $ n)) haystack
1260 |   | otherwise = go 0 (indices n h)
1261 |   where
1262 |     needleLen :: Int
1263 |     needleLen = length needle
1264 |     go :: Int -> [Int] -> [AsciiText]
1265 |     go pos = \case
1266 |       [] -> [drop pos haystack]
1267 |       (ix : ixes) ->
1268 |         let chunkLen = ix - pos
1269 |             segment = take chunkLen . drop pos $ haystack
1270 |          in segment : go (pos + chunkLen + needleLen) ixes
1271 | 
1272 | -- | @split p t@ separates @t@ into components delimited by separators, for
1273 | -- which @p@ returns @True@. The results do not contain the separators.
1274 | --
1275 | -- \(n\) adjacent separators result in \(n - 1\) empty components in the result.
1276 | --
1277 | -- >>> split ([char| '~' |] ==) empty
1278 | -- []
1279 | -- >>> split ([char| '~' |] ==) . singleton $ [char| '~' |]
1280 | -- ["",""]
1281 | -- >>> split ([char| '~' |] ==) [ascii| "nyan" |]
1282 | -- ["nyan"]
1283 | -- >>> split ([char| '~' |] ==) [ascii| "~nyan" |]
1284 | -- ["","nyan"]
1285 | -- >>> split ([char| '~' |] ==) [ascii| "nyan~" |]
1286 | -- ["nyan",""]
1287 | -- >>> split ([char| '~' |] ==) [ascii| "nyan~nyan"|]
1288 | -- ["nyan","nyan"]
1289 | -- >>> split ([char| '~' |] ==) [ascii| "nyan~~nyan" |]
1290 | -- ["nyan","","nyan"]
1291 | -- >>> split ([char| '~' |] ==) [ascii| "nyan~~~nyan" |]
1292 | -- ["nyan","","","nyan"]
1293 | --
1294 | -- /Complexity:/ \(\Theta(n)\)
1295 | --
1296 | -- @since 1.0.0
1297 | {-# INLINE split #-}
1298 | split :: (AsciiChar -> Bool) -> AsciiText -> [AsciiText]
1299 | split = coerce BS.splitWith
1300 | 
1301 | -- | Splits a text into chunks of the specified length. Equivalent to repeatedly
1302 | -- 'take'ing the specified length until exhaustion. The last item in the result
1303 | -- may thus be shorter than requested.
1304 | --
1305 | -- For any @n <= 0@ and any @t@, @chunksOf n t@ yields the empty list. This is
1306 | -- identical to the behaviour of the function of the same name in the [text
1307 | -- package](http://hackage.haskell.org/package/text-1.2.4.1/docs/Data-Text.html#v:chunksOf),
1308 | -- although it doesn't document this fact.
1309 | --
1310 | -- >>> chunksOf (-100) [ascii| "I am a catboy" |]
1311 | -- []
1312 | -- >>> chunksOf (-100) empty
1313 | -- []
1314 | -- >>> chunksOf 0 [ascii| "I am a catboy" |]
1315 | -- []
1316 | -- >>> chunksOf 0 empty
1317 | -- []
1318 | -- >>> chunksOf 1 [ascii| "I am a catboy" |]
1319 | -- ["I"," ","a","m"," ","a"," ","c","a","t","b","o","y"]
1320 | -- >>> chunksOf 1 empty
1321 | -- []
1322 | -- >>> chunksOf 2 [ascii| "I am a catboy" |]
1323 | -- ["I ","am"," a"," c","at","bo","y"]
1324 | -- >>> chunksOf 300 [ascii| "I am a catboy" |]
1325 | -- ["I am a catboy"]
1326 | --
1327 | -- /Complexity:/ \(\Theta(n)\)
1328 | --
1329 | -- @since 1.0.1
1330 | chunksOf :: Int -> AsciiText -> [AsciiText]
1331 | chunksOf n t
1332 |   | n <= 0 = []
1333 |   | t == empty = []
1334 |   | otherwise = case splitAt n t of
1335 |     (h, t') -> h : chunksOf n t'
1336 | 
1337 | -- Breaking into lines and words
1338 | 
1339 | -- | Identical to the functions of the same name in the [text
1340 | -- package](http://hackage.haskell.org/package/text-1.2.4.1/docs/Data-Text.html#v:lines),
1341 | -- and [the
1342 | -- Prelude](https://hackage.haskell.org/package/base-4.14.1.0/docs/Prelude.html#v:lines).
1343 | -- Specifically, separates the argument into pieces, with LF characters (0x0a) as
1344 | -- separators. A single trailing LF is ignored. None of the final results
1345 | -- contain LF.
1346 | --
1347 | -- We chose to follow the same semantics for this function as the text package
1348 | -- and the Prelude. This has some consequences,
1349 | -- which the documentation of both the text package and the Prelude does not
1350 | -- properly explain. We list them here - bear these in mind when using this
1351 | -- function, as well as 'unlines':
1352 | --
1353 | -- * No platform-specific concept of a \'newline\' is ever used by this
1354 | -- function. Separation is done on LF, and /only/ LF, regardless of platform.
1355 | -- The documentation in both the text package and the Prelude confusingly refers
1356 | -- to \'newline characters\', which is a category error. We thus specify that LF
1357 | -- is the character being split on, rather than mentioning \'newlines\' in any
1358 | -- way, shape or form.
1359 | -- * @'unlines' '.' 'lines'@ is /not/ the same as @'Prelude.id'@. This is
1360 | -- misleadingly described in the Prelude, which claims that (its version of)
1361 | -- @unlines@ is \'an inverse operation\' to (its version of) @lines@. For a
1362 | -- precise explanation of why this is the case, please see the documentation for
1363 | -- 'unlines'.
1364 | -- * @'lines'@ is not the same as @'split' (['char'| \'\n\' |] '==')@. See the
1365 | -- doctests below for a demonstration of how they differ.
1366 | --
1367 | -- >>> lines empty
1368 | -- []
1369 | -- >>> split ([char| '\n' |] ==) empty
1370 | -- []
1371 | -- >>> lines [ascii| "catboy goes nyan" |]
1372 | -- ["catboy goes nyan"]
1373 | -- >>> split ([char| '\n' |] ==) [ascii| "catboy goes nyan" |]
1374 | -- ["catboy goes nyan"]
1375 | -- >>> lines [ascii| "catboy goes nyan\n" |]
1376 | -- ["catboy goes nyan"]
1377 | -- >>> split ([char| '\n' |] ==) [ascii| "catboy goes nyan\n" |]
1378 | -- ["catboy goes nyan",""]
1379 | -- >>> lines [ascii| "\ncatboy\n\n\ngoes\n\nnyan\n\n" |]
1380 | -- ["","catboy","","","goes","","nyan",""]
1381 | -- >>> split ([char| '\n' |] ==) [ascii| "\ncatboy\n\n\ngoes\n\nnyan\n\n" |]
1382 | -- ["","catboy","","","goes","","nyan","",""]
1383 | -- >>> lines [ascii| "\r\ncatboy\r\ngoes\r\nnyan\r\n" |]
1384 | -- ["\r","catboy\r","goes\r","nyan\r"]
1385 | -- >>> split ([char| '\n' |] ==) [ascii| "\r\ncatboy\r\ngoes\r\nnyan\r\n" |]
1386 | -- ["\r","catboy\r","goes\r","nyan\r",""]
1387 | --
1388 | -- /Complexity:/ \(\Theta(n)\)
1389 | --
1390 | -- /See also:/ [Wikipedia on newlines](https://en.wikipedia.org/wiki/Newline)
1391 | --
1392 | -- @since 1.0.1
1393 | lines :: AsciiText -> [AsciiText]
1394 | lines (AsciiText bs) = coerce . go $ bs
1395 |   where
1396 |     go :: ByteString -> [ByteString]
1397 |     go rest = case BS.uncons rest of
1398 |       Nothing -> []
1399 |       Just _ -> case BS.break (0x0a ==) rest of
1400 |         (h, t) ->
1401 |           h : case BS.uncons t of
1402 |             Nothing -> []
1403 |             Just (_, t') -> go t'
1404 | 
1405 | -- | Identical to the functions of the same name in the [text
1406 | -- package](http://hackage.haskell.org/package/text-1.2.4.1/docs/Data-Text.html#v:words)
1407 | -- and [the
1408 | -- Prelude](https://hackage.haskell.org/package/base-4.14.1.0/docs/Prelude.html#v:words).
1409 | -- Specifically, separates the argument into pieces, with (non-empty sequences
1410 | -- of) word separator characters as separators. A \'word separator character\'
1411 | -- is any of the following:
1412 | --
1413 | -- * TAB (0x09)
1414 | -- * LF (0x0a)
1415 | -- * VT (0x0b)
1416 | -- * FF (0x0c)
1417 | -- * CR (0x0d)
1418 | -- * Space (0x20)
1419 | --
1420 | -- None of the final results contain any word separator characters. Any sequence
1421 | -- of leading, or trailing, word separator characters will be ignored.
1422 | --
1423 | -- We chose to follow the same semantics for this function as the text package
1424 | -- and the Prelude. This has the consequence that @'unwords' '.' 'words'@ is
1425 | -- /not/ the same as 'Prelude.id', although the documentation for the Prelude
1426 | -- confusingly describes (its version of) @unwords@ as an \'inverse operation\'
1427 | -- to (its version of) @words@. See the documentation for 'unwords' for an
1428 | -- explanation of why this is the case.
1429 | --
1430 | -- >>> words empty
1431 | -- []
1432 | -- >>> words [ascii| "catboy" |]
1433 | -- ["catboy"]
1434 | -- >>> words [ascii| "  \r\r\r\rcatboy   \n\rgoes\t\t\t\t\tnyan\n  " |]
1435 | -- ["catboy","goes","nyan"]
1436 | --
1437 | -- /Complexity:/ \(\Theta(n)\)
1438 | --
1439 | -- @since 1.0.1
1440 | words :: AsciiText -> [AsciiText]
1441 | words (AsciiText bs) = coerce . go $ bs
1442 |   where
1443 |     go :: ByteString -> [ByteString]
1444 |     go rest =
1445 |       let rest' = BS.dropWhile isSep rest
1446 |        in case BS.length rest' of
1447 |             0 -> []
1448 |             _ -> case BS.break isSep rest' of
1449 |               (h, t) -> h : go t
1450 |     isSep :: Word8 -> Bool
1451 |     isSep w8
1452 |       | w8 == 32 = True
1453 |       | 9 <= w8 && w8 <= 13 = True
1454 |       | otherwise = False
1455 | 
1456 | -- | Identical to the functions of the same name in the [text
1457 | -- package](http://hackage.haskell.org/package/text-1.2.4.1/docs/Data-Text.html#v:unlines)
1458 | -- and [the
1459 | -- Prelude](https://hackage.haskell.org/package/base-4.14.1.0/docs/Prelude.html#v:unlines).
1460 | -- Specifically, appends an LF character to each of the texts, then concatenates. Equivalent
1461 | -- to @'foldMap' (`'snoc'` [char| '\n' |])@.
1462 | --
1463 | -- We chose to follow the same semantics for this function as the text package
1464 | -- and the Prelude. This has some consequences, which the documentation of both
1465 | -- the text package and the Prelude does not properly explain. We list them here
1466 | -- - bear these in mind when using this function, as well as 'lines':
1467 | --
1468 | -- * No platform-specific concept of a \'newline\' is ever used by this
1469 | -- function. The documentation in both the text package and the Prelude
1470 | -- confusing refer to appending a \'terminating newline\', which is only a
1471 | -- correct statement on platforms where a newline is LF. We thus specify that we
1472 | -- append LF, rather than mentioning \'newlines\' in any way, shape or form.
1473 | -- * @'unlines' '.' 'lines'@ is /not/ the same as @'Prelude.id'@. This is
1474 | -- misleadingly described in the Prelude, which claims that (its version of)
1475 | -- @unlines@ is \'an inverse operation\' to (its version of) @lines@. See the
1476 | -- doctests below for a demonstration of this.
1477 | --
1478 | -- >>> unlines []
1479 | -- ""
1480 | -- >>> unlines [[ascii| "nyan" |]]
1481 | -- "nyan\n"
1482 | -- >>> unlines . Prelude.replicate 3 $ [ascii| "nyan" |]
1483 | -- "nyan\nnyan\nnyan\n"
1484 | -- >>> unlines . lines $ [ascii| "catboy goes nyan" |]
1485 | -- "catboy goes nyan\n"
1486 | --
1487 | -- /Complexity:/ \(\Theta(n)\)
1488 | --
1489 | -- /See also:/ [Wikipedia on newlines](https://en.wikipedia.org/wiki/Newline)
1490 | --
1491 | -- @since 1.0.1
1492 | unlines :: (Foldable f) => f AsciiText -> AsciiText
1493 | unlines = foldMap (`snoc` [char| '\n' |])
1494 | 
1495 | -- | Identical to the functions of the same name in the [text
1496 | -- package](http://hackage.haskell.org/package/text-1.2.4.1/docs/Data-Text.html#v:unwords)
1497 | -- and [the
1498 | -- Prelude](https://hackage.haskell.org/package/base-4.14.1.0/docs/Prelude.html#v:unwords).
1499 | -- Specifically, links together adjacent texts with a Space character. Equivalent to
1500 | -- @'intercalate' [ascii| " " |]@.
1501 | --
1502 | -- We chose to follow the same semantics for this function as the text package
1503 | -- and the Prelude. This has the consequence that @'unwords' '.' 'words'@ is
1504 | -- /not/ the same as 'Prelude.id', although the documentation for the Prelude
1505 | -- confusingly describes (its version of) @unwords@ as an \'inverse operation\'
1506 | -- to (its version of) @words@. See the doctests below for a demonstration of
1507 | -- this.
1508 | --
1509 | -- >>> unwords []
1510 | -- ""
1511 | -- >>> unwords [[ascii| "nyan" |]]
1512 | -- "nyan"
1513 | -- >>> unwords . Prelude.replicate 3 $ [ascii| "nyan" |]
1514 | -- "nyan nyan nyan"
1515 | -- >>> unwords . words $ [ascii| "nyan\nnyan\nnyan" |]
1516 | -- "nyan nyan nyan"
1517 | --
1518 | -- /Complexity:/ \(\Theta(n)\)
1519 | --
1520 | -- @since 1.0.1
1521 | unwords :: [AsciiText] -> AsciiText
1522 | unwords = intercalate [ascii| " " |]
1523 | 
1524 | -- View patterns
1525 | 
1526 | -- | Return 'Just' the suffix of the second text if it has the first text as
1527 | -- a prefix, 'Nothing' otherwise.
1528 | --
1529 | -- >>> stripPrefix [ascii| "catboy" |] empty
1530 | -- Nothing
1531 | -- >>> stripPrefix empty [ascii| "catboy" |]
1532 | -- Just "catboy"
1533 | -- >>> stripPrefix [ascii| "nyan" |] [ascii| "nyan" |]
1534 | -- Just ""
1535 | -- >>> stripPrefix [ascii| "nyan" |] [ascii| "catboy" |]
1536 | -- Nothing
1537 | -- >>> stripPrefix [ascii| "catboy" |] [ascii| "catboy goes nyan" |]
1538 | -- Just " goes nyan"
1539 | --
1540 | -- /Complexity:/ \(\Theta(n)\)
1541 | --
1542 | -- @since 1.0.0
1543 | stripPrefix :: AsciiText -> AsciiText -> Maybe AsciiText
1544 | stripPrefix = coerce BS.stripPrefix
1545 | 
1546 | -- | Return 'Just' the prefix of the second text if it has the first text as
1547 | -- a suffix, 'Nothing' otherwise.
1548 | --
1549 | -- >>> stripSuffix [ascii| "catboy" |] empty
1550 | -- Nothing
1551 | -- >>> stripSuffix empty [ascii| "catboy" |]
1552 | -- Just "catboy"
1553 | -- >>> stripSuffix [ascii| "nyan" |] [ascii| "nyan" |]
1554 | -- Just ""
1555 | -- >>> stripSuffix [ascii| "nyan" |] [ascii| "catboy" |]
1556 | -- Nothing
1557 | -- >>> stripSuffix [ascii| "nyan" |] [ascii| "catboy goes nyan" |]
1558 | -- Just "catboy goes "
1559 | --
1560 | -- /Complexity:/ \(\Theta(n)\)
1561 | --
1562 | -- @since 1.0.0
1563 | stripSuffix :: AsciiText -> AsciiText -> Maybe AsciiText
1564 | stripSuffix = coerce BS.stripSuffix
1565 | 
1566 | -- | @stripInfix needle haystack@, given a needle of length \(n\) and a haystack
1567 | -- of length \(h\), attempts to find the first instance of @needle@ in
1568 | -- @haystack@. If successful, it returns 'Just' the pair consisting of:
1569 | --
1570 | -- * All the text in @haystack@ before the first instance of @needle@; and
1571 | -- * All the text in @haystack@ after, but not including, the first instance of
1572 | -- @needle@.
1573 | --
1574 | -- If there is no instance of @needle@ in @haystack@, this returns 'Nothing'.
1575 | --
1576 | -- >>> stripInfix [ascii| "catboy" |] empty
1577 | -- Nothing
1578 | -- >>> stripInfix empty [ascii| "nyan catboy nyan nyan" |]
1579 | -- Nothing
1580 | -- >>> stripInfix [ascii| "catboy" |] [ascii| "catboy" |]
1581 | -- Just ("","")
1582 | -- >>> stripInfix [ascii| "catboy" |] [ascii| "nyan catboy" |]
1583 | -- Just ("nyan ","")
1584 | -- >>> stripInfix [ascii| "catboy" |] [ascii| "catboy nyan" |]
1585 | -- Just (""," nyan")
1586 | -- >>> stripInfix [ascii| "catboy" |] [ascii| "nyan catboy nyan nyan" |]
1587 | -- Just ("nyan "," nyan nyan")
1588 | -- >>> stripInfix [ascii| "nyan" |] [ascii| "nyanyanyan" |]
1589 | -- Just ("","yanyan")
1590 | --
1591 | -- = On complexity
1592 | --
1593 | -- This function is based on a variant of the
1594 | -- [NSN](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) algorithm,
1595 | -- except it does not detect overlapping needles. Its average-case analysis is
1596 | -- based on the assumption that:
1597 | --
1598 | -- * All ASCII symbols are equally likely to occur in both the needle and the
1599 | -- haystack; and
1600 | -- * The needle has length at least two; and
1601 | -- * Both the needle and the haystack contain at least four unique symbols.
1602 | --
1603 | -- We fall back to 'split' for singleton needles, and there is no work to be
1604 | -- done on empty needles, which means the second assumption always holds.
1605 | --
1606 | -- Worst-case behaviour becomes more likely the more your input satisfies the
1607 | -- following conditions:
1608 | --
1609 | -- * The needle and/or haystack use few unique symbols (less than four is the
1610 | -- worst); or
1611 | -- * The haystack contains many instances of the second symbol of the needle
1612 | -- which don't lead to full matches.
1613 | --
1614 | -- /Complexity:/ \(\Theta(h)\) average case, \(\Theta(h \cdot n\)\) worst-case.
1615 | --
1616 | -- /See also:/ Note that all the below are references for the original
1617 | -- algorithm, which includes searching for overlapping needles. Thus, our
1618 | -- implementation will perform better than the analysis suggests.
1619 | --
1620 | -- * [Description and pseudocode](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html)
1621 | -- * ["Algorithms on Strings"](https://www.cambridge.org/core/books/algorithms-on-strings/19049704C876795D95D8882C73257C70) by Crochemore, Hancart and Lecroq. PDF available [here](https://www.researchgate.net/publication/220693689_Algorithms_on_Strings).
1622 | --
1623 | -- @since 1.0.1
1624 | stripInfix :: AsciiText -> AsciiText -> Maybe (AsciiText, AsciiText)
1625 | stripInfix needle@(AsciiText n) haystack@(AsciiText h)
1626 |   | P.min (length needle) (length haystack) == 0 = Nothing
1627 |   | otherwise = case indices n h of
1628 |     [] -> Nothing
1629 |     (ix : _) -> Just (take ix haystack, drop (ix + length needle) haystack)
1630 | 
1631 | -- | Find the longest non-empty common prefix of the arguments and return it,
1632 | -- along with the remaining suffixes of both arguments. If the arguments lack a
1633 | -- common, non-empty prefix, returns 'Nothing'.
1634 | --
1635 | -- >>> commonPrefixes empty [ascii| "catboy" |]
1636 | -- Nothing
1637 | -- >>> commonPrefixes [ascii| "catboy" |] empty
1638 | -- Nothing
1639 | -- >>> commonPrefixes [ascii| "catboy" |] [ascii| "nyan" |]
1640 | -- Nothing
1641 | -- >>> commonPrefixes [ascii| "catboy" |] [ascii| "catboy" |]
1642 | -- Just ("catboy","","")
1643 | -- >>> commonPrefixes [ascii| "nyan" |] [ascii| "nyan nyan" |]
1644 | -- Just ("nyan",""," nyan")
1645 | --
1646 | -- /Complexity:/ \(\Theta(n)\)
1647 | --
1648 | -- @since 1.0.1
1649 | commonPrefixes :: AsciiText -> AsciiText -> Maybe (AsciiText, AsciiText, AsciiText)
1650 | commonPrefixes (AsciiText t1) (AsciiText t2) =
1651 |   go2 <$> F.foldl' go Nothing [0 .. P.min (BS.length t1) (BS.length t2) - 1]
1652 |   where
1653 |     go :: Maybe Int -> Int -> Maybe Int
1654 |     go acc i
1655 |       | BS.index t1 i == BS.index t2 i = Just i
1656 |       | otherwise = acc
1657 |     go2 :: Int -> (AsciiText, AsciiText, AsciiText)
1658 |     go2 i = case BS.splitAt (i + 1) t1 of
1659 |       (h, t) -> coerce (h, t, BS.drop (i + 1) t2)
1660 | 
1661 | -- Searching
1662 | 
1663 | -- | Return the text comprised of all the characters that satisfy the function
1664 | -- argument (that is, for which it returns 'True'), in the same order as in the
1665 | -- original.
1666 | --
1667 | -- >>> filter ([char| 'n' |] ==) empty
1668 | -- ""
1669 | -- >>> filter ([char| 'n' |] ==) [ascii| "catboy" |]
1670 | -- ""
1671 | -- >>> filter ([char| 'n' |] ==) [ascii| "nyan" |]
1672 | -- "nn"
1673 | --
1674 | -- /Complexity:/ \(\Theta(n)\)
1675 | --
1676 | -- @since 1.0.0
1677 | {-# INLINE filter #-}
1678 | filter :: (AsciiChar -> Bool) -> AsciiText -> AsciiText
1679 | filter = coerce BS.filter
1680 | 
1681 | -- | @breakOnAll needle haystack@, given a @needle@ of length \(n\) and a
1682 | -- @haystack@ of length \(h\), finds all non-overlapping instances of @needle@
1683 | -- in @haystack@. Each result consists of the following elements:
1684 | --
1685 | -- * The prefix prior to the match; and
1686 | -- * The match, followed by the rest of the string.
1687 | --
1688 | -- If given an empty needle, the result is a singleton list containing a pair of
1689 | -- the entire haystack and the empty text. If given an empty haystack, the
1690 | -- result is an empty list.
1691 | --
1692 | -- >>> breakOnAll empty [ascii| "nyan nyan nyan" |]
1693 | -- [("nyan nyan nyan","")]
1694 | -- >>> breakOnAll [ascii| "nyan" |] empty
1695 | -- []
1696 | -- >>> breakOnAll [ascii| "nyan" |] [ascii| "nyan" |]
1697 | -- [("","nyan")]
1698 | -- >>> breakOnAll [ascii| "nyan" |] [ascii| "nyan nyan nyan" |]
1699 | -- [("","nyan nyan nyan"),("nyan ","nyan nyan"),("nyan nyan ","nyan")]
1700 | -- >>> breakOnAll [ascii| "nyan" |] [ascii| "nyanyanyan" |]
1701 | -- [("","nyanyanyan"),("nyanya","nyan")]
1702 | --
1703 | -- = On complexity
1704 | --
1705 | -- This function is based on a variant of the
1706 | -- [NSN](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) algorithm,
1707 | -- except it does not detect overlapping needles. Its average-case analysis is
1708 | -- based on the assumption that:
1709 | --
1710 | -- * All ASCII symbols are equally likely to occur in both the needle and the
1711 | -- haystack; and
1712 | -- * The needle has length at least two; and
1713 | -- * Both the needle and the haystack contain at least four unique symbols.
1714 | --
1715 | -- We fall back to 'split' for singleton needles, and there is no work to be
1716 | -- done on empty needles, which means the second assumption always holds.
1717 | --
1718 | -- Worst-case behaviour becomes more likely the more your input satisfies the
1719 | -- following conditions:
1720 | --
1721 | -- * The needle and/or haystack use few unique symbols (less than four is the
1722 | -- worst); or
1723 | -- * The haystack contains many instances of the second symbol of the needle
1724 | -- which don't lead to full matches.
1725 | --
1726 | -- /Complexity:/ \(\Theta(h)\) average case, \(\Theta(h \cdot n\)\) worst-case.
1727 | --
1728 | -- /See also:/ Note that all the below are references for the original
1729 | -- algorithm, which includes searching for overlapping needles. Thus, our
1730 | -- implementation will perform better than the analysis suggests.
1731 | --
1732 | -- * [Description and pseudocode](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html)
1733 | -- * ["Algorithms on Strings"](https://www.cambridge.org/core/books/algorithms-on-strings/19049704C876795D95D8882C73257C70) by Crochemore, Hancart and Lecroq. PDF available [here](https://www.researchgate.net/publication/220693689_Algorithms_on_Strings).
1734 | --
1735 | -- @since 1.0.1
1736 | breakOnAll :: AsciiText -> AsciiText -> [(AsciiText, AsciiText)]
1737 | breakOnAll needle@(AsciiText n) haystack@(AsciiText h)
1738 |   | length needle == 0 = [(haystack, empty)]
1739 |   | length haystack == 0 = []
1740 |   | otherwise = (`splitAt` haystack) <$> indices n h
1741 | 
1742 | -- | Returns 'Just' the first character in the text satisfying the predicate,
1743 | -- 'Nothing' otherwise.
1744 | --
1745 | -- >>> find ([char| 'n' |] ==) empty
1746 | -- Nothing
1747 | -- >>> find ([char| 'n' |] ==) [ascii| "catboy" |]
1748 | -- Nothing
1749 | -- >>> find ([char| 'n' |] ==) [ascii| "nyan" |]
1750 | -- Just '0x6e'
1751 | -- >>> find ([char| 'n' |] /=) [ascii| "nyan" |]
1752 | -- Just '0x79'
1753 | --
1754 | -- /Complexity:/ \(\Theta(n)\)
1755 | --
1756 | -- @since 1.0.0
1757 | {-# INLINE find #-}
1758 | find :: (AsciiChar -> Bool) -> AsciiText -> Maybe AsciiChar
1759 | find = coerce BS.find
1760 | 
1761 | -- | @partition p t@ is equivalent to @('filter' p t, 'filter' ('not' p) t)@.
1762 | --
1763 | -- >>> partition ([char| 'n' |] ==) empty
1764 | -- ("","")
1765 | -- >>> partition ([char| 'n' |] ==) . singleton $ [char| 'n' |]
1766 | -- ("n","")
1767 | -- >>> partition ([char| 'n' |] ==) . singleton $ [char| 'w' |]
1768 | -- ("","w")
1769 | -- >>> partition ([char| 'n' |] ==) [ascii| "nyan!" |]
1770 | -- ("nn","ya!")
1771 | --
1772 | -- /Complexity:/ \(\Theta(n)\)
1773 | --
1774 | -- @since 1.0.0
1775 | partition :: (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText)
1776 | partition = coerce BS.partition
1777 | 
1778 | -- Indexing
1779 | 
1780 | -- | Retrieve the ASCII character at the given position in the text. Indexes
1781 | -- begin from 0. If the index provided is invalid (that is, less than 0, equal
1782 | -- to the length of the text, or greater), return 'Nothing'; otherwise, return
1783 | -- 'Just' the character at that position.
1784 | --
1785 | -- >>> index [ascii| "nyan nyan nyan" |] (-100)
1786 | -- Nothing
1787 | -- >>> index [ascii| "nyan nyan nyan" |] 0
1788 | -- Just '0x6e'
1789 | -- >>> index [ascii| "nyan nyan nyan" |] 5
1790 | -- Just '0x6e'
1791 | -- >>> index [ascii| "nyan nyan nyan" |] 2000
1792 | -- Nothing
1793 | --
1794 | -- /Complexity:/ \(\Theta(1)\)
1795 | --
1796 | -- @since 1.0.1
1797 | index :: AsciiText -> Int -> Maybe AsciiChar
1798 | index at i
1799 |   | i < 0 || i >= length at = Nothing
1800 |   | otherwise = Just . coerce BS.index at $ i
1801 | 
1802 | -- | Returns 'Just' the first index in the text such that the character at that
1803 | -- index satisfies the predicate, 'Nothing' otherwise.
1804 | --
1805 | -- >>> findIndex ([char| 'n' |] ==) empty
1806 | -- Nothing
1807 | -- >>> findIndex ([char| 'n' |] ==) . singleton $ [char| 'n' |]
1808 | -- Just 0
1809 | -- >>> findIndex ([char| 'n' |] ==) . singleton $ [char| 'w' |]
1810 | -- Nothing
1811 | -- >>> findIndex ([char| 'n' |] ==) [ascii| "nyan" |]
1812 | -- Just 0
1813 | --
1814 | -- /Complexity:/ \(\Theta(n)\)
1815 | --
1816 | -- @since 1.0.0
1817 | {-# INLINE [1] findIndex #-}
1818 | findIndex :: (AsciiChar -> Bool) -> AsciiText -> Maybe Int
1819 | findIndex = coerce BS.findIndex
1820 | 
1821 | -- | @count needle haystack@, given a @needle@ of length \(n\) and a haystack of
1822 | -- length \(h\), counts the number of non-overlapping occurrences of @needle@ in
1823 | -- @haystack@. If @needle@ is empty, the count will be 0.
1824 | --
1825 | -- >>> count empty [ascii| "nyan nyan nyan" |]
1826 | -- 0
1827 | -- >>> count [ascii| "nyan" |] empty
1828 | -- 0
1829 | -- >>> count [ascii| "nyan" |] [ascii| "nyan" |]
1830 | -- 1
1831 | -- >>> count [ascii| "nyan" |] [ascii| "nyan nyan nyan" |]
1832 | -- 3
1833 | -- >>> count [ascii| "nyan" |] [ascii| "nyanyanyan" |]
1834 | -- 2
1835 | --
1836 | -- = On complexity
1837 | --
1838 | -- This function is based on a variant of the
1839 | -- [NSN](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) algorithm,
1840 | -- except it does not detect overlapping needles. Its average-case analysis is
1841 | -- based on the assumption that:
1842 | --
1843 | -- * All ASCII symbols are equally likely to occur in both the needle and the
1844 | -- haystack; and
1845 | -- * The needle has length at least two; and
1846 | -- * Both the needle and the haystack contain at least four unique symbols.
1847 | --
1848 | -- We fall back to 'split' for singleton needles, and there is no work to be
1849 | -- done on empty needles, which means the second assumption always holds.
1850 | --
1851 | -- Worst-case behaviour becomes more likely the more your input satisfies the
1852 | -- following conditions:
1853 | --
1854 | -- * The needle and/or haystack use few unique symbols (less than four is the
1855 | -- worst); or
1856 | -- * The haystack contains many instances of the second symbol of the needle
1857 | -- which don't lead to full matches.
1858 | --
1859 | -- /Complexity:/ \(\Theta(h)\) average case, \(\Theta(h \cdot n\)\) worst-case.
1860 | --
1861 | -- /See also:/ Note that all the below are references for the original
1862 | -- algorithm, which includes searching for overlapping needles. Thus, our
1863 | -- implementation will perform better than the analysis suggests.
1864 | --
1865 | -- * [Description and pseudocode](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html)
1866 | -- * ["Algorithms on Strings"](https://www.cambridge.org/core/books/algorithms-on-strings/19049704C876795D95D8882C73257C70) by Crochemore, Hancart and Lecroq. PDF available [here](https://www.researchgate.net/publication/220693689_Algorithms_on_Strings).
1867 | --
1868 | -- @since 1.0.1
1869 | count :: AsciiText -> AsciiText -> Int
1870 | count needle@(AsciiText n) haystack@(AsciiText h)
1871 |   | P.min (length needle) (length haystack) == 0 = 0
1872 |   | length needle == 1 = BS.count (BS.head n) h
1873 |   | otherwise = P.length . indices n $ h
1874 | 
1875 | -- Zipping
1876 | 
1877 | -- | \'Pair off\' characters in both texts at corresponding indices. The result
1878 | -- will be limited to the shorter of the two arguments.
1879 | --
1880 | -- >>> zip empty [ascii| "catboy" |]
1881 | -- []
1882 | -- >>> zip [ascii| "catboy" |] empty
1883 | -- []
1884 | -- >>> zip [ascii| "catboy" |] [ascii| "nyan" |]
1885 | -- [('0x63','0x6e'),('0x61','0x79'),('0x74','0x61'),('0x62','0x6e')]
1886 | --
1887 | -- /Complexity:/ \(\Theta(n)\)
1888 | --
1889 | -- @since 1.0.0
1890 | zip :: AsciiText -> AsciiText -> [(AsciiChar, AsciiChar)]
1891 | zip = coerce BS.zip
1892 | 
1893 | -- | Combine two texts together in lockstep to produce a new text, using the
1894 | -- provided function to combine ASCII characters at each step. The length of the
1895 | -- result will be the minimum of the lengths of the two text arguments.
1896 | --
1897 | -- >>> zipWith max [ascii| "I am a catboy" |] empty
1898 | -- ""
1899 | -- >>> zipWith max empty [ascii| "I am a catboy" |]
1900 | -- ""
1901 | -- >>> zipWith max [ascii| "I am a catboy" |] [ascii| "Nyan nyan nyan nyan nyan" |]
1902 | -- "Nyan nycntnyy"
1903 | --
1904 | -- /Complexity:/ \(\Theta(n)\)
1905 | --
1906 | -- @since 1.0.1
1907 | zipWith ::
1908 |   (AsciiChar -> AsciiChar -> AsciiChar) -> AsciiText -> AsciiText -> AsciiText
1909 | zipWith f t1 t2 = unfoldr go (t1, t2)
1910 |   where
1911 |     go :: (AsciiText, AsciiText) -> Maybe (AsciiChar, (AsciiText, AsciiText))
1912 |     go (acc1, acc2) = do
1913 |       (h1, t1') <- uncons acc1
1914 |       (h2, t2') <- uncons acc2
1915 |       pure (f h1 h2, (t1', t2'))
1916 | 
1917 | -- Conversions
1918 | 
1919 | -- | Try and convert a 'Text' into an 'AsciiText'. Gives 'Nothing' if the 'Text'
1920 | -- contains any symbols which lack an ASCII equivalent.
1921 | --
1922 | -- >>> fromText "catboy"
1923 | -- Just "catboy"
1924 | -- >>> fromText "😺😺😺😺😺"
1925 | -- Nothing
1926 | --
1927 | -- /Complexity:/ \(\Theta(n)\)
1928 | --
1929 | -- @since 1.0.0
1930 | fromText :: Text -> Maybe AsciiText
1931 | fromText t = case T.find (not . isAscii) t of
1932 |   Nothing -> pure . AsciiText . encodeUtf8 $ t
1933 |   Just _ -> Nothing
1934 | 
1935 | -- | Try and convert a 'Text' into an 'AsciiText'. Gives @'Prelude.Left' c@ if the 'Text'
1936 | -- contains a 'Prelude.Char' @c@ that lacks an ASCII representation.
1937 | --
1938 | -- >>> eitherFromText "catboy"
1939 | -- Right "catboy"
1940 | -- >>> eitherFromText "😺😺😺😺😺"
1941 | -- Left '\128570'
1942 | --
1943 | -- /Complexity:/ \(\Theta(n)\)
1944 | --
1945 | -- @since 1.1
1946 | eitherFromText :: Text -> P.Either P.Char AsciiText
1947 | eitherFromText t = case T.find (not . isAscii) t of
1948 |   Nothing -> pure . AsciiText . encodeUtf8 $ t
1949 |   Just c -> P.Left c
1950 | 
1951 | -- | Try and convert a 'ByteString' into an 'AsciiText'. Gives 'Nothing' if the
1952 | -- 'ByteString' contains any bytes outside the ASCII range (that is, from 0 to
1953 | -- 127 inclusive).
1954 | --
1955 | -- >>> fromByteString "catboy"
1956 | -- Just "catboy"
1957 | -- >>> fromByteString . BS.pack $ [128]
1958 | -- Nothing
1959 | --
1960 | -- /Complexity:/ \(\Theta(n)\)
1961 | --
1962 | -- @since 1.0.0
1963 | fromByteString :: ByteString -> Maybe AsciiText
1964 | fromByteString bs = case BS.find (> 127) bs of
1965 |   Nothing -> pure . AsciiText $ bs
1966 |   Just _ -> Nothing
1967 | 
1968 | -- | Try and convert a 'ByteString' into an 'AsciiText'. Gives @'Prelude.Left' w8@ if
1969 | -- the 'ByteString' contains a byte @w8@ that is outside the ASCII range (that
1970 | -- is, from 0 to 127 inclusive).
1971 | --
1972 | -- >>> eitherFromByteString "catboy"
1973 | -- Right "catboy"
1974 | -- >>> eitherFromByteString . BS.pack $ [128]
1975 | -- Left 128
1976 | --
1977 | -- /Complexity:/ \(\Theta(n)\)
1978 | --
1979 | -- @since 1.1
1980 | eitherFromByteString :: ByteString -> P.Either Word8 AsciiText
1981 | eitherFromByteString bs = case BS.find (> 127) bs of
1982 |   Nothing -> pure . AsciiText $ bs
1983 |   Just w8 -> P.Left w8
1984 | 
1985 | -- | Convert an 'AsciiText' into a 'Text' (by copying).
1986 | --
1987 | -- >>> toText empty
1988 | -- ""
1989 | -- >>> toText . singleton $ [char| 'w' |]
1990 | -- "w"
1991 | -- >>> toText [ascii| "nyan" |]
1992 | -- "nyan"
1993 | --
1994 | -- /Complexity:/ \(\Theta(n)\)
1995 | --
1996 | -- @since 1.0.0
1997 | toText :: AsciiText -> Text
1998 | toText (AsciiText bs) = decodeUtf8 bs
1999 | 
2000 | -- | Reinterpret an 'AsciiText' as a 'ByteString' (without copying).
2001 | --
2002 | -- >>> toByteString empty
2003 | -- ""
2004 | -- >>> toByteString . singleton $ [char| 'w' |]
2005 | -- "w"
2006 | -- >>> toByteString [ascii| "nyan" |]
2007 | -- "nyan"
2008 | --
2009 | -- /Complexity:/ \(\Theta(1)\)
2010 | --
2011 | -- @since 1.0.0
2012 | toByteString :: AsciiText -> ByteString
2013 | toByteString = coerce
2014 | 
2015 | -- Optics
2016 | 
2017 | -- | A convenient demonstration of the relationship between 'toText' and
2018 | -- 'fromText'.
2019 | --
2020 | -- >>> preview textWise "catboy goes nyan"
2021 | -- Just "catboy goes nyan"
2022 | -- >>> preview textWise "😺😺😺😺😺"
2023 | -- Nothing
2024 | -- >>> review textWise [ascii| "catboys are amazing" |]
2025 | -- "catboys are amazing"
2026 | --
2027 | -- @since 1.0.0
2028 | textWise :: Prism' Text AsciiText
2029 | textWise = prism' toText fromText
2030 | 
2031 | -- | A convenient demonstration of the relationship between 'toByteString' and
2032 | -- 'fromByteString'.
2033 | --
2034 | -- >>> preview byteStringWise "catboy goes nyan"
2035 | -- Just "catboy goes nyan"
2036 | -- >>> preview byteStringWise . BS.pack $ [0xff, 0xff]
2037 | -- Nothing
2038 | -- >>> review byteStringWise [ascii| "I love catboys" |]
2039 | -- "I love catboys"
2040 | --
2041 | -- @since 1.0.0
2042 | byteStringWise :: Prism' ByteString AsciiText
2043 | byteStringWise = prism' toByteString fromByteString
2044 | 
2045 | -- | Pack (or unpack) a list of ASCII characters into a text.
2046 | --
2047 | -- >>> view packedChars [[char| 'n' |], [char| 'y' |], [char| 'a' |], [char| 'n' |]]
2048 | -- "nyan"
2049 | -- >>> review packedChars [ascii| "nyan" |]
2050 | -- ['0x6e','0x79','0x61','0x6e']
2051 | --
2052 | -- @since 1.0.1
2053 | packedChars :: Iso' [AsciiChar] AsciiText
2054 | packedChars =
2055 |   coerceS . coerceT . coerceA . coerceB $ BSO.packedBytes @ByteString
2056 | 
2057 | -- | Traverse the individual ASCII characters in a text.
2058 | --
2059 | -- >>> preview (elementOf chars 0) [ascii| "I am a catboy" |]
2060 | -- Just '0x49'
2061 | -- >>> preview (elementOf chars 100) [ascii| "I am a catboy" |]
2062 | -- Nothing
2063 | -- >>> iover chars (\i x -> bool x [char| 'w' |] . even $ i) [ascii| "I am a catboy" |]
2064 | -- "w wmwawcwtwow"
2065 | --
2066 | -- @since 1.0.1
2067 | chars :: IxTraversal' Int64 AsciiText AsciiChar
2068 | chars = coerceS . coerceT . coerceA . coerceB $ BSO.bytes @ByteString
2069 | 
2070 | -- | Pack (or unpack) a list of bytes into a text. This isn't as capable as
2071 | -- 'packedChars', as that would allow construction of invalid texts.
2072 | --
2073 | -- >>> preview packedBytes [0x6e, 0x79, 0x61, 0x6e]
2074 | -- Just "nyan"
2075 | -- >>> preview packedBytes [0xff, 0xfe]
2076 | -- Nothing
2077 | -- >>> review packedBytes [ascii| "nyan" |]
2078 | -- [110,121,97,110]
2079 | --
2080 | -- @since 1.0.1
2081 | packedBytes :: Prism' [Word8] AsciiText
2082 | packedBytes = prism' (review go) (P.fmap (view go2) . P.traverse asciify)
2083 |   where
2084 |     go :: Review [Word8] AsciiText
2085 |     go = castOptic . coerceA . coerceB $ BSO.packedBytes @ByteString
2086 |     go2 :: Getter [Word8] AsciiText
2087 |     go2 = castOptic . coerceA . coerceB $ BSO.packedBytes @ByteString
2088 | 
2089 | -- | Access the individual bytes in a text. This isn't as capable as 'chars', as
2090 | -- that would allow modifications of the bytes in ways that aren't valid as
2091 | -- ASCII.
2092 | --
2093 | -- >>> itoListOf bytes [ascii| "I am a catboy" |]
2094 | -- [(0,73),(1,32),(2,97),(3,109),(4,32),(5,97),(6,32),(7,99),(8,97),(9,116),(10,98),(11,111),(12,121)]
2095 | --
2096 | -- @since 1.0.1
2097 | bytes :: IxFold Int64 AsciiText Word8
2098 | bytes = castOptic . coerceS . coerceT $ BSO.bytes @ByteString
2099 | 
2100 | -- Helpers
2101 | 
2102 | isSpace :: AsciiChar -> Bool
2103 | isSpace (AsciiChar w8)
2104 |   | w8 == 32 = True
2105 |   | 9 <= w8 && w8 <= 13 = True
2106 |   | otherwise = False
2107 | 
2108 | asciify :: Word8 -> Maybe Word8
2109 | asciify w8
2110 |   | w8 <= 127 = Just w8
2111 |   | otherwise = Nothing
2112 | 
2113 | indices :: ByteString -> ByteString -> [Int]
2114 | indices needle haystack
2115 |   | P.min needleLen haystackLen == 0 = []
2116 |   | needleLen == 1 = BS.elemIndices (BS.head needle) haystack
2117 |   | otherwise = L.unfoldr go 0
2118 |   where
2119 |     go :: Int -> Maybe (Int, Int)
2120 |     go j
2121 |       | j > (haystackLen - needleLen) = Nothing
2122 |       | BS.index needle 1 /= BS.index haystack (j + 1) = go (j + kay)
2123 |       | otherwise = do
2124 |         let fragment = BS.take needleLen . BS.drop j $ haystack
2125 |         if fragment == needle
2126 |           then pure (j, j + needleLen)
2127 |           else go (j + ell)
2128 |     kay :: Int
2129 |     kay
2130 |       | BS.head needle == BS.index needle 1 = 2
2131 |       | otherwise = 1
2132 |     ell :: Int
2133 |     ell
2134 |       | BS.head needle == BS.index needle 1 = 1
2135 |       | otherwise = 2
2136 |     needleLen :: Int
2137 |     needleLen = BS.length needle
2138 |     haystackLen :: Int
2139 |     haystackLen = BS.length haystack
2140 | 


--------------------------------------------------------------------------------
/src/Text/Ascii/Char.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DerivingVia #-}
  2 | {-# LANGUAGE LambdaCase #-}
  3 | {-# LANGUAGE PatternSynonyms #-}
  4 | {-# LANGUAGE Trustworthy #-}
  5 | 
  6 | -- |
  7 | -- Module: Text.Ascii.Char
  8 | -- Copyright: (C) 2021 Koz Ross
  9 | -- License: Apache 2.0
 10 | -- Maintainer: Koz Ross <koz.ross@retro-freedom.nz>
 11 | -- Stability: stable
 12 | -- Portability: GHC only
 13 | --
 14 | -- An implementation of ASCII characters, as bytes restricted to the range 0 -
 15 | -- 127 inclusive.
 16 | --
 17 | -- /See also:/ [Wikipedia entry for ASCII](https://en.wikipedia.org/wiki/ASCII)
 18 | module Text.Ascii.Char
 19 |   ( -- * ASCII characters
 20 | 
 21 |     -- ** Type
 22 |     AsciiChar (AsByte, AsChar),
 23 | 
 24 |     -- ** Construction
 25 |     char,
 26 |     fromChar,
 27 |     fromByte,
 28 | 
 29 |     -- ** Transformation
 30 |     upcase,
 31 |     downcase,
 32 | 
 33 |     -- * Categorization
 34 |     AsciiType (Control, Printable),
 35 |     charType,
 36 |     AsciiCategory (Other, Punctuation, Letter, Number, Symbol),
 37 |     categorize,
 38 |     categorizeGeneral,
 39 |     AsciiCase (Upper, Lower),
 40 |     caseOf,
 41 | 
 42 |     -- * Optics
 43 |     charWise,
 44 |     byteWise,
 45 |   )
 46 | where
 47 | 
 48 | import Control.DeepSeq (NFData)
 49 | import Control.Monad (guard)
 50 | import Data.Char (GeneralCategory, chr, generalCategory, isAscii, ord)
 51 | import Data.Functor (($>))
 52 | import Data.Hashable (Hashable)
 53 | import Data.Word (Word8)
 54 | import Optics.Prism (Prism', prism')
 55 | import Text.Ascii.Internal (AsciiChar (AsciiChar), toByte, pattern AsByte, pattern AsChar)
 56 | import Text.Ascii.QQ (char)
 57 | 
 58 | -- $setup
 59 | -- >>> :set -XQuasiQuotes
 60 | -- >>> import Text.Ascii.Char
 61 | -- >>> import Optics.AffineFold (preview)
 62 | -- >>> import Optics.Review (review)
 63 | 
 64 | -- | Try and turn a 'Char' into the equivalent 'AsciiChar'. Will return
 65 | -- 'Nothing' if given a 'Char' that has no ASCII equivalent.
 66 | --
 67 | -- >>> fromChar '0'
 68 | -- Just '0x30'
 69 | -- >>> fromChar '😺'
 70 | -- Nothing
 71 | --
 72 | -- @since 1.0.0
 73 | fromChar :: Char -> Maybe AsciiChar
 74 | fromChar c =
 75 |   if isAscii c
 76 |     then pure . AsciiChar . fromIntegral . ord $ c
 77 |     else Nothing
 78 | 
 79 | -- | Try to give the 'AsciiChar' corresponding to the given byte. Will return
 80 | -- 'Nothing' if given a byte that doesn't correspond to an ASCII character.
 81 | --
 82 | -- >>> fromByte 50
 83 | -- Just '0x32'
 84 | -- >>> fromByte 128
 85 | -- Nothing
 86 | --
 87 | -- @since 1.0.0
 88 | fromByte :: Word8 -> Maybe AsciiChar
 89 | fromByte w8 =
 90 |   if isAscii . chr . fromIntegral $ w8
 91 |     then pure . AsciiChar $ w8
 92 |     else Nothing
 93 | 
 94 | -- | Give the 'AsciiChar' corresponding to the uppercase version of the
 95 | -- argument. Will give 'Nothing' if given an 'AsciiChar' which has no uppercase
 96 | -- version, or is uppercase already.
 97 | --
 98 | -- >>> upcase [char| 'a' |]
 99 | -- Just '0x41'
100 | -- >>> upcase [char| '0' |]
101 | -- Nothing
102 | --
103 | -- @since 1.0.0
104 | upcase :: AsciiChar -> Maybe AsciiChar
105 | upcase c@(AsciiChar w8) =
106 |   caseOf c >>= (\cs -> guard (cs == Lower) $> AsciiChar (w8 - 32))
107 | 
108 | -- | Give the 'AsciiChar' corresponding to the lowercase version of the
109 | -- argument. Will give 'Nothing' if given an 'AsciiChar' which has no lowercase
110 | -- version, or is lowercase already.
111 | --
112 | -- >>> downcase [char| 'C' |]
113 | -- Just '0x63'
114 | -- >>> downcase [char| '\\' |]
115 | -- Nothing
116 | --
117 | -- @since 1.0.0
118 | downcase :: AsciiChar -> Maybe AsciiChar
119 | downcase c@(AsciiChar w8) =
120 |   caseOf c >>= (\cs -> guard (cs == Upper) $> AsciiChar (w8 + 32))
121 | 
122 | -- Categorization
123 | 
124 | -- | A categorization of ASCII characters based on whether they're meant to be
125 | -- displayed ('Printable') or for control ('Control').
126 | --
127 | -- @since 1.0.0
128 | newtype AsciiType = AsciiType Word8
129 |   deriving (Eq, Ord, Hashable, NFData) via Word8
130 | 
131 | -- | @since 1.0.0
132 | instance Show AsciiType where
133 |   {-# INLINEABLE show #-}
134 |   show = \case
135 |     Control -> "Control"
136 |     Printable -> "Printable"
137 | 
138 | -- | @since 1.0.0
139 | instance Bounded AsciiType where
140 |   minBound = Control
141 |   maxBound = Printable
142 | 
143 | -- | A control character is any of the first 32 bytes (0-31), plus @DEL@ (127).
144 | --
145 | -- @since 1.0.0
146 | pattern Control :: AsciiType
147 | pattern Control <-
148 |   AsciiType 0
149 |   where
150 |     Control = AsciiType 0
151 | 
152 | -- | All ASCII characters whose byte is above 31 (and not 127) are printable
153 | -- characters.
154 | --
155 | -- @since 1.0.0
156 | pattern Printable :: AsciiType
157 | pattern Printable <-
158 |   AsciiType 1
159 |   where
160 |     Printable = AsciiType 1
161 | 
162 | {-# COMPLETE Control, Printable #-}
163 | 
164 | -- | Classify an 'AsciiChar' according to whether it's a control character or a
165 | -- printable character.
166 | --
167 | -- >>> charType [char| '\0' |]
168 | -- Control
169 | -- >>> charType [char| 'w' |]
170 | -- Printable
171 | --
172 | -- @since 1.0.0
173 | charType :: AsciiChar -> AsciiType
174 | charType (AsciiChar w8)
175 |   | w8 == 127 = Control
176 |   | w8 < 32 = Control
177 |   | otherwise = Printable
178 | 
179 | -- | A categorization of ASCII characters based on their usage. Based (loosely)
180 | -- on Unicode categories.
181 | --
182 | -- @since 1.0.0
183 | newtype AsciiCategory = AsciiCategory Word8
184 |   deriving (Eq, Ord, Hashable, NFData) via Word8
185 | 
186 | -- | @since 1.0.0
187 | instance Show AsciiCategory where
188 |   {-# INLINEABLE show #-}
189 |   show = \case
190 |     Other -> "Other"
191 |     Symbol -> "Symbol"
192 |     Number -> "Number"
193 |     Letter -> "Letter"
194 |     Punctuation -> "Punctuation"
195 | 
196 | -- | @since 1.0.0
197 | instance Bounded AsciiCategory where
198 |   minBound = Other
199 |   maxBound = Symbol
200 | 
201 | -- | Something which doesn't fit into any of the other categories.
202 | --
203 | -- @since 1.0.0
204 | pattern Other :: AsciiCategory
205 | pattern Other <-
206 |   AsciiCategory 0
207 |   where
208 |     Other = AsciiCategory 0
209 | 
210 | -- | A punctuation character.
211 | --
212 | -- @since 1.0.0
213 | pattern Punctuation :: AsciiCategory
214 | pattern Punctuation <-
215 |   AsciiCategory 1
216 |   where
217 |     Punctuation = AsciiCategory 1
218 | 
219 | -- | A letter, either uppercase or lowercase.
220 | --
221 | -- @since 1.0.0
222 | pattern Letter :: AsciiCategory
223 | pattern Letter <-
224 |   AsciiCategory 2
225 |   where
226 |     Letter = AsciiCategory 2
227 | 
228 | -- | A numerical digit.
229 | --
230 | -- @since 1.0.0
231 | pattern Number :: AsciiCategory
232 | pattern Number <-
233 |   AsciiCategory 3
234 |   where
235 |     Number = AsciiCategory 3
236 | 
237 | -- | A symbol whose role isn't (normally) punctuation.
238 | --
239 | -- @since 1.0.0
240 | pattern Symbol :: AsciiCategory
241 | pattern Symbol <-
242 |   AsciiCategory 4
243 |   where
244 |     Symbol = AsciiCategory 4
245 | 
246 | {-# COMPLETE Other, Punctuation, Letter, Number, Symbol #-}
247 | 
248 | -- | Classify an 'AsciiChar' based on its category.
249 | --
250 | -- >>> categorize [char| ',' |]
251 | -- Punctuation
252 | -- >>> categorize [char| '~' |]
253 | -- Symbol
254 | -- >>> categorize [char| 'w' |]
255 | -- Letter
256 | -- >>> categorize [char| '2' |]
257 | -- Number
258 | -- >>> categorize [char| '\0' |]
259 | -- Other
260 | --
261 | -- @since 1.0.0
262 | categorize :: AsciiChar -> AsciiCategory
263 | categorize c@(AsciiChar w8)
264 |   | charType c == Control = Other
265 |   | w8 == 0x20 = Punctuation
266 |   | w8 >= 0x21 && w8 <= 0x23 = Punctuation
267 |   | w8 == 0x24 = Symbol
268 |   | w8 >= 0x25 && w8 <= 0x2a = Punctuation
269 |   | w8 == 0x2b = Symbol
270 |   | w8 >= 0x2c && w8 <= 0x2f = Punctuation
271 |   | w8 >= 0x30 && w8 <= 0x39 = Number
272 |   | w8 >= 0x3a && w8 <= 0x3b = Punctuation
273 |   | w8 >= 0x3c && w8 <= 0x3e = Symbol
274 |   | w8 >= 0x3f && w8 <= 0x40 = Punctuation
275 |   | w8 >= 0x41 && w8 <= 0x5a = Letter
276 |   | w8 >= 0x5b && w8 <= 0x5d = Punctuation
277 |   | w8 == 0x5e = Symbol
278 |   | w8 == 0x5f = Punctuation
279 |   | w8 == 0x60 = Symbol
280 |   | w8 >= 0x61 && w8 <= 0x7a = Letter
281 |   | w8 == 0x7b = Punctuation
282 |   | w8 == 0x7c = Symbol
283 |   | w8 == 0x7d = Punctuation
284 |   | otherwise = Symbol -- This only leaves ~. - Koz
285 | 
286 | -- | Compatibility method for the 'GeneralCategory' provided by 'Data.Char'.
287 | --
288 | -- >>> categorizeGeneral [char| ',' |]
289 | -- OtherPunctuation
290 | -- >>> categorizeGeneral [char| '~' |]
291 | -- MathSymbol
292 | -- >>> categorizeGeneral [char| 'w' |]
293 | -- LowercaseLetter
294 | -- >>> categorizeGeneral [char| '2' |]
295 | -- DecimalNumber
296 | -- >>> categorizeGeneral [char| '\0' |]
297 | -- Control
298 | --
299 | -- @since 1.0.0
300 | categorizeGeneral :: AsciiChar -> GeneralCategory
301 | categorizeGeneral (AsciiChar w8) = generalCategory . chr . fromIntegral $ w8
302 | 
303 | -- | The case of an ASCII character (if it has one).
304 | --
305 | -- @since 1.0.0
306 | newtype AsciiCase = AsciiCase Word8
307 |   deriving (Eq, Ord, Hashable, NFData) via Word8
308 | 
309 | -- | @since 1.0.0
310 | instance Show AsciiCase where
311 |   {-# INLINEABLE show #-}
312 |   show = \case
313 |     Upper -> "Upper"
314 |     Lower -> "Lower"
315 | 
316 | -- | @since 1.0.0
317 | instance Bounded AsciiCase where
318 |   minBound = Upper
319 |   maxBound = Lower
320 | 
321 | -- | Indicator of an uppercase character.
322 | --
323 | -- @since 1.0.0
324 | pattern Upper :: AsciiCase
325 | pattern Upper <-
326 |   AsciiCase 0
327 |   where
328 |     Upper = AsciiCase 0
329 | 
330 | -- | Indicator of a lowercase character.
331 | --
332 | -- @since 1.0.0
333 | pattern Lower :: AsciiCase
334 | pattern Lower <-
335 |   AsciiCase 1
336 |   where
337 |     Lower = AsciiCase 1
338 | 
339 | {-# COMPLETE Upper, Lower #-}
340 | 
341 | -- | Determine the case of an 'AsciiChar'. Returns 'Nothing' if the character
342 | -- doesn't have a case.
343 | --
344 | -- >>> caseOf [char| 'w' |]
345 | -- Just Lower
346 | -- >>> caseOf [char| 'W' |]
347 | -- Just Upper
348 | -- >>> caseOf [char| '~' |]
349 | -- Nothing
350 | --
351 | -- @since 1.0.0
352 | caseOf :: AsciiChar -> Maybe AsciiCase
353 | caseOf c@(AsciiChar w8)
354 |   | categorize c /= Letter = Nothing
355 |   | w8 <= 0x5a = Just Upper
356 |   | otherwise = Just Lower
357 | 
358 | -- Optics
359 | 
360 | -- | A representation of the relationship between 'Char' and 'AsciiChar'.
361 | --
362 | -- >>> preview charWise 'w'
363 | -- Just '0x77'
364 | -- >>> preview charWise '😺'
365 | -- Nothing
366 | -- >>> review charWise [char| 'w' |]
367 | -- 'w'
368 | --
369 | -- @since 1.0.0
370 | charWise :: Prism' Char AsciiChar
371 | charWise = prism' (chr . fromIntegral . toByte) fromChar
372 | 
373 | -- | A representation of the relationship between ASCII characters and bytes.
374 | --
375 | -- >>> preview byteWise 0x20
376 | -- Just '0x20'
377 | -- >>> preview byteWise 0x81
378 | -- Nothing
379 | -- >>> review byteWise [char| 'w' |]
380 | -- 119
381 | --
382 | -- @since 1.0.0
383 | byteWise :: Prism' Word8 AsciiChar
384 | byteWise = prism' toByte fromByte
385 | 


--------------------------------------------------------------------------------
/src/Text/Ascii/Internal.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DerivingVia #-}
  2 | {-# LANGUAGE PatternSynonyms #-}
  3 | {-# LANGUAGE ScopedTypeVariables #-}
  4 | {-# LANGUAGE Trustworthy #-}
  5 | {-# LANGUAGE TypeApplications #-}
  6 | {-# LANGUAGE TypeFamilies #-}
  7 | {-# LANGUAGE ViewPatterns #-}
  8 | 
  9 | -- |
 10 | -- Module: Text.Ascii.Internal
 11 | -- Copyright: (C) 2021 Koz Ross
 12 | -- License: Apache 2.0
 13 | -- Maintainer: Koz Ross <koz.ross@retro-freedom.nz>
 14 | -- Stability: unstable, not subject to PVP
 15 | -- Portability: GHC only
 16 | --
 17 | -- This is an internal module, and is /not/ subject to the PVP. It can change
 18 | -- in any way, at any time, and should not be depended on unless you know
 19 | -- /exactly/ what you are doing. You have been warned.
 20 | module Text.Ascii.Internal where
 21 | 
 22 | import Control.DeepSeq (NFData)
 23 | import Data.ByteString (ByteString)
 24 | import qualified Data.ByteString as BS
 25 | import Data.CaseInsensitive (FoldCase (foldCase))
 26 | import Data.Char (chr, isAscii)
 27 | import Data.Coerce (coerce)
 28 | import Data.Hashable (Hashable)
 29 | import qualified Data.List.NonEmpty as NE
 30 | import Data.Monoid.Factorial (FactorialMonoid)
 31 | import Data.Monoid.GCD (LeftGCDMonoid, RightGCDMonoid)
 32 | import Data.Monoid.Monus (OverlappingGCDMonoid)
 33 | import Data.Monoid.Null (MonoidNull, PositiveMonoid)
 34 | import Data.Semigroup.Cancellative (LeftCancellative, LeftReductive, RightCancellative, RightReductive)
 35 | import Data.Semigroup.Factorial (Factorial, StableFactorial)
 36 | import Data.Word (Word8)
 37 | import GHC.Exts (IsList (Item, fromList, fromListN, toList))
 38 | import Numeric (showHex)
 39 | import Optics.AffineTraversal (An_AffineTraversal, atraversal)
 40 | import Optics.At.Core (Index, IxValue, Ixed (IxKind, ix))
 41 | import Text.Megaparsec.Stream
 42 |   ( Stream
 43 |       ( Token,
 44 |         Tokens,
 45 |         chunkLength,
 46 |         chunkToTokens,
 47 |         take1_,
 48 |         takeN_,
 49 |         takeWhile_,
 50 |         tokenToChunk,
 51 |         tokensToChunk
 52 |       ),
 53 |     TraversableStream (reachOffset),
 54 |     VisualStream (showTokens),
 55 |   )
 56 | import Type.Reflection (Typeable)
 57 | 
 58 | -- | Represents valid ASCII characters, which are bytes from @0x00@ to @0x7f@.
 59 | --
 60 | -- @since 1.0.0
 61 | newtype AsciiChar = AsciiChar {toByte :: Word8}
 62 |   deriving
 63 |     ( -- | @since 1.0.0
 64 |       Eq,
 65 |       -- | @since 1.0.0
 66 |       Ord,
 67 |       -- | @since 1.0.0
 68 |       Hashable,
 69 |       -- | @since 1.0.0
 70 |       NFData
 71 |     )
 72 |     via Word8
 73 |   deriving stock
 74 |     ( -- | @since 1.0.0
 75 |       Typeable
 76 |     )
 77 | 
 78 | -- | @since 1.0.0
 79 | instance Show AsciiChar where
 80 |   {-# INLINEABLE show #-}
 81 |   show (AsciiChar w8) = "'0x" <> showHex w8 "'"
 82 | 
 83 | -- | @since 1.0.0
 84 | instance Bounded AsciiChar where
 85 |   {-# INLINEABLE minBound #-}
 86 |   minBound = AsciiChar 0
 87 |   {-# INLINEABLE maxBound #-}
 88 |   maxBound = AsciiChar 127
 89 | 
 90 | -- | @since 1.0.1
 91 | instance FoldCase AsciiChar where
 92 |   {-# INLINEABLE foldCase #-}
 93 |   foldCase ac@(AsciiChar w8)
 94 |     | 65 <= w8 && w8 <= 90 = AsciiChar (w8 + 32)
 95 |     | otherwise = ac
 96 | 
 97 | -- | View an 'AsciiChar' as its underlying byte. You can pattern match on this,
 98 | -- but since there are more bytes than valid ASCII characters, you cannot use
 99 | -- this to construct.
100 | --
101 | -- @since 1.0.0
102 | pattern AsByte :: Word8 -> AsciiChar
103 | pattern AsByte w8 <- AsciiChar w8
104 | 
105 | -- | View an 'AsciiChar' as a 'Char'. You can pattern match on this, but since
106 | -- there are more 'Char's than valid ASCII characters, you cannot use this to
107 | -- construct.
108 | --
109 | -- @since 1.0.0
110 | pattern AsChar :: Char -> AsciiChar
111 | pattern AsChar c <- AsciiChar (isJustAscii -> Just c)
112 | 
113 | {-# COMPLETE AsByte #-}
114 | 
115 | {-# COMPLETE AsChar #-}
116 | 
117 | -- | A string of ASCII characters, represented as a packed byte array.
118 | --
119 | -- @since 1.0.0
120 | newtype AsciiText = AsciiText ByteString
121 |   deriving
122 |     ( -- | @since 1.0.0
123 |       Eq,
124 |       -- | @since 1.0.0
125 |       Ord,
126 |       -- | @since 1.0.0
127 |       NFData,
128 |       -- | @since 1.0.0
129 |       Semigroup,
130 |       -- | @since 1.0.0
131 |       Monoid,
132 |       -- | @since 1.0.0
133 |       Show,
134 |       -- | @since 1.2
135 |       Factorial,
136 |       -- | @since 1.2
137 |       FactorialMonoid,
138 |       -- | @since 1.2
139 |       LeftCancellative,
140 |       -- | @since 1.2
141 |       LeftGCDMonoid,
142 |       -- | @since 1.2
143 |       LeftReductive,
144 |       -- | @since 1.2
145 |       MonoidNull,
146 |       -- | @since 1.2
147 |       OverlappingGCDMonoid,
148 |       -- | @since 1.2
149 |       PositiveMonoid,
150 |       -- | @since 1.2
151 |       RightCancellative,
152 |       -- | @since 1.2
153 |       RightGCDMonoid,
154 |       -- | @since 1.2
155 |       RightReductive,
156 |       -- | @since 1.2
157 |       StableFactorial
158 |     )
159 |     via ByteString
160 | 
161 | -- | @since 1.0.0
162 | instance IsList AsciiText where
163 |   type Item AsciiText = AsciiChar
164 |   {-# INLINEABLE fromList #-}
165 |   fromList =
166 |     coerce @ByteString @AsciiText
167 |       . fromList
168 |       . coerce @[AsciiChar] @[Word8]
169 |   {-# INLINEABLE fromListN #-}
170 |   fromListN n =
171 |     coerce @ByteString @AsciiText
172 |       . fromListN n
173 |       . coerce @[AsciiChar] @[Word8]
174 |   {-# INLINEABLE toList #-}
175 |   toList = coerce . toList . coerce @AsciiText @ByteString
176 | 
177 | -- | @since 1.0.1
178 | type instance Index AsciiText = Int
179 | 
180 | -- | @since 1.0.1
181 | type instance IxValue AsciiText = AsciiChar
182 | 
183 | -- | @since 1.0.1
184 | instance Ixed AsciiText where
185 |   type IxKind AsciiText = An_AffineTraversal
186 |   {-# INLINEABLE ix #-}
187 |   ix i = atraversal get put
188 |     where
189 |       get :: AsciiText -> Either AsciiText AsciiChar
190 |       get (AsciiText at) = case at BS.!? i of
191 |         Nothing -> Left . AsciiText $ at
192 |         Just w8 -> Right . AsciiChar $ w8
193 |       put :: AsciiText -> AsciiChar -> AsciiText
194 |       put (AsciiText at) (AsciiChar ac) = case BS.splitAt i at of
195 |         (lead, end) -> case BS.uncons end of
196 |           Nothing -> AsciiText at
197 |           Just (_, end') -> AsciiText (lead <> BS.singleton ac <> end')
198 | 
199 | -- | @since 1.0.1
200 | instance FoldCase AsciiText where
201 |   {-# INLINEABLE foldCase #-}
202 |   foldCase (AsciiText bs) = AsciiText . BS.map go $ bs
203 |     where
204 |       go :: Word8 -> Word8
205 |       go w8
206 |         | 65 <= w8 && w8 <= 90 = w8 + 32
207 |         | otherwise = w8
208 | 
209 | -- | @since 1.0.1
210 | instance Stream AsciiText where
211 |   type Token AsciiText = AsciiChar
212 |   type Tokens AsciiText = AsciiText
213 |   {-# INLINEABLE tokenToChunk #-}
214 |   tokenToChunk _ = coerce BS.singleton
215 |   {-# INLINEABLE tokensToChunk #-}
216 |   tokensToChunk _ = fromList
217 |   {-# INLINEABLE chunkToTokens #-}
218 |   chunkToTokens _ = toList
219 |   {-# INLINEABLE chunkLength #-}
220 |   chunkLength _ = coerce BS.length
221 |   {-# INLINEABLE take1_ #-}
222 |   take1_ = coerce BS.uncons
223 |   {-# INLINEABLE takeN_ #-}
224 |   takeN_ n at@(AsciiText bs)
225 |     | n <= 0 = Just (coerce BS.empty, at)
226 |     | BS.length bs == 0 = Nothing
227 |     | otherwise = Just . coerce . BS.splitAt n $ bs
228 |   {-# INLINEABLE takeWhile_ #-}
229 |   takeWhile_ = coerce BS.span
230 | 
231 | -- | @since 1.0.1
232 | instance VisualStream AsciiText where
233 |   {-# INLINEABLE showTokens #-}
234 |   showTokens _ = fmap (chr . fromIntegral) . coerce @_ @[Word8] . NE.toList
235 | 
236 | -- | @since 1.0.1
237 | instance TraversableStream AsciiText where
238 |   {-# INLINEABLE reachOffset #-}
239 |   reachOffset o ps = coerce (reachOffset o ps)
240 | 
241 | -- Helpers
242 | 
243 | isJustAscii :: Word8 -> Maybe Char
244 | isJustAscii w8 =
245 |   if isAscii asChar
246 |     then pure asChar
247 |     else Nothing
248 |   where
249 |     asChar :: Char
250 |     asChar = chr . fromIntegral $ w8
251 | 


--------------------------------------------------------------------------------
/src/Text/Ascii/QQ.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE TemplateHaskell #-}
  2 | {-# LANGUAGE Trustworthy #-}
  3 | 
  4 | -- |
  5 | -- Module: Text.Ascii.QQ
  6 | -- Copyright: (C) 2021 Koz Ross
  7 | -- License: Apache 2.0
  8 | -- Maintainer: Koz Ross <koz.ross@retro-freedom.nz>
  9 | -- Stability: unstable, not subject to PVP
 10 | -- Portability: GHC only
 11 | --
 12 | -- This is an internal module, and is /not/ subject to the PVP. It can change
 13 | -- in any way, at any time, and should not be depended on unless you know
 14 | -- /exactly/ what you are doing. You have been warned.
 15 | module Text.Ascii.QQ where
 16 | 
 17 | import Data.ByteString (ByteString)
 18 | import qualified Data.ByteString as BS
 19 | import Data.Char
 20 |   ( isAlphaNum,
 21 |     isAscii,
 22 |     isPunctuation,
 23 |     isSymbol,
 24 |     ord,
 25 |   )
 26 | import Data.Functor (void)
 27 | import Data.Void (Void)
 28 | import GHC.Exts (IsList (fromList))
 29 | import Language.Haskell.TH.Quote (QuasiQuoter (QuasiQuoter))
 30 | import Language.Haskell.TH.Syntax
 31 |   ( Dec,
 32 |     Exp (AppE, ConE, ListE, LitE, VarE),
 33 |     Lit (IntegerL),
 34 |     Pat,
 35 |     Q,
 36 |     Type,
 37 |   )
 38 | import Text.Ascii.Internal (AsciiChar (AsciiChar), AsciiText (AsciiText))
 39 | import Text.Megaparsec
 40 |   ( Parsec,
 41 |     between,
 42 |     eof,
 43 |     lookAhead,
 44 |     manyTill,
 45 |     oneOf,
 46 |     parse,
 47 |     satisfy,
 48 |     single,
 49 |     try,
 50 |   )
 51 | import Text.Megaparsec.Char (space)
 52 | import Text.Megaparsec.Error (errorBundlePretty)
 53 | 
 54 | -- $setup
 55 | -- >>> :set -XQuasiQuotes
 56 | -- >>> import Text.Ascii.QQ
 57 | 
 58 | -- | Allows constructing ASCII characters from literals, whose correctness is
 59 | -- checked by the compiler.
 60 | --
 61 | -- Currently, accepts literal syntax similar to the Haskell parser, with escape
 62 | -- sequences preceded by \'\\\'. In particular, this includes the single quote
 63 | -- (see the example below).
 64 | --
 65 | -- >>> [char| '\'' |]
 66 | -- '0x27'
 67 | --
 68 | -- @since 1.0.0
 69 | char :: QuasiQuoter
 70 | char = QuasiQuoter charQQ (errPat "char") (errType "char") (errDec "char")
 71 | 
 72 | -- | Allows constructing ASCII strings from literals, whose correctness is
 73 | -- checked by the compiler.
 74 | --
 75 | -- Currently accepts literal syntax similar to the Haskell parser, with escape
 76 | -- sequences preceded by \'\\\'. In particular, this includes the double quote
 77 | -- (see the example below).
 78 | --
 79 | -- >>> [ascii| "\"Nyan!\", said the catboy." |]
 80 | -- "\"Nyan!\", said the catboy."
 81 | --
 82 | -- @since 1.0.0
 83 | ascii :: QuasiQuoter
 84 | ascii = QuasiQuoter asciiQQ (errPat "ascii") (errType "ascii") (errDec "ascii")
 85 | 
 86 | -- Helpers
 87 | 
 88 | asciiQQ :: String -> Q Exp
 89 | asciiQQ input = case parse (between open close go) "" input of
 90 |   Left err -> fail . errorBundlePretty $ err
 91 |   Right result ->
 92 |     pure
 93 |       . AppE (ConE 'AsciiText)
 94 |       . AppE (VarE 'fromList)
 95 |       . ListE
 96 |       . fmap (LitE . IntegerL . fromIntegral)
 97 |       . BS.unpack
 98 |       $ result
 99 |   where
100 |     open :: Parsec Void String ()
101 |     open = space *> (void . single $ '"')
102 |     close :: Parsec Void String ()
103 |     close = single '"' *> space *> eof
104 |     go :: Parsec Void String ByteString
105 |     go = BS.pack <$> manyTill asciiByte (lookAhead . try . single $ '"')
106 |     asciiByte = do
107 |       c <- satisfy isAscii
108 |       case c of
109 |         '\\' -> do
110 |           c' <- oneOf "0abfnrtv\\\""
111 |           pure . fromIntegral . ord $ case c' of
112 |             '0' -> '\0'
113 |             'a' -> '\a'
114 |             'b' -> '\b'
115 |             'f' -> '\f'
116 |             'n' -> '\n'
117 |             'r' -> '\r'
118 |             't' -> '\t'
119 |             'v' -> '\v'
120 |             '\\' -> '\\'
121 |             _ -> '"'
122 |         _ -> pure . fromIntegral . ord $ c
123 | 
124 | charQQ :: String -> Q Exp
125 | charQQ input = case parse (between open close go) "" input of
126 |   Left err -> fail . errorBundlePretty $ err
127 |   Right result ->
128 |     pure . AppE (ConE 'AsciiChar) . LitE . IntegerL . fromIntegral $ result
129 |   where
130 |     open :: Parsec Void String ()
131 |     open = space *> (void . single $ '\'')
132 |     close :: Parsec Void String ()
133 |     close = single '\'' *> space *> eof
134 |     go :: Parsec Void String Int
135 |     go = do
136 |       c1 <- satisfy isValidLead
137 |       case c1 of
138 |         '\\' -> do
139 |           c2 <- oneOf "0abfnrtv\\\'"
140 |           pure . ord $ case c2 of
141 |             '0' -> '\0'
142 |             'a' -> '\a'
143 |             'b' -> '\b'
144 |             'f' -> '\f'
145 |             'n' -> '\n'
146 |             'r' -> '\r'
147 |             't' -> '\t'
148 |             'v' -> '\v'
149 |             '\\' -> '\\'
150 |             _ -> '\''
151 |         _ -> pure . ord $ c1
152 | 
153 | isValidLead :: Char -> Bool
154 | isValidLead c = isAscii c && (isAlphaNum c || c == ' ' || isSymbol c || isPunctuation c)
155 | 
156 | errPat :: String -> String -> Q Pat
157 | errPat name _ = fail $ "'" <> name <> "' should not be used in a pattern context."
158 | 
159 | errType :: String -> String -> Q Type
160 | errType name _ = fail $ "'" <> name <> "' should not be used in a type context."
161 | 
162 | errDec :: String -> String -> Q [Dec]
163 | errDec name _ = fail $ "'" <> name <> "' should not be used in a declaration context."
164 | 


--------------------------------------------------------------------------------
/src/Text/Ascii/Unsafe.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DeriveFunctor #-}
  2 | {-# LANGUAGE DerivingVia #-}
  3 | {-# LANGUAGE FlexibleInstances #-}
  4 | {-# LANGUAGE LambdaCase #-}
  5 | {-# LANGUAGE RoleAnnotations #-}
  6 | {-# LANGUAGE ScopedTypeVariables #-}
  7 | {-# LANGUAGE TypeFamilies #-}
  8 | 
  9 | -- |
 10 | -- Module: Text.Ascii.Unsafe
 11 | -- Copyright: (C) 2021 Koz Ross
 12 | -- License: Apache 2.0
 13 | -- Maintainer: Koz Ross <koz.ross@retro-freedom.nz>
 14 | -- Stability: stable
 15 | -- Portability: GHC only
 16 | --
 17 | -- A wrapper for partial type class instances and functions.
 18 | --
 19 | -- This module is designed for qualified importing:
 20 | --
 21 | -- > import qualified Text.Ascii.Unsafe as Unsafe
 22 | module Text.Ascii.Unsafe
 23 |   ( -- * Types
 24 |     Unsafe (..),
 25 | 
 26 |     -- * Text functions
 27 |     head,
 28 |     last,
 29 |     tail,
 30 |     init,
 31 |     foldl1,
 32 |     foldl1',
 33 |     foldr1,
 34 |     foldr1',
 35 |     maximum,
 36 |     minimum,
 37 |     scanl1,
 38 |     scanr1,
 39 |     index,
 40 |   )
 41 | where
 42 | 
 43 | import Control.DeepSeq (NFData)
 44 | import Data.ByteString (ByteString)
 45 | import qualified Data.ByteString as BS
 46 | import Data.CaseInsensitive (FoldCase)
 47 | import Data.Coerce (coerce)
 48 | import Data.Hashable (Hashable)
 49 | import Data.Kind (Type)
 50 | import Data.Monoid.Factorial (FactorialMonoid)
 51 | import Data.Monoid.GCD (LeftGCDMonoid, RightGCDMonoid)
 52 | import Data.Monoid.Monus (OverlappingGCDMonoid)
 53 | import Data.Monoid.Null (MonoidNull, PositiveMonoid)
 54 | import Data.Semigroup.Cancellative (LeftCancellative, LeftReductive, RightCancellative, RightReductive)
 55 | import Data.Semigroup.Factorial (Factorial, StableFactorial)
 56 | import Data.Word (Word8)
 57 | import GHC.Exts (IsList)
 58 | import GHC.Read (expectP, lexP, parens, readPrec)
 59 | import Text.Ascii.Internal (AsciiChar (AsciiChar), AsciiText (AsciiText))
 60 | import Text.Megaparsec.Stream (Stream, TraversableStream, VisualStream)
 61 | import Text.ParserCombinators.ReadPrec (ReadPrec)
 62 | import Text.Read (Lexeme (Char))
 63 | import Type.Reflection (Typeable)
 64 | import Prelude hiding
 65 |   ( foldl1,
 66 |     foldr1,
 67 |     head,
 68 |     init,
 69 |     last,
 70 |     maximum,
 71 |     minimum,
 72 |     scanl1,
 73 |     scanr1,
 74 |     tail,
 75 |   )
 76 | 
 77 | -- | A wrapper for a type, designating that partial type class methods or other
 78 | -- functions are available for it.
 79 | --
 80 | -- We set the role of the type argument of 'Unsafe' to nominal. Among other
 81 | -- things, it means that this type can't be coerced or derived through. This
 82 | -- ensures clear indication when (and to what extent) non-total operations occur
 83 | -- in any code using them.
 84 | --
 85 | -- @since 1.0.1
 86 | newtype Unsafe (a :: Type) = Unsafe {safe :: a}
 87 |   deriving
 88 |     ( -- | @since 1.0.1
 89 |       Eq,
 90 |       -- | @since 1.0.1
 91 |       Ord,
 92 |       -- | @since 1.0.1
 93 |       Bounded,
 94 |       -- | @since 1.0.1
 95 |       Hashable,
 96 |       -- | @since 1.0.1
 97 |       NFData,
 98 |       -- | @since 1.0.1
 99 |       FoldCase,
100 |       -- | @since 1.0.1
101 |       Semigroup,
102 |       -- | @since 1.0.1
103 |       Monoid,
104 |       -- | @since 1.0.1
105 |       IsList,
106 |       -- | @since 1.0.1
107 |       Stream,
108 |       -- | @since 1.0.1
109 |       VisualStream,
110 |       -- | @since 1.0.1
111 |       TraversableStream,
112 |       -- | @since 1.0.1
113 |       Show,
114 |       -- | @since 1.2
115 |       Factorial,
116 |       -- | @since 1.2
117 |       FactorialMonoid,
118 |       -- | @since 1.2
119 |       LeftCancellative,
120 |       -- | @since 1.2
121 |       LeftGCDMonoid,
122 |       -- | @since 1.2
123 |       LeftReductive,
124 |       -- | @since 1.2
125 |       MonoidNull,
126 |       -- | @since 1.2
127 |       OverlappingGCDMonoid,
128 |       -- | @since 1.2
129 |       PositiveMonoid,
130 |       -- | @since 1.2
131 |       RightCancellative,
132 |       -- | @since 1.2
133 |       RightGCDMonoid,
134 |       -- | @since 1.2
135 |       RightReductive,
136 |       -- | @since 1.2
137 |       StableFactorial
138 |     )
139 |     via a
140 |   deriving stock
141 |     ( -- | @since 1.0.1
142 |       Typeable,
143 |       -- | @since 1.0.1
144 |       Functor
145 |     )
146 | 
147 | type role Unsafe nominal
148 | 
149 | -- | @since 1.0.1
150 | instance Read (Unsafe AsciiChar) where
151 |   {-# INLINEABLE readPrec #-}
152 |   readPrec = parens go
153 |     where
154 |       go :: ReadPrec (Unsafe AsciiChar)
155 |       go =
156 |         Unsafe . AsciiChar <$> do
157 |           expectP (Char '\'')
158 |           expectP (Char '0')
159 |           expectP (Char 'x')
160 |           Char d1 <- lexP
161 |           Char d2 <- lexP
162 |           expectP (Char '\'')
163 |           case d1 of
164 |             '0' -> fromSecondDigit d2
165 |             '1' -> (16 +) <$> fromSecondDigit d2
166 |             '2' -> (32 +) <$> fromSecondDigit d2
167 |             '3' -> (48 +) <$> fromSecondDigit d2
168 |             '4' -> (64 +) <$> fromSecondDigit d2
169 |             '5' -> (80 +) <$> fromSecondDigit d2
170 |             '6' -> (96 +) <$> fromSecondDigit d2
171 |             '7' -> (112 +) <$> fromSecondDigit d2
172 |             _ -> fail $ "Expected digit from 0 to 7, instead got '" <> [d1] <> "'"
173 | 
174 | -- | @since 1.0.1
175 | instance Enum (Unsafe AsciiChar) where
176 |   {-# INLINEABLE succ #-}
177 |   succ (Unsafe (AsciiChar w8))
178 |     | w8 < 127 = Unsafe . AsciiChar $ w8 + 1
179 |     | otherwise = error "Out of range for ASCII character"
180 |   {-# INLINEABLE pred #-}
181 |   pred (Unsafe (AsciiChar w8))
182 |     | w8 > 0 = Unsafe . AsciiChar $ w8 - 1
183 |     | otherwise = error "Out of range for ASCII character"
184 |   {-# INLINEABLE toEnum #-}
185 |   toEnum n
186 |     | 0 <= n && n <= 127 = Unsafe . AsciiChar . fromIntegral $ n
187 |     | otherwise = error "Out of range for ASCII character"
188 |   {-# INLINEABLE fromEnum #-}
189 |   fromEnum (Unsafe (AsciiChar w8)) = fromIntegral w8
190 |   {-# INLINEABLE enumFrom #-}
191 |   enumFrom (Unsafe (AsciiChar w8)) = coerce [w | w <- [w8 ..], w <= 127]
192 |   {-# INLINEABLE enumFromThen #-}
193 |   enumFromThen (Unsafe (AsciiChar start)) (Unsafe (AsciiChar step)) =
194 |     coerce [w | w <- [start, step ..], w <= 127]
195 |   {-# INLINEABLE enumFromTo #-}
196 |   enumFromTo (Unsafe (AsciiChar start)) (Unsafe (AsciiChar end)) =
197 |     coerce [w | w <- [start .. end], w <= 127]
198 |   {-# INLINEABLE enumFromThenTo #-}
199 |   enumFromThenTo (Unsafe (AsciiChar start)) (Unsafe (AsciiChar step)) (Unsafe (AsciiChar end)) =
200 |     coerce [w | w <- [start, step .. end], w <= 127]
201 | 
202 | -- | @since 1.0.1
203 | instance Read (Unsafe AsciiText) where
204 |   {-# INLINEABLE readPrec #-}
205 |   readPrec = Unsafe . AsciiText <$> go
206 |     where
207 |       go :: ReadPrec ByteString
208 |       go = do
209 |         bs :: ByteString <- readPrec
210 |         case BS.findIndex (>= 128) bs of
211 |           Nothing -> pure bs
212 |           Just i -> error $ "Non-ASCII byte at index " <> show i
213 | 
214 | -- Functions
215 | 
216 | -- $setup
217 | -- >>> :set -XNoImplicitPrelude
218 | -- >>> :set -XQuasiQuotes
219 | -- >>> import Text.Ascii.Unsafe
220 | -- >>> import Text.Ascii.QQ
221 | -- >>> import Prelude ((.), ($))
222 | 
223 | -- | Yield the first character of the text.
224 | --
225 | -- /Requirements:/ Text is not empty.
226 | --
227 | -- >>> head . Unsafe $ [ascii| "catboy" |]
228 | -- '0x63'
229 | --
230 | -- /Complexity:/ \(\Theta(1)\)
231 | --
232 | -- @since 1.0.1
233 | head :: Unsafe AsciiText -> AsciiChar
234 | head = coerce BS.head
235 | 
236 | -- | Yield the last character of the text.
237 | --
238 | -- /Requirements:/ Text is not empty.
239 | --
240 | -- >>> last . Unsafe $ [ascii| "catboy" |]
241 | -- '0x79'
242 | --
243 | -- /Complexity:/ \(\Theta(1)\)
244 | --
245 | -- @since 1.0.1
246 | last :: Unsafe AsciiText -> AsciiChar
247 | last = coerce BS.last
248 | 
249 | -- | Yield the text without its first character.
250 | --
251 | -- /Requirements:/ Text is not empty.
252 | --
253 | -- >>> tail . Unsafe $ [ascii| "catboy" |]
254 | -- "atboy"
255 | --
256 | -- /Complexity:/ \(\Theta(1)\)
257 | --
258 | -- @since 1.0.1
259 | tail :: Unsafe AsciiText -> Unsafe AsciiText
260 | tail = coerce BS.tail
261 | 
262 | -- | Yield the text without its last character.
263 | --
264 | -- /Requirements:/ Text is not empty.
265 | --
266 | -- >>> init . Unsafe $ [ascii| "catboy" |]
267 | -- "catbo"
268 | --
269 | -- /Complexity:/ \(\Theta(1)\)
270 | --
271 | -- @since 1.0.1
272 | init :: Unsafe AsciiText -> Unsafe AsciiText
273 | init = coerce BS.init
274 | 
275 | -- | Left-associative fold of a text without a base case.
276 | --
277 | -- /Requirements:/ Text is not empty.
278 | --
279 | -- /Complexity:/ \(\Theta(n)\)
280 | --
281 | -- @since 1.0.1
282 | foldl1 :: (AsciiChar -> AsciiChar -> AsciiChar) -> Unsafe AsciiText -> AsciiChar
283 | foldl1 = coerce BS.foldl1
284 | 
285 | -- | Left-associative fold of a text without a base case, strict in the
286 | -- accumulator.
287 | --
288 | -- /Requirements:/ Text is not empty.
289 | --
290 | -- /Complexity:/ \(\Theta(n)\)
291 | --
292 | -- @since 1.0.1
293 | foldl1' :: (AsciiChar -> AsciiChar -> AsciiChar) -> Unsafe AsciiText -> AsciiChar
294 | foldl1' = coerce BS.foldl1'
295 | 
296 | -- | Right-associative fold of a text without a base case.
297 | --
298 | -- /Requirements:/ Text is not empty.
299 | --
300 | -- /Complexity:/ \(\Theta(n)\)
301 | --
302 | -- @since 1.0.1
303 | foldr1 :: (AsciiChar -> AsciiChar -> AsciiChar) -> Unsafe AsciiText -> AsciiChar
304 | foldr1 = coerce BS.foldr1
305 | 
306 | -- | Right-associative fold of a text without a base case, strict in the
307 | -- accumulator.
308 | --
309 | -- /Requirements:/ Text is not empty.
310 | --
311 | -- /Complexity:/ \(\Theta(n)\)
312 | --
313 | -- @since 1.0.1
314 | foldr1' :: (AsciiChar -> AsciiChar -> AsciiChar) -> Unsafe AsciiText -> AsciiChar
315 | foldr1' = coerce BS.foldr1'
316 | 
317 | -- | Yield the character in the text whose byte representation is numerically
318 | -- the largest.
319 | --
320 | -- /Requirements:/ Text is not empty.
321 | --
322 | -- >>> maximum . Unsafe $ [ascii| "catboy" |]
323 | -- '0x79'
324 | -- >>> maximum . Unsafe $ [ascii| "nyan~" |]
325 | -- '0x7e'
326 | --
327 | -- /Complexity:/ \(\Theta(n)\)
328 | --
329 | -- @since 1.0.1
330 | maximum :: Unsafe AsciiText -> AsciiChar
331 | maximum = coerce BS.maximum
332 | 
333 | -- | Yield the character in the text whose byte representation is numerically
334 | -- the smallest.
335 | --
336 | -- /Requirements:/ Text is not empty.
337 | --
338 | -- >>> minimum . Unsafe $ [ascii| "catboy" |]
339 | -- '0x61'
340 | -- >>> minimum . Unsafe $ [ascii| " nyan" |]
341 | -- '0x20'
342 | --
343 | -- /Complexity:/ \(\Theta(n)\)
344 | --
345 | -- @since 1.0.1
346 | minimum :: Unsafe AsciiText -> AsciiChar
347 | minimum = coerce BS.minimum
348 | 
349 | -- | 'scanl1' is similar to 'foldl1', but returns a list of successive values
350 | -- from the left.
351 | --
352 | -- /Requirements:/ Text is not empty.
353 | --
354 | -- /Complexity:/ \(\Theta(n)\)
355 | --
356 | -- @since 1.0.1
357 | scanl1 ::
358 |   -- | accumulator -> element -> new accumulator
359 |   (AsciiChar -> AsciiChar -> AsciiChar) ->
360 |   -- | Input of length \(n\)
361 |   Unsafe AsciiText ->
362 |   -- | Output of length \(n - 1\)
363 |   Unsafe AsciiText
364 | scanl1 = coerce BS.scanl1
365 | 
366 | -- | 'scanr1' is similar to 'foldr1', but returns a list of successive values
367 | -- from the right.
368 | --
369 | -- /Requirements:/ Text is not empty.
370 | --
371 | -- /Complexity:/ \(\Theta(n)\)
372 | --
373 | -- @since 1.0.1
374 | scanr1 ::
375 |   -- | element -> accumulator -> new accumulator
376 |   (AsciiChar -> AsciiChar -> AsciiChar) ->
377 |   -- | Input of length \(n\)
378 |   Unsafe AsciiText ->
379 |   -- | Output of length \(n - 1\)
380 |   Unsafe AsciiText
381 | scanr1 = coerce BS.scanr1
382 | 
383 | -- | Yield the character at the given position.
384 | --
385 | -- /Requirements:/ The position must be at least 0, and at most the length of
386 | -- the text - 1.
387 | --
388 | -- >>> index (Unsafe [ascii| "catboy" |]) 0
389 | -- '0x63'
390 | -- >>> index (Unsafe $ [ascii| "catboy" |]) 4
391 | -- '0x6f'
392 | --
393 | -- /Complexity:/ \(\Theta(1)\)
394 | --
395 | -- @since 1.0.1
396 | index :: Unsafe AsciiText -> Int -> AsciiChar
397 | index = coerce BS.index
398 | 
399 | -- Helpers
400 | 
401 | fromSecondDigit :: Char -> ReadPrec Word8
402 | fromSecondDigit = \case
403 |   '0' -> pure 0
404 |   '1' -> pure 1
405 |   '2' -> pure 2
406 |   '3' -> pure 3
407 |   '4' -> pure 4
408 |   '5' -> pure 5
409 |   '6' -> pure 6
410 |   '7' -> pure 7
411 |   '8' -> pure 8
412 |   '9' -> pure 9
413 |   'a' -> pure 10
414 |   'b' -> pure 11
415 |   'c' -> pure 12
416 |   'd' -> pure 13
417 |   'e' -> pure 14
418 |   'f' -> pure 15
419 |   d -> fail $ "Expected hex digit, instead got '" <> [d] <> "'"
420 | 


--------------------------------------------------------------------------------
/text-ascii.cabal:
--------------------------------------------------------------------------------
 1 | cabal-version:      3.0
 2 | name:               text-ascii
 3 | version:            1.2.1
 4 | synopsis:           ASCII string and character processing.
 5 | description:
 6 |   A total-by-default, tested and documented library for
 7 |   working with ASCII text. Low on dependencies, high on usability.
 8 | 
 9 | homepage:           https://github.com/haskell-text/text-ascii
10 | license:            Apache-2.0
11 | license-file:       LICENSE.md
12 | author:             Koz Ross
13 | maintainer:         koz.ross@retro-freedom.nz
14 | bug-reports:        https://github.com/haskell-text/text-ascii/issues
15 | copyright:          (C) Koz Ross 2021-3
16 | category:           Text
17 | tested-with:        GHC ==9.4.8 || ==9.6.6 || ==9.8.4 || ==9.10.1
18 | build-type:         Simple
19 | extra-source-files:
20 |   CHANGELOG.md
21 |   README.md
22 | 
23 | library
24 |   exposed-modules:
25 |     Text.Ascii
26 |     Text.Ascii.Char
27 |     Text.Ascii.Internal
28 |     Text.Ascii.QQ
29 |     Text.Ascii.Unsafe
30 | 
31 |   build-depends:
32 |     , base               >=4.17  && <5
33 |     , bytestring         ^>=0.12
34 |     , case-insensitive   ^>=1.2
35 |     , deepseq            >=1.4.8 && <1.6.0
36 |     , hashable           ^>=1.4
37 |     , megaparsec         ^>=9.6
38 |     , monoid-subclasses  ^>=1.2
39 |     , optics-core        ^>=0.4
40 |     , optics-extra       ^>=0.4
41 |     , template-haskell   >=2.19  && <3.0
42 |     , text               ^>=2.1
43 | 
44 |   ghc-options:
45 |     -Wall -Wcompat -Wincomplete-record-updates
46 |     -Wincomplete-uni-patterns -Wredundant-constraints
47 |     -Wmissing-deriving-strategies
48 | 
49 |   hs-source-dirs:   src
50 |   default-language: Haskell2010
51 | 


--------------------------------------------------------------------------------