├── .github ├── dependabot.yml └── workflows │ └── haskell.yml ├── .gitignore ├── .nvimrc ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE.md ├── README.md ├── Setup.hs ├── cabal.project ├── src └── Text │ ├── Ascii.hs │ └── Ascii │ ├── Char.hs │ ├── Internal.hs │ ├── QQ.hs │ └── Unsafe.hs └── text-ascii.cabal /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # From: 2 | # - https://github.com/haskell/hackage-server 3 | # - https://github.com/rhysd/actionlint/issues/228#issuecomment-1272493095 4 | # - https://docs.github.com/en/code-security/dependabot/working-with-dependabot/keeping-your-actions-up-to-date-with-dependabot 5 | 6 | # Set update schedule for GitHub Actions 7 | 8 | version: 2 9 | updates: 10 | 11 | - package-ecosystem: "github-actions" 12 | directory: "/" 13 | schedule: 14 | # Check for updates to GitHub Actions every week 15 | interval: "weekly" 16 | -------------------------------------------------------------------------------- /.github/workflows/haskell.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: [main, dev] 7 | 8 | jobs: 9 | generate-matrix: 10 | name: "Generate matrix from cabal" 11 | outputs: 12 | matrix: ${{ steps.set-matrix.outputs.matrix }} 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Extract the tested GHC versions 16 | id: set-matrix 17 | uses: kleidukos/get-tested@v0.1.6.0 18 | with: 19 | cabal-file: text-ascii.cabal 20 | ubuntu: true 21 | macos: true 22 | windows: true 23 | version: 0.1.6.0 24 | tests: 25 | name: ${{ matrix.ghc }} on ${{ matrix.os }} 26 | needs: generate-matrix 27 | runs-on: ${{ matrix.os }} 28 | strategy: 29 | matrix: ${{ fromJSON(needs.generate-matrix.outputs.matrix) }} 30 | steps: 31 | - uses: actions/checkout@v4 32 | name: Checkout base repo 33 | - uses: haskell-actions/setup@v2 34 | id: haskell-setup 35 | name: Setup Haskell 36 | with: 37 | ghc-version: ${{ matrix.ghc }} 38 | cabal-version: 'latest' 39 | - name: Configure 40 | run: | 41 | cabal configure --enable-tests 42 | cabal freeze 43 | - name: Cache 44 | uses: actions/cache@v4.0.0 45 | with: 46 | path: ${{ steps.haskell-setup.outputs.cabal-store }} 47 | key: ${{ runner.os }}-ghc-${{ matrix.ghc }}-cabal-${{ hashFiles('**/plan.json') }} 48 | restore-keys: ${{ runner.os }}-ghc-${{ matrix.ghc }}- 49 | - name: Install base dependencies 50 | run: cabal build --only-dependencies 51 | - name: Build 52 | run: cabal build 53 | - name: Run tests 54 | run: cabal test 55 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | */.nvimrc 2 | .cabal 3 | dist 4 | dist-newstyle 5 | .ghc.environment.* 6 | cabal.project.local* 7 | .stack-work 8 | stack.yaml.lock 9 | cabal.project.local~ 10 | -------------------------------------------------------------------------------- /.nvimrc: -------------------------------------------------------------------------------- 1 | " Enable hlint and GHC via Cabal 2 | let g:ale_linters = {'haskell': ['hlint', 'cabal-build']} 3 | " ... only 4 | let g:ale_linters_explicit = 1 5 | " Don't lint until I save 6 | let g:ale_lint_on_text_changed = 'never' 7 | let g:ale_lint_on_insert_leave = 0 8 | let g:ale_lint_on_enter = 0 9 | 10 | call ale#Set('haskell_cabal_build_options', '--enable-tests --disable-optimization') 11 | 12 | function! GetCabalCommand(buffer) abort 13 | let l:flags = ale#Var(a:buffer, 'haskell_cabal_build_options') 14 | return 'cabal new-build ' . l:flags 15 | endfunction 16 | 17 | call ale#linter#Define('haskell', { 18 | \ 'name': 'cabal_build', 19 | \ 'aliases': ['cabal-build'], 20 | \ 'output_stream': 'stderr', 21 | \ 'executable': 'cabal', 22 | \ 'command': function('GetCabalCommand'), 23 | \ 'callback': 'ale#handlers#haskell#HandleGHCFormat', 24 | \}) 25 | 26 | " Configure Neoformat to use cabal-fmt for Cabal files 27 | let g:neoformat_cabal_cabalfmt = { 'exe': 'cabal-fmt', 'args': [] } 28 | let g:neoformat_enabled_cabal = ['cabalfmt'] 29 | 30 | " Configure Neoformat to use ormolu for Haskell 31 | let g:neoformat_haskell_ormolu = { 'exe': 'ormolu', 'args': [] } 32 | let g:neoformat_enabled_haskell = ['ormolu'] 33 | 34 | " Enable automagic autoformatting 35 | augroup fmt 36 | autocmd! 37 | autocmd BufWritePre * undojoin | Neoformat 38 | augroup end 39 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Revision history for text-ascii 2 | 3 | ## 1.2.1 -- 2023-01-20 4 | 5 | * Dependency bump 6 | 7 | ## 1.2 -- 2021-11-07 8 | 9 | * Add instances of `Factorial`, `FactorialMonoid`, `LeftCancellative`, 10 | `LeftGCDMonoid`, `LeftReductive`, `MonoidNull`, `OverlappingGCDMonoid`, 11 | `PositiveMonoid`, `RightCancellative`, `RightGCDMonoid`, `RightReductive` and 12 | `StableFactorial` for `AsciiText`. 13 | 14 | ## 1.1 -- 2021-10-31 15 | 16 | * Support GHC 9.2. 17 | * Remove support for GHCs below 8.10. 18 | * Add `eitherFromText` and `eitherFromByteString` for better conversion errors. 19 | 20 | ## 1.0.1 -- 2021-03-02 21 | 22 | * Support GHC 9. 23 | * Replace 8.10.3 with 8.10.4 in CI. 24 | * Expose `Text.Ascii.Internal` and `Text.Ascii.QQ`. 25 | * Add `Ixed` instance (and supporting type instances) for `AsciiText`. 26 | * Add `Stream`, `VisualStream` and `TraversableStream` instances (and supporting 27 | type instances) for `AsciiText`. 28 | * Drop Parsec in favour of Megaparsec. 29 | * Add `FoldCase` instances for `AsciiChar` and `AsciiText`. 30 | * Implement `lines`, `unlines`, `words`, `unwords`, `replicate`, `chunksOf`, 31 | `index`, `zipWith`, `justifyLeft`, `justifyRight`, `center`, `takeEnd`, 32 | `dropEnd`, `dropAround`, `strip`, `stripStart`, `stripEnd`, `commonPrefixes` 33 | for `AsciiText`. 34 | * Implement [NSN](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html), as 35 | well as the following functions that use it: 36 | * `count` 37 | * `replace` 38 | * `splitOn` 39 | * `stripInfix` 40 | * `breakOnAll` 41 | * `breakOn` 42 | * `breakOnEnd` 43 | * Add `Unsafe` module containing an `Unsafe` wrapper, plus instances and 44 | functions. 45 | * Add a range of optics for `AsciiText`. 46 | 47 | ## 1.0.0 -- 2021-02-07 48 | 49 | * First version. Released on an unsuspecting world. 50 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contribution guide 2 | 3 | ## Introduction 4 | 5 | First of all, thank you for wanting to contribute! This guide is designed to 6 | help make sure that your contribution experience is as stress-free and 7 | straightforward as possible. 8 | 9 | ## Git practices 10 | 11 | Please fork, and make PRs to, the `dev` branch. `main` is used only for 12 | releases. 13 | 14 | Ensure that your commits are individually buildable, and that all tests pass on 15 | each commit (doctests and otherwise). Each commit should have a concise, but 16 | clear description of what it fixes or adds. Refer to issues if relevant by 17 | tagging with # followed by the issue number (for example, "Fix #1234"). To check 18 | if your doctests pass, we recommend `cabal-docspec` from 19 | [`cabal-extras`](https://github.com/phadej/cabal-extras). 20 | 21 | ## Cabal file standards 22 | 23 | The cabal file for this project must be formatted according to 24 | [`cabal-fmt`](http://hackage.haskell.org/package/cabal-fmt). All dependencies 25 | must have bounds; where possible, `^>=`-style bounds are preferable. 26 | 27 | ## Code standards 28 | 29 | We follow the [Package Versioning Policy](https://pvp.haskell.org). If your 30 | changes are significant enough to warrant a version change by the Policy, ensure 31 | that you do so, and update the changelog to match. If you are unsure, you can 32 | use [`Policeman`](http://hackage.haskell.org/package/policeman) to check. 33 | 34 | All code is to be formatted using 35 | [`ormolu`](http://hackage.haskell.org/package/ormolu), and must be free of 36 | warnings as emitted by [Hlint](http://hackage.haskell.org/package/hlint), both 37 | with default settings. If a warning is spurious, it must be silenced in the 38 | narrowest possible scope, with an explanatory comment. 39 | 40 | Imports into a module may take one of the following forms only: 41 | 42 | * `import Foo (Bar, baz, quux)`; or 43 | * `import qualified Foo as Baz` 44 | 45 | For data type imports, wildcard imports should not be used; instead, specify the 46 | constructor(s) you want explicitly: 47 | 48 | ```haskell 49 | -- Not like this: import Foo (Bar(..)) 50 | import Foo (Bar (Baz, Quux)) 51 | ``` 52 | 53 | Every publically-facing module must have an explicit export list (internal 54 | modules can omit this). All publically-exported identifiers should have 55 | Haddocks, indicating an `@since` with the version where they first appeared, or 56 | last changed semantically. For functions, doctests should be provided, ideally 57 | demonstrating as much of the functionality as reasonable. Edge cases are 58 | _especially_ critical: provide a clear explanation of these in the Haddocks, or 59 | show the behaviour with doctests, preferably both. 60 | 61 | Where possible, keep to a similar style to the rest of the module (and the 62 | package). This isn't a hard-and-fast rule, but a good thing to keep in mind for 63 | consistency reasons. 64 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irreocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # `text-ascii` [![Hackage](https://img.shields.io/hackage/v/text-ascii?style=flat-square)][hackage] 2 | 3 | ## What is this thing? 4 | 5 | A library for handling ASCII text. 6 | 7 | ## What are the goals of this project? 8 | 9 | ### Totality by default 10 | 11 | Partial functions (and type classes which provide them) will not be included: 12 | everything is total. When we include anything unsafe, it will be explicitly 13 | firewalled into its own module, behind a newtype. 14 | 15 | ### No boolean blindness 16 | 17 | [Boolean blindness](http://dev.stephendiehl.com/hask/#boolean-blindness) is not 18 | a good thing, for all the reasons listed in the link. Whenever possible, we'll 19 | try and give more useful information than a `Bool`. 20 | 21 | ### Compatibility with the [`text`](http://hackage.haskell.org/package/text) API 22 | 23 | We want match the API of the `text` package exactly. If you know how to use `text`, 24 | you'll know how to use this package too. Exceptions are made for places where 25 | `text` is either partial or boolean-blind. 26 | 27 | ### Discoverability, documentation and user-friendliness 28 | 29 | In addition to documenting everything with Haddocks, we have over 250 doctests, 30 | which provide _executable_ examples of how the API can be used, and how it will 31 | behave. We aim to clarify _every_ corner case left by the documentation of 32 | `text`, and care strongly about making the API easy to follow, learn and 33 | understand. 34 | 35 | ### Correctness 36 | 37 | We currently use doctests, but plan to add support for more testing. No such 38 | thing as too much! 39 | 40 | ### Low dependencies 41 | 42 | As far as possible, we aim to depend on [GHC boot packages](https://gitlab.haskell.org/ghc/ghc/-/wikis/commentary/libraries/version-history) only. When we 43 | introduce more dependencies, we do it only when we have to. This way, we ensure 44 | this package builds quickly and doesn't 'lag' more than necessary due to GHC 45 | version changes. 46 | 47 | ## What's with all the cat stuff? 48 | 49 | [I am a Haskell catboy.](https://twitter.com/KozRoss) 50 | 51 | ## What does this run on? 52 | 53 | We support the latest three releases of GHC. Currently, these are: 54 | 55 | * 9.4 56 | * 9.6 57 | * 9.8 58 | 59 | We check on the following platforms: 60 | 61 | * Windows 62 | * Linux 63 | * MacOS 64 | 65 | ## What can I do with this? 66 | 67 | The project is licensed Apache 2.0 (SPDX code 68 | [`Apache-2.0`](https://spdx.org/licenses/Apache-2.0.html)). For more details, 69 | please see the `LICENSE.md` file. 70 | 71 | [hackage]: https://hackage.haskell.org/package/text-ascii 72 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | 3 | main = defaultMain 4 | -------------------------------------------------------------------------------- /cabal.project: -------------------------------------------------------------------------------- 1 | packages: 2 | ./text-ascii.cabal 3 | -------------------------------------------------------------------------------- /src/Text/Ascii.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DerivingVia #-} 2 | {-# LANGUAGE LambdaCase #-} 3 | {-# LANGUAGE QuasiQuotes #-} 4 | {-# LANGUAGE Trustworthy #-} 5 | {-# LANGUAGE TypeApplications #-} 6 | {-# LANGUAGE NoImplicitPrelude #-} 7 | 8 | -- | 9 | -- Module: Text.Ascii 10 | -- Copyright: (C) 2021 Koz Ross 11 | -- License: Apache 2.0 12 | -- Maintainer: Koz Ross 13 | -- Stability: stable 14 | -- Portability: GHC only 15 | -- 16 | -- An implementation of ASCII strings. 17 | -- 18 | -- This module is designed for qualified importing: 19 | -- 20 | -- > import qualified Text.Ascii as Ascii 21 | -- 22 | -- /See also:/ [Wikipedia entry for ASCII](https://en.wikipedia.org/wiki/ASCII) 23 | module Text.Ascii 24 | ( -- * Type 25 | AsciiText, 26 | 27 | -- * Creation 28 | empty, 29 | singleton, 30 | ascii, 31 | 32 | -- * Basic interface 33 | cons, 34 | snoc, 35 | uncons, 36 | unsnoc, 37 | length, 38 | 39 | -- * Transformations 40 | map, 41 | intercalate, 42 | intersperse, 43 | transpose, 44 | reverse, 45 | replace, 46 | 47 | -- ** Justification 48 | justifyLeft, 49 | justifyRight, 50 | center, 51 | 52 | -- * Folds 53 | foldl, 54 | foldl', 55 | foldr, 56 | foldr', 57 | 58 | -- ** Special folds 59 | concat, 60 | concatMap, 61 | 62 | -- * Construction 63 | 64 | -- ** Scans 65 | scanl, 66 | scanr, 67 | 68 | -- ** Accumulating maps 69 | mapAccumL, 70 | mapAccumR, 71 | 72 | -- ** Generation and unfolding 73 | replicate, 74 | unfoldr, 75 | unfoldrN, 76 | 77 | -- * Substrings 78 | 79 | -- ** Breaking strings 80 | take, 81 | takeEnd, 82 | drop, 83 | dropEnd, 84 | takeWhile, 85 | takeWhileEnd, 86 | dropWhile, 87 | dropWhileEnd, 88 | dropAround, 89 | strip, 90 | stripStart, 91 | stripEnd, 92 | splitAt, 93 | breakOn, 94 | breakOnEnd, 95 | break, 96 | span, 97 | group, 98 | groupBy, 99 | inits, 100 | tails, 101 | 102 | -- ** Breaking into many substrings 103 | splitOn, 104 | split, 105 | chunksOf, 106 | 107 | -- ** Breaking into lines and words 108 | lines, 109 | unlines, 110 | words, 111 | unwords, 112 | 113 | -- * View patterns 114 | stripPrefix, 115 | stripSuffix, 116 | stripInfix, 117 | commonPrefixes, 118 | 119 | -- * Searching 120 | filter, 121 | breakOnAll, 122 | find, 123 | partition, 124 | 125 | -- * Indexing 126 | index, 127 | findIndex, 128 | count, 129 | 130 | -- * Zipping 131 | zip, 132 | zipWith, 133 | 134 | -- * Conversions 135 | fromText, 136 | eitherFromText, 137 | fromByteString, 138 | eitherFromByteString, 139 | toText, 140 | toByteString, 141 | 142 | -- * Optics 143 | textWise, 144 | byteStringWise, 145 | packedChars, 146 | chars, 147 | packedBytes, 148 | bytes, 149 | ) 150 | where 151 | 152 | import Control.Category ((.)) 153 | import Data.Bifunctor (first) 154 | import Data.Bool (Bool (False, True), otherwise, (&&)) 155 | import Data.ByteString (ByteString) 156 | import qualified Data.ByteString as BS 157 | import qualified Data.ByteString.Optics as BSO 158 | import Data.Char (isAscii) 159 | import Data.Coerce (coerce) 160 | import Data.Foldable (Foldable (foldMap)) 161 | import qualified Data.Foldable as F 162 | import Data.Int (Int64) 163 | import qualified Data.List as L 164 | import Data.Maybe (Maybe (Just, Nothing)) 165 | import Data.Text (Text) 166 | import qualified Data.Text as T 167 | import Data.Text.Encoding (decodeUtf8, encodeUtf8) 168 | import Data.Word (Word8) 169 | import Optics.Coerce (coerceA, coerceB, coerceS, coerceT) 170 | import Optics.Getter (Getter, view) 171 | import Optics.Iso (Iso') 172 | import Optics.IxFold (IxFold) 173 | import Optics.IxTraversal (IxTraversal') 174 | import Optics.Optic (castOptic) 175 | import Optics.Prism (Prism', prism') 176 | import Optics.Review (Review, review) 177 | import Text.Ascii.Internal (AsciiChar (AsciiChar), AsciiText (AsciiText)) 178 | import Text.Ascii.QQ (ascii, char) 179 | import Prelude 180 | ( Int, 181 | not, 182 | pure, 183 | ($), 184 | (+), 185 | (-), 186 | (/=), 187 | (<), 188 | (<$>), 189 | (<=), 190 | (<>), 191 | (==), 192 | (>), 193 | (>=), 194 | (||), 195 | ) 196 | import qualified Prelude as P 197 | 198 | -- Note on pragmata 199 | -- 200 | -- This is cribbed directly from bytestring, as I figure they know what they're 201 | -- doing way better than we do. When we add our own functionality, this probably 202 | -- needs to be considered more carefully. - Koz 203 | 204 | -- Creation 205 | 206 | -- $setup 207 | -- >>> :set -XNoImplicitPrelude 208 | -- >>> :seti -XQuasiQuotes 209 | -- >>> :seti -XOverloadedStrings 210 | -- >>> import Text.Ascii 211 | -- >>> import Text.Ascii.Char (char, upcase, AsciiCase (Lower), caseOf) 212 | -- >>> import Prelude ((.), ($), (<>), (==), (<), (/=), (-), max, even) 213 | -- >>> import qualified Prelude as Prelude 214 | -- >>> import Data.Maybe (Maybe (Just), fromMaybe) 215 | -- >>> import qualified Data.ByteString as BS 216 | -- >>> import Optics.AffineFold (preview) 217 | -- >>> import Optics.Review (review) 218 | -- >>> import Optics.Getter (view) 219 | -- >>> import Optics.IxTraversal (elementOf) 220 | -- >>> import Optics.IxSetter (iover) 221 | -- >>> import Data.Bool (bool) 222 | -- >>> import Optics.IxFold (itoListOf) 223 | 224 | -- | The empty text. 225 | -- 226 | -- >>> empty 227 | -- "" 228 | -- 229 | -- /Complexity:/ \(\Theta(1)\) 230 | -- 231 | -- @since 1.0.0 232 | empty :: AsciiText 233 | empty = coerce BS.empty 234 | 235 | -- | A text consisting of a single ASCII character. 236 | -- 237 | -- >>> singleton [char| 'w' |] 238 | -- "w" 239 | -- 240 | -- /Complexity:/ \(\Theta(1)\) 241 | -- 242 | -- @since 1.0.0 243 | {-# INLINE [1] singleton #-} 244 | singleton :: AsciiChar -> AsciiText 245 | singleton = coerce BS.singleton 246 | 247 | -- Basic interface 248 | 249 | -- | Adds a character to the front of a text. This requires copying, which gives 250 | -- its complexity. 251 | -- 252 | -- >>> cons [char| 'n' |] [ascii| "eko" |] 253 | -- "neko" 254 | -- 255 | -- /Complexity:/ \(\Theta(n)\) 256 | -- 257 | -- @since 1.0.0 258 | {-# INLINE cons #-} 259 | cons :: AsciiChar -> AsciiText -> AsciiText 260 | cons = coerce BS.cons 261 | 262 | -- | Adds a character to the back of a text. This requires copying, which gives 263 | -- its complexity. 264 | -- 265 | -- >>> snoc [ascii| "nek" |] [char| 'o' |] 266 | -- "neko" 267 | -- 268 | -- /Complexity:/ \(\Theta(n)\) 269 | -- 270 | -- @since 1.0.0 271 | {-# INLINE snoc #-} 272 | snoc :: AsciiText -> AsciiChar -> AsciiText 273 | snoc = coerce BS.snoc 274 | 275 | -- | If the argument is non-empty, gives 'Just' the first character and the 276 | -- rest, and 'Nothing' otherwise. 277 | -- 278 | -- >>> uncons empty 279 | -- Nothing 280 | -- >>> uncons . singleton $ [char| 'w' |] 281 | -- Just ('0x77',"") 282 | -- >>> uncons [ascii| "nekomimi" |] 283 | -- Just ('0x6e',"ekomimi") 284 | -- 285 | -- /Complexity:/ \(\Theta(1)\) 286 | -- 287 | -- @since 1.0.0 288 | {-# INLINE uncons #-} 289 | uncons :: AsciiText -> Maybe (AsciiChar, AsciiText) 290 | uncons = coerce BS.uncons 291 | 292 | -- | If the argument is non-empty, gives 'Just' the initial segment and the last 293 | -- character, and 'Nothing' otherwise. 294 | -- 295 | -- >>> unsnoc empty 296 | -- Nothing 297 | -- >>> unsnoc . singleton $ [char| 'w' |] 298 | -- Just ("",'0x77') 299 | -- >>> unsnoc [ascii| "catboy" |] 300 | -- Just ("catbo",'0x79') 301 | -- 302 | -- /Complexity:/ \(\Theta(1)\) 303 | -- 304 | -- @since 1.0.0 305 | {-# INLINE unsnoc #-} 306 | unsnoc :: AsciiText -> Maybe (AsciiText, AsciiChar) 307 | unsnoc = coerce BS.unsnoc 308 | 309 | -- | The number of characters (and, since this is ASCII, bytes) in the text. 310 | -- 311 | -- >>> length . singleton $ [char| 'w' |] 312 | -- 1 313 | -- >>> length [ascii| "nyan nyan" |] 314 | -- 9 315 | -- 316 | -- /Complexity:/ \(\Theta(1)\) 317 | -- 318 | -- @since 1.0.0 319 | {-# INLINE length #-} 320 | length :: AsciiText -> Int 321 | length = coerce BS.length 322 | 323 | -- Transformations 324 | 325 | -- | Copy, and apply the function to each element of, the text. 326 | -- 327 | -- >>> map (\c -> fromMaybe c . upcase $ c) [ascii| "nyan!" |] 328 | -- "NYAN!" 329 | -- 330 | -- /Complexity:/ \(\Theta(n)\) 331 | -- 332 | -- @since 1.0.0 333 | {-# INLINE map #-} 334 | map :: (AsciiChar -> AsciiChar) -> AsciiText -> AsciiText 335 | map = coerce BS.map 336 | 337 | -- | Takes a text and a list of texts, and concatenates the list after 338 | -- interspersing the first argument between each element of the list. 339 | -- 340 | -- >>> intercalate [ascii| " ~ " |] [] 341 | -- "" 342 | -- >>> intercalate [ascii| " ~ " |] [[ascii| "nyan" |]] 343 | -- "nyan" 344 | -- >>> intercalate [ascii| " ~ " |] . Prelude.replicate 3 $ [ascii| "nyan" |] 345 | -- "nyan ~ nyan ~ nyan" 346 | -- >>> intercalate empty . Prelude.replicate 3 $ [ascii| "nyan" |] 347 | -- "nyannyannyan" 348 | -- 349 | -- /Complexity:/ \(\Theta(n)\) 350 | -- 351 | -- @since 1.0.0 352 | {-# INLINE [1] intercalate #-} 353 | intercalate :: AsciiText -> [AsciiText] -> AsciiText 354 | intercalate = coerce BS.intercalate 355 | 356 | -- | Takes a character, and places it between the characters of a text. 357 | -- 358 | -- >>> intersperse [char| '~' |] empty 359 | -- "" 360 | -- >>> intersperse [char| '~' |] . singleton $ [char| 'w' |] 361 | -- "w" 362 | -- >>> intersperse [char| '~' |] [ascii| "nyan" |] 363 | -- "n~y~a~n" 364 | -- 365 | -- /Complexity:/ \(\Theta(n)\) 366 | -- 367 | -- @since 1.0.0 368 | intersperse :: AsciiChar -> AsciiText -> AsciiText 369 | intersperse = coerce BS.intersperse 370 | 371 | -- | Transpose the rows and columns of the argument. This uses 372 | -- 'Data.List.transpose' internally, and thus, isn't very efficient. 373 | -- 374 | -- >>> transpose [] 375 | -- [] 376 | -- >>> transpose [[ascii| "w" |]] 377 | -- ["w"] 378 | -- >>> transpose [[ascii| "nyan" |]] 379 | -- ["n","y","a","n"] 380 | -- >>> transpose . Prelude.replicate 3 $ [ascii| "nyan" |] 381 | -- ["nnn","yyy","aaa","nnn"] 382 | -- >>> transpose [[ascii| "cat" |], [ascii| "boy" |], [ascii| "nyan" |]] 383 | -- ["cbn","aoy","tya","n"] 384 | -- 385 | -- /Complexity:/ \(\Theta(n)\) 386 | -- 387 | -- @since 1.0.0 388 | transpose :: [AsciiText] -> [AsciiText] 389 | transpose = coerce BS.transpose 390 | 391 | -- | Reverse the text. 392 | -- 393 | -- >>> reverse empty 394 | -- "" 395 | -- >>> reverse . singleton $ [char| 'w' |] 396 | -- "w" 397 | -- >>> reverse [ascii| "catboy goes nyan" |] 398 | -- "nayn seog yobtac" 399 | -- 400 | -- /Complexity:/ \(\Theta(n)\) 401 | -- 402 | -- @since 1.0.0 403 | reverse :: AsciiText -> AsciiText 404 | reverse = coerce BS.reverse 405 | 406 | -- | @replace needle replacement haystack@, given a @needle@ of length \(n\) and 407 | -- a haystack of length \(h\), replaces each non-overlapping occurrence of 408 | -- @needle@ in @haystack@ with @replacement@. If the @needle@ is empty, no 409 | -- replacement will be performed. Equivalent to @'intercalate' replacement '.' 410 | -- 'splitOn' needle '$' haystack@. 411 | -- 412 | -- >>> replace empty [ascii| "NYAN~" |] [ascii| "catboy goes nyan nyan" |] 413 | -- "catboy goes nyan nyan" 414 | -- >>> replace [ascii| "nyan" |] [ascii| "NYAN~" |] empty 415 | -- "" 416 | -- >>> replace [ascii| "nyan" |] [ascii| "NYAN~" |] [ascii| "catboy goes nyan nyan" |] 417 | -- "catboy goes NYAN~ NYAN~" 418 | -- >>> replace [ascii| "nyan" |] [ascii| "NYAN~" |] [ascii| "nyanyan" |] 419 | -- "NYAN~yan" 420 | -- 421 | -- = On complexity 422 | -- 423 | -- This function is based on a variant of the 424 | -- [NSN](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) algorithm, 425 | -- except it does not detect overlapping needles. Its average-case analysis is 426 | -- based on the assumption that: 427 | -- 428 | -- * All ASCII symbols are equally likely to occur in both the needle and the 429 | -- haystack; and 430 | -- * The needle has length at least two; and 431 | -- * Both the needle and the haystack contain at least four unique symbols. 432 | -- 433 | -- We fall back to 'split' for singleton needles, and there is no work to be 434 | -- done on empty needles, which means the second assumption always holds. 435 | -- 436 | -- Worst-case behaviour becomes more likely the more your input satisfies the 437 | -- following conditions: 438 | -- 439 | -- * The needle and/or haystack use few unique symbols (less than four is the 440 | -- worst); or 441 | -- * The haystack contains many instances of the second symbol of the needle 442 | -- which don't lead to full matches. 443 | -- 444 | -- The analysis below also doesn't factor in the cost of performing the 445 | -- replacement, as this is (among other things) proportional to the number of 446 | -- matches of the needle (and thus is hard to quantify). 447 | -- 448 | -- /Complexity:/ \(\Theta(h)\) average case, \(\Theta(h \cdot n\)\) worst-case. 449 | -- 450 | -- /See also:/ Note that all the below are references for the original 451 | -- algorithm, which includes searching for overlapping needles. Thus, our 452 | -- implementation will perform better than the analysis suggests. 453 | -- 454 | -- * [Description and pseudocode](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) 455 | -- * ["Algorithms on Strings"](https://www.cambridge.org/core/books/algorithms-on-strings/19049704C876795D95D8882C73257C70) by Crochemore, Hancart and Lecroq. PDF available [here](https://www.researchgate.net/publication/220693689_Algorithms_on_Strings). 456 | -- 457 | -- @since 1.0.1 458 | replace :: 459 | -- | @needle@ to search for 460 | AsciiText -> 461 | -- | @replacement@ to replace @needle@ with 462 | AsciiText -> 463 | -- | @haystack@ in which to search 464 | AsciiText -> 465 | AsciiText 466 | replace needle replacement haystack 467 | | length needle == 0 || length haystack == 0 = haystack 468 | | length needle > length haystack = haystack 469 | | otherwise = intercalate replacement . splitOn needle $ haystack 470 | 471 | -- | @justifyLeft n c t@ produces a result of length \(\max \{ {\tt n }, {\tt length} \; {\tt t} \}\), 472 | -- consisting of a copy of @t@ followed by (zero or more) copies 473 | -- of @c@. 474 | -- 475 | -- >>> justifyLeft (-100) [char| '~' |] [ascii| "nyan" |] 476 | -- "nyan" 477 | -- >>> justifyLeft 4 [char| '~' |] [ascii| "nyan" |] 478 | -- "nyan" 479 | -- >>> justifyLeft 10 [char| '~' |] [ascii| "nyan" |] 480 | -- "nyan~~~~~~" 481 | -- 482 | -- /Complexity:/ \(\Theta(n)\) 483 | -- 484 | -- @since 1.0.1 485 | justifyLeft :: Int -> AsciiChar -> AsciiText -> AsciiText 486 | justifyLeft n c t = t <> replicate (n - length t) (singleton c) 487 | 488 | -- | @justifyRight n c t@ produces a result of length \(\max \{ {\tt n }, {\tt length} \; {\tt t} \}\), 489 | -- consisting of (zero or more) copies of @c@ followed by a copy of @t@. 490 | -- 491 | -- >>> justifyRight (-100) [char| '~' |] [ascii| "nyan" |] 492 | -- "nyan" 493 | -- >>> justifyRight 4 [char| '~' |] [ascii| "nyan" |] 494 | -- "nyan" 495 | -- >>> justifyRight 10 [char| '~' |] [ascii| "nyan" |] 496 | -- "~~~~~~nyan" 497 | -- 498 | -- /Complexity:/ \(\Theta(n)\) 499 | -- 500 | -- @since 1.0.1 501 | justifyRight :: Int -> AsciiChar -> AsciiText -> AsciiText 502 | justifyRight n c t = replicate (n - length t) (singleton c) <> t 503 | 504 | -- | @center n c t@ produces a result of length \({\tt k } = \max \{ {\tt n }, {\tt length} \; {\tt t} \}\), 505 | -- consisting of: 506 | -- 507 | -- * \(\lceil \frac{{\tt k} - {\tt length} \; {\tt t}}{2} \rceil\) copies of @c@; 508 | -- followed by 509 | -- * A copy of @t@; followed by 510 | -- * Zero or more copies of @c@ 511 | -- 512 | -- This means that the centering is \'left-biased\'. This mimicks the behaviour 513 | -- of the function of the same name in the [text 514 | -- package](http://hackage.haskell.org/package/text-1.2.4.1/docs/Data-Text.html#v:center), 515 | -- although that function's documenation does not describe this behaviour. 516 | -- 517 | -- >>> center (-100) [char| '~' |] [ascii| "nyan" |] 518 | -- "nyan" 519 | -- >>> center 4 [char| '~' |] [ascii| "nyan" |] 520 | -- "nyan" 521 | -- >>> center 5 [char| '~' |] [ascii| "nyan" |] 522 | -- "~nyan" 523 | -- >>> center 6 [char| '~' |] [ascii| "nyan" |] 524 | -- "~nyan~" 525 | -- 526 | -- /Complexity:/ \(\Theta(n)\) 527 | -- 528 | -- @since 1.0.1 529 | center :: Int -> AsciiChar -> AsciiText -> AsciiText 530 | center n c t 531 | | n <= length t = t 532 | | P.even (n - length t) = copied <> t <> copied 533 | | otherwise = copied <> singleton c <> t <> copied 534 | where 535 | copied :: AsciiText 536 | copied = replicate ((n - length t) `P.div` 2) (singleton c) 537 | 538 | -- Folds 539 | 540 | -- | Left-associative fold of a text. 541 | -- 542 | -- >>> foldl (\acc c -> [ascii| "f(" |] <> acc <> singleton c <> [ascii| ")" |]) [ascii| "a" |] [ascii| "catboy" |] 543 | -- "f(f(f(f(f(f(ac)a)t)b)o)y)" 544 | -- 545 | -- /Complexity:/ \(\Theta(n)\) 546 | -- 547 | -- @since 1.0.0 548 | {-# INLINE foldl #-} 549 | foldl :: (a -> AsciiChar -> a) -> a -> AsciiText -> a 550 | foldl f x (AsciiText bs) = BS.foldl (coerce f) x bs 551 | 552 | -- | Left-associative fold of a text, strict in the accumulator. 553 | -- 554 | -- >>> foldl' (\acc c -> [ascii| "f(" |] <> acc <> singleton c <> [ascii| ")" |]) [ascii| "a" |] [ascii| "catboy" |] 555 | -- "f(f(f(f(f(f(ac)a)t)b)o)y)" 556 | -- 557 | -- /Complexity:/ \(\Theta(n)\) 558 | -- 559 | -- @since 1.0.0 560 | {-# INLINE foldl' #-} 561 | foldl' :: (a -> AsciiChar -> a) -> a -> AsciiText -> a 562 | foldl' f x (AsciiText bs) = BS.foldl' (coerce f) x bs 563 | 564 | -- | Right-associative fold of a text. 565 | -- 566 | -- >>> foldr (\c acc -> [ascii| "f(" |] <> acc <> singleton c <> [ascii| ")" |]) [ascii| "a" |] [ascii| "catboy" |] 567 | -- "f(f(f(f(f(f(ay)o)b)t)a)c)" 568 | -- 569 | -- /Complexity:/ \(\Theta(n)\) 570 | -- 571 | -- @since 1.0.0 572 | {-# INLINE foldr #-} 573 | foldr :: (AsciiChar -> a -> a) -> a -> AsciiText -> a 574 | foldr f x (AsciiText bs) = BS.foldr (coerce f) x bs 575 | 576 | -- | Right-associative fold of a text, strict in the accumulator. 577 | -- 578 | -- >>> foldr' (\c acc -> [ascii| "f(" |] <> acc <> singleton c <> [ascii| ")" |]) [ascii| "a" |] [ascii| "catboy" |] 579 | -- "f(f(f(f(f(f(ay)o)b)t)a)c)" 580 | -- 581 | -- /Complexity:/ \(\Theta(n)\) 582 | -- 583 | -- @since 1.0.0 584 | {-# INLINE foldr' #-} 585 | foldr' :: (AsciiChar -> a -> a) -> a -> AsciiText -> a 586 | foldr' f x (AsciiText bs) = BS.foldr' (coerce f) x bs 587 | 588 | -- Special folds 589 | 590 | -- | Concatenate a list of texts. 591 | -- 592 | -- >>> concat [] 593 | -- "" 594 | -- >>> concat [[ascii| "catboy" |]] 595 | -- "catboy" 596 | -- >>> concat . Prelude.replicate 4 $ [ascii| "nyan" |] 597 | -- "nyannyannyannyan" 598 | -- 599 | -- /Complexity:/ \(\Theta(n)\) 600 | -- 601 | -- @since 1.0.0 602 | concat :: [AsciiText] -> AsciiText 603 | concat = coerce BS.concat 604 | 605 | -- | Map a text-producing function over a text, then concatenate the results. 606 | -- 607 | -- >>> concatMap singleton empty 608 | -- "" 609 | -- >>> concatMap singleton [ascii| "nyan" |] 610 | -- "nyan" 611 | -- >>> concatMap (\c -> singleton c <> singleton c) [ascii| "nekomimi" |] 612 | -- "nneekkoommiimmii" 613 | -- 614 | -- /Complexity:/ \(\Theta(n)\) 615 | -- 616 | -- @since 1.0.0 617 | concatMap :: (AsciiChar -> AsciiText) -> AsciiText -> AsciiText 618 | concatMap = coerce BS.concatMap 619 | 620 | -- | 'scanl' is similar to 'foldl', but returns a list of successive values from 621 | -- the left. 622 | -- 623 | -- /Complexity:/ \(\Theta(n)\) 624 | -- 625 | -- @since 1.0.0 626 | {-# INLINE scanl #-} 627 | scanl :: 628 | -- | accumulator -> element -> new accumulator 629 | (AsciiChar -> AsciiChar -> AsciiChar) -> 630 | -- | Starting accumulator value 631 | AsciiChar -> 632 | -- | Input of length \(n\) 633 | AsciiText -> 634 | -- | Output of length \(n + 1\) 635 | AsciiText 636 | scanl = coerce BS.scanl 637 | 638 | -- | 'scanr' is similar to 'foldr', but returns a list of successive values from 639 | -- the right. 640 | -- 641 | -- /Complexity:/ \(\Theta(n)\) 642 | -- 643 | -- @since 1.0.0 644 | {-# INLINE scanr #-} 645 | scanr :: 646 | -- | element -> accumulator -> new accumulator 647 | (AsciiChar -> AsciiChar -> AsciiChar) -> 648 | -- | Starting accumulator value 649 | AsciiChar -> 650 | -- | Input of length \(n\) 651 | AsciiText -> 652 | -- | Output of length \(n + 1\) 653 | AsciiText 654 | scanr = coerce BS.scanr 655 | 656 | -- Accumulating maps 657 | 658 | -- | Like a combination of 'map' and 'foldl''. Applies a function to each 659 | -- element of an 'AsciiText', passing an accumulating parameter from left to 660 | -- right, and returns a final 'AsciiText' along with the accumulating 661 | -- parameter's final value. 662 | -- 663 | -- /Complexity:/ \(\Theta(n)\) 664 | -- 665 | -- @since 1.0.0 666 | {-# INLINE mapAccumL #-} 667 | mapAccumL :: (a -> AsciiChar -> (a, AsciiChar)) -> a -> AsciiText -> (a, AsciiText) 668 | mapAccumL f x (AsciiText bs) = AsciiText <$> BS.mapAccumL (coerce f) x bs 669 | 670 | -- | Like a combination of 'map' and 'foldr'. Applies a function to each element 671 | -- of an 'AsciiText', passing an accumulating parameter from right to left, and 672 | -- returns a final 'AsciiText' along with the accumulating parameter's final 673 | -- value. 674 | -- 675 | -- /Complexity:/ \(\Theta(n)\) 676 | -- 677 | -- @since 1.0.0 678 | {-# INLINE mapAccumR #-} 679 | mapAccumR :: (a -> AsciiChar -> (a, AsciiChar)) -> a -> AsciiText -> (a, AsciiText) 680 | mapAccumR f x (AsciiText bs) = AsciiText <$> BS.mapAccumL (coerce f) x bs 681 | 682 | -- Generation and unfolding 683 | 684 | -- | @replicate n t@ consists of @t@ repeated \(\max \{ 0, {\tt n } \}\) times. 685 | -- 686 | -- >>> replicate (-100) [ascii| "nyan" |] 687 | -- "" 688 | -- >>> replicate 0 [ascii| "nyan" |] 689 | -- "" 690 | -- >>> replicate 3 [ascii| "nyan" |] 691 | -- "nyannyannyan" 692 | -- 693 | -- /Complexity:/ \(\Theta(n \cdot m)\) 694 | -- 695 | -- @since 1.0.1 696 | replicate :: Int -> AsciiText -> AsciiText 697 | replicate n t 698 | | n <= 0 = empty 699 | | otherwise = concat . P.replicate n $ t 700 | 701 | -- | Similar to 'Data.List.unfoldr'. The function parameter takes a seed value, 702 | -- and produces either 'Nothing' (indicating that we're done) or 'Just' an 703 | -- 'AsciiChar' and a new seed value. 'unfoldr' then, given a starting seed, will 704 | -- repeatedly call the function parameter on successive seed values, returning 705 | -- the resulting 'AsciiText', based on the 'AsciiChar's produced, in the same 706 | -- order. 707 | -- 708 | -- /Complexity:/ \(\Theta(n)\) 709 | -- 710 | -- @since 1.0.0 711 | {-# INLINE unfoldr #-} 712 | unfoldr :: (a -> Maybe (AsciiChar, a)) -> a -> AsciiText 713 | unfoldr f = AsciiText . BS.unfoldr (coerce f) 714 | 715 | -- | Similar to 'unfoldr', but also takes a maximum length parameter. The second 716 | -- element of the result tuple will be 'Nothing' if we finished with the 717 | -- function argument returning 'Nothing', and 'Just' the final seed value if we 718 | -- reached the maximum length before that happened. 719 | -- 720 | -- /Complexity:/ \(\Theta(n)\) 721 | -- 722 | -- @since 1.0.0 723 | {-# INLINE unfoldrN #-} 724 | unfoldrN :: Int -> (a -> Maybe (AsciiChar, a)) -> a -> (AsciiText, Maybe a) 725 | unfoldrN n f = first AsciiText . BS.unfoldrN n (coerce f) 726 | 727 | -- | @take n t@ returns the prefix of @t@ with length 728 | -- \(\min \{ \max \{ 0, {\tt n}\}, {\tt length} \; {\tt t} \}\). 729 | -- 730 | -- >>> take (-100) [ascii| "catboy" |] 731 | -- "" 732 | -- >>> take 0 [ascii| "catboy" |] 733 | -- "" 734 | -- >>> take 4 [ascii| "catboy" |] 735 | -- "catb" 736 | -- >>> take 1000 [ascii| "catboy" |] 737 | -- "catboy" 738 | -- 739 | -- /Complexity:/ \(\Theta(1)\) 740 | -- 741 | -- @since 1.0.0 742 | {-# INLINE take #-} 743 | take :: Int -> AsciiText -> AsciiText 744 | take = coerce BS.take 745 | 746 | -- | @takeEnd n t@ returns the suffix of @t@ with length 747 | -- \(\min \{ \max \{0, {\tt n} \}, {\tt length} \; {\tt t} \}\). 748 | -- 749 | -- >>> takeEnd (-100) [ascii| "catboy" |] 750 | -- "" 751 | -- >>> takeEnd 0 [ascii| "catboy" |] 752 | -- "" 753 | -- >>> takeEnd 4 [ascii| "catboy" |] 754 | -- "tboy" 755 | -- >>> takeEnd 1000 [ascii| "catboy" |] 756 | -- "catboy" 757 | -- 758 | -- /Complexity:/ \(\Theta(1)\) 759 | -- 760 | -- @since 1.0.1 761 | takeEnd :: Int -> AsciiText -> AsciiText 762 | takeEnd n t = drop (length t - n) t 763 | 764 | -- | @drop n t@ returns the suffix of @t@ with length 765 | -- \(\max \{ 0, \min \{ {\tt length} \; {\tt t}, {\tt length} \; {\tt t} - {\tt n} \} \}\). 766 | -- 767 | -- >>> drop (-100) [ascii| "catboy" |] 768 | -- "catboy" 769 | -- >>> drop 0 [ascii| "catboy" |] 770 | -- "catboy" 771 | -- >>> drop 4 [ascii| "catboy" |] 772 | -- "oy" 773 | -- >>> drop 1000 [ascii| "catboy" |] 774 | -- "" 775 | -- 776 | -- /Complexity:/ \(\Theta(1)\) 777 | -- 778 | -- @since 1.0.0 779 | {-# INLINE drop #-} 780 | drop :: Int -> AsciiText -> AsciiText 781 | drop = coerce BS.drop 782 | 783 | -- | @dropEnd n t@ returns the prefix of @t@ with length 784 | -- \(\max \{ 0, \min \{ {\tt length} \; {\tt t}, {\tt length} \; {\tt t} - {\tt n} \} \}\). 785 | -- 786 | -- >>> dropEnd (-100) [ascii| "catboy" |] 787 | -- "catboy" 788 | -- >>> dropEnd 0 [ascii| "catboy" |] 789 | -- "catboy" 790 | -- >>> dropEnd 4 [ascii| "catboy" |] 791 | -- "ca" 792 | -- >>> dropEnd 1000 [ascii| "catboy" |] 793 | -- "" 794 | -- 795 | -- /Complexity:/ \(\Theta(1)\) 796 | -- 797 | -- @since 1.0.1 798 | dropEnd :: Int -> AsciiText -> AsciiText 799 | dropEnd n t = take (length t - n) t 800 | 801 | -- | @takeWhile p t@ returns the longest prefix of @t@ of characters that 802 | -- satisfy @p@. 803 | -- 804 | -- >>> takeWhile ((Just Lower ==) . caseOf) empty 805 | -- "" 806 | -- >>> takeWhile ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |] 807 | -- "catboy" 808 | -- 809 | -- /Complexity:/ \(\Theta(n)\) 810 | -- 811 | -- @since 1.0.0 812 | {-# INLINE [1] takeWhile #-} 813 | takeWhile :: (AsciiChar -> Bool) -> AsciiText -> AsciiText 814 | takeWhile f (AsciiText at) = AsciiText . BS.takeWhile (coerce f) $ at 815 | 816 | -- | @takeWhileEnd p t@ returns the longest suffix of @t@ of characters that 817 | -- satisfy @p@. Equivalent to @'reverse' . 'takeWhile' p . 'reverse'@. 818 | -- 819 | -- >>> takeWhileEnd ((Just Lower ==) . caseOf) empty 820 | -- "" 821 | -- >>> takeWhileEnd ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |] 822 | -- "nyan" 823 | -- 824 | -- /Complexity:/ \(\Theta(n)\) 825 | -- 826 | -- @since 1.0.0 827 | {-# INLINE takeWhileEnd #-} 828 | takeWhileEnd :: (AsciiChar -> Bool) -> AsciiText -> AsciiText 829 | takeWhileEnd f = AsciiText . BS.takeWhileEnd (coerce f) . coerce 830 | 831 | -- | @dropWhile p t@ returns the suffix remaining after @'takeWhile' p t@. 832 | -- 833 | -- >>> dropWhile ((Just Lower ==) . caseOf) empty 834 | -- "" 835 | -- >>> dropWhile ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |] 836 | -- " goes nyan" 837 | -- 838 | -- /Complexity:/ \(\Theta(n)\) 839 | -- 840 | -- @since 1.0.0 841 | {-# INLINE [1] dropWhile #-} 842 | dropWhile :: (AsciiChar -> Bool) -> AsciiText -> AsciiText 843 | dropWhile f (AsciiText at) = AsciiText . BS.dropWhile (coerce f) $ at 844 | 845 | -- | @dropWhileEnd p t@ returns the prefix remaining after @'takeWhileEnd' p t@. 846 | -- Equivalent to @'reverse' . 'dropWhile' p . 'reverse'@. 847 | -- 848 | -- >>> dropWhileEnd ((Just Lower ==) . caseOf) empty 849 | -- "" 850 | -- >>> dropWhileEnd ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |] 851 | -- "catboy goes " 852 | -- 853 | -- /Complexity:/ \(\Theta(n)\) 854 | -- 855 | -- @since 1.0.0 856 | {-# INLINE dropWhileEnd #-} 857 | dropWhileEnd :: (AsciiChar -> Bool) -> AsciiText -> AsciiText 858 | dropWhileEnd f = AsciiText . BS.dropWhileEnd (coerce f) . coerce 859 | 860 | -- | @dropAround p@ is equivalent to @'dropWhile' p '.' 'dropWhileEnd' p@. 861 | -- 862 | -- >>> dropAround ((Just Lower ==) . caseOf) empty 863 | -- "" 864 | -- >>> dropAround ((Just Lower ==) . caseOf) [ascii| "catboy goes nyan" |] 865 | -- " goes " 866 | -- 867 | -- /Complexity:/ \(\Theta(n)\) 868 | -- 869 | -- @since 1.0.1 870 | dropAround :: (AsciiChar -> Bool) -> AsciiText -> AsciiText 871 | dropAround p = dropWhile p . dropWhileEnd p 872 | 873 | -- | Remove the longest prefix /and/ suffix of the input comprised entirely of 874 | -- whitespace characters. We define a \'whitespace character\' as any of the 875 | -- following: 876 | -- 877 | -- * TAB (0x09) 878 | -- * LF (0x0a) 879 | -- * VT (0x0b) 880 | -- * FF (0x0c) 881 | -- * CR (0x0d) 882 | -- * Space (0x20) 883 | -- 884 | -- >>> strip empty 885 | -- "" 886 | -- >>> strip [ascii| "catboy goes nyan" |] 887 | -- "catboy goes nyan" 888 | -- >>> strip [ascii| "\n\n \tcatboy goes nyan" |] 889 | -- "catboy goes nyan" 890 | -- >>> strip [ascii| "catboy goes nyan \t\t\n" |] 891 | -- "catboy goes nyan" 892 | -- >>> strip [ascii| "\n\n \tcatboy goes nyan \t\t\n" |] 893 | -- "catboy goes nyan" 894 | -- 895 | -- /Complexity:/ \(\Theta(n)\) 896 | -- 897 | -- @since 1.0.1 898 | strip :: AsciiText -> AsciiText 899 | strip = dropAround isSpace 900 | 901 | -- | Remove the longest prefix of the input comprised entirely of whitespace 902 | -- characters. We define a \'whitespace character\' as any of the following: 903 | -- 904 | -- * TAB (0x09) 905 | -- * LF (0x0a) 906 | -- * VT (0x0b) 907 | -- * FF (0x0c) 908 | -- * CR (0x0d) 909 | -- * Space (0x20) 910 | -- 911 | -- >>> stripStart empty 912 | -- "" 913 | -- >>> stripStart [ascii| "catboy goes nyan" |] 914 | -- "catboy goes nyan" 915 | -- >>> stripStart [ascii| "\n\n \tcatboy goes nyan" |] 916 | -- "catboy goes nyan" 917 | -- >>> stripStart [ascii| "catboy goes nyan \t\t\n" |] 918 | -- "catboy goes nyan \t\t\n" 919 | -- >>> stripStart [ascii| "\n\n \tcatboy goes nyan \t\t\n" |] 920 | -- "catboy goes nyan \t\t\n" 921 | -- 922 | -- /Complexity:/ \(\Theta(n)\) 923 | -- 924 | -- @since 1.0.1 925 | stripStart :: AsciiText -> AsciiText 926 | stripStart = dropWhile isSpace 927 | 928 | -- | Remove the longest suffix of the input comprised entirely of whitespace 929 | -- characters. We define a \'whitespace character\' as any of the following: 930 | -- 931 | -- * TAB (0x09) 932 | -- * LF (0x0a) 933 | -- * VT (0x0b) 934 | -- * FF (0x0c) 935 | -- * CR (0x0d) 936 | -- * Space (0x20) 937 | -- 938 | -- >>> stripEnd empty 939 | -- "" 940 | -- >>> stripEnd [ascii| "catboy goes nyan" |] 941 | -- "catboy goes nyan" 942 | -- >>> stripEnd [ascii| "\n\n \tcatboy goes nyan" |] 943 | -- "\n\n \tcatboy goes nyan" 944 | -- >>> stripEnd [ascii| "catboy goes nyan \t\t\n" |] 945 | -- "catboy goes nyan" 946 | -- >>> stripEnd [ascii| "\n\n \tcatboy goes nyan \t\t\n" |] 947 | -- "\n\n \tcatboy goes nyan" 948 | -- 949 | -- /Complexity:/ \(\Theta(n)\) 950 | -- 951 | -- @since 1.0.1 952 | stripEnd :: AsciiText -> AsciiText 953 | stripEnd = dropWhileEnd isSpace 954 | 955 | -- | @splitAt n t@ is equivalent to @('take' n t, 'drop' n t)@. 956 | -- 957 | -- >>> splitAt (-3) [ascii| "catboy" |] 958 | -- ("","catboy") 959 | -- >>> splitAt 0 [ascii| "catboy" |] 960 | -- ("","catboy") 961 | -- >>> splitAt 3 [ascii| "catboy" |] 962 | -- ("cat","boy") 963 | -- >>> splitAt 1000 [ascii| "catboy" |] 964 | -- ("catboy","") 965 | -- 966 | -- /Complexity:/ \(\Theta(1)\) 967 | -- 968 | -- @since 1.0.0 969 | {-# INLINE splitAt #-} 970 | splitAt :: Int -> AsciiText -> (AsciiText, AsciiText) 971 | splitAt = coerce BS.splitAt 972 | 973 | -- | @breakOn needle haystack@, given a @needle@ of length \(n\) and a 974 | -- @haystack@ of length \(h\), attempts to find the first instance of @needle@ 975 | -- in @haystack@. If successful, return a tuple consisting of: 976 | -- 977 | -- * The prefix of @haystack@ before the match; and 978 | -- * The rest of @haystack@, starting with the match. 979 | -- 980 | -- If the needle is empty, this returns @('empty', haystack)@. If no match can 981 | -- be found, this instead returns @(haystack, 'empty')@. 982 | -- 983 | -- If you need to repeatedly split on the same needle, consider 'breakOnAll', as 984 | -- this will be more efficient due to only having to run the matching algorithm 985 | -- once. 986 | -- 987 | -- >>> breakOn empty [ascii| "catboy goes nyan" |] 988 | -- ("","catboy goes nyan") 989 | -- >>> breakOn [ascii| "nyan" |] empty 990 | -- ("","") 991 | -- >>> breakOn [ascii| "goes" |] [ascii| "catboy goes nyan" |] 992 | -- ("catboy ","goes nyan") 993 | -- >>> breakOn [ascii| "catboy" |] [ascii| "nyan nyan nyan" |] 994 | -- ("nyan nyan nyan","") 995 | -- 996 | -- = On complexity 997 | -- 998 | -- This function is based on a variant of the 999 | -- [NSN](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) algorithm, 1000 | -- except it does not detect overlapping needles. Its average-case analysis is 1001 | -- based on the assumption that: 1002 | -- 1003 | -- * All ASCII symbols are equally likely to occur in both the needle and the 1004 | -- haystack; and 1005 | -- * The needle has length at least two; and 1006 | -- * Both the needle and the haystack contain at least four unique symbols. 1007 | -- 1008 | -- We fall back to 'split' for singleton needles, and there is no work to be 1009 | -- done on empty needles, which means the second assumption always holds. 1010 | -- 1011 | -- Worst-case behaviour becomes more likely the more your input satisfies the 1012 | -- following conditions: 1013 | -- 1014 | -- * The needle and/or haystack use few unique symbols (less than four is the 1015 | -- worst); or 1016 | -- * The haystack contains many instances of the second symbol of the needle 1017 | -- which don't lead to full matches. 1018 | -- 1019 | -- /Complexity:/ \(\Theta(h)\) average case, \(\Theta(h \cdot n\)\) worst-case. 1020 | -- 1021 | -- /See also:/ Note that all the below are references for the original 1022 | -- algorithm, which includes searching for overlapping needles. Thus, our 1023 | -- implementation will perform better than the analysis suggests. 1024 | -- 1025 | -- * [Description and pseudocode](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) 1026 | -- * ["Algorithms on Strings"](https://www.cambridge.org/core/books/algorithms-on-strings/19049704C876795D95D8882C73257C70) by Crochemore, Hancart and Lecroq. PDF available [here](https://www.researchgate.net/publication/220693689_Algorithms_on_Strings). 1027 | -- 1028 | -- @since 1.0.1 1029 | breakOn :: AsciiText -> AsciiText -> (AsciiText, AsciiText) 1030 | breakOn needle@(AsciiText n) haystack@(AsciiText h) 1031 | | length needle == 0 = (empty, haystack) 1032 | | otherwise = case indices n h of 1033 | [] -> (haystack, empty) 1034 | ix : _ -> splitAt ix haystack 1035 | 1036 | -- | @breakOnEnd needle haystack@, given a @needle@ of length \(n\) and a 1037 | -- @haystack@ of length \(h\), attempts to find the last instance of @needle@ in 1038 | -- @haystack@. If successful, return a tuple consisting of: 1039 | -- 1040 | -- * The prefix of @haystack@ up to, and including, the match; and 1041 | -- * The rest of @haystack@. 1042 | -- 1043 | -- If the needle is empty, this returns @(haystack, 'empty')@. If no match can 1044 | -- be found, this instead returns @('empty', haystack)@. 1045 | -- 1046 | -- This function is similar to 'breakOn'. If you need to repeatedly split on the 1047 | -- same needle, consider 'breakOnAll', as this will be more efficient due to 1048 | -- only having to run the matching algorithm once. 1049 | -- 1050 | -- >>> breakOnEnd empty [ascii| "catboy goes nyan" |] 1051 | -- ("catboy goes nyan","") 1052 | -- >>> breakOnEnd [ascii| "nyan" |] empty 1053 | -- ("","") 1054 | -- >>> breakOnEnd [ascii| "goes" |] [ascii| "catboy goes nyan" |] 1055 | -- ("catboy goes"," nyan") 1056 | -- >>> breakOnEnd [ascii| "catboy" |] [ascii| "nyan nyan nyan" |] 1057 | -- ("","nyan nyan nyan") 1058 | -- 1059 | -- = On complexity 1060 | -- 1061 | -- This function is based on a variant of the 1062 | -- [NSN](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) algorithm, 1063 | -- except it does not detect overlapping needles. Its average-case analysis is 1064 | -- based on the assumption that: 1065 | -- 1066 | -- * All ASCII symbols are equally likely to occur in both the needle and the 1067 | -- haystack; and 1068 | -- * The needle has length at least two; and 1069 | -- * Both the needle and the haystack contain at least four unique symbols. 1070 | -- 1071 | -- We fall back to 'split' for singleton needles, and there is no work to be 1072 | -- done on empty needles, which means the second assumption always holds. 1073 | -- 1074 | -- Worst-case behaviour becomes more likely the more your input satisfies the 1075 | -- following conditions: 1076 | -- 1077 | -- * The needle and/or haystack use few unique symbols (less than four is the 1078 | -- worst); or 1079 | -- * The haystack contains many instances of the second symbol of the needle 1080 | -- which don't lead to full matches. 1081 | -- 1082 | -- /Complexity:/ \(\Theta(h)\) average case, \(\Theta(h \cdot n\)\) worst-case. 1083 | -- 1084 | -- /See also:/ Note that all the below are references for the original 1085 | -- algorithm, which includes searching for overlapping needles. Thus, our 1086 | -- implementation will perform better than the analysis suggests. 1087 | -- 1088 | -- * [Description and pseudocode](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) 1089 | -- * ["Algorithms on Strings"](https://www.cambridge.org/core/books/algorithms-on-strings/19049704C876795D95D8882C73257C70) by Crochemore, Hancart and Lecroq. PDF available [here](https://www.researchgate.net/publication/220693689_Algorithms_on_Strings). 1090 | -- 1091 | -- @since 1.0.1 1092 | breakOnEnd :: AsciiText -> AsciiText -> (AsciiText, AsciiText) 1093 | breakOnEnd needle@(AsciiText n) haystack@(AsciiText h) 1094 | | length needle == 0 = (haystack, empty) 1095 | | otherwise = case go . indices n $ h of 1096 | Nothing -> (empty, haystack) 1097 | Just ix -> splitAt (ix + length needle) haystack 1098 | where 1099 | go :: [Int] -> Maybe Int 1100 | go = \case 1101 | [] -> Nothing 1102 | [i] -> Just i 1103 | (_ : is) -> go is 1104 | 1105 | -- | @break p t@ is equivalent to @('takeWhile' ('not' p) t, 'dropWhile' ('not' 1106 | -- p) t)@. 1107 | -- 1108 | -- >>> break ([char| ' ' |] ==) [ascii| "catboy goes nyan" |] 1109 | -- ("catboy"," goes nyan") 1110 | -- 1111 | -- /Complexity:/ \(\Theta(n)\) 1112 | -- 1113 | -- @since 1.0.0 1114 | break :: (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText) 1115 | break = coerce BS.break 1116 | 1117 | -- | @span p t@ is equivalent to @('takeWhile' p t, 'dropWhile' p t)@. 1118 | -- 1119 | -- >>> span ([char| 'c' |] ==) [ascii| "catboy goes nyan" |] 1120 | -- ("c","atboy goes nyan") 1121 | -- 1122 | -- /Complexity:/ \(\Theta(n)\) 1123 | -- 1124 | -- @since 1.0.0 1125 | {-# INLINE [1] span #-} 1126 | span :: (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText) 1127 | span = coerce BS.span 1128 | 1129 | -- | Separate a text into a list of texts such that: 1130 | -- 1131 | -- * Their concatenation is equal to the original argument; and 1132 | -- * Equal adjacent characters in the original argument are in the same text in 1133 | -- the result. 1134 | -- 1135 | -- This is a specialized form of 'groupBy', and is about 40% faster than 1136 | -- @'groupBy' '=='@. 1137 | -- 1138 | -- >>> group empty 1139 | -- [] 1140 | -- >>> group . singleton $ [char| 'w' |] 1141 | -- ["w"] 1142 | -- >>> group [ascii| "nyan" |] 1143 | -- ["n","y","a","n"] 1144 | -- >>> group [ascii| "nyaaaan" |] 1145 | -- ["n","y","aaaa","n"] 1146 | -- 1147 | -- /Complexity:/ \(\Theta(n)\) 1148 | -- 1149 | -- @since 1.0.0 1150 | group :: AsciiText -> [AsciiText] 1151 | group = coerce BS.group 1152 | 1153 | -- | Separate a text into a list of texts such that: 1154 | -- 1155 | -- * Their concatenation is equal to the original argument; and 1156 | -- * Adjacent characters for which the function argument returns @True@ are in 1157 | -- the same text in the result. 1158 | -- 1159 | -- 'group' is a special case for the function argument '=='; it is also about 1160 | -- 40% faster. 1161 | -- 1162 | -- >>> groupBy (<) empty 1163 | -- [] 1164 | -- >>> groupBy (<) . singleton $ [char| 'w' |] 1165 | -- ["w"] 1166 | -- >>> groupBy (<) [ascii| "catboy goes nyan" |] 1167 | -- ["c","atboy"," goes"," nyan"] 1168 | -- 1169 | -- /Complexity:/ \(\Theta(n)\) 1170 | -- 1171 | -- @since 1.0.0 1172 | groupBy :: (AsciiChar -> AsciiChar -> Bool) -> AsciiText -> [AsciiText] 1173 | groupBy = coerce BS.groupBy 1174 | 1175 | -- | All prefixes of the argument, from shortest to longest. 1176 | -- 1177 | -- >>> inits empty 1178 | -- [""] 1179 | -- >>> inits . singleton $ [char| 'w' |] 1180 | -- ["","w"] 1181 | -- >>> inits [ascii| "nyan" |] 1182 | -- ["","n","ny","nya","nyan"] 1183 | -- 1184 | -- /Complexity:/ \(\Theta(n)\) 1185 | -- 1186 | -- @since 1.0.0 1187 | inits :: AsciiText -> [AsciiText] 1188 | inits = coerce BS.inits 1189 | 1190 | -- | All suffixes of the argument, from shortest to longest. 1191 | -- 1192 | -- >>> tails empty 1193 | -- [""] 1194 | -- >>> tails . singleton $ [char| 'w' |] 1195 | -- ["w",""] 1196 | -- >>> tails [ascii| "nyan" |] 1197 | -- ["nyan","yan","an","n",""] 1198 | -- 1199 | -- /Complexity:/ \(\Theta(n)\) 1200 | -- 1201 | -- @since 1.0.0 1202 | tails :: AsciiText -> [AsciiText] 1203 | tails = coerce BS.tails 1204 | 1205 | -- Breaking into many substrings 1206 | 1207 | -- | @splitOn needle haystack@, given a @needle@ of length \(n\) and a haystack 1208 | -- of length \(h\), breaks @haystack@ into pieces, separated by @needle@. Any 1209 | -- occurrences of @needle@ in @haystack@ are consumed. 1210 | -- 1211 | -- >>> splitOn empty [ascii| "catboy goes nyan and goes nyan" |] 1212 | -- ["catboy goes nyan and goes nyan"] 1213 | -- >>> splitOn [ascii| "nyan" |] empty 1214 | -- [""] 1215 | -- >>> splitOn [ascii| "nyan" |] [ascii| "catboy goes nyan and goes nyan" |] 1216 | -- ["catboy goes "," and goes ",""] 1217 | -- >>> splitOn [ascii| "nyan" |] [ascii| "nyan" |] 1218 | -- ["",""] 1219 | -- >>> splitOn [ascii| "nyan" |] [ascii| "catboy" |] 1220 | -- ["catboy"] 1221 | -- 1222 | -- = On complexity 1223 | -- 1224 | -- This function is based on a variant of the 1225 | -- [NSN](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) algorithm, 1226 | -- except it does not detect overlapping needles. Its average-case analysis is 1227 | -- based on the assumption that: 1228 | -- 1229 | -- * All ASCII symbols are equally likely to occur in both the needle and the 1230 | -- haystack; and 1231 | -- * The needle has length at least two; and 1232 | -- * Both the needle and the haystack contain at least four unique symbols. 1233 | -- 1234 | -- We fall back to 'split' for singleton needles, and there is no work to be 1235 | -- done on empty needles, which means the second assumption always holds. 1236 | -- 1237 | -- Worst-case behaviour becomes more likely the more your input satisfies the 1238 | -- following conditions: 1239 | -- 1240 | -- * The needle and/or haystack use few unique symbols (less than four is the 1241 | -- worst); or 1242 | -- * The haystack contains many instances of the second symbol of the needle 1243 | -- which don't lead to full matches. 1244 | -- 1245 | -- /Complexity:/ \(\Theta(h)\) average case, \(\Theta(h \cdot n\)\) worst-case. 1246 | -- 1247 | -- /See also:/ Note that all the below are references for the original 1248 | -- algorithm, which includes searching for overlapping needles. Thus, our 1249 | -- implementation will perform better than the analysis suggests. 1250 | -- 1251 | -- * [Description and pseudocode](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) 1252 | -- * ["Algorithms on Strings"](https://www.cambridge.org/core/books/algorithms-on-strings/19049704C876795D95D8882C73257C70) by Crochemore, Hancart and Lecroq. PDF available [here](https://www.researchgate.net/publication/220693689_Algorithms_on_Strings). 1253 | -- 1254 | -- @since 1.0.1 1255 | splitOn :: AsciiText -> AsciiText -> [AsciiText] 1256 | splitOn needle@(AsciiText n) haystack@(AsciiText h) 1257 | | needleLen == 0 = [haystack] 1258 | | length haystack == 0 = [empty] 1259 | | needleLen == 1 = split (== (AsciiChar . BS.head $ n)) haystack 1260 | | otherwise = go 0 (indices n h) 1261 | where 1262 | needleLen :: Int 1263 | needleLen = length needle 1264 | go :: Int -> [Int] -> [AsciiText] 1265 | go pos = \case 1266 | [] -> [drop pos haystack] 1267 | (ix : ixes) -> 1268 | let chunkLen = ix - pos 1269 | segment = take chunkLen . drop pos $ haystack 1270 | in segment : go (pos + chunkLen + needleLen) ixes 1271 | 1272 | -- | @split p t@ separates @t@ into components delimited by separators, for 1273 | -- which @p@ returns @True@. The results do not contain the separators. 1274 | -- 1275 | -- \(n\) adjacent separators result in \(n - 1\) empty components in the result. 1276 | -- 1277 | -- >>> split ([char| '~' |] ==) empty 1278 | -- [] 1279 | -- >>> split ([char| '~' |] ==) . singleton $ [char| '~' |] 1280 | -- ["",""] 1281 | -- >>> split ([char| '~' |] ==) [ascii| "nyan" |] 1282 | -- ["nyan"] 1283 | -- >>> split ([char| '~' |] ==) [ascii| "~nyan" |] 1284 | -- ["","nyan"] 1285 | -- >>> split ([char| '~' |] ==) [ascii| "nyan~" |] 1286 | -- ["nyan",""] 1287 | -- >>> split ([char| '~' |] ==) [ascii| "nyan~nyan"|] 1288 | -- ["nyan","nyan"] 1289 | -- >>> split ([char| '~' |] ==) [ascii| "nyan~~nyan" |] 1290 | -- ["nyan","","nyan"] 1291 | -- >>> split ([char| '~' |] ==) [ascii| "nyan~~~nyan" |] 1292 | -- ["nyan","","","nyan"] 1293 | -- 1294 | -- /Complexity:/ \(\Theta(n)\) 1295 | -- 1296 | -- @since 1.0.0 1297 | {-# INLINE split #-} 1298 | split :: (AsciiChar -> Bool) -> AsciiText -> [AsciiText] 1299 | split = coerce BS.splitWith 1300 | 1301 | -- | Splits a text into chunks of the specified length. Equivalent to repeatedly 1302 | -- 'take'ing the specified length until exhaustion. The last item in the result 1303 | -- may thus be shorter than requested. 1304 | -- 1305 | -- For any @n <= 0@ and any @t@, @chunksOf n t@ yields the empty list. This is 1306 | -- identical to the behaviour of the function of the same name in the [text 1307 | -- package](http://hackage.haskell.org/package/text-1.2.4.1/docs/Data-Text.html#v:chunksOf), 1308 | -- although it doesn't document this fact. 1309 | -- 1310 | -- >>> chunksOf (-100) [ascii| "I am a catboy" |] 1311 | -- [] 1312 | -- >>> chunksOf (-100) empty 1313 | -- [] 1314 | -- >>> chunksOf 0 [ascii| "I am a catboy" |] 1315 | -- [] 1316 | -- >>> chunksOf 0 empty 1317 | -- [] 1318 | -- >>> chunksOf 1 [ascii| "I am a catboy" |] 1319 | -- ["I"," ","a","m"," ","a"," ","c","a","t","b","o","y"] 1320 | -- >>> chunksOf 1 empty 1321 | -- [] 1322 | -- >>> chunksOf 2 [ascii| "I am a catboy" |] 1323 | -- ["I ","am"," a"," c","at","bo","y"] 1324 | -- >>> chunksOf 300 [ascii| "I am a catboy" |] 1325 | -- ["I am a catboy"] 1326 | -- 1327 | -- /Complexity:/ \(\Theta(n)\) 1328 | -- 1329 | -- @since 1.0.1 1330 | chunksOf :: Int -> AsciiText -> [AsciiText] 1331 | chunksOf n t 1332 | | n <= 0 = [] 1333 | | t == empty = [] 1334 | | otherwise = case splitAt n t of 1335 | (h, t') -> h : chunksOf n t' 1336 | 1337 | -- Breaking into lines and words 1338 | 1339 | -- | Identical to the functions of the same name in the [text 1340 | -- package](http://hackage.haskell.org/package/text-1.2.4.1/docs/Data-Text.html#v:lines), 1341 | -- and [the 1342 | -- Prelude](https://hackage.haskell.org/package/base-4.14.1.0/docs/Prelude.html#v:lines). 1343 | -- Specifically, separates the argument into pieces, with LF characters (0x0a) as 1344 | -- separators. A single trailing LF is ignored. None of the final results 1345 | -- contain LF. 1346 | -- 1347 | -- We chose to follow the same semantics for this function as the text package 1348 | -- and the Prelude. This has some consequences, 1349 | -- which the documentation of both the text package and the Prelude does not 1350 | -- properly explain. We list them here - bear these in mind when using this 1351 | -- function, as well as 'unlines': 1352 | -- 1353 | -- * No platform-specific concept of a \'newline\' is ever used by this 1354 | -- function. Separation is done on LF, and /only/ LF, regardless of platform. 1355 | -- The documentation in both the text package and the Prelude confusingly refers 1356 | -- to \'newline characters\', which is a category error. We thus specify that LF 1357 | -- is the character being split on, rather than mentioning \'newlines\' in any 1358 | -- way, shape or form. 1359 | -- * @'unlines' '.' 'lines'@ is /not/ the same as @'Prelude.id'@. This is 1360 | -- misleadingly described in the Prelude, which claims that (its version of) 1361 | -- @unlines@ is \'an inverse operation\' to (its version of) @lines@. For a 1362 | -- precise explanation of why this is the case, please see the documentation for 1363 | -- 'unlines'. 1364 | -- * @'lines'@ is not the same as @'split' (['char'| \'\n\' |] '==')@. See the 1365 | -- doctests below for a demonstration of how they differ. 1366 | -- 1367 | -- >>> lines empty 1368 | -- [] 1369 | -- >>> split ([char| '\n' |] ==) empty 1370 | -- [] 1371 | -- >>> lines [ascii| "catboy goes nyan" |] 1372 | -- ["catboy goes nyan"] 1373 | -- >>> split ([char| '\n' |] ==) [ascii| "catboy goes nyan" |] 1374 | -- ["catboy goes nyan"] 1375 | -- >>> lines [ascii| "catboy goes nyan\n" |] 1376 | -- ["catboy goes nyan"] 1377 | -- >>> split ([char| '\n' |] ==) [ascii| "catboy goes nyan\n" |] 1378 | -- ["catboy goes nyan",""] 1379 | -- >>> lines [ascii| "\ncatboy\n\n\ngoes\n\nnyan\n\n" |] 1380 | -- ["","catboy","","","goes","","nyan",""] 1381 | -- >>> split ([char| '\n' |] ==) [ascii| "\ncatboy\n\n\ngoes\n\nnyan\n\n" |] 1382 | -- ["","catboy","","","goes","","nyan","",""] 1383 | -- >>> lines [ascii| "\r\ncatboy\r\ngoes\r\nnyan\r\n" |] 1384 | -- ["\r","catboy\r","goes\r","nyan\r"] 1385 | -- >>> split ([char| '\n' |] ==) [ascii| "\r\ncatboy\r\ngoes\r\nnyan\r\n" |] 1386 | -- ["\r","catboy\r","goes\r","nyan\r",""] 1387 | -- 1388 | -- /Complexity:/ \(\Theta(n)\) 1389 | -- 1390 | -- /See also:/ [Wikipedia on newlines](https://en.wikipedia.org/wiki/Newline) 1391 | -- 1392 | -- @since 1.0.1 1393 | lines :: AsciiText -> [AsciiText] 1394 | lines (AsciiText bs) = coerce . go $ bs 1395 | where 1396 | go :: ByteString -> [ByteString] 1397 | go rest = case BS.uncons rest of 1398 | Nothing -> [] 1399 | Just _ -> case BS.break (0x0a ==) rest of 1400 | (h, t) -> 1401 | h : case BS.uncons t of 1402 | Nothing -> [] 1403 | Just (_, t') -> go t' 1404 | 1405 | -- | Identical to the functions of the same name in the [text 1406 | -- package](http://hackage.haskell.org/package/text-1.2.4.1/docs/Data-Text.html#v:words) 1407 | -- and [the 1408 | -- Prelude](https://hackage.haskell.org/package/base-4.14.1.0/docs/Prelude.html#v:words). 1409 | -- Specifically, separates the argument into pieces, with (non-empty sequences 1410 | -- of) word separator characters as separators. A \'word separator character\' 1411 | -- is any of the following: 1412 | -- 1413 | -- * TAB (0x09) 1414 | -- * LF (0x0a) 1415 | -- * VT (0x0b) 1416 | -- * FF (0x0c) 1417 | -- * CR (0x0d) 1418 | -- * Space (0x20) 1419 | -- 1420 | -- None of the final results contain any word separator characters. Any sequence 1421 | -- of leading, or trailing, word separator characters will be ignored. 1422 | -- 1423 | -- We chose to follow the same semantics for this function as the text package 1424 | -- and the Prelude. This has the consequence that @'unwords' '.' 'words'@ is 1425 | -- /not/ the same as 'Prelude.id', although the documentation for the Prelude 1426 | -- confusingly describes (its version of) @unwords@ as an \'inverse operation\' 1427 | -- to (its version of) @words@. See the documentation for 'unwords' for an 1428 | -- explanation of why this is the case. 1429 | -- 1430 | -- >>> words empty 1431 | -- [] 1432 | -- >>> words [ascii| "catboy" |] 1433 | -- ["catboy"] 1434 | -- >>> words [ascii| " \r\r\r\rcatboy \n\rgoes\t\t\t\t\tnyan\n " |] 1435 | -- ["catboy","goes","nyan"] 1436 | -- 1437 | -- /Complexity:/ \(\Theta(n)\) 1438 | -- 1439 | -- @since 1.0.1 1440 | words :: AsciiText -> [AsciiText] 1441 | words (AsciiText bs) = coerce . go $ bs 1442 | where 1443 | go :: ByteString -> [ByteString] 1444 | go rest = 1445 | let rest' = BS.dropWhile isSep rest 1446 | in case BS.length rest' of 1447 | 0 -> [] 1448 | _ -> case BS.break isSep rest' of 1449 | (h, t) -> h : go t 1450 | isSep :: Word8 -> Bool 1451 | isSep w8 1452 | | w8 == 32 = True 1453 | | 9 <= w8 && w8 <= 13 = True 1454 | | otherwise = False 1455 | 1456 | -- | Identical to the functions of the same name in the [text 1457 | -- package](http://hackage.haskell.org/package/text-1.2.4.1/docs/Data-Text.html#v:unlines) 1458 | -- and [the 1459 | -- Prelude](https://hackage.haskell.org/package/base-4.14.1.0/docs/Prelude.html#v:unlines). 1460 | -- Specifically, appends an LF character to each of the texts, then concatenates. Equivalent 1461 | -- to @'foldMap' (`'snoc'` [char| '\n' |])@. 1462 | -- 1463 | -- We chose to follow the same semantics for this function as the text package 1464 | -- and the Prelude. This has some consequences, which the documentation of both 1465 | -- the text package and the Prelude does not properly explain. We list them here 1466 | -- - bear these in mind when using this function, as well as 'lines': 1467 | -- 1468 | -- * No platform-specific concept of a \'newline\' is ever used by this 1469 | -- function. The documentation in both the text package and the Prelude 1470 | -- confusing refer to appending a \'terminating newline\', which is only a 1471 | -- correct statement on platforms where a newline is LF. We thus specify that we 1472 | -- append LF, rather than mentioning \'newlines\' in any way, shape or form. 1473 | -- * @'unlines' '.' 'lines'@ is /not/ the same as @'Prelude.id'@. This is 1474 | -- misleadingly described in the Prelude, which claims that (its version of) 1475 | -- @unlines@ is \'an inverse operation\' to (its version of) @lines@. See the 1476 | -- doctests below for a demonstration of this. 1477 | -- 1478 | -- >>> unlines [] 1479 | -- "" 1480 | -- >>> unlines [[ascii| "nyan" |]] 1481 | -- "nyan\n" 1482 | -- >>> unlines . Prelude.replicate 3 $ [ascii| "nyan" |] 1483 | -- "nyan\nnyan\nnyan\n" 1484 | -- >>> unlines . lines $ [ascii| "catboy goes nyan" |] 1485 | -- "catboy goes nyan\n" 1486 | -- 1487 | -- /Complexity:/ \(\Theta(n)\) 1488 | -- 1489 | -- /See also:/ [Wikipedia on newlines](https://en.wikipedia.org/wiki/Newline) 1490 | -- 1491 | -- @since 1.0.1 1492 | unlines :: (Foldable f) => f AsciiText -> AsciiText 1493 | unlines = foldMap (`snoc` [char| '\n' |]) 1494 | 1495 | -- | Identical to the functions of the same name in the [text 1496 | -- package](http://hackage.haskell.org/package/text-1.2.4.1/docs/Data-Text.html#v:unwords) 1497 | -- and [the 1498 | -- Prelude](https://hackage.haskell.org/package/base-4.14.1.0/docs/Prelude.html#v:unwords). 1499 | -- Specifically, links together adjacent texts with a Space character. Equivalent to 1500 | -- @'intercalate' [ascii| " " |]@. 1501 | -- 1502 | -- We chose to follow the same semantics for this function as the text package 1503 | -- and the Prelude. This has the consequence that @'unwords' '.' 'words'@ is 1504 | -- /not/ the same as 'Prelude.id', although the documentation for the Prelude 1505 | -- confusingly describes (its version of) @unwords@ as an \'inverse operation\' 1506 | -- to (its version of) @words@. See the doctests below for a demonstration of 1507 | -- this. 1508 | -- 1509 | -- >>> unwords [] 1510 | -- "" 1511 | -- >>> unwords [[ascii| "nyan" |]] 1512 | -- "nyan" 1513 | -- >>> unwords . Prelude.replicate 3 $ [ascii| "nyan" |] 1514 | -- "nyan nyan nyan" 1515 | -- >>> unwords . words $ [ascii| "nyan\nnyan\nnyan" |] 1516 | -- "nyan nyan nyan" 1517 | -- 1518 | -- /Complexity:/ \(\Theta(n)\) 1519 | -- 1520 | -- @since 1.0.1 1521 | unwords :: [AsciiText] -> AsciiText 1522 | unwords = intercalate [ascii| " " |] 1523 | 1524 | -- View patterns 1525 | 1526 | -- | Return 'Just' the suffix of the second text if it has the first text as 1527 | -- a prefix, 'Nothing' otherwise. 1528 | -- 1529 | -- >>> stripPrefix [ascii| "catboy" |] empty 1530 | -- Nothing 1531 | -- >>> stripPrefix empty [ascii| "catboy" |] 1532 | -- Just "catboy" 1533 | -- >>> stripPrefix [ascii| "nyan" |] [ascii| "nyan" |] 1534 | -- Just "" 1535 | -- >>> stripPrefix [ascii| "nyan" |] [ascii| "catboy" |] 1536 | -- Nothing 1537 | -- >>> stripPrefix [ascii| "catboy" |] [ascii| "catboy goes nyan" |] 1538 | -- Just " goes nyan" 1539 | -- 1540 | -- /Complexity:/ \(\Theta(n)\) 1541 | -- 1542 | -- @since 1.0.0 1543 | stripPrefix :: AsciiText -> AsciiText -> Maybe AsciiText 1544 | stripPrefix = coerce BS.stripPrefix 1545 | 1546 | -- | Return 'Just' the prefix of the second text if it has the first text as 1547 | -- a suffix, 'Nothing' otherwise. 1548 | -- 1549 | -- >>> stripSuffix [ascii| "catboy" |] empty 1550 | -- Nothing 1551 | -- >>> stripSuffix empty [ascii| "catboy" |] 1552 | -- Just "catboy" 1553 | -- >>> stripSuffix [ascii| "nyan" |] [ascii| "nyan" |] 1554 | -- Just "" 1555 | -- >>> stripSuffix [ascii| "nyan" |] [ascii| "catboy" |] 1556 | -- Nothing 1557 | -- >>> stripSuffix [ascii| "nyan" |] [ascii| "catboy goes nyan" |] 1558 | -- Just "catboy goes " 1559 | -- 1560 | -- /Complexity:/ \(\Theta(n)\) 1561 | -- 1562 | -- @since 1.0.0 1563 | stripSuffix :: AsciiText -> AsciiText -> Maybe AsciiText 1564 | stripSuffix = coerce BS.stripSuffix 1565 | 1566 | -- | @stripInfix needle haystack@, given a needle of length \(n\) and a haystack 1567 | -- of length \(h\), attempts to find the first instance of @needle@ in 1568 | -- @haystack@. If successful, it returns 'Just' the pair consisting of: 1569 | -- 1570 | -- * All the text in @haystack@ before the first instance of @needle@; and 1571 | -- * All the text in @haystack@ after, but not including, the first instance of 1572 | -- @needle@. 1573 | -- 1574 | -- If there is no instance of @needle@ in @haystack@, this returns 'Nothing'. 1575 | -- 1576 | -- >>> stripInfix [ascii| "catboy" |] empty 1577 | -- Nothing 1578 | -- >>> stripInfix empty [ascii| "nyan catboy nyan nyan" |] 1579 | -- Nothing 1580 | -- >>> stripInfix [ascii| "catboy" |] [ascii| "catboy" |] 1581 | -- Just ("","") 1582 | -- >>> stripInfix [ascii| "catboy" |] [ascii| "nyan catboy" |] 1583 | -- Just ("nyan ","") 1584 | -- >>> stripInfix [ascii| "catboy" |] [ascii| "catboy nyan" |] 1585 | -- Just (""," nyan") 1586 | -- >>> stripInfix [ascii| "catboy" |] [ascii| "nyan catboy nyan nyan" |] 1587 | -- Just ("nyan "," nyan nyan") 1588 | -- >>> stripInfix [ascii| "nyan" |] [ascii| "nyanyanyan" |] 1589 | -- Just ("","yanyan") 1590 | -- 1591 | -- = On complexity 1592 | -- 1593 | -- This function is based on a variant of the 1594 | -- [NSN](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) algorithm, 1595 | -- except it does not detect overlapping needles. Its average-case analysis is 1596 | -- based on the assumption that: 1597 | -- 1598 | -- * All ASCII symbols are equally likely to occur in both the needle and the 1599 | -- haystack; and 1600 | -- * The needle has length at least two; and 1601 | -- * Both the needle and the haystack contain at least four unique symbols. 1602 | -- 1603 | -- We fall back to 'split' for singleton needles, and there is no work to be 1604 | -- done on empty needles, which means the second assumption always holds. 1605 | -- 1606 | -- Worst-case behaviour becomes more likely the more your input satisfies the 1607 | -- following conditions: 1608 | -- 1609 | -- * The needle and/or haystack use few unique symbols (less than four is the 1610 | -- worst); or 1611 | -- * The haystack contains many instances of the second symbol of the needle 1612 | -- which don't lead to full matches. 1613 | -- 1614 | -- /Complexity:/ \(\Theta(h)\) average case, \(\Theta(h \cdot n\)\) worst-case. 1615 | -- 1616 | -- /See also:/ Note that all the below are references for the original 1617 | -- algorithm, which includes searching for overlapping needles. Thus, our 1618 | -- implementation will perform better than the analysis suggests. 1619 | -- 1620 | -- * [Description and pseudocode](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) 1621 | -- * ["Algorithms on Strings"](https://www.cambridge.org/core/books/algorithms-on-strings/19049704C876795D95D8882C73257C70) by Crochemore, Hancart and Lecroq. PDF available [here](https://www.researchgate.net/publication/220693689_Algorithms_on_Strings). 1622 | -- 1623 | -- @since 1.0.1 1624 | stripInfix :: AsciiText -> AsciiText -> Maybe (AsciiText, AsciiText) 1625 | stripInfix needle@(AsciiText n) haystack@(AsciiText h) 1626 | | P.min (length needle) (length haystack) == 0 = Nothing 1627 | | otherwise = case indices n h of 1628 | [] -> Nothing 1629 | (ix : _) -> Just (take ix haystack, drop (ix + length needle) haystack) 1630 | 1631 | -- | Find the longest non-empty common prefix of the arguments and return it, 1632 | -- along with the remaining suffixes of both arguments. If the arguments lack a 1633 | -- common, non-empty prefix, returns 'Nothing'. 1634 | -- 1635 | -- >>> commonPrefixes empty [ascii| "catboy" |] 1636 | -- Nothing 1637 | -- >>> commonPrefixes [ascii| "catboy" |] empty 1638 | -- Nothing 1639 | -- >>> commonPrefixes [ascii| "catboy" |] [ascii| "nyan" |] 1640 | -- Nothing 1641 | -- >>> commonPrefixes [ascii| "catboy" |] [ascii| "catboy" |] 1642 | -- Just ("catboy","","") 1643 | -- >>> commonPrefixes [ascii| "nyan" |] [ascii| "nyan nyan" |] 1644 | -- Just ("nyan",""," nyan") 1645 | -- 1646 | -- /Complexity:/ \(\Theta(n)\) 1647 | -- 1648 | -- @since 1.0.1 1649 | commonPrefixes :: AsciiText -> AsciiText -> Maybe (AsciiText, AsciiText, AsciiText) 1650 | commonPrefixes (AsciiText t1) (AsciiText t2) = 1651 | go2 <$> F.foldl' go Nothing [0 .. P.min (BS.length t1) (BS.length t2) - 1] 1652 | where 1653 | go :: Maybe Int -> Int -> Maybe Int 1654 | go acc i 1655 | | BS.index t1 i == BS.index t2 i = Just i 1656 | | otherwise = acc 1657 | go2 :: Int -> (AsciiText, AsciiText, AsciiText) 1658 | go2 i = case BS.splitAt (i + 1) t1 of 1659 | (h, t) -> coerce (h, t, BS.drop (i + 1) t2) 1660 | 1661 | -- Searching 1662 | 1663 | -- | Return the text comprised of all the characters that satisfy the function 1664 | -- argument (that is, for which it returns 'True'), in the same order as in the 1665 | -- original. 1666 | -- 1667 | -- >>> filter ([char| 'n' |] ==) empty 1668 | -- "" 1669 | -- >>> filter ([char| 'n' |] ==) [ascii| "catboy" |] 1670 | -- "" 1671 | -- >>> filter ([char| 'n' |] ==) [ascii| "nyan" |] 1672 | -- "nn" 1673 | -- 1674 | -- /Complexity:/ \(\Theta(n)\) 1675 | -- 1676 | -- @since 1.0.0 1677 | {-# INLINE filter #-} 1678 | filter :: (AsciiChar -> Bool) -> AsciiText -> AsciiText 1679 | filter = coerce BS.filter 1680 | 1681 | -- | @breakOnAll needle haystack@, given a @needle@ of length \(n\) and a 1682 | -- @haystack@ of length \(h\), finds all non-overlapping instances of @needle@ 1683 | -- in @haystack@. Each result consists of the following elements: 1684 | -- 1685 | -- * The prefix prior to the match; and 1686 | -- * The match, followed by the rest of the string. 1687 | -- 1688 | -- If given an empty needle, the result is a singleton list containing a pair of 1689 | -- the entire haystack and the empty text. If given an empty haystack, the 1690 | -- result is an empty list. 1691 | -- 1692 | -- >>> breakOnAll empty [ascii| "nyan nyan nyan" |] 1693 | -- [("nyan nyan nyan","")] 1694 | -- >>> breakOnAll [ascii| "nyan" |] empty 1695 | -- [] 1696 | -- >>> breakOnAll [ascii| "nyan" |] [ascii| "nyan" |] 1697 | -- [("","nyan")] 1698 | -- >>> breakOnAll [ascii| "nyan" |] [ascii| "nyan nyan nyan" |] 1699 | -- [("","nyan nyan nyan"),("nyan ","nyan nyan"),("nyan nyan ","nyan")] 1700 | -- >>> breakOnAll [ascii| "nyan" |] [ascii| "nyanyanyan" |] 1701 | -- [("","nyanyanyan"),("nyanya","nyan")] 1702 | -- 1703 | -- = On complexity 1704 | -- 1705 | -- This function is based on a variant of the 1706 | -- [NSN](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) algorithm, 1707 | -- except it does not detect overlapping needles. Its average-case analysis is 1708 | -- based on the assumption that: 1709 | -- 1710 | -- * All ASCII symbols are equally likely to occur in both the needle and the 1711 | -- haystack; and 1712 | -- * The needle has length at least two; and 1713 | -- * Both the needle and the haystack contain at least four unique symbols. 1714 | -- 1715 | -- We fall back to 'split' for singleton needles, and there is no work to be 1716 | -- done on empty needles, which means the second assumption always holds. 1717 | -- 1718 | -- Worst-case behaviour becomes more likely the more your input satisfies the 1719 | -- following conditions: 1720 | -- 1721 | -- * The needle and/or haystack use few unique symbols (less than four is the 1722 | -- worst); or 1723 | -- * The haystack contains many instances of the second symbol of the needle 1724 | -- which don't lead to full matches. 1725 | -- 1726 | -- /Complexity:/ \(\Theta(h)\) average case, \(\Theta(h \cdot n\)\) worst-case. 1727 | -- 1728 | -- /See also:/ Note that all the below are references for the original 1729 | -- algorithm, which includes searching for overlapping needles. Thus, our 1730 | -- implementation will perform better than the analysis suggests. 1731 | -- 1732 | -- * [Description and pseudocode](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) 1733 | -- * ["Algorithms on Strings"](https://www.cambridge.org/core/books/algorithms-on-strings/19049704C876795D95D8882C73257C70) by Crochemore, Hancart and Lecroq. PDF available [here](https://www.researchgate.net/publication/220693689_Algorithms_on_Strings). 1734 | -- 1735 | -- @since 1.0.1 1736 | breakOnAll :: AsciiText -> AsciiText -> [(AsciiText, AsciiText)] 1737 | breakOnAll needle@(AsciiText n) haystack@(AsciiText h) 1738 | | length needle == 0 = [(haystack, empty)] 1739 | | length haystack == 0 = [] 1740 | | otherwise = (`splitAt` haystack) <$> indices n h 1741 | 1742 | -- | Returns 'Just' the first character in the text satisfying the predicate, 1743 | -- 'Nothing' otherwise. 1744 | -- 1745 | -- >>> find ([char| 'n' |] ==) empty 1746 | -- Nothing 1747 | -- >>> find ([char| 'n' |] ==) [ascii| "catboy" |] 1748 | -- Nothing 1749 | -- >>> find ([char| 'n' |] ==) [ascii| "nyan" |] 1750 | -- Just '0x6e' 1751 | -- >>> find ([char| 'n' |] /=) [ascii| "nyan" |] 1752 | -- Just '0x79' 1753 | -- 1754 | -- /Complexity:/ \(\Theta(n)\) 1755 | -- 1756 | -- @since 1.0.0 1757 | {-# INLINE find #-} 1758 | find :: (AsciiChar -> Bool) -> AsciiText -> Maybe AsciiChar 1759 | find = coerce BS.find 1760 | 1761 | -- | @partition p t@ is equivalent to @('filter' p t, 'filter' ('not' p) t)@. 1762 | -- 1763 | -- >>> partition ([char| 'n' |] ==) empty 1764 | -- ("","") 1765 | -- >>> partition ([char| 'n' |] ==) . singleton $ [char| 'n' |] 1766 | -- ("n","") 1767 | -- >>> partition ([char| 'n' |] ==) . singleton $ [char| 'w' |] 1768 | -- ("","w") 1769 | -- >>> partition ([char| 'n' |] ==) [ascii| "nyan!" |] 1770 | -- ("nn","ya!") 1771 | -- 1772 | -- /Complexity:/ \(\Theta(n)\) 1773 | -- 1774 | -- @since 1.0.0 1775 | partition :: (AsciiChar -> Bool) -> AsciiText -> (AsciiText, AsciiText) 1776 | partition = coerce BS.partition 1777 | 1778 | -- Indexing 1779 | 1780 | -- | Retrieve the ASCII character at the given position in the text. Indexes 1781 | -- begin from 0. If the index provided is invalid (that is, less than 0, equal 1782 | -- to the length of the text, or greater), return 'Nothing'; otherwise, return 1783 | -- 'Just' the character at that position. 1784 | -- 1785 | -- >>> index [ascii| "nyan nyan nyan" |] (-100) 1786 | -- Nothing 1787 | -- >>> index [ascii| "nyan nyan nyan" |] 0 1788 | -- Just '0x6e' 1789 | -- >>> index [ascii| "nyan nyan nyan" |] 5 1790 | -- Just '0x6e' 1791 | -- >>> index [ascii| "nyan nyan nyan" |] 2000 1792 | -- Nothing 1793 | -- 1794 | -- /Complexity:/ \(\Theta(1)\) 1795 | -- 1796 | -- @since 1.0.1 1797 | index :: AsciiText -> Int -> Maybe AsciiChar 1798 | index at i 1799 | | i < 0 || i >= length at = Nothing 1800 | | otherwise = Just . coerce BS.index at $ i 1801 | 1802 | -- | Returns 'Just' the first index in the text such that the character at that 1803 | -- index satisfies the predicate, 'Nothing' otherwise. 1804 | -- 1805 | -- >>> findIndex ([char| 'n' |] ==) empty 1806 | -- Nothing 1807 | -- >>> findIndex ([char| 'n' |] ==) . singleton $ [char| 'n' |] 1808 | -- Just 0 1809 | -- >>> findIndex ([char| 'n' |] ==) . singleton $ [char| 'w' |] 1810 | -- Nothing 1811 | -- >>> findIndex ([char| 'n' |] ==) [ascii| "nyan" |] 1812 | -- Just 0 1813 | -- 1814 | -- /Complexity:/ \(\Theta(n)\) 1815 | -- 1816 | -- @since 1.0.0 1817 | {-# INLINE [1] findIndex #-} 1818 | findIndex :: (AsciiChar -> Bool) -> AsciiText -> Maybe Int 1819 | findIndex = coerce BS.findIndex 1820 | 1821 | -- | @count needle haystack@, given a @needle@ of length \(n\) and a haystack of 1822 | -- length \(h\), counts the number of non-overlapping occurrences of @needle@ in 1823 | -- @haystack@. If @needle@ is empty, the count will be 0. 1824 | -- 1825 | -- >>> count empty [ascii| "nyan nyan nyan" |] 1826 | -- 0 1827 | -- >>> count [ascii| "nyan" |] empty 1828 | -- 0 1829 | -- >>> count [ascii| "nyan" |] [ascii| "nyan" |] 1830 | -- 1 1831 | -- >>> count [ascii| "nyan" |] [ascii| "nyan nyan nyan" |] 1832 | -- 3 1833 | -- >>> count [ascii| "nyan" |] [ascii| "nyanyanyan" |] 1834 | -- 2 1835 | -- 1836 | -- = On complexity 1837 | -- 1838 | -- This function is based on a variant of the 1839 | -- [NSN](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) algorithm, 1840 | -- except it does not detect overlapping needles. Its average-case analysis is 1841 | -- based on the assumption that: 1842 | -- 1843 | -- * All ASCII symbols are equally likely to occur in both the needle and the 1844 | -- haystack; and 1845 | -- * The needle has length at least two; and 1846 | -- * Both the needle and the haystack contain at least four unique symbols. 1847 | -- 1848 | -- We fall back to 'split' for singleton needles, and there is no work to be 1849 | -- done on empty needles, which means the second assumption always holds. 1850 | -- 1851 | -- Worst-case behaviour becomes more likely the more your input satisfies the 1852 | -- following conditions: 1853 | -- 1854 | -- * The needle and/or haystack use few unique symbols (less than four is the 1855 | -- worst); or 1856 | -- * The haystack contains many instances of the second symbol of the needle 1857 | -- which don't lead to full matches. 1858 | -- 1859 | -- /Complexity:/ \(\Theta(h)\) average case, \(\Theta(h \cdot n\)\) worst-case. 1860 | -- 1861 | -- /See also:/ Note that all the below are references for the original 1862 | -- algorithm, which includes searching for overlapping needles. Thus, our 1863 | -- implementation will perform better than the analysis suggests. 1864 | -- 1865 | -- * [Description and pseudocode](https://www-igm.univ-mlv.fr/~lecroq/string/node13.html) 1866 | -- * ["Algorithms on Strings"](https://www.cambridge.org/core/books/algorithms-on-strings/19049704C876795D95D8882C73257C70) by Crochemore, Hancart and Lecroq. PDF available [here](https://www.researchgate.net/publication/220693689_Algorithms_on_Strings). 1867 | -- 1868 | -- @since 1.0.1 1869 | count :: AsciiText -> AsciiText -> Int 1870 | count needle@(AsciiText n) haystack@(AsciiText h) 1871 | | P.min (length needle) (length haystack) == 0 = 0 1872 | | length needle == 1 = BS.count (BS.head n) h 1873 | | otherwise = P.length . indices n $ h 1874 | 1875 | -- Zipping 1876 | 1877 | -- | \'Pair off\' characters in both texts at corresponding indices. The result 1878 | -- will be limited to the shorter of the two arguments. 1879 | -- 1880 | -- >>> zip empty [ascii| "catboy" |] 1881 | -- [] 1882 | -- >>> zip [ascii| "catboy" |] empty 1883 | -- [] 1884 | -- >>> zip [ascii| "catboy" |] [ascii| "nyan" |] 1885 | -- [('0x63','0x6e'),('0x61','0x79'),('0x74','0x61'),('0x62','0x6e')] 1886 | -- 1887 | -- /Complexity:/ \(\Theta(n)\) 1888 | -- 1889 | -- @since 1.0.0 1890 | zip :: AsciiText -> AsciiText -> [(AsciiChar, AsciiChar)] 1891 | zip = coerce BS.zip 1892 | 1893 | -- | Combine two texts together in lockstep to produce a new text, using the 1894 | -- provided function to combine ASCII characters at each step. The length of the 1895 | -- result will be the minimum of the lengths of the two text arguments. 1896 | -- 1897 | -- >>> zipWith max [ascii| "I am a catboy" |] empty 1898 | -- "" 1899 | -- >>> zipWith max empty [ascii| "I am a catboy" |] 1900 | -- "" 1901 | -- >>> zipWith max [ascii| "I am a catboy" |] [ascii| "Nyan nyan nyan nyan nyan" |] 1902 | -- "Nyan nycntnyy" 1903 | -- 1904 | -- /Complexity:/ \(\Theta(n)\) 1905 | -- 1906 | -- @since 1.0.1 1907 | zipWith :: 1908 | (AsciiChar -> AsciiChar -> AsciiChar) -> AsciiText -> AsciiText -> AsciiText 1909 | zipWith f t1 t2 = unfoldr go (t1, t2) 1910 | where 1911 | go :: (AsciiText, AsciiText) -> Maybe (AsciiChar, (AsciiText, AsciiText)) 1912 | go (acc1, acc2) = do 1913 | (h1, t1') <- uncons acc1 1914 | (h2, t2') <- uncons acc2 1915 | pure (f h1 h2, (t1', t2')) 1916 | 1917 | -- Conversions 1918 | 1919 | -- | Try and convert a 'Text' into an 'AsciiText'. Gives 'Nothing' if the 'Text' 1920 | -- contains any symbols which lack an ASCII equivalent. 1921 | -- 1922 | -- >>> fromText "catboy" 1923 | -- Just "catboy" 1924 | -- >>> fromText "😺😺😺😺😺" 1925 | -- Nothing 1926 | -- 1927 | -- /Complexity:/ \(\Theta(n)\) 1928 | -- 1929 | -- @since 1.0.0 1930 | fromText :: Text -> Maybe AsciiText 1931 | fromText t = case T.find (not . isAscii) t of 1932 | Nothing -> pure . AsciiText . encodeUtf8 $ t 1933 | Just _ -> Nothing 1934 | 1935 | -- | Try and convert a 'Text' into an 'AsciiText'. Gives @'Prelude.Left' c@ if the 'Text' 1936 | -- contains a 'Prelude.Char' @c@ that lacks an ASCII representation. 1937 | -- 1938 | -- >>> eitherFromText "catboy" 1939 | -- Right "catboy" 1940 | -- >>> eitherFromText "😺😺😺😺😺" 1941 | -- Left '\128570' 1942 | -- 1943 | -- /Complexity:/ \(\Theta(n)\) 1944 | -- 1945 | -- @since 1.1 1946 | eitherFromText :: Text -> P.Either P.Char AsciiText 1947 | eitherFromText t = case T.find (not . isAscii) t of 1948 | Nothing -> pure . AsciiText . encodeUtf8 $ t 1949 | Just c -> P.Left c 1950 | 1951 | -- | Try and convert a 'ByteString' into an 'AsciiText'. Gives 'Nothing' if the 1952 | -- 'ByteString' contains any bytes outside the ASCII range (that is, from 0 to 1953 | -- 127 inclusive). 1954 | -- 1955 | -- >>> fromByteString "catboy" 1956 | -- Just "catboy" 1957 | -- >>> fromByteString . BS.pack $ [128] 1958 | -- Nothing 1959 | -- 1960 | -- /Complexity:/ \(\Theta(n)\) 1961 | -- 1962 | -- @since 1.0.0 1963 | fromByteString :: ByteString -> Maybe AsciiText 1964 | fromByteString bs = case BS.find (> 127) bs of 1965 | Nothing -> pure . AsciiText $ bs 1966 | Just _ -> Nothing 1967 | 1968 | -- | Try and convert a 'ByteString' into an 'AsciiText'. Gives @'Prelude.Left' w8@ if 1969 | -- the 'ByteString' contains a byte @w8@ that is outside the ASCII range (that 1970 | -- is, from 0 to 127 inclusive). 1971 | -- 1972 | -- >>> eitherFromByteString "catboy" 1973 | -- Right "catboy" 1974 | -- >>> eitherFromByteString . BS.pack $ [128] 1975 | -- Left 128 1976 | -- 1977 | -- /Complexity:/ \(\Theta(n)\) 1978 | -- 1979 | -- @since 1.1 1980 | eitherFromByteString :: ByteString -> P.Either Word8 AsciiText 1981 | eitherFromByteString bs = case BS.find (> 127) bs of 1982 | Nothing -> pure . AsciiText $ bs 1983 | Just w8 -> P.Left w8 1984 | 1985 | -- | Convert an 'AsciiText' into a 'Text' (by copying). 1986 | -- 1987 | -- >>> toText empty 1988 | -- "" 1989 | -- >>> toText . singleton $ [char| 'w' |] 1990 | -- "w" 1991 | -- >>> toText [ascii| "nyan" |] 1992 | -- "nyan" 1993 | -- 1994 | -- /Complexity:/ \(\Theta(n)\) 1995 | -- 1996 | -- @since 1.0.0 1997 | toText :: AsciiText -> Text 1998 | toText (AsciiText bs) = decodeUtf8 bs 1999 | 2000 | -- | Reinterpret an 'AsciiText' as a 'ByteString' (without copying). 2001 | -- 2002 | -- >>> toByteString empty 2003 | -- "" 2004 | -- >>> toByteString . singleton $ [char| 'w' |] 2005 | -- "w" 2006 | -- >>> toByteString [ascii| "nyan" |] 2007 | -- "nyan" 2008 | -- 2009 | -- /Complexity:/ \(\Theta(1)\) 2010 | -- 2011 | -- @since 1.0.0 2012 | toByteString :: AsciiText -> ByteString 2013 | toByteString = coerce 2014 | 2015 | -- Optics 2016 | 2017 | -- | A convenient demonstration of the relationship between 'toText' and 2018 | -- 'fromText'. 2019 | -- 2020 | -- >>> preview textWise "catboy goes nyan" 2021 | -- Just "catboy goes nyan" 2022 | -- >>> preview textWise "😺😺😺😺😺" 2023 | -- Nothing 2024 | -- >>> review textWise [ascii| "catboys are amazing" |] 2025 | -- "catboys are amazing" 2026 | -- 2027 | -- @since 1.0.0 2028 | textWise :: Prism' Text AsciiText 2029 | textWise = prism' toText fromText 2030 | 2031 | -- | A convenient demonstration of the relationship between 'toByteString' and 2032 | -- 'fromByteString'. 2033 | -- 2034 | -- >>> preview byteStringWise "catboy goes nyan" 2035 | -- Just "catboy goes nyan" 2036 | -- >>> preview byteStringWise . BS.pack $ [0xff, 0xff] 2037 | -- Nothing 2038 | -- >>> review byteStringWise [ascii| "I love catboys" |] 2039 | -- "I love catboys" 2040 | -- 2041 | -- @since 1.0.0 2042 | byteStringWise :: Prism' ByteString AsciiText 2043 | byteStringWise = prism' toByteString fromByteString 2044 | 2045 | -- | Pack (or unpack) a list of ASCII characters into a text. 2046 | -- 2047 | -- >>> view packedChars [[char| 'n' |], [char| 'y' |], [char| 'a' |], [char| 'n' |]] 2048 | -- "nyan" 2049 | -- >>> review packedChars [ascii| "nyan" |] 2050 | -- ['0x6e','0x79','0x61','0x6e'] 2051 | -- 2052 | -- @since 1.0.1 2053 | packedChars :: Iso' [AsciiChar] AsciiText 2054 | packedChars = 2055 | coerceS . coerceT . coerceA . coerceB $ BSO.packedBytes @ByteString 2056 | 2057 | -- | Traverse the individual ASCII characters in a text. 2058 | -- 2059 | -- >>> preview (elementOf chars 0) [ascii| "I am a catboy" |] 2060 | -- Just '0x49' 2061 | -- >>> preview (elementOf chars 100) [ascii| "I am a catboy" |] 2062 | -- Nothing 2063 | -- >>> iover chars (\i x -> bool x [char| 'w' |] . even $ i) [ascii| "I am a catboy" |] 2064 | -- "w wmwawcwtwow" 2065 | -- 2066 | -- @since 1.0.1 2067 | chars :: IxTraversal' Int64 AsciiText AsciiChar 2068 | chars = coerceS . coerceT . coerceA . coerceB $ BSO.bytes @ByteString 2069 | 2070 | -- | Pack (or unpack) a list of bytes into a text. This isn't as capable as 2071 | -- 'packedChars', as that would allow construction of invalid texts. 2072 | -- 2073 | -- >>> preview packedBytes [0x6e, 0x79, 0x61, 0x6e] 2074 | -- Just "nyan" 2075 | -- >>> preview packedBytes [0xff, 0xfe] 2076 | -- Nothing 2077 | -- >>> review packedBytes [ascii| "nyan" |] 2078 | -- [110,121,97,110] 2079 | -- 2080 | -- @since 1.0.1 2081 | packedBytes :: Prism' [Word8] AsciiText 2082 | packedBytes = prism' (review go) (P.fmap (view go2) . P.traverse asciify) 2083 | where 2084 | go :: Review [Word8] AsciiText 2085 | go = castOptic . coerceA . coerceB $ BSO.packedBytes @ByteString 2086 | go2 :: Getter [Word8] AsciiText 2087 | go2 = castOptic . coerceA . coerceB $ BSO.packedBytes @ByteString 2088 | 2089 | -- | Access the individual bytes in a text. This isn't as capable as 'chars', as 2090 | -- that would allow modifications of the bytes in ways that aren't valid as 2091 | -- ASCII. 2092 | -- 2093 | -- >>> itoListOf bytes [ascii| "I am a catboy" |] 2094 | -- [(0,73),(1,32),(2,97),(3,109),(4,32),(5,97),(6,32),(7,99),(8,97),(9,116),(10,98),(11,111),(12,121)] 2095 | -- 2096 | -- @since 1.0.1 2097 | bytes :: IxFold Int64 AsciiText Word8 2098 | bytes = castOptic . coerceS . coerceT $ BSO.bytes @ByteString 2099 | 2100 | -- Helpers 2101 | 2102 | isSpace :: AsciiChar -> Bool 2103 | isSpace (AsciiChar w8) 2104 | | w8 == 32 = True 2105 | | 9 <= w8 && w8 <= 13 = True 2106 | | otherwise = False 2107 | 2108 | asciify :: Word8 -> Maybe Word8 2109 | asciify w8 2110 | | w8 <= 127 = Just w8 2111 | | otherwise = Nothing 2112 | 2113 | indices :: ByteString -> ByteString -> [Int] 2114 | indices needle haystack 2115 | | P.min needleLen haystackLen == 0 = [] 2116 | | needleLen == 1 = BS.elemIndices (BS.head needle) haystack 2117 | | otherwise = L.unfoldr go 0 2118 | where 2119 | go :: Int -> Maybe (Int, Int) 2120 | go j 2121 | | j > (haystackLen - needleLen) = Nothing 2122 | | BS.index needle 1 /= BS.index haystack (j + 1) = go (j + kay) 2123 | | otherwise = do 2124 | let fragment = BS.take needleLen . BS.drop j $ haystack 2125 | if fragment == needle 2126 | then pure (j, j + needleLen) 2127 | else go (j + ell) 2128 | kay :: Int 2129 | kay 2130 | | BS.head needle == BS.index needle 1 = 2 2131 | | otherwise = 1 2132 | ell :: Int 2133 | ell 2134 | | BS.head needle == BS.index needle 1 = 1 2135 | | otherwise = 2 2136 | needleLen :: Int 2137 | needleLen = BS.length needle 2138 | haystackLen :: Int 2139 | haystackLen = BS.length haystack 2140 | -------------------------------------------------------------------------------- /src/Text/Ascii/Char.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DerivingVia #-} 2 | {-# LANGUAGE LambdaCase #-} 3 | {-# LANGUAGE PatternSynonyms #-} 4 | {-# LANGUAGE Trustworthy #-} 5 | 6 | -- | 7 | -- Module: Text.Ascii.Char 8 | -- Copyright: (C) 2021 Koz Ross 9 | -- License: Apache 2.0 10 | -- Maintainer: Koz Ross 11 | -- Stability: stable 12 | -- Portability: GHC only 13 | -- 14 | -- An implementation of ASCII characters, as bytes restricted to the range 0 - 15 | -- 127 inclusive. 16 | -- 17 | -- /See also:/ [Wikipedia entry for ASCII](https://en.wikipedia.org/wiki/ASCII) 18 | module Text.Ascii.Char 19 | ( -- * ASCII characters 20 | 21 | -- ** Type 22 | AsciiChar (AsByte, AsChar), 23 | 24 | -- ** Construction 25 | char, 26 | fromChar, 27 | fromByte, 28 | 29 | -- ** Transformation 30 | upcase, 31 | downcase, 32 | 33 | -- * Categorization 34 | AsciiType (Control, Printable), 35 | charType, 36 | AsciiCategory (Other, Punctuation, Letter, Number, Symbol), 37 | categorize, 38 | categorizeGeneral, 39 | AsciiCase (Upper, Lower), 40 | caseOf, 41 | 42 | -- * Optics 43 | charWise, 44 | byteWise, 45 | ) 46 | where 47 | 48 | import Control.DeepSeq (NFData) 49 | import Control.Monad (guard) 50 | import Data.Char (GeneralCategory, chr, generalCategory, isAscii, ord) 51 | import Data.Functor (($>)) 52 | import Data.Hashable (Hashable) 53 | import Data.Word (Word8) 54 | import Optics.Prism (Prism', prism') 55 | import Text.Ascii.Internal (AsciiChar (AsciiChar), toByte, pattern AsByte, pattern AsChar) 56 | import Text.Ascii.QQ (char) 57 | 58 | -- $setup 59 | -- >>> :set -XQuasiQuotes 60 | -- >>> import Text.Ascii.Char 61 | -- >>> import Optics.AffineFold (preview) 62 | -- >>> import Optics.Review (review) 63 | 64 | -- | Try and turn a 'Char' into the equivalent 'AsciiChar'. Will return 65 | -- 'Nothing' if given a 'Char' that has no ASCII equivalent. 66 | -- 67 | -- >>> fromChar '0' 68 | -- Just '0x30' 69 | -- >>> fromChar '😺' 70 | -- Nothing 71 | -- 72 | -- @since 1.0.0 73 | fromChar :: Char -> Maybe AsciiChar 74 | fromChar c = 75 | if isAscii c 76 | then pure . AsciiChar . fromIntegral . ord $ c 77 | else Nothing 78 | 79 | -- | Try to give the 'AsciiChar' corresponding to the given byte. Will return 80 | -- 'Nothing' if given a byte that doesn't correspond to an ASCII character. 81 | -- 82 | -- >>> fromByte 50 83 | -- Just '0x32' 84 | -- >>> fromByte 128 85 | -- Nothing 86 | -- 87 | -- @since 1.0.0 88 | fromByte :: Word8 -> Maybe AsciiChar 89 | fromByte w8 = 90 | if isAscii . chr . fromIntegral $ w8 91 | then pure . AsciiChar $ w8 92 | else Nothing 93 | 94 | -- | Give the 'AsciiChar' corresponding to the uppercase version of the 95 | -- argument. Will give 'Nothing' if given an 'AsciiChar' which has no uppercase 96 | -- version, or is uppercase already. 97 | -- 98 | -- >>> upcase [char| 'a' |] 99 | -- Just '0x41' 100 | -- >>> upcase [char| '0' |] 101 | -- Nothing 102 | -- 103 | -- @since 1.0.0 104 | upcase :: AsciiChar -> Maybe AsciiChar 105 | upcase c@(AsciiChar w8) = 106 | caseOf c >>= (\cs -> guard (cs == Lower) $> AsciiChar (w8 - 32)) 107 | 108 | -- | Give the 'AsciiChar' corresponding to the lowercase version of the 109 | -- argument. Will give 'Nothing' if given an 'AsciiChar' which has no lowercase 110 | -- version, or is lowercase already. 111 | -- 112 | -- >>> downcase [char| 'C' |] 113 | -- Just '0x63' 114 | -- >>> downcase [char| '\\' |] 115 | -- Nothing 116 | -- 117 | -- @since 1.0.0 118 | downcase :: AsciiChar -> Maybe AsciiChar 119 | downcase c@(AsciiChar w8) = 120 | caseOf c >>= (\cs -> guard (cs == Upper) $> AsciiChar (w8 + 32)) 121 | 122 | -- Categorization 123 | 124 | -- | A categorization of ASCII characters based on whether they're meant to be 125 | -- displayed ('Printable') or for control ('Control'). 126 | -- 127 | -- @since 1.0.0 128 | newtype AsciiType = AsciiType Word8 129 | deriving (Eq, Ord, Hashable, NFData) via Word8 130 | 131 | -- | @since 1.0.0 132 | instance Show AsciiType where 133 | {-# INLINEABLE show #-} 134 | show = \case 135 | Control -> "Control" 136 | Printable -> "Printable" 137 | 138 | -- | @since 1.0.0 139 | instance Bounded AsciiType where 140 | minBound = Control 141 | maxBound = Printable 142 | 143 | -- | A control character is any of the first 32 bytes (0-31), plus @DEL@ (127). 144 | -- 145 | -- @since 1.0.0 146 | pattern Control :: AsciiType 147 | pattern Control <- 148 | AsciiType 0 149 | where 150 | Control = AsciiType 0 151 | 152 | -- | All ASCII characters whose byte is above 31 (and not 127) are printable 153 | -- characters. 154 | -- 155 | -- @since 1.0.0 156 | pattern Printable :: AsciiType 157 | pattern Printable <- 158 | AsciiType 1 159 | where 160 | Printable = AsciiType 1 161 | 162 | {-# COMPLETE Control, Printable #-} 163 | 164 | -- | Classify an 'AsciiChar' according to whether it's a control character or a 165 | -- printable character. 166 | -- 167 | -- >>> charType [char| '\0' |] 168 | -- Control 169 | -- >>> charType [char| 'w' |] 170 | -- Printable 171 | -- 172 | -- @since 1.0.0 173 | charType :: AsciiChar -> AsciiType 174 | charType (AsciiChar w8) 175 | | w8 == 127 = Control 176 | | w8 < 32 = Control 177 | | otherwise = Printable 178 | 179 | -- | A categorization of ASCII characters based on their usage. Based (loosely) 180 | -- on Unicode categories. 181 | -- 182 | -- @since 1.0.0 183 | newtype AsciiCategory = AsciiCategory Word8 184 | deriving (Eq, Ord, Hashable, NFData) via Word8 185 | 186 | -- | @since 1.0.0 187 | instance Show AsciiCategory where 188 | {-# INLINEABLE show #-} 189 | show = \case 190 | Other -> "Other" 191 | Symbol -> "Symbol" 192 | Number -> "Number" 193 | Letter -> "Letter" 194 | Punctuation -> "Punctuation" 195 | 196 | -- | @since 1.0.0 197 | instance Bounded AsciiCategory where 198 | minBound = Other 199 | maxBound = Symbol 200 | 201 | -- | Something which doesn't fit into any of the other categories. 202 | -- 203 | -- @since 1.0.0 204 | pattern Other :: AsciiCategory 205 | pattern Other <- 206 | AsciiCategory 0 207 | where 208 | Other = AsciiCategory 0 209 | 210 | -- | A punctuation character. 211 | -- 212 | -- @since 1.0.0 213 | pattern Punctuation :: AsciiCategory 214 | pattern Punctuation <- 215 | AsciiCategory 1 216 | where 217 | Punctuation = AsciiCategory 1 218 | 219 | -- | A letter, either uppercase or lowercase. 220 | -- 221 | -- @since 1.0.0 222 | pattern Letter :: AsciiCategory 223 | pattern Letter <- 224 | AsciiCategory 2 225 | where 226 | Letter = AsciiCategory 2 227 | 228 | -- | A numerical digit. 229 | -- 230 | -- @since 1.0.0 231 | pattern Number :: AsciiCategory 232 | pattern Number <- 233 | AsciiCategory 3 234 | where 235 | Number = AsciiCategory 3 236 | 237 | -- | A symbol whose role isn't (normally) punctuation. 238 | -- 239 | -- @since 1.0.0 240 | pattern Symbol :: AsciiCategory 241 | pattern Symbol <- 242 | AsciiCategory 4 243 | where 244 | Symbol = AsciiCategory 4 245 | 246 | {-# COMPLETE Other, Punctuation, Letter, Number, Symbol #-} 247 | 248 | -- | Classify an 'AsciiChar' based on its category. 249 | -- 250 | -- >>> categorize [char| ',' |] 251 | -- Punctuation 252 | -- >>> categorize [char| '~' |] 253 | -- Symbol 254 | -- >>> categorize [char| 'w' |] 255 | -- Letter 256 | -- >>> categorize [char| '2' |] 257 | -- Number 258 | -- >>> categorize [char| '\0' |] 259 | -- Other 260 | -- 261 | -- @since 1.0.0 262 | categorize :: AsciiChar -> AsciiCategory 263 | categorize c@(AsciiChar w8) 264 | | charType c == Control = Other 265 | | w8 == 0x20 = Punctuation 266 | | w8 >= 0x21 && w8 <= 0x23 = Punctuation 267 | | w8 == 0x24 = Symbol 268 | | w8 >= 0x25 && w8 <= 0x2a = Punctuation 269 | | w8 == 0x2b = Symbol 270 | | w8 >= 0x2c && w8 <= 0x2f = Punctuation 271 | | w8 >= 0x30 && w8 <= 0x39 = Number 272 | | w8 >= 0x3a && w8 <= 0x3b = Punctuation 273 | | w8 >= 0x3c && w8 <= 0x3e = Symbol 274 | | w8 >= 0x3f && w8 <= 0x40 = Punctuation 275 | | w8 >= 0x41 && w8 <= 0x5a = Letter 276 | | w8 >= 0x5b && w8 <= 0x5d = Punctuation 277 | | w8 == 0x5e = Symbol 278 | | w8 == 0x5f = Punctuation 279 | | w8 == 0x60 = Symbol 280 | | w8 >= 0x61 && w8 <= 0x7a = Letter 281 | | w8 == 0x7b = Punctuation 282 | | w8 == 0x7c = Symbol 283 | | w8 == 0x7d = Punctuation 284 | | otherwise = Symbol -- This only leaves ~. - Koz 285 | 286 | -- | Compatibility method for the 'GeneralCategory' provided by 'Data.Char'. 287 | -- 288 | -- >>> categorizeGeneral [char| ',' |] 289 | -- OtherPunctuation 290 | -- >>> categorizeGeneral [char| '~' |] 291 | -- MathSymbol 292 | -- >>> categorizeGeneral [char| 'w' |] 293 | -- LowercaseLetter 294 | -- >>> categorizeGeneral [char| '2' |] 295 | -- DecimalNumber 296 | -- >>> categorizeGeneral [char| '\0' |] 297 | -- Control 298 | -- 299 | -- @since 1.0.0 300 | categorizeGeneral :: AsciiChar -> GeneralCategory 301 | categorizeGeneral (AsciiChar w8) = generalCategory . chr . fromIntegral $ w8 302 | 303 | -- | The case of an ASCII character (if it has one). 304 | -- 305 | -- @since 1.0.0 306 | newtype AsciiCase = AsciiCase Word8 307 | deriving (Eq, Ord, Hashable, NFData) via Word8 308 | 309 | -- | @since 1.0.0 310 | instance Show AsciiCase where 311 | {-# INLINEABLE show #-} 312 | show = \case 313 | Upper -> "Upper" 314 | Lower -> "Lower" 315 | 316 | -- | @since 1.0.0 317 | instance Bounded AsciiCase where 318 | minBound = Upper 319 | maxBound = Lower 320 | 321 | -- | Indicator of an uppercase character. 322 | -- 323 | -- @since 1.0.0 324 | pattern Upper :: AsciiCase 325 | pattern Upper <- 326 | AsciiCase 0 327 | where 328 | Upper = AsciiCase 0 329 | 330 | -- | Indicator of a lowercase character. 331 | -- 332 | -- @since 1.0.0 333 | pattern Lower :: AsciiCase 334 | pattern Lower <- 335 | AsciiCase 1 336 | where 337 | Lower = AsciiCase 1 338 | 339 | {-# COMPLETE Upper, Lower #-} 340 | 341 | -- | Determine the case of an 'AsciiChar'. Returns 'Nothing' if the character 342 | -- doesn't have a case. 343 | -- 344 | -- >>> caseOf [char| 'w' |] 345 | -- Just Lower 346 | -- >>> caseOf [char| 'W' |] 347 | -- Just Upper 348 | -- >>> caseOf [char| '~' |] 349 | -- Nothing 350 | -- 351 | -- @since 1.0.0 352 | caseOf :: AsciiChar -> Maybe AsciiCase 353 | caseOf c@(AsciiChar w8) 354 | | categorize c /= Letter = Nothing 355 | | w8 <= 0x5a = Just Upper 356 | | otherwise = Just Lower 357 | 358 | -- Optics 359 | 360 | -- | A representation of the relationship between 'Char' and 'AsciiChar'. 361 | -- 362 | -- >>> preview charWise 'w' 363 | -- Just '0x77' 364 | -- >>> preview charWise '😺' 365 | -- Nothing 366 | -- >>> review charWise [char| 'w' |] 367 | -- 'w' 368 | -- 369 | -- @since 1.0.0 370 | charWise :: Prism' Char AsciiChar 371 | charWise = prism' (chr . fromIntegral . toByte) fromChar 372 | 373 | -- | A representation of the relationship between ASCII characters and bytes. 374 | -- 375 | -- >>> preview byteWise 0x20 376 | -- Just '0x20' 377 | -- >>> preview byteWise 0x81 378 | -- Nothing 379 | -- >>> review byteWise [char| 'w' |] 380 | -- 119 381 | -- 382 | -- @since 1.0.0 383 | byteWise :: Prism' Word8 AsciiChar 384 | byteWise = prism' toByte fromByte 385 | -------------------------------------------------------------------------------- /src/Text/Ascii/Internal.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DerivingVia #-} 2 | {-# LANGUAGE PatternSynonyms #-} 3 | {-# LANGUAGE ScopedTypeVariables #-} 4 | {-# LANGUAGE Trustworthy #-} 5 | {-# LANGUAGE TypeApplications #-} 6 | {-# LANGUAGE TypeFamilies #-} 7 | {-# LANGUAGE ViewPatterns #-} 8 | 9 | -- | 10 | -- Module: Text.Ascii.Internal 11 | -- Copyright: (C) 2021 Koz Ross 12 | -- License: Apache 2.0 13 | -- Maintainer: Koz Ross 14 | -- Stability: unstable, not subject to PVP 15 | -- Portability: GHC only 16 | -- 17 | -- This is an internal module, and is /not/ subject to the PVP. It can change 18 | -- in any way, at any time, and should not be depended on unless you know 19 | -- /exactly/ what you are doing. You have been warned. 20 | module Text.Ascii.Internal where 21 | 22 | import Control.DeepSeq (NFData) 23 | import Data.ByteString (ByteString) 24 | import qualified Data.ByteString as BS 25 | import Data.CaseInsensitive (FoldCase (foldCase)) 26 | import Data.Char (chr, isAscii) 27 | import Data.Coerce (coerce) 28 | import Data.Hashable (Hashable) 29 | import qualified Data.List.NonEmpty as NE 30 | import Data.Monoid.Factorial (FactorialMonoid) 31 | import Data.Monoid.GCD (LeftGCDMonoid, RightGCDMonoid) 32 | import Data.Monoid.Monus (OverlappingGCDMonoid) 33 | import Data.Monoid.Null (MonoidNull, PositiveMonoid) 34 | import Data.Semigroup.Cancellative (LeftCancellative, LeftReductive, RightCancellative, RightReductive) 35 | import Data.Semigroup.Factorial (Factorial, StableFactorial) 36 | import Data.Word (Word8) 37 | import GHC.Exts (IsList (Item, fromList, fromListN, toList)) 38 | import Numeric (showHex) 39 | import Optics.AffineTraversal (An_AffineTraversal, atraversal) 40 | import Optics.At.Core (Index, IxValue, Ixed (IxKind, ix)) 41 | import Text.Megaparsec.Stream 42 | ( Stream 43 | ( Token, 44 | Tokens, 45 | chunkLength, 46 | chunkToTokens, 47 | take1_, 48 | takeN_, 49 | takeWhile_, 50 | tokenToChunk, 51 | tokensToChunk 52 | ), 53 | TraversableStream (reachOffset), 54 | VisualStream (showTokens), 55 | ) 56 | import Type.Reflection (Typeable) 57 | 58 | -- | Represents valid ASCII characters, which are bytes from @0x00@ to @0x7f@. 59 | -- 60 | -- @since 1.0.0 61 | newtype AsciiChar = AsciiChar {toByte :: Word8} 62 | deriving 63 | ( -- | @since 1.0.0 64 | Eq, 65 | -- | @since 1.0.0 66 | Ord, 67 | -- | @since 1.0.0 68 | Hashable, 69 | -- | @since 1.0.0 70 | NFData 71 | ) 72 | via Word8 73 | deriving stock 74 | ( -- | @since 1.0.0 75 | Typeable 76 | ) 77 | 78 | -- | @since 1.0.0 79 | instance Show AsciiChar where 80 | {-# INLINEABLE show #-} 81 | show (AsciiChar w8) = "'0x" <> showHex w8 "'" 82 | 83 | -- | @since 1.0.0 84 | instance Bounded AsciiChar where 85 | {-# INLINEABLE minBound #-} 86 | minBound = AsciiChar 0 87 | {-# INLINEABLE maxBound #-} 88 | maxBound = AsciiChar 127 89 | 90 | -- | @since 1.0.1 91 | instance FoldCase AsciiChar where 92 | {-# INLINEABLE foldCase #-} 93 | foldCase ac@(AsciiChar w8) 94 | | 65 <= w8 && w8 <= 90 = AsciiChar (w8 + 32) 95 | | otherwise = ac 96 | 97 | -- | View an 'AsciiChar' as its underlying byte. You can pattern match on this, 98 | -- but since there are more bytes than valid ASCII characters, you cannot use 99 | -- this to construct. 100 | -- 101 | -- @since 1.0.0 102 | pattern AsByte :: Word8 -> AsciiChar 103 | pattern AsByte w8 <- AsciiChar w8 104 | 105 | -- | View an 'AsciiChar' as a 'Char'. You can pattern match on this, but since 106 | -- there are more 'Char's than valid ASCII characters, you cannot use this to 107 | -- construct. 108 | -- 109 | -- @since 1.0.0 110 | pattern AsChar :: Char -> AsciiChar 111 | pattern AsChar c <- AsciiChar (isJustAscii -> Just c) 112 | 113 | {-# COMPLETE AsByte #-} 114 | 115 | {-# COMPLETE AsChar #-} 116 | 117 | -- | A string of ASCII characters, represented as a packed byte array. 118 | -- 119 | -- @since 1.0.0 120 | newtype AsciiText = AsciiText ByteString 121 | deriving 122 | ( -- | @since 1.0.0 123 | Eq, 124 | -- | @since 1.0.0 125 | Ord, 126 | -- | @since 1.0.0 127 | NFData, 128 | -- | @since 1.0.0 129 | Semigroup, 130 | -- | @since 1.0.0 131 | Monoid, 132 | -- | @since 1.0.0 133 | Show, 134 | -- | @since 1.2 135 | Factorial, 136 | -- | @since 1.2 137 | FactorialMonoid, 138 | -- | @since 1.2 139 | LeftCancellative, 140 | -- | @since 1.2 141 | LeftGCDMonoid, 142 | -- | @since 1.2 143 | LeftReductive, 144 | -- | @since 1.2 145 | MonoidNull, 146 | -- | @since 1.2 147 | OverlappingGCDMonoid, 148 | -- | @since 1.2 149 | PositiveMonoid, 150 | -- | @since 1.2 151 | RightCancellative, 152 | -- | @since 1.2 153 | RightGCDMonoid, 154 | -- | @since 1.2 155 | RightReductive, 156 | -- | @since 1.2 157 | StableFactorial 158 | ) 159 | via ByteString 160 | 161 | -- | @since 1.0.0 162 | instance IsList AsciiText where 163 | type Item AsciiText = AsciiChar 164 | {-# INLINEABLE fromList #-} 165 | fromList = 166 | coerce @ByteString @AsciiText 167 | . fromList 168 | . coerce @[AsciiChar] @[Word8] 169 | {-# INLINEABLE fromListN #-} 170 | fromListN n = 171 | coerce @ByteString @AsciiText 172 | . fromListN n 173 | . coerce @[AsciiChar] @[Word8] 174 | {-# INLINEABLE toList #-} 175 | toList = coerce . toList . coerce @AsciiText @ByteString 176 | 177 | -- | @since 1.0.1 178 | type instance Index AsciiText = Int 179 | 180 | -- | @since 1.0.1 181 | type instance IxValue AsciiText = AsciiChar 182 | 183 | -- | @since 1.0.1 184 | instance Ixed AsciiText where 185 | type IxKind AsciiText = An_AffineTraversal 186 | {-# INLINEABLE ix #-} 187 | ix i = atraversal get put 188 | where 189 | get :: AsciiText -> Either AsciiText AsciiChar 190 | get (AsciiText at) = case at BS.!? i of 191 | Nothing -> Left . AsciiText $ at 192 | Just w8 -> Right . AsciiChar $ w8 193 | put :: AsciiText -> AsciiChar -> AsciiText 194 | put (AsciiText at) (AsciiChar ac) = case BS.splitAt i at of 195 | (lead, end) -> case BS.uncons end of 196 | Nothing -> AsciiText at 197 | Just (_, end') -> AsciiText (lead <> BS.singleton ac <> end') 198 | 199 | -- | @since 1.0.1 200 | instance FoldCase AsciiText where 201 | {-# INLINEABLE foldCase #-} 202 | foldCase (AsciiText bs) = AsciiText . BS.map go $ bs 203 | where 204 | go :: Word8 -> Word8 205 | go w8 206 | | 65 <= w8 && w8 <= 90 = w8 + 32 207 | | otherwise = w8 208 | 209 | -- | @since 1.0.1 210 | instance Stream AsciiText where 211 | type Token AsciiText = AsciiChar 212 | type Tokens AsciiText = AsciiText 213 | {-# INLINEABLE tokenToChunk #-} 214 | tokenToChunk _ = coerce BS.singleton 215 | {-# INLINEABLE tokensToChunk #-} 216 | tokensToChunk _ = fromList 217 | {-# INLINEABLE chunkToTokens #-} 218 | chunkToTokens _ = toList 219 | {-# INLINEABLE chunkLength #-} 220 | chunkLength _ = coerce BS.length 221 | {-# INLINEABLE take1_ #-} 222 | take1_ = coerce BS.uncons 223 | {-# INLINEABLE takeN_ #-} 224 | takeN_ n at@(AsciiText bs) 225 | | n <= 0 = Just (coerce BS.empty, at) 226 | | BS.length bs == 0 = Nothing 227 | | otherwise = Just . coerce . BS.splitAt n $ bs 228 | {-# INLINEABLE takeWhile_ #-} 229 | takeWhile_ = coerce BS.span 230 | 231 | -- | @since 1.0.1 232 | instance VisualStream AsciiText where 233 | {-# INLINEABLE showTokens #-} 234 | showTokens _ = fmap (chr . fromIntegral) . coerce @_ @[Word8] . NE.toList 235 | 236 | -- | @since 1.0.1 237 | instance TraversableStream AsciiText where 238 | {-# INLINEABLE reachOffset #-} 239 | reachOffset o ps = coerce (reachOffset o ps) 240 | 241 | -- Helpers 242 | 243 | isJustAscii :: Word8 -> Maybe Char 244 | isJustAscii w8 = 245 | if isAscii asChar 246 | then pure asChar 247 | else Nothing 248 | where 249 | asChar :: Char 250 | asChar = chr . fromIntegral $ w8 251 | -------------------------------------------------------------------------------- /src/Text/Ascii/QQ.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TemplateHaskell #-} 2 | {-# LANGUAGE Trustworthy #-} 3 | 4 | -- | 5 | -- Module: Text.Ascii.QQ 6 | -- Copyright: (C) 2021 Koz Ross 7 | -- License: Apache 2.0 8 | -- Maintainer: Koz Ross 9 | -- Stability: unstable, not subject to PVP 10 | -- Portability: GHC only 11 | -- 12 | -- This is an internal module, and is /not/ subject to the PVP. It can change 13 | -- in any way, at any time, and should not be depended on unless you know 14 | -- /exactly/ what you are doing. You have been warned. 15 | module Text.Ascii.QQ where 16 | 17 | import Data.ByteString (ByteString) 18 | import qualified Data.ByteString as BS 19 | import Data.Char 20 | ( isAlphaNum, 21 | isAscii, 22 | isPunctuation, 23 | isSymbol, 24 | ord, 25 | ) 26 | import Data.Functor (void) 27 | import Data.Void (Void) 28 | import GHC.Exts (IsList (fromList)) 29 | import Language.Haskell.TH.Quote (QuasiQuoter (QuasiQuoter)) 30 | import Language.Haskell.TH.Syntax 31 | ( Dec, 32 | Exp (AppE, ConE, ListE, LitE, VarE), 33 | Lit (IntegerL), 34 | Pat, 35 | Q, 36 | Type, 37 | ) 38 | import Text.Ascii.Internal (AsciiChar (AsciiChar), AsciiText (AsciiText)) 39 | import Text.Megaparsec 40 | ( Parsec, 41 | between, 42 | eof, 43 | lookAhead, 44 | manyTill, 45 | oneOf, 46 | parse, 47 | satisfy, 48 | single, 49 | try, 50 | ) 51 | import Text.Megaparsec.Char (space) 52 | import Text.Megaparsec.Error (errorBundlePretty) 53 | 54 | -- $setup 55 | -- >>> :set -XQuasiQuotes 56 | -- >>> import Text.Ascii.QQ 57 | 58 | -- | Allows constructing ASCII characters from literals, whose correctness is 59 | -- checked by the compiler. 60 | -- 61 | -- Currently, accepts literal syntax similar to the Haskell parser, with escape 62 | -- sequences preceded by \'\\\'. In particular, this includes the single quote 63 | -- (see the example below). 64 | -- 65 | -- >>> [char| '\'' |] 66 | -- '0x27' 67 | -- 68 | -- @since 1.0.0 69 | char :: QuasiQuoter 70 | char = QuasiQuoter charQQ (errPat "char") (errType "char") (errDec "char") 71 | 72 | -- | Allows constructing ASCII strings from literals, whose correctness is 73 | -- checked by the compiler. 74 | -- 75 | -- Currently accepts literal syntax similar to the Haskell parser, with escape 76 | -- sequences preceded by \'\\\'. In particular, this includes the double quote 77 | -- (see the example below). 78 | -- 79 | -- >>> [ascii| "\"Nyan!\", said the catboy." |] 80 | -- "\"Nyan!\", said the catboy." 81 | -- 82 | -- @since 1.0.0 83 | ascii :: QuasiQuoter 84 | ascii = QuasiQuoter asciiQQ (errPat "ascii") (errType "ascii") (errDec "ascii") 85 | 86 | -- Helpers 87 | 88 | asciiQQ :: String -> Q Exp 89 | asciiQQ input = case parse (between open close go) "" input of 90 | Left err -> fail . errorBundlePretty $ err 91 | Right result -> 92 | pure 93 | . AppE (ConE 'AsciiText) 94 | . AppE (VarE 'fromList) 95 | . ListE 96 | . fmap (LitE . IntegerL . fromIntegral) 97 | . BS.unpack 98 | $ result 99 | where 100 | open :: Parsec Void String () 101 | open = space *> (void . single $ '"') 102 | close :: Parsec Void String () 103 | close = single '"' *> space *> eof 104 | go :: Parsec Void String ByteString 105 | go = BS.pack <$> manyTill asciiByte (lookAhead . try . single $ '"') 106 | asciiByte = do 107 | c <- satisfy isAscii 108 | case c of 109 | '\\' -> do 110 | c' <- oneOf "0abfnrtv\\\"" 111 | pure . fromIntegral . ord $ case c' of 112 | '0' -> '\0' 113 | 'a' -> '\a' 114 | 'b' -> '\b' 115 | 'f' -> '\f' 116 | 'n' -> '\n' 117 | 'r' -> '\r' 118 | 't' -> '\t' 119 | 'v' -> '\v' 120 | '\\' -> '\\' 121 | _ -> '"' 122 | _ -> pure . fromIntegral . ord $ c 123 | 124 | charQQ :: String -> Q Exp 125 | charQQ input = case parse (between open close go) "" input of 126 | Left err -> fail . errorBundlePretty $ err 127 | Right result -> 128 | pure . AppE (ConE 'AsciiChar) . LitE . IntegerL . fromIntegral $ result 129 | where 130 | open :: Parsec Void String () 131 | open = space *> (void . single $ '\'') 132 | close :: Parsec Void String () 133 | close = single '\'' *> space *> eof 134 | go :: Parsec Void String Int 135 | go = do 136 | c1 <- satisfy isValidLead 137 | case c1 of 138 | '\\' -> do 139 | c2 <- oneOf "0abfnrtv\\\'" 140 | pure . ord $ case c2 of 141 | '0' -> '\0' 142 | 'a' -> '\a' 143 | 'b' -> '\b' 144 | 'f' -> '\f' 145 | 'n' -> '\n' 146 | 'r' -> '\r' 147 | 't' -> '\t' 148 | 'v' -> '\v' 149 | '\\' -> '\\' 150 | _ -> '\'' 151 | _ -> pure . ord $ c1 152 | 153 | isValidLead :: Char -> Bool 154 | isValidLead c = isAscii c && (isAlphaNum c || c == ' ' || isSymbol c || isPunctuation c) 155 | 156 | errPat :: String -> String -> Q Pat 157 | errPat name _ = fail $ "'" <> name <> "' should not be used in a pattern context." 158 | 159 | errType :: String -> String -> Q Type 160 | errType name _ = fail $ "'" <> name <> "' should not be used in a type context." 161 | 162 | errDec :: String -> String -> Q [Dec] 163 | errDec name _ = fail $ "'" <> name <> "' should not be used in a declaration context." 164 | -------------------------------------------------------------------------------- /src/Text/Ascii/Unsafe.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveFunctor #-} 2 | {-# LANGUAGE DerivingVia #-} 3 | {-# LANGUAGE FlexibleInstances #-} 4 | {-# LANGUAGE LambdaCase #-} 5 | {-# LANGUAGE RoleAnnotations #-} 6 | {-# LANGUAGE ScopedTypeVariables #-} 7 | {-# LANGUAGE TypeFamilies #-} 8 | 9 | -- | 10 | -- Module: Text.Ascii.Unsafe 11 | -- Copyright: (C) 2021 Koz Ross 12 | -- License: Apache 2.0 13 | -- Maintainer: Koz Ross 14 | -- Stability: stable 15 | -- Portability: GHC only 16 | -- 17 | -- A wrapper for partial type class instances and functions. 18 | -- 19 | -- This module is designed for qualified importing: 20 | -- 21 | -- > import qualified Text.Ascii.Unsafe as Unsafe 22 | module Text.Ascii.Unsafe 23 | ( -- * Types 24 | Unsafe (..), 25 | 26 | -- * Text functions 27 | head, 28 | last, 29 | tail, 30 | init, 31 | foldl1, 32 | foldl1', 33 | foldr1, 34 | foldr1', 35 | maximum, 36 | minimum, 37 | scanl1, 38 | scanr1, 39 | index, 40 | ) 41 | where 42 | 43 | import Control.DeepSeq (NFData) 44 | import Data.ByteString (ByteString) 45 | import qualified Data.ByteString as BS 46 | import Data.CaseInsensitive (FoldCase) 47 | import Data.Coerce (coerce) 48 | import Data.Hashable (Hashable) 49 | import Data.Kind (Type) 50 | import Data.Monoid.Factorial (FactorialMonoid) 51 | import Data.Monoid.GCD (LeftGCDMonoid, RightGCDMonoid) 52 | import Data.Monoid.Monus (OverlappingGCDMonoid) 53 | import Data.Monoid.Null (MonoidNull, PositiveMonoid) 54 | import Data.Semigroup.Cancellative (LeftCancellative, LeftReductive, RightCancellative, RightReductive) 55 | import Data.Semigroup.Factorial (Factorial, StableFactorial) 56 | import Data.Word (Word8) 57 | import GHC.Exts (IsList) 58 | import GHC.Read (expectP, lexP, parens, readPrec) 59 | import Text.Ascii.Internal (AsciiChar (AsciiChar), AsciiText (AsciiText)) 60 | import Text.Megaparsec.Stream (Stream, TraversableStream, VisualStream) 61 | import Text.ParserCombinators.ReadPrec (ReadPrec) 62 | import Text.Read (Lexeme (Char)) 63 | import Type.Reflection (Typeable) 64 | import Prelude hiding 65 | ( foldl1, 66 | foldr1, 67 | head, 68 | init, 69 | last, 70 | maximum, 71 | minimum, 72 | scanl1, 73 | scanr1, 74 | tail, 75 | ) 76 | 77 | -- | A wrapper for a type, designating that partial type class methods or other 78 | -- functions are available for it. 79 | -- 80 | -- We set the role of the type argument of 'Unsafe' to nominal. Among other 81 | -- things, it means that this type can't be coerced or derived through. This 82 | -- ensures clear indication when (and to what extent) non-total operations occur 83 | -- in any code using them. 84 | -- 85 | -- @since 1.0.1 86 | newtype Unsafe (a :: Type) = Unsafe {safe :: a} 87 | deriving 88 | ( -- | @since 1.0.1 89 | Eq, 90 | -- | @since 1.0.1 91 | Ord, 92 | -- | @since 1.0.1 93 | Bounded, 94 | -- | @since 1.0.1 95 | Hashable, 96 | -- | @since 1.0.1 97 | NFData, 98 | -- | @since 1.0.1 99 | FoldCase, 100 | -- | @since 1.0.1 101 | Semigroup, 102 | -- | @since 1.0.1 103 | Monoid, 104 | -- | @since 1.0.1 105 | IsList, 106 | -- | @since 1.0.1 107 | Stream, 108 | -- | @since 1.0.1 109 | VisualStream, 110 | -- | @since 1.0.1 111 | TraversableStream, 112 | -- | @since 1.0.1 113 | Show, 114 | -- | @since 1.2 115 | Factorial, 116 | -- | @since 1.2 117 | FactorialMonoid, 118 | -- | @since 1.2 119 | LeftCancellative, 120 | -- | @since 1.2 121 | LeftGCDMonoid, 122 | -- | @since 1.2 123 | LeftReductive, 124 | -- | @since 1.2 125 | MonoidNull, 126 | -- | @since 1.2 127 | OverlappingGCDMonoid, 128 | -- | @since 1.2 129 | PositiveMonoid, 130 | -- | @since 1.2 131 | RightCancellative, 132 | -- | @since 1.2 133 | RightGCDMonoid, 134 | -- | @since 1.2 135 | RightReductive, 136 | -- | @since 1.2 137 | StableFactorial 138 | ) 139 | via a 140 | deriving stock 141 | ( -- | @since 1.0.1 142 | Typeable, 143 | -- | @since 1.0.1 144 | Functor 145 | ) 146 | 147 | type role Unsafe nominal 148 | 149 | -- | @since 1.0.1 150 | instance Read (Unsafe AsciiChar) where 151 | {-# INLINEABLE readPrec #-} 152 | readPrec = parens go 153 | where 154 | go :: ReadPrec (Unsafe AsciiChar) 155 | go = 156 | Unsafe . AsciiChar <$> do 157 | expectP (Char '\'') 158 | expectP (Char '0') 159 | expectP (Char 'x') 160 | Char d1 <- lexP 161 | Char d2 <- lexP 162 | expectP (Char '\'') 163 | case d1 of 164 | '0' -> fromSecondDigit d2 165 | '1' -> (16 +) <$> fromSecondDigit d2 166 | '2' -> (32 +) <$> fromSecondDigit d2 167 | '3' -> (48 +) <$> fromSecondDigit d2 168 | '4' -> (64 +) <$> fromSecondDigit d2 169 | '5' -> (80 +) <$> fromSecondDigit d2 170 | '6' -> (96 +) <$> fromSecondDigit d2 171 | '7' -> (112 +) <$> fromSecondDigit d2 172 | _ -> fail $ "Expected digit from 0 to 7, instead got '" <> [d1] <> "'" 173 | 174 | -- | @since 1.0.1 175 | instance Enum (Unsafe AsciiChar) where 176 | {-# INLINEABLE succ #-} 177 | succ (Unsafe (AsciiChar w8)) 178 | | w8 < 127 = Unsafe . AsciiChar $ w8 + 1 179 | | otherwise = error "Out of range for ASCII character" 180 | {-# INLINEABLE pred #-} 181 | pred (Unsafe (AsciiChar w8)) 182 | | w8 > 0 = Unsafe . AsciiChar $ w8 - 1 183 | | otherwise = error "Out of range for ASCII character" 184 | {-# INLINEABLE toEnum #-} 185 | toEnum n 186 | | 0 <= n && n <= 127 = Unsafe . AsciiChar . fromIntegral $ n 187 | | otherwise = error "Out of range for ASCII character" 188 | {-# INLINEABLE fromEnum #-} 189 | fromEnum (Unsafe (AsciiChar w8)) = fromIntegral w8 190 | {-# INLINEABLE enumFrom #-} 191 | enumFrom (Unsafe (AsciiChar w8)) = coerce [w | w <- [w8 ..], w <= 127] 192 | {-# INLINEABLE enumFromThen #-} 193 | enumFromThen (Unsafe (AsciiChar start)) (Unsafe (AsciiChar step)) = 194 | coerce [w | w <- [start, step ..], w <= 127] 195 | {-# INLINEABLE enumFromTo #-} 196 | enumFromTo (Unsafe (AsciiChar start)) (Unsafe (AsciiChar end)) = 197 | coerce [w | w <- [start .. end], w <= 127] 198 | {-# INLINEABLE enumFromThenTo #-} 199 | enumFromThenTo (Unsafe (AsciiChar start)) (Unsafe (AsciiChar step)) (Unsafe (AsciiChar end)) = 200 | coerce [w | w <- [start, step .. end], w <= 127] 201 | 202 | -- | @since 1.0.1 203 | instance Read (Unsafe AsciiText) where 204 | {-# INLINEABLE readPrec #-} 205 | readPrec = Unsafe . AsciiText <$> go 206 | where 207 | go :: ReadPrec ByteString 208 | go = do 209 | bs :: ByteString <- readPrec 210 | case BS.findIndex (>= 128) bs of 211 | Nothing -> pure bs 212 | Just i -> error $ "Non-ASCII byte at index " <> show i 213 | 214 | -- Functions 215 | 216 | -- $setup 217 | -- >>> :set -XNoImplicitPrelude 218 | -- >>> :set -XQuasiQuotes 219 | -- >>> import Text.Ascii.Unsafe 220 | -- >>> import Text.Ascii.QQ 221 | -- >>> import Prelude ((.), ($)) 222 | 223 | -- | Yield the first character of the text. 224 | -- 225 | -- /Requirements:/ Text is not empty. 226 | -- 227 | -- >>> head . Unsafe $ [ascii| "catboy" |] 228 | -- '0x63' 229 | -- 230 | -- /Complexity:/ \(\Theta(1)\) 231 | -- 232 | -- @since 1.0.1 233 | head :: Unsafe AsciiText -> AsciiChar 234 | head = coerce BS.head 235 | 236 | -- | Yield the last character of the text. 237 | -- 238 | -- /Requirements:/ Text is not empty. 239 | -- 240 | -- >>> last . Unsafe $ [ascii| "catboy" |] 241 | -- '0x79' 242 | -- 243 | -- /Complexity:/ \(\Theta(1)\) 244 | -- 245 | -- @since 1.0.1 246 | last :: Unsafe AsciiText -> AsciiChar 247 | last = coerce BS.last 248 | 249 | -- | Yield the text without its first character. 250 | -- 251 | -- /Requirements:/ Text is not empty. 252 | -- 253 | -- >>> tail . Unsafe $ [ascii| "catboy" |] 254 | -- "atboy" 255 | -- 256 | -- /Complexity:/ \(\Theta(1)\) 257 | -- 258 | -- @since 1.0.1 259 | tail :: Unsafe AsciiText -> Unsafe AsciiText 260 | tail = coerce BS.tail 261 | 262 | -- | Yield the text without its last character. 263 | -- 264 | -- /Requirements:/ Text is not empty. 265 | -- 266 | -- >>> init . Unsafe $ [ascii| "catboy" |] 267 | -- "catbo" 268 | -- 269 | -- /Complexity:/ \(\Theta(1)\) 270 | -- 271 | -- @since 1.0.1 272 | init :: Unsafe AsciiText -> Unsafe AsciiText 273 | init = coerce BS.init 274 | 275 | -- | Left-associative fold of a text without a base case. 276 | -- 277 | -- /Requirements:/ Text is not empty. 278 | -- 279 | -- /Complexity:/ \(\Theta(n)\) 280 | -- 281 | -- @since 1.0.1 282 | foldl1 :: (AsciiChar -> AsciiChar -> AsciiChar) -> Unsafe AsciiText -> AsciiChar 283 | foldl1 = coerce BS.foldl1 284 | 285 | -- | Left-associative fold of a text without a base case, strict in the 286 | -- accumulator. 287 | -- 288 | -- /Requirements:/ Text is not empty. 289 | -- 290 | -- /Complexity:/ \(\Theta(n)\) 291 | -- 292 | -- @since 1.0.1 293 | foldl1' :: (AsciiChar -> AsciiChar -> AsciiChar) -> Unsafe AsciiText -> AsciiChar 294 | foldl1' = coerce BS.foldl1' 295 | 296 | -- | Right-associative fold of a text without a base case. 297 | -- 298 | -- /Requirements:/ Text is not empty. 299 | -- 300 | -- /Complexity:/ \(\Theta(n)\) 301 | -- 302 | -- @since 1.0.1 303 | foldr1 :: (AsciiChar -> AsciiChar -> AsciiChar) -> Unsafe AsciiText -> AsciiChar 304 | foldr1 = coerce BS.foldr1 305 | 306 | -- | Right-associative fold of a text without a base case, strict in the 307 | -- accumulator. 308 | -- 309 | -- /Requirements:/ Text is not empty. 310 | -- 311 | -- /Complexity:/ \(\Theta(n)\) 312 | -- 313 | -- @since 1.0.1 314 | foldr1' :: (AsciiChar -> AsciiChar -> AsciiChar) -> Unsafe AsciiText -> AsciiChar 315 | foldr1' = coerce BS.foldr1' 316 | 317 | -- | Yield the character in the text whose byte representation is numerically 318 | -- the largest. 319 | -- 320 | -- /Requirements:/ Text is not empty. 321 | -- 322 | -- >>> maximum . Unsafe $ [ascii| "catboy" |] 323 | -- '0x79' 324 | -- >>> maximum . Unsafe $ [ascii| "nyan~" |] 325 | -- '0x7e' 326 | -- 327 | -- /Complexity:/ \(\Theta(n)\) 328 | -- 329 | -- @since 1.0.1 330 | maximum :: Unsafe AsciiText -> AsciiChar 331 | maximum = coerce BS.maximum 332 | 333 | -- | Yield the character in the text whose byte representation is numerically 334 | -- the smallest. 335 | -- 336 | -- /Requirements:/ Text is not empty. 337 | -- 338 | -- >>> minimum . Unsafe $ [ascii| "catboy" |] 339 | -- '0x61' 340 | -- >>> minimum . Unsafe $ [ascii| " nyan" |] 341 | -- '0x20' 342 | -- 343 | -- /Complexity:/ \(\Theta(n)\) 344 | -- 345 | -- @since 1.0.1 346 | minimum :: Unsafe AsciiText -> AsciiChar 347 | minimum = coerce BS.minimum 348 | 349 | -- | 'scanl1' is similar to 'foldl1', but returns a list of successive values 350 | -- from the left. 351 | -- 352 | -- /Requirements:/ Text is not empty. 353 | -- 354 | -- /Complexity:/ \(\Theta(n)\) 355 | -- 356 | -- @since 1.0.1 357 | scanl1 :: 358 | -- | accumulator -> element -> new accumulator 359 | (AsciiChar -> AsciiChar -> AsciiChar) -> 360 | -- | Input of length \(n\) 361 | Unsafe AsciiText -> 362 | -- | Output of length \(n - 1\) 363 | Unsafe AsciiText 364 | scanl1 = coerce BS.scanl1 365 | 366 | -- | 'scanr1' is similar to 'foldr1', but returns a list of successive values 367 | -- from the right. 368 | -- 369 | -- /Requirements:/ Text is not empty. 370 | -- 371 | -- /Complexity:/ \(\Theta(n)\) 372 | -- 373 | -- @since 1.0.1 374 | scanr1 :: 375 | -- | element -> accumulator -> new accumulator 376 | (AsciiChar -> AsciiChar -> AsciiChar) -> 377 | -- | Input of length \(n\) 378 | Unsafe AsciiText -> 379 | -- | Output of length \(n - 1\) 380 | Unsafe AsciiText 381 | scanr1 = coerce BS.scanr1 382 | 383 | -- | Yield the character at the given position. 384 | -- 385 | -- /Requirements:/ The position must be at least 0, and at most the length of 386 | -- the text - 1. 387 | -- 388 | -- >>> index (Unsafe [ascii| "catboy" |]) 0 389 | -- '0x63' 390 | -- >>> index (Unsafe $ [ascii| "catboy" |]) 4 391 | -- '0x6f' 392 | -- 393 | -- /Complexity:/ \(\Theta(1)\) 394 | -- 395 | -- @since 1.0.1 396 | index :: Unsafe AsciiText -> Int -> AsciiChar 397 | index = coerce BS.index 398 | 399 | -- Helpers 400 | 401 | fromSecondDigit :: Char -> ReadPrec Word8 402 | fromSecondDigit = \case 403 | '0' -> pure 0 404 | '1' -> pure 1 405 | '2' -> pure 2 406 | '3' -> pure 3 407 | '4' -> pure 4 408 | '5' -> pure 5 409 | '6' -> pure 6 410 | '7' -> pure 7 411 | '8' -> pure 8 412 | '9' -> pure 9 413 | 'a' -> pure 10 414 | 'b' -> pure 11 415 | 'c' -> pure 12 416 | 'd' -> pure 13 417 | 'e' -> pure 14 418 | 'f' -> pure 15 419 | d -> fail $ "Expected hex digit, instead got '" <> [d] <> "'" 420 | -------------------------------------------------------------------------------- /text-ascii.cabal: -------------------------------------------------------------------------------- 1 | cabal-version: 3.0 2 | name: text-ascii 3 | version: 1.2.1 4 | synopsis: ASCII string and character processing. 5 | description: 6 | A total-by-default, tested and documented library for 7 | working with ASCII text. Low on dependencies, high on usability. 8 | 9 | homepage: https://github.com/haskell-text/text-ascii 10 | license: Apache-2.0 11 | license-file: LICENSE.md 12 | author: Koz Ross 13 | maintainer: koz.ross@retro-freedom.nz 14 | bug-reports: https://github.com/haskell-text/text-ascii/issues 15 | copyright: (C) Koz Ross 2021-3 16 | category: Text 17 | tested-with: GHC ==9.4.8 || ==9.6.6 || ==9.8.4 || ==9.10.1 18 | build-type: Simple 19 | extra-source-files: 20 | CHANGELOG.md 21 | README.md 22 | 23 | library 24 | exposed-modules: 25 | Text.Ascii 26 | Text.Ascii.Char 27 | Text.Ascii.Internal 28 | Text.Ascii.QQ 29 | Text.Ascii.Unsafe 30 | 31 | build-depends: 32 | , base >=4.17 && <5 33 | , bytestring ^>=0.12 34 | , case-insensitive ^>=1.2 35 | , deepseq >=1.4.8 && <1.6.0 36 | , hashable ^>=1.4 37 | , megaparsec ^>=9.6 38 | , monoid-subclasses ^>=1.2 39 | , optics-core ^>=0.4 40 | , optics-extra ^>=0.4 41 | , template-haskell >=2.19 && <3.0 42 | , text ^>=2.1 43 | 44 | ghc-options: 45 | -Wall -Wcompat -Wincomplete-record-updates 46 | -Wincomplete-uni-patterns -Wredundant-constraints 47 | -Wmissing-deriving-strategies 48 | 49 | hs-source-dirs: src 50 | default-language: Haskell2010 51 | --------------------------------------------------------------------------------