├── .github
    └── workflows
    │   └── go.yml
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── common.go
├── convert.go
├── convert_test.go
├── count.go
├── count_test.go
├── doc.go
├── format.go
├── format_test.go
├── go.mod
├── manipulate.go
├── manipulate_test.go
├── stringbuilder.go
├── stringbuilder_go110.go
├── translate.go
├── translate_test.go
└── util_test.go


/.github/workflows/go.yml:
--------------------------------------------------------------------------------
 1 | name: Go
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [master]
 6 |   pull_request:
 7 |     branches: [master]
 8 | 
 9 | jobs:
10 |   build:
11 |     name: Build
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - name: Set up Go 1.x
15 |         uses: actions/setup-go@v2
16 |         with:
17 |           go-version: 1.17
18 | 
19 |       - name: Check out code into the Go module directory
20 |         uses: actions/checkout@v2
21 | 
22 |       - name: Get dependencies
23 |         run: |
24 |           go mod download
25 |           go get
26 | 
27 |       - name: Test
28 |         run: go test -v -coverprofile=covprofile.cov ./...
29 | 
30 |       - name: Send coverage
31 |         env:
32 |           COVERALLS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
33 |         run: |
34 |           go get github.com/mattn/goveralls
35 |           go install github.com/mattn/goveralls
36 |           goveralls -coverprofile=covprofile.cov -service=github
37 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects)
 2 | *.o
 3 | *.a
 4 | *.so
 5 | 
 6 | # Folders
 7 | _obj
 8 | _test
 9 | 
10 | # Architecture specific extensions/prefixes
11 | *.[568vq]
12 | [568vq].out
13 | 
14 | *.cgo1.go
15 | *.cgo2.c
16 | _cgo_defun.c
17 | _cgo_gotypes.go
18 | _cgo_export.*
19 | 
20 | _testmain.go
21 | 
22 | *.exe
23 | *.test
24 | *.prof
25 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing #
 2 | 
 3 | Thanks for your contribution in advance. No matter what you will contribute to this project, pull request or bug report or feature discussion, it's always highly appreciated.
 4 | 
 5 | ## New API or feature ##
 6 | 
 7 | I want to speak more about how to add new functions to this package.
 8 | 
 9 | Package `xstring` is a collection of useful string functions which should be implemented in Go. It's a bit subject to say which function should be included and which should not. I set up following rules in order to make it clear and as objective as possible.
10 | 
11 | * Rule 1: Only string algorithm, which takes string as input, can be included.
12 | * Rule 2: If a function has been implemented in package `string`, it must not be included.
13 | * Rule 3: If a function is not language neutral, it must not be included.
14 | * Rule 4: If a function is a part of standard library in other languages, it can be included.
15 | * Rule 5: If a function is quite useful in some famous framework or library, it can be included.
16 | 
17 | New function must be discussed in project issues before submitting any code. If a pull request with new functions is sent without any ref issue, it will be rejected.
18 | 
19 | ## Pull request ##
20 | 
21 | Pull request is always welcome. Just make sure you have run `go fmt` and all test cases passed before submit.
22 | 
23 | If the pull request is to add a new API or feature, don't forget to update README.md and add new API in function list.
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Huan Du
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # xstrings
  2 | 
  3 | [![Build Status](https://github.com/huandu/xstrings/workflows/Go/badge.svg)](https://github.com/huandu/xstrings/actions)
  4 | [![Go Doc](https://godoc.org/github.com/huandu/xstrings?status.svg)](https://pkg.go.dev/github.com/huandu/xstrings)
  5 | [![Go Report](https://goreportcard.com/badge/github.com/huandu/xstrings)](https://goreportcard.com/report/github.com/huandu/xstrings)
  6 | [![Coverage Status](https://coveralls.io/repos/github/huandu/xstrings/badge.svg?branch=master)](https://coveralls.io/github/huandu/xstrings?branch=master)
  7 | 
  8 | Go package [xstrings](https://godoc.org/github.com/huandu/xstrings) is a collection of string functions, which are widely used in other languages but absent in Go package [strings](http://golang.org/pkg/strings).
  9 | 
 10 | All functions are well tested and carefully tuned for performance.
 11 | 
 12 | ## Propose a new function
 13 | 
 14 | Please review [contributing guideline](CONTRIBUTING.md) and [create new issue](https://github.com/huandu/xstrings/issues) to state why it should be included.
 15 | 
 16 | ## Install
 17 | 
 18 | Use `go get` to install this library.
 19 | 
 20 |     go get github.com/huandu/xstrings
 21 | 
 22 | ## API document
 23 | 
 24 | See [GoDoc](https://godoc.org/github.com/huandu/xstrings) for full document.
 25 | 
 26 | ## Function list
 27 | 
 28 | Go functions have a unique naming style. One, who has experience in other language but new in Go, may have difficulties to find out right string function to use.
 29 | 
 30 | Here is a list of functions in [strings](http://golang.org/pkg/strings) and [xstrings](https://godoc.org/github.com/huandu/xstrings) with enough extra information about how to map these functions to their friends in other languages. Hope this list could be helpful for fresh gophers.
 31 | 
 32 | ### Package `xstrings` functions
 33 | 
 34 | _Keep this table sorted by Function in ascending order._
 35 | 
 36 | | Function                                                                          | Friends                                                                         | #                                                   |
 37 | | --------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- | --------------------------------------------------- |
 38 | | [Center](https://godoc.org/github.com/huandu/xstrings#Center)                     | `str.center` in Python; `String#center` in Ruby                                 | [#30](https://github.com/huandu/xstrings/issues/30) |
 39 | | [Count](https://godoc.org/github.com/huandu/xstrings#Count)                       | `String#count` in Ruby                                                          | [#16](https://github.com/huandu/xstrings/issues/16) |
 40 | | [Delete](https://godoc.org/github.com/huandu/xstrings#Delete)                     | `String#delete` in Ruby                                                         | [#17](https://github.com/huandu/xstrings/issues/17) |
 41 | | [ExpandTabs](https://godoc.org/github.com/huandu/xstrings#ExpandTabs)             | `str.expandtabs` in Python                                                      | [#27](https://github.com/huandu/xstrings/issues/27) |
 42 | | [FirstRuneToLower](https://godoc.org/github.com/huandu/xstrings#FirstRuneToLower) | `lcfirst` in PHP or Perl                                                         | [#15](https://github.com/huandu/xstrings/issues/15) |
 43 | | [FirstRuneToUpper](https://godoc.org/github.com/huandu/xstrings#FirstRuneToUpper) | `String#capitalize` in Ruby; `ucfirst` in PHP or Perl                            | [#15](https://github.com/huandu/xstrings/issues/15) |
 44 | | [Insert](https://godoc.org/github.com/huandu/xstrings#Insert)                     | `String#insert` in Ruby                                                         | [#18](https://github.com/huandu/xstrings/issues/18) |
 45 | | [LastPartition](https://godoc.org/github.com/huandu/xstrings#LastPartition)       | `str.rpartition` in Python; `String#rpartition` in Ruby                         | [#19](https://github.com/huandu/xstrings/issues/19) |
 46 | | [LeftJustify](https://godoc.org/github.com/huandu/xstrings#LeftJustify)           | `str.ljust` in Python; `String#ljust` in Ruby                                   | [#28](https://github.com/huandu/xstrings/issues/28) |
 47 | | [Len](https://godoc.org/github.com/huandu/xstrings#Len)                           | `mb_strlen` in PHP                                                              | [#23](https://github.com/huandu/xstrings/issues/23) |
 48 | | [Partition](https://godoc.org/github.com/huandu/xstrings#Partition)               | `str.partition` in Python; `String#partition` in Ruby                           | [#10](https://github.com/huandu/xstrings/issues/10) |
 49 | | [Reverse](https://godoc.org/github.com/huandu/xstrings#Reverse)                   | `String#reverse` in Ruby; `strrev` in PHP; `reverse` in Perl                    | [#7](https://github.com/huandu/xstrings/issues/7)   |
 50 | | [RightJustify](https://godoc.org/github.com/huandu/xstrings#RightJustify)         | `str.rjust` in Python; `String#rjust` in Ruby                                   | [#29](https://github.com/huandu/xstrings/issues/29) |
 51 | | [RuneWidth](https://godoc.org/github.com/huandu/xstrings#RuneWidth)               | -                                                                               | [#27](https://github.com/huandu/xstrings/issues/27) |
 52 | | [Scrub](https://godoc.org/github.com/huandu/xstrings#Scrub)                       | `String#scrub` in Ruby                                                          | [#20](https://github.com/huandu/xstrings/issues/20) |
 53 | | [Shuffle](https://godoc.org/github.com/huandu/xstrings#Shuffle)                     | `str_shuffle` in PHP                                                             | [#13](https://github.com/huandu/xstrings/issues/13) |
 54 | | [ShuffleSource](https://godoc.org/github.com/huandu/xstrings#ShuffleSource)         | `str_shuffle` in PHP                                                             | [#13](https://github.com/huandu/xstrings/issues/13) |
 55 | | [Slice](https://godoc.org/github.com/huandu/xstrings#Slice)                       | `mb_substr` in PHP                                                              | [#9](https://github.com/huandu/xstrings/issues/9)   |
 56 | | [Squeeze](https://godoc.org/github.com/huandu/xstrings#Squeeze)                   | `String#squeeze` in Ruby                                                        | [#11](https://github.com/huandu/xstrings/issues/11) |
 57 | | [Successor](https://godoc.org/github.com/huandu/xstrings#Successor)               | `String#succ` or `String#next` in Ruby                                          | [#22](https://github.com/huandu/xstrings/issues/22) |
 58 | | [SwapCase](https://godoc.org/github.com/huandu/xstrings#SwapCase)                 | `str.swapcase` in Python; `String#swapcase` in Ruby                             | [#12](https://github.com/huandu/xstrings/issues/12) |
 59 | | [ToCamelCase](https://godoc.org/github.com/huandu/xstrings#ToCamelCase)           | `String#camelize` in RoR                                                        | [#1](https://github.com/huandu/xstrings/issues/1)   |
 60 | | [ToKebab](https://godoc.org/github.com/huandu/xstrings#ToKebabCase)               | -                                                                               | [#41](https://github.com/huandu/xstrings/issues/41) |
 61 | | [ToPascalCase](https://godoc.org/github.com/huandu/xstrings#ToPascalCase)         | -                                                                               | [#1](https://github.com/huandu/xstrings/issues/1)   |
 62 | | [ToSnakeCase](https://godoc.org/github.com/huandu/xstrings#ToSnakeCase)           | `String#underscore` in RoR                                                      | [#1](https://github.com/huandu/xstrings/issues/1)   |
 63 | | [Translate](https://godoc.org/github.com/huandu/xstrings#Translate)               | `str.translate` in Python; `String#tr` in Ruby; `strtr` in PHP; `tr///` in Perl | [#21](https://github.com/huandu/xstrings/issues/21) |
 64 | | [Width](https://godoc.org/github.com/huandu/xstrings#Width)                       | `mb_strwidth` in PHP                                                            | [#26](https://github.com/huandu/xstrings/issues/26) |
 65 | | [WordCount](https://godoc.org/github.com/huandu/xstrings#WordCount)               | `str_word_count` in PHP                                                         | [#14](https://github.com/huandu/xstrings/issues/14) |
 66 | | [WordSplit](https://godoc.org/github.com/huandu/xstrings#WordSplit)               | -                                                                               | [#14](https://github.com/huandu/xstrings/issues/14) |
 67 | 
 68 | ### Package `strings` functions
 69 | 
 70 | _Keep this table sorted by Function in ascending order._
 71 | 
 72 | | Function                                                        | Friends                                                                             |
 73 | | --------------------------------------------------------------- | ----------------------------------------------------------------------------------- |
 74 | | [Contains](http://golang.org/pkg/strings/#Contains)             | `String#include?` in Ruby                                                           |
 75 | | [ContainsAny](http://golang.org/pkg/strings/#ContainsAny)       | -                                                                                   |
 76 | | [ContainsRune](http://golang.org/pkg/strings/#ContainsRune)     | -                                                                                   |
 77 | | [Count](http://golang.org/pkg/strings/#Count)                   | `str.count` in Python; `substr_count` in PHP                                        |
 78 | | [EqualFold](http://golang.org/pkg/strings/#EqualFold)           | `stricmp` in PHP; `String#casecmp` in Ruby                                          |
 79 | | [Fields](http://golang.org/pkg/strings/#Fields)                 | `str.split` in Python; `split` in Perl; `String#split` in Ruby                      |
 80 | | [FieldsFunc](http://golang.org/pkg/strings/#FieldsFunc)         | -                                                                                   |
 81 | | [HasPrefix](http://golang.org/pkg/strings/#HasPrefix)           | `str.startswith` in Python; `String#start_with?` in Ruby                            |
 82 | | [HasSuffix](http://golang.org/pkg/strings/#HasSuffix)           | `str.endswith` in Python; `String#end_with?` in Ruby                                |
 83 | | [Index](http://golang.org/pkg/strings/#Index)                   | `str.index` in Python; `String#index` in Ruby; `strpos` in PHP; `index` in Perl     |
 84 | | [IndexAny](http://golang.org/pkg/strings/#IndexAny)             | -                                                                                   |
 85 | | [IndexByte](http://golang.org/pkg/strings/#IndexByte)           | -                                                                                   |
 86 | | [IndexFunc](http://golang.org/pkg/strings/#IndexFunc)           | -                                                                                   |
 87 | | [IndexRune](http://golang.org/pkg/strings/#IndexRune)           | -                                                                                   |
 88 | | [Join](http://golang.org/pkg/strings/#Join)                     | `str.join` in Python; `Array#join` in Ruby; `implode` in PHP; `join` in Perl        |
 89 | | [LastIndex](http://golang.org/pkg/strings/#LastIndex)           | `str.rindex` in Python; `String#rindex`; `strrpos` in PHP; `rindex` in Perl         |
 90 | | [LastIndexAny](http://golang.org/pkg/strings/#LastIndexAny)     | -                                                                                   |
 91 | | [LastIndexFunc](http://golang.org/pkg/strings/#LastIndexFunc)   | -                                                                                   |
 92 | | [Map](http://golang.org/pkg/strings/#Map)                       | `String#each_codepoint` in Ruby                                                     |
 93 | | [Repeat](http://golang.org/pkg/strings/#Repeat)                 | operator `*` in Python and Ruby; `str_repeat` in PHP                                |
 94 | | [Replace](http://golang.org/pkg/strings/#Replace)               | `str.replace` in Python; `String#sub` in Ruby; `str_replace` in PHP                 |
 95 | | [Split](http://golang.org/pkg/strings/#Split)                   | `str.split` in Python; `String#split` in Ruby; `explode` in PHP; `split` in Perl    |
 96 | | [SplitAfter](http://golang.org/pkg/strings/#SplitAfter)         | -                                                                                   |
 97 | | [SplitAfterN](http://golang.org/pkg/strings/#SplitAfterN)       | -                                                                                   |
 98 | | [SplitN](http://golang.org/pkg/strings/#SplitN)                 | `str.split` in Python; `String#split` in Ruby; `explode` in PHP; `split` in Perl    |
 99 | | [Title](http://golang.org/pkg/strings/#Title)                   | `str.title` in Python                                                               |
100 | | [ToLower](http://golang.org/pkg/strings/#ToLower)               | `str.lower` in Python; `String#downcase` in Ruby; `strtolower` in PHP; `lc` in Perl |
101 | | [ToLowerSpecial](http://golang.org/pkg/strings/#ToLowerSpecial) | -                                                                                   |
102 | | [ToTitle](http://golang.org/pkg/strings/#ToTitle)               | -                                                                                   |
103 | | [ToTitleSpecial](http://golang.org/pkg/strings/#ToTitleSpecial) | -                                                                                   |
104 | | [ToUpper](http://golang.org/pkg/strings/#ToUpper)               | `str.upper` in Python; `String#upcase` in Ruby; `strtoupper` in PHP; `uc` in Perl   |
105 | | [ToUpperSpecial](http://golang.org/pkg/strings/#ToUpperSpecial) | -                                                                                   |
106 | | [Trim](http://golang.org/pkg/strings/#Trim)                     | `str.strip` in Python; `String#strip` in Ruby; `trim` in PHP                        |
107 | | [TrimFunc](http://golang.org/pkg/strings/#TrimFunc)             | -                                                                                   |
108 | | [TrimLeft](http://golang.org/pkg/strings/#TrimLeft)             | `str.lstrip` in Python; `String#lstrip` in Ruby; `ltrim` in PHP                     |
109 | | [TrimLeftFunc](http://golang.org/pkg/strings/#TrimLeftFunc)     | -                                                                                   |
110 | | [TrimPrefix](http://golang.org/pkg/strings/#TrimPrefix)         | -                                                                                   |
111 | | [TrimRight](http://golang.org/pkg/strings/#TrimRight)           | `str.rstrip` in Python; `String#rstrip` in Ruby; `rtrim` in PHP                     |
112 | | [TrimRightFunc](http://golang.org/pkg/strings/#TrimRightFunc)   | -                                                                                   |
113 | | [TrimSpace](http://golang.org/pkg/strings/#TrimSpace)           | `str.strip` in Python; `String#strip` in Ruby; `trim` in PHP                        |
114 | | [TrimSuffix](http://golang.org/pkg/strings/#TrimSuffix)         | `String#chomp` in Ruby; `chomp` in Perl                                             |
115 | 
116 | ## License
117 | 
118 | This library is licensed under MIT license. See LICENSE for details.
119 | 


--------------------------------------------------------------------------------
/common.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 Huan Du. All rights reserved.
 2 | // Licensed under the MIT license that can be found in the LICENSE file.
 3 | 
 4 | package xstrings
 5 | 
 6 | const bufferMaxInitGrowSize = 2048
 7 | 
 8 | // Lazy initialize a buffer.
 9 | func allocBuffer(orig, cur string) *stringBuilder {
10 | 	output := &stringBuilder{}
11 | 	maxSize := len(orig) * 4
12 | 
13 | 	// Avoid to reserve too much memory at once.
14 | 	if maxSize > bufferMaxInitGrowSize {
15 | 		maxSize = bufferMaxInitGrowSize
16 | 	}
17 | 
18 | 	output.Grow(maxSize)
19 | 	output.WriteString(orig[:len(orig)-len(cur)])
20 | 	return output
21 | }
22 | 


--------------------------------------------------------------------------------
/convert.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 Huan Du. All rights reserved.
  2 | // Licensed under the MIT license that can be found in the LICENSE file.
  3 | 
  4 | package xstrings
  5 | 
  6 | import (
  7 | 	"math/rand"
  8 | 	"unicode"
  9 | 	"unicode/utf8"
 10 | )
 11 | 
 12 | // ToCamelCase is to convert words separated by space, underscore and hyphen to camel case.
 13 | //
 14 | // Some samples.
 15 | //
 16 | //	"some_words"      => "someWords"
 17 | //	"http_server"     => "httpServer"
 18 | //	"no_https"        => "noHttps"
 19 | //	"_complex__case_" => "_complex_Case_"
 20 | //	"some words"      => "someWords"
 21 | //	"GOLANG_IS_GREAT" => "golangIsGreat"
 22 | func ToCamelCase(str string) string {
 23 | 	return toCamelCase(str, false)
 24 | }
 25 | 
 26 | // ToPascalCase is to convert words separated by space, underscore and hyphen to pascal case.
 27 | //
 28 | // Some samples.
 29 | //
 30 | //	"some_words"      => "SomeWords"
 31 | //	"http_server"     => "HttpServer"
 32 | //	"no_https"        => "NoHttps"
 33 | //	"_complex__case_" => "_Complex_Case_"
 34 | //	"some words"      => "SomeWords"
 35 | //	"GOLANG_IS_GREAT" => "GolangIsGreat"
 36 | func ToPascalCase(str string) string {
 37 | 	return toCamelCase(str, true)
 38 | }
 39 | 
 40 | func toCamelCase(str string, isBig bool) string {
 41 | 	if len(str) == 0 {
 42 | 		return ""
 43 | 	}
 44 | 
 45 | 	buf := &stringBuilder{}
 46 | 	var isFirstRuneUpper bool
 47 | 	var r0, r1 rune
 48 | 	var size int
 49 | 
 50 | 	// leading connector will appear in output.
 51 | 	for len(str) > 0 {
 52 | 		r0, size = utf8.DecodeRuneInString(str)
 53 | 		str = str[size:]
 54 | 
 55 | 		if !isConnector(r0) {
 56 | 			isFirstRuneUpper = unicode.IsUpper(r0)
 57 | 
 58 | 			if isBig {
 59 | 				r0 = unicode.ToUpper(r0)
 60 | 			} else {
 61 | 				r0 = unicode.ToLower(r0)
 62 | 			}
 63 | 
 64 | 			break
 65 | 		}
 66 | 
 67 | 		buf.WriteRune(r0)
 68 | 	}
 69 | 
 70 | 	if len(str) == 0 {
 71 | 		// A special case for a string contains only 1 rune.
 72 | 		if size != 0 {
 73 | 			buf.WriteRune(r0)
 74 | 		}
 75 | 
 76 | 		return buf.String()
 77 | 	}
 78 | 
 79 | 	for len(str) > 0 {
 80 | 		r1 = r0
 81 | 		r0, size = utf8.DecodeRuneInString(str)
 82 | 		str = str[size:]
 83 | 
 84 | 		if isConnector(r0) && isConnector(r1) {
 85 | 			buf.WriteRune(r1)
 86 | 			continue
 87 | 		}
 88 | 
 89 | 		if isConnector(r1) {
 90 | 			isFirstRuneUpper = unicode.IsUpper(r0)
 91 | 			r0 = unicode.ToUpper(r0)
 92 | 		} else {
 93 | 			if isFirstRuneUpper {
 94 | 				if unicode.IsUpper(r0) {
 95 | 					r0 = unicode.ToLower(r0)
 96 | 				} else {
 97 | 					isFirstRuneUpper = false
 98 | 				}
 99 | 			}
100 | 
101 | 			buf.WriteRune(r1)
102 | 		}
103 | 	}
104 | 
105 | 	if isFirstRuneUpper && !isBig {
106 | 		r0 = unicode.ToLower(r0)
107 | 	}
108 | 
109 | 	buf.WriteRune(r0)
110 | 	return buf.String()
111 | }
112 | 
113 | // ToSnakeCase can convert all upper case characters in a string to
114 | // snake case format.
115 | //
116 | // Some samples.
117 | //
118 | //	"FirstName"    => "first_name"
119 | //	"HTTPServer"   => "http_server"
120 | //	"NoHTTPS"      => "no_https"
121 | //	"GO_PATH"      => "go_path"
122 | //	"GO PATH"      => "go_path"  // space is converted to underscore.
123 | //	"GO-PATH"      => "go_path"  // hyphen is converted to underscore.
124 | //	"http2xx"      => "http_2xx" // insert an underscore before a number and after an alphabet.
125 | //	"HTTP20xOK"    => "http_20x_ok"
126 | //	"Duration2m3s" => "duration_2m3s"
127 | //	"Bld4Floor3rd" => "bld4_floor_3rd"
128 | func ToSnakeCase(str string) string {
129 | 	return camelCaseToLowerCase(str, '_')
130 | }
131 | 
132 | // ToKebabCase can convert all upper case characters in a string to
133 | // kebab case format.
134 | //
135 | // Some samples.
136 | //
137 | //	"FirstName"    => "first-name"
138 | //	"HTTPServer"   => "http-server"
139 | //	"NoHTTPS"      => "no-https"
140 | //	"GO_PATH"      => "go-path"
141 | //	"GO PATH"      => "go-path"  // space is converted to '-'.
142 | //	"GO-PATH"      => "go-path"  // hyphen is converted to '-'.
143 | //	"http2xx"      => "http-2xx" // insert an underscore before a number and after an alphabet.
144 | //	"HTTP20xOK"    => "http-20x-ok"
145 | //	"Duration2m3s" => "duration-2m3s"
146 | //	"Bld4Floor3rd" => "bld4-floor-3rd"
147 | func ToKebabCase(str string) string {
148 | 	return camelCaseToLowerCase(str, '-')
149 | }
150 | 
151 | func camelCaseToLowerCase(str string, connector rune) string {
152 | 	if len(str) == 0 {
153 | 		return ""
154 | 	}
155 | 
156 | 	buf := &stringBuilder{}
157 | 	wt, word, remaining := nextWord(str)
158 | 
159 | 	for len(remaining) > 0 {
160 | 		if wt != connectorWord {
161 | 			toLower(buf, wt, word, connector)
162 | 		}
163 | 
164 | 		prev := wt
165 | 		last := word
166 | 		wt, word, remaining = nextWord(remaining)
167 | 
168 | 		switch prev {
169 | 		case numberWord:
170 | 			for wt == alphabetWord || wt == numberWord {
171 | 				toLower(buf, wt, word, connector)
172 | 				wt, word, remaining = nextWord(remaining)
173 | 			}
174 | 
175 | 			if wt != invalidWord && wt != punctWord && wt != connectorWord {
176 | 				buf.WriteRune(connector)
177 | 			}
178 | 
179 | 		case connectorWord:
180 | 			toLower(buf, prev, last, connector)
181 | 
182 | 		case punctWord:
183 | 			// nothing.
184 | 
185 | 		default:
186 | 			if wt != numberWord {
187 | 				if wt != connectorWord && wt != punctWord {
188 | 					buf.WriteRune(connector)
189 | 				}
190 | 
191 | 				break
192 | 			}
193 | 
194 | 			if len(remaining) == 0 {
195 | 				break
196 | 			}
197 | 
198 | 			last := word
199 | 			wt, word, remaining = nextWord(remaining)
200 | 
201 | 			// consider number as a part of previous word.
202 | 			// e.g. "Bld4Floor" => "bld4_floor"
203 | 			if wt != alphabetWord {
204 | 				toLower(buf, numberWord, last, connector)
205 | 
206 | 				if wt != connectorWord && wt != punctWord {
207 | 					buf.WriteRune(connector)
208 | 				}
209 | 
210 | 				break
211 | 			}
212 | 
213 | 			// if there are some lower case letters following a number,
214 | 			// add connector before the number.
215 | 			// e.g. "HTTP2xx" => "http_2xx"
216 | 			buf.WriteRune(connector)
217 | 			toLower(buf, numberWord, last, connector)
218 | 
219 | 			for wt == alphabetWord || wt == numberWord {
220 | 				toLower(buf, wt, word, connector)
221 | 				wt, word, remaining = nextWord(remaining)
222 | 			}
223 | 
224 | 			if wt != invalidWord && wt != connectorWord && wt != punctWord {
225 | 				buf.WriteRune(connector)
226 | 			}
227 | 		}
228 | 	}
229 | 
230 | 	toLower(buf, wt, word, connector)
231 | 	return buf.String()
232 | }
233 | 
234 | func isConnector(r rune) bool {
235 | 	return r == '-' || r == '_' || unicode.IsSpace(r)
236 | }
237 | 
238 | type wordType int
239 | 
240 | const (
241 | 	invalidWord wordType = iota
242 | 	numberWord
243 | 	upperCaseWord
244 | 	alphabetWord
245 | 	connectorWord
246 | 	punctWord
247 | 	otherWord
248 | )
249 | 
250 | func nextWord(str string) (wt wordType, word, remaining string) {
251 | 	if len(str) == 0 {
252 | 		return
253 | 	}
254 | 
255 | 	var offset int
256 | 	remaining = str
257 | 	r, size := nextValidRune(remaining, utf8.RuneError)
258 | 	offset += size
259 | 
260 | 	if r == utf8.RuneError {
261 | 		wt = invalidWord
262 | 		word = str[:offset]
263 | 		remaining = str[offset:]
264 | 		return
265 | 	}
266 | 
267 | 	switch {
268 | 	case isConnector(r):
269 | 		wt = connectorWord
270 | 		remaining = remaining[size:]
271 | 
272 | 		for len(remaining) > 0 {
273 | 			r, size = nextValidRune(remaining, r)
274 | 
275 | 			if !isConnector(r) {
276 | 				break
277 | 			}
278 | 
279 | 			offset += size
280 | 			remaining = remaining[size:]
281 | 		}
282 | 
283 | 	case unicode.IsPunct(r):
284 | 		wt = punctWord
285 | 		remaining = remaining[size:]
286 | 
287 | 		for len(remaining) > 0 {
288 | 			r, size = nextValidRune(remaining, r)
289 | 
290 | 			if !unicode.IsPunct(r) {
291 | 				break
292 | 			}
293 | 
294 | 			offset += size
295 | 			remaining = remaining[size:]
296 | 		}
297 | 
298 | 	case unicode.IsUpper(r):
299 | 		wt = upperCaseWord
300 | 		remaining = remaining[size:]
301 | 
302 | 		if len(remaining) == 0 {
303 | 			break
304 | 		}
305 | 
306 | 		r, size = nextValidRune(remaining, r)
307 | 
308 | 		switch {
309 | 		case unicode.IsUpper(r):
310 | 			prevSize := size
311 | 			offset += size
312 | 			remaining = remaining[size:]
313 | 
314 | 			for len(remaining) > 0 {
315 | 				r, size = nextValidRune(remaining, r)
316 | 
317 | 				if !unicode.IsUpper(r) {
318 | 					break
319 | 				}
320 | 
321 | 				prevSize = size
322 | 				offset += size
323 | 				remaining = remaining[size:]
324 | 			}
325 | 
326 | 			// it's a bit complex when dealing with a case like "HTTPStatus".
327 | 			// it's expected to be splitted into "HTTP" and "Status".
328 | 			// Therefore "S" should be in remaining instead of word.
329 | 			if len(remaining) > 0 && isAlphabet(r) {
330 | 				offset -= prevSize
331 | 				remaining = str[offset:]
332 | 			}
333 | 
334 | 		case isAlphabet(r):
335 | 			offset += size
336 | 			remaining = remaining[size:]
337 | 
338 | 			for len(remaining) > 0 {
339 | 				r, size = nextValidRune(remaining, r)
340 | 
341 | 				if !isAlphabet(r) || unicode.IsUpper(r) {
342 | 					break
343 | 				}
344 | 
345 | 				offset += size
346 | 				remaining = remaining[size:]
347 | 			}
348 | 		}
349 | 
350 | 	case isAlphabet(r):
351 | 		wt = alphabetWord
352 | 		remaining = remaining[size:]
353 | 
354 | 		for len(remaining) > 0 {
355 | 			r, size = nextValidRune(remaining, r)
356 | 
357 | 			if !isAlphabet(r) || unicode.IsUpper(r) {
358 | 				break
359 | 			}
360 | 
361 | 			offset += size
362 | 			remaining = remaining[size:]
363 | 		}
364 | 
365 | 	case unicode.IsNumber(r):
366 | 		wt = numberWord
367 | 		remaining = remaining[size:]
368 | 
369 | 		for len(remaining) > 0 {
370 | 			r, size = nextValidRune(remaining, r)
371 | 
372 | 			if !unicode.IsNumber(r) {
373 | 				break
374 | 			}
375 | 
376 | 			offset += size
377 | 			remaining = remaining[size:]
378 | 		}
379 | 
380 | 	default:
381 | 		wt = otherWord
382 | 		remaining = remaining[size:]
383 | 
384 | 		for len(remaining) > 0 {
385 | 			r, size = nextValidRune(remaining, r)
386 | 
387 | 			if size == 0 || isConnector(r) || isAlphabet(r) || unicode.IsNumber(r) || unicode.IsPunct(r) {
388 | 				break
389 | 			}
390 | 
391 | 			offset += size
392 | 			remaining = remaining[size:]
393 | 		}
394 | 	}
395 | 
396 | 	word = str[:offset]
397 | 	return
398 | }
399 | 
400 | func nextValidRune(str string, prev rune) (r rune, size int) {
401 | 	var sz int
402 | 
403 | 	for len(str) > 0 {
404 | 		r, sz = utf8.DecodeRuneInString(str)
405 | 		size += sz
406 | 
407 | 		if r != utf8.RuneError {
408 | 			return
409 | 		}
410 | 
411 | 		str = str[sz:]
412 | 	}
413 | 
414 | 	r = prev
415 | 	return
416 | }
417 | 
418 | func toLower(buf *stringBuilder, wt wordType, str string, connector rune) {
419 | 	buf.Grow(buf.Len() + len(str))
420 | 
421 | 	if wt != upperCaseWord && wt != connectorWord {
422 | 		buf.WriteString(str)
423 | 		return
424 | 	}
425 | 
426 | 	for len(str) > 0 {
427 | 		r, size := utf8.DecodeRuneInString(str)
428 | 		str = str[size:]
429 | 
430 | 		if isConnector(r) {
431 | 			buf.WriteRune(connector)
432 | 		} else if unicode.IsUpper(r) {
433 | 			buf.WriteRune(unicode.ToLower(r))
434 | 		} else {
435 | 			buf.WriteRune(r)
436 | 		}
437 | 	}
438 | }
439 | 
440 | // SwapCase will swap characters case from upper to lower or lower to upper.
441 | func SwapCase(str string) string {
442 | 	var r rune
443 | 	var size int
444 | 
445 | 	buf := &stringBuilder{}
446 | 
447 | 	for len(str) > 0 {
448 | 		r, size = utf8.DecodeRuneInString(str)
449 | 
450 | 		switch {
451 | 		case unicode.IsUpper(r):
452 | 			buf.WriteRune(unicode.ToLower(r))
453 | 
454 | 		case unicode.IsLower(r):
455 | 			buf.WriteRune(unicode.ToUpper(r))
456 | 
457 | 		default:
458 | 			buf.WriteRune(r)
459 | 		}
460 | 
461 | 		str = str[size:]
462 | 	}
463 | 
464 | 	return buf.String()
465 | }
466 | 
467 | // FirstRuneToUpper converts first rune to upper case if necessary.
468 | func FirstRuneToUpper(str string) string {
469 | 	if str == "" {
470 | 		return str
471 | 	}
472 | 
473 | 	r, size := utf8.DecodeRuneInString(str)
474 | 
475 | 	if !unicode.IsLower(r) {
476 | 		return str
477 | 	}
478 | 
479 | 	buf := &stringBuilder{}
480 | 	buf.WriteRune(unicode.ToUpper(r))
481 | 	buf.WriteString(str[size:])
482 | 	return buf.String()
483 | }
484 | 
485 | // FirstRuneToLower converts first rune to lower case if necessary.
486 | func FirstRuneToLower(str string) string {
487 | 	if str == "" {
488 | 		return str
489 | 	}
490 | 
491 | 	r, size := utf8.DecodeRuneInString(str)
492 | 
493 | 	if !unicode.IsUpper(r) {
494 | 		return str
495 | 	}
496 | 
497 | 	buf := &stringBuilder{}
498 | 	buf.WriteRune(unicode.ToLower(r))
499 | 	buf.WriteString(str[size:])
500 | 	return buf.String()
501 | }
502 | 
503 | // Shuffle randomizes runes in a string and returns the result.
504 | // It uses default random source in `math/rand`.
505 | func Shuffle(str string) string {
506 | 	if str == "" {
507 | 		return str
508 | 	}
509 | 
510 | 	runes := []rune(str)
511 | 	index := 0
512 | 
513 | 	for i := len(runes) - 1; i > 0; i-- {
514 | 		index = rand.Intn(i + 1)
515 | 
516 | 		if i != index {
517 | 			runes[i], runes[index] = runes[index], runes[i]
518 | 		}
519 | 	}
520 | 
521 | 	return string(runes)
522 | }
523 | 
524 | // ShuffleSource randomizes runes in a string with given random source.
525 | func ShuffleSource(str string, src rand.Source) string {
526 | 	if str == "" {
527 | 		return str
528 | 	}
529 | 
530 | 	runes := []rune(str)
531 | 	index := 0
532 | 	r := rand.New(src)
533 | 
534 | 	for i := len(runes) - 1; i > 0; i-- {
535 | 		index = r.Intn(i + 1)
536 | 
537 | 		if i != index {
538 | 			runes[i], runes[index] = runes[index], runes[i]
539 | 		}
540 | 	}
541 | 
542 | 	return string(runes)
543 | }
544 | 
545 | // Successor returns the successor to string.
546 | //
547 | // If there is one alphanumeric rune is found in string, increase the rune by 1.
548 | // If increment generates a "carry", the rune to the left of it is incremented.
549 | // This process repeats until there is no carry, adding an additional rune if necessary.
550 | //
551 | // If there is no alphanumeric rune, the rightmost rune will be increased by 1
552 | // regardless whether the result is a valid rune or not.
553 | //
554 | // Only following characters are alphanumeric.
555 | //   - a - z
556 | //   - A - Z
557 | //   - 0 - 9
558 | //
559 | // Samples (borrowed from ruby's String#succ document):
560 | //
561 | //	"abcd"      => "abce"
562 | //	"THX1138"   => "THX1139"
563 | //	"<<koala>>" => "<<koalb>>"
564 | //	"1999zzz"   => "2000aaa"
565 | //	"ZZZ9999"   => "AAAA0000"
566 | //	"***"       => "**+"
567 | func Successor(str string) string {
568 | 	if str == "" {
569 | 		return str
570 | 	}
571 | 
572 | 	var r rune
573 | 	var i int
574 | 	carry := ' '
575 | 	runes := []rune(str)
576 | 	l := len(runes)
577 | 	lastAlphanumeric := l
578 | 
579 | 	for i = l - 1; i >= 0; i-- {
580 | 		r = runes[i]
581 | 
582 | 		if ('a' <= r && r <= 'y') ||
583 | 			('A' <= r && r <= 'Y') ||
584 | 			('0' <= r && r <= '8') {
585 | 			runes[i]++
586 | 			carry = ' '
587 | 			lastAlphanumeric = i
588 | 			break
589 | 		}
590 | 
591 | 		switch r {
592 | 		case 'z':
593 | 			runes[i] = 'a'
594 | 			carry = 'a'
595 | 			lastAlphanumeric = i
596 | 
597 | 		case 'Z':
598 | 			runes[i] = 'A'
599 | 			carry = 'A'
600 | 			lastAlphanumeric = i
601 | 
602 | 		case '9':
603 | 			runes[i] = '0'
604 | 			carry = '0'
605 | 			lastAlphanumeric = i
606 | 		}
607 | 	}
608 | 
609 | 	// Needs to add one character for carry.
610 | 	if i < 0 && carry != ' ' {
611 | 		buf := &stringBuilder{}
612 | 		buf.Grow(l + 4) // Reserve enough space for write.
613 | 
614 | 		if lastAlphanumeric != 0 {
615 | 			buf.WriteString(str[:lastAlphanumeric])
616 | 		}
617 | 
618 | 		buf.WriteRune(carry)
619 | 
620 | 		for _, r = range runes[lastAlphanumeric:] {
621 | 			buf.WriteRune(r)
622 | 		}
623 | 
624 | 		return buf.String()
625 | 	}
626 | 
627 | 	// No alphanumeric character. Simply increase last rune's value.
628 | 	if lastAlphanumeric == l {
629 | 		runes[l-1]++
630 | 	}
631 | 
632 | 	return string(runes)
633 | }
634 | 


--------------------------------------------------------------------------------
/convert_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 Huan Du. All rights reserved.
  2 | // Licensed under the MIT license that can be found in the LICENSE file.
  3 | 
  4 | package xstrings
  5 | 
  6 | import (
  7 | 	"sort"
  8 | 	"strings"
  9 | 	"testing"
 10 | )
 11 | 
 12 | func TestToSnakeCaseAndToKebabCase(t *testing.T) {
 13 | 	cases := _M{
 14 | 		"HTTPServer":         "http_server",
 15 | 		"_camelCase":         "_camel_case",
 16 | 		"NoHTTPS":            "no_https",
 17 | 		"Wi_thF":             "wi_th_f",
 18 | 		"_AnotherTES_TCaseP": "_another_tes_t_case_p",
 19 | 		"ALL":                "all",
 20 | 		"_HELLO_WORLD_":      "_hello_world_",
 21 | 		"HELLO_WORLD":        "hello_world",
 22 | 		"HELLO____WORLD":     "hello____world",
 23 | 		"TW":                 "tw",
 24 | 		"_C":                 "_c",
 25 | 		"http2xx":            "http_2xx",
 26 | 		"HTTP2XX":            "http2_xx",
 27 | 		"HTTP20xOK":          "http_20x_ok",
 28 | 		"HTTP20xStatus":      "http_20x_status",
 29 | 		"HTTP-20xStatus":     "http_20x_status",
 30 | 		"a":                  "a",
 31 | 		"Duration2m3s":       "duration_2m3s",
 32 | 		"Bld4Floor3rd":       "bld4_floor_3rd",
 33 | 		" _-_ ":              "_____",
 34 | 		"a1b2c3d":            "a_1b2c3d",
 35 | 		"A//B%%2c":           "a//b%%2c",
 36 | 
 37 | 		"HTTP状态码404/502Error": "http_状态码404/502_error",
 38 | 		"中文(字符)":              "中文(字符)",
 39 | 		"混合ABCWords与123数字456": "混合_abc_words_与123_数字456",
 40 | 
 41 | 		"  sentence case  ": "__sentence_case__",
 42 | 		" Mixed-hyphen case _and SENTENCE_case and UPPER-case": "_mixed_hyphen_case__and_sentence_case_and_upper_case",
 43 | 		"FROM CamelCase to snake/kebab-case":                   "from_camel_case_to_snake/kebab_case",
 44 | 
 45 | 		"": "",
 46 | 		"Abc\uFFFDE\uFFFDf\uFFFDd\uFFFD2\uFFFD00z\uFFFDZZ\uFFFDZZ": "abc_\uFFFDe\uFFFDf\uFFFDd_\uFFFD2\uFFFD00z_\uFFFDzz\uFFFDzz",
 47 | 		"\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD":                           "\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD",
 48 | 
 49 | 		"abc_123_def": "abc_123_def",
 50 | 	}
 51 | 
 52 | 	runTestCases(t, ToSnakeCase, cases)
 53 | 
 54 | 	for k, v := range cases {
 55 | 		cases[k] = strings.Replace(v, "_", "-", -1)
 56 | 	}
 57 | 
 58 | 	runTestCases(t, ToKebabCase, cases)
 59 | }
 60 | 
 61 | func TestToCamelCase(t *testing.T) {
 62 | 	runTestCases(t, ToCamelCase, _M{
 63 | 		"http_server":     "httpServer",
 64 | 		"_camel_case":     "_camelCase",
 65 | 		"no_https":        "noHttps",
 66 | 		"_complex__case_": "_complex_Case_",
 67 | 		" complex -case ": " complex Case ",
 68 | 		"all":             "all",
 69 | 		"GOLANG_IS_GREAT": "golangIsGreat",
 70 | 		"GOLANG":          "golang",
 71 | 		"a":               "a",
 72 | 		"好":               "好",
 73 | 
 74 | 		"FROM CamelCase to snake/kebab-case": "fromCamelCaseToSnake/kebabCase",
 75 | 
 76 | 		"": "",
 77 | 	})
 78 | }
 79 | 
 80 | func TestToPascalCase(t *testing.T) {
 81 | 	runTestCases(t, ToPascalCase, _M{
 82 | 		"http_server":     "HttpServer",
 83 | 		"_camel_case":     "_CamelCase",
 84 | 		"no_https":        "NoHttps",
 85 | 		"_complex__case_": "_Complex_Case_",
 86 | 		" complex -case ": " Complex Case ",
 87 | 		"all":             "All",
 88 | 		"GOLANG_IS_GREAT": "GolangIsGreat",
 89 | 		"GOLANG":          "Golang",
 90 | 		"a":               "A",
 91 | 		"好":               "好",
 92 | 
 93 | 		"FROM CamelCase to snake/kebab-case": "FromCamelCaseToSnake/kebabCase",
 94 | 
 95 | 		"": "",
 96 | 	})
 97 | }
 98 | 
 99 | func TestSwapCase(t *testing.T) {
100 | 	runTestCases(t, SwapCase, _M{
101 | 		"swapCase": "SWAPcASE",
102 | 		"Θ~λa云Ξπ":  "θ~ΛA云ξΠ",
103 | 		"a":        "A",
104 | 
105 | 		"": "",
106 | 	})
107 | }
108 | 
109 | func TestFirstRuneToUpper(t *testing.T) {
110 | 	runTestCases(t, FirstRuneToUpper, _M{
111 | 		"hello, world!": "Hello, world!",
112 | 		"Hello, world!": "Hello, world!",
113 | 		"你好，世界！":        "你好，世界！",
114 | 		"a":             "A",
115 | 
116 | 		"": "",
117 | 	})
118 | }
119 | 
120 | func TestFirstRuneToLower(t *testing.T) {
121 | 	runTestCases(t, FirstRuneToLower, _M{
122 | 		"hello, world!": "hello, world!",
123 | 		"Hello, world!": "hello, world!",
124 | 		"你好，世界！":        "你好，世界！",
125 | 		"a":             "a",
126 | 		"A":             "a",
127 | 
128 | 		"": "",
129 | 	})
130 | }
131 | 
132 | func TestShuffle(t *testing.T) {
133 | 	// It seems there is no reliable way to test shuffled string.
134 | 	// Runner just make sure shuffled string has the same runes as origin string.
135 | 	runner := func(str string) string {
136 | 		s := Shuffle(str)
137 | 		slice := sort.StringSlice(strings.Split(s, ""))
138 | 		slice.Sort()
139 | 		return strings.Join(slice, "")
140 | 	}
141 | 
142 | 	runTestCases(t, runner, _M{
143 | 		"":            "",
144 | 		"facgbheidjk": "abcdefghijk",
145 | 		"尝试中文":        "中尝文试",
146 | 		"zh英文hun排":    "hhnuz排文英",
147 | 	})
148 | }
149 | 
150 | type testShuffleSource int
151 | 
152 | // A generated random number sequance just for testing.
153 | var testShuffleTable = []int64{
154 | 	1874068156324778273,
155 | 	3328451335138149956,
156 | 	5263531936693774911,
157 | 	7955079406183515637,
158 | 	2703501726821866378,
159 | 	2740103009342231109,
160 | 	6941261091797652072,
161 | 	1905388747193831650,
162 | 	7981306761429961588,
163 | 	6426100070888298971,
164 | 	4831389563158288344,
165 | 	261049867304784443,
166 | 	1460320609597786623,
167 | 	5600924393587988459,
168 | 	8995016276575641803,
169 | 	732830328053361739,
170 | 	5486140987150761883,
171 | 	545291762129038907,
172 | 	6382800227808658932,
173 | 	2781055864473387780,
174 | 	1598098976185383115,
175 | 	4990765271833742716,
176 | 	5018949295715050020,
177 | 	2568779411109623071,
178 | 	3902890183311134652,
179 | 	4893789450120281907,
180 | 	2338498362660772719,
181 | 	2601737961087659062,
182 | 	7273596521315663110,
183 | 	3337066551442961397,
184 | 	8121576815539813105,
185 | 	2740376916591569721,
186 | 	8249030965139585917,
187 | 	898860202204764712,
188 | 	9010467728050264449,
189 | 	685213522303989579,
190 | 	2050257992909156333,
191 | 	6281838661429879825,
192 | 	2227583514184312746,
193 | 	2873287401706343734,
194 | }
195 | 
196 | func (src *testShuffleSource) Int63() int64 {
197 | 	n := testShuffleTable[int(*src)%len(testShuffleTable)]
198 | 	(*src)++
199 | 	return n
200 | }
201 | 
202 | func (*testShuffleSource) Seed(int64) {}
203 | 
204 | func TestShuffleSource(t *testing.T) {
205 | 	runner := func(str string) string {
206 | 		var src testShuffleSource
207 | 		return ShuffleSource(str, &src)
208 | 	}
209 | 
210 | 	runTestCases(t, runner, _M{
211 | 		"":            "",
212 | 		"facgbheidjk": "bkgfijached",
213 | 		"尝试中文怎么样":     "怎试么中样尝文",
214 | 		"zh英文hun排":    "zuhh文n英排",
215 | 	})
216 | }
217 | 
218 | func TestSuccessor(t *testing.T) {
219 | 	runTestCases(t, Successor, _M{
220 | 		"":          "",
221 | 		"abcd":      "abce",
222 | 		"THX1138":   "THX1139",
223 | 		"<<koala>>": "<<koalb>>",
224 | 		"1999zzz":   "2000aaa",
225 | 		"ZZZ9999":   "AAAA0000",
226 | 		"***":       "**+",
227 | 
228 | 		"来点中文试试":               "来点中文试诖",
229 | 		"中cZ英ZZ文zZ混9zZ9杂99进z位": "中dA英AA文aA混0aA0杂00进a位",
230 | 	})
231 | }
232 | 


--------------------------------------------------------------------------------
/count.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 Huan Du. All rights reserved.
  2 | // Licensed under the MIT license that can be found in the LICENSE file.
  3 | 
  4 | package xstrings
  5 | 
  6 | import (
  7 | 	"unicode"
  8 | 	"unicode/utf8"
  9 | )
 10 | 
 11 | // Len returns str's utf8 rune length.
 12 | func Len(str string) int {
 13 | 	return utf8.RuneCountInString(str)
 14 | }
 15 | 
 16 | // WordCount returns number of words in a string.
 17 | //
 18 | // Word is defined as a locale dependent string containing alphabetic characters,
 19 | // which may also contain but not start with `'` and `-` characters.
 20 | func WordCount(str string) int {
 21 | 	var r rune
 22 | 	var size, n int
 23 | 
 24 | 	inWord := false
 25 | 
 26 | 	for len(str) > 0 {
 27 | 		r, size = utf8.DecodeRuneInString(str)
 28 | 
 29 | 		switch {
 30 | 		case isAlphabet(r):
 31 | 			if !inWord {
 32 | 				inWord = true
 33 | 				n++
 34 | 			}
 35 | 
 36 | 		case inWord && (r == '\'' || r == '-'):
 37 | 			// Still in word.
 38 | 
 39 | 		default:
 40 | 			inWord = false
 41 | 		}
 42 | 
 43 | 		str = str[size:]
 44 | 	}
 45 | 
 46 | 	return n
 47 | }
 48 | 
 49 | const minCJKCharacter = '\u3400'
 50 | 
 51 | // Checks r is a letter but not CJK character.
 52 | func isAlphabet(r rune) bool {
 53 | 	if !unicode.IsLetter(r) {
 54 | 		return false
 55 | 	}
 56 | 
 57 | 	switch {
 58 | 	// Quick check for non-CJK character.
 59 | 	case r < minCJKCharacter:
 60 | 		return true
 61 | 
 62 | 	// Common CJK characters.
 63 | 	case r >= '\u4E00' && r <= '\u9FCC':
 64 | 		return false
 65 | 
 66 | 	// Rare CJK characters.
 67 | 	case r >= '\u3400' && r <= '\u4D85':
 68 | 		return false
 69 | 
 70 | 	// Rare and historic CJK characters.
 71 | 	case r >= '\U00020000' && r <= '\U0002B81D':
 72 | 		return false
 73 | 	}
 74 | 
 75 | 	return true
 76 | }
 77 | 
 78 | // Width returns string width in monotype font.
 79 | // Multi-byte characters are usually twice the width of single byte characters.
 80 | //
 81 | // Algorithm comes from `mb_strwidth` in PHP.
 82 | // http://php.net/manual/en/function.mb-strwidth.php
 83 | func Width(str string) int {
 84 | 	var r rune
 85 | 	var size, n int
 86 | 
 87 | 	for len(str) > 0 {
 88 | 		r, size = utf8.DecodeRuneInString(str)
 89 | 		n += RuneWidth(r)
 90 | 		str = str[size:]
 91 | 	}
 92 | 
 93 | 	return n
 94 | }
 95 | 
 96 | // RuneWidth returns character width in monotype font.
 97 | // Multi-byte characters are usually twice the width of single byte characters.
 98 | //
 99 | // Algorithm comes from `mb_strwidth` in PHP.
100 | // http://php.net/manual/en/function.mb-strwidth.php
101 | func RuneWidth(r rune) int {
102 | 	switch {
103 | 	case r == utf8.RuneError || r < '\x20':
104 | 		return 0
105 | 
106 | 	case '\x20' <= r && r < '\u2000':
107 | 		return 1
108 | 
109 | 	case '\u2000' <= r && r < '\uFF61':
110 | 		return 2
111 | 
112 | 	case '\uFF61' <= r && r < '\uFFA0':
113 | 		return 1
114 | 
115 | 	case '\uFFA0' <= r:
116 | 		return 2
117 | 	}
118 | 
119 | 	return 0
120 | }
121 | 


--------------------------------------------------------------------------------
/count_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 Huan Du. All rights reserved.
 2 | // Licensed under the MIT license that can be found in the LICENSE file.
 3 | 
 4 | package xstrings
 5 | 
 6 | import (
 7 | 	"fmt"
 8 | 	"testing"
 9 | )
10 | 
11 | func TestLen(t *testing.T) {
12 | 	runner := func(str string) string {
13 | 		return fmt.Sprint(Len(str))
14 | 	}
15 | 
16 | 	runTestCases(t, runner, _M{
17 | 		"abcdef":    "6",
18 | 		"中文":        "2",
19 | 		"中yin文hun排": "9",
20 | 		"":          "0",
21 | 	})
22 | }
23 | 
24 | func TestWordCount(t *testing.T) {
25 | 	runner := func(str string) string {
26 | 		return fmt.Sprint(WordCount(str))
27 | 	}
28 | 
29 | 	runTestCases(t, runner, _M{
30 | 		"one word: λ":             "3",
31 | 		"中文":                      "0",
32 | 		"你好，sekai！":               "1",
33 | 		"oh, it's super-fancy!!a": "4",
34 | 		"":                        "0",
35 | 		"-":                       "0",
36 | 		"it's-'s":                 "1",
37 | 	})
38 | }
39 | 
40 | func TestWidth(t *testing.T) {
41 | 	runner := func(str string) string {
42 | 		return fmt.Sprint(Width(str))
43 | 	}
44 | 
45 | 	runTestCases(t, runner, _M{
46 | 		"abcd\t0123\n7890": "12",
47 | 		"中zh英eng文混排":       "15",
48 | 		"":                 "0",
49 | 	})
50 | }
51 | 
52 | func TestRuneWidth(t *testing.T) {
53 | 	runner := func(str string) string {
54 | 		return fmt.Sprint(RuneWidth([]rune(str)[0]))
55 | 	}
56 | 
57 | 	runTestCases(t, runner, _M{
58 | 		"a":    "1",
59 | 		"中":    "2",
60 | 		"\x11": "0",
61 | 	})
62 | }
63 | 


--------------------------------------------------------------------------------
/doc.go:
--------------------------------------------------------------------------------
1 | // Copyright 2015 Huan Du. All rights reserved.
2 | // Licensed under the MIT license that can be found in the LICENSE file.
3 | 
4 | // Package xstrings is to provide string algorithms which are useful but not included in `strings` package.
5 | // See project home page for details. https://github.com/huandu/xstrings
6 | //
7 | // Package xstrings assumes all strings are encoded in utf8.
8 | package xstrings
9 | 


--------------------------------------------------------------------------------
/format.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 Huan Du. All rights reserved.
  2 | // Licensed under the MIT license that can be found in the LICENSE file.
  3 | 
  4 | package xstrings
  5 | 
  6 | import (
  7 | 	"unicode/utf8"
  8 | )
  9 | 
 10 | // ExpandTabs can expand tabs ('\t') rune in str to one or more spaces dpending on
 11 | // current column and tabSize.
 12 | // The column number is reset to zero after each newline ('\n') occurring in the str.
 13 | //
 14 | // ExpandTabs uses RuneWidth to decide rune's width.
 15 | // For example, CJK characters will be treated as two characters.
 16 | //
 17 | // If tabSize <= 0, ExpandTabs panics with error.
 18 | //
 19 | // Samples:
 20 | //
 21 | //	ExpandTabs("a\tbc\tdef\tghij\tk", 4) => "a   bc  def ghij    k"
 22 | //	ExpandTabs("abcdefg\thij\nk\tl", 4)  => "abcdefg hij\nk   l"
 23 | //	ExpandTabs("z中\t文\tw", 4)           => "z中 文  w"
 24 | func ExpandTabs(str string, tabSize int) string {
 25 | 	if tabSize <= 0 {
 26 | 		panic("tab size must be positive")
 27 | 	}
 28 | 
 29 | 	var r rune
 30 | 	var i, size, column, expand int
 31 | 	var output *stringBuilder
 32 | 
 33 | 	orig := str
 34 | 
 35 | 	for len(str) > 0 {
 36 | 		r, size = utf8.DecodeRuneInString(str)
 37 | 
 38 | 		if r == '\t' {
 39 | 			expand = tabSize - column%tabSize
 40 | 
 41 | 			if output == nil {
 42 | 				output = allocBuffer(orig, str)
 43 | 			}
 44 | 
 45 | 			for i = 0; i < expand; i++ {
 46 | 				output.WriteRune(' ')
 47 | 			}
 48 | 
 49 | 			column += expand
 50 | 		} else {
 51 | 			if r == '\n' {
 52 | 				column = 0
 53 | 			} else {
 54 | 				column += RuneWidth(r)
 55 | 			}
 56 | 
 57 | 			if output != nil {
 58 | 				output.WriteRune(r)
 59 | 			}
 60 | 		}
 61 | 
 62 | 		str = str[size:]
 63 | 	}
 64 | 
 65 | 	if output == nil {
 66 | 		return orig
 67 | 	}
 68 | 
 69 | 	return output.String()
 70 | }
 71 | 
 72 | // LeftJustify returns a string with pad string at right side if str's rune length is smaller than length.
 73 | // If str's rune length is larger than length, str itself will be returned.
 74 | //
 75 | // If pad is an empty string, str will be returned.
 76 | //
 77 | // Samples:
 78 | //
 79 | //	LeftJustify("hello", 4, " ")    => "hello"
 80 | //	LeftJustify("hello", 10, " ")   => "hello     "
 81 | //	LeftJustify("hello", 10, "123") => "hello12312"
 82 | func LeftJustify(str string, length int, pad string) string {
 83 | 	l := Len(str)
 84 | 
 85 | 	if l >= length || pad == "" {
 86 | 		return str
 87 | 	}
 88 | 
 89 | 	remains := length - l
 90 | 	padLen := Len(pad)
 91 | 
 92 | 	output := &stringBuilder{}
 93 | 	output.Grow(len(str) + (remains/padLen+1)*len(pad))
 94 | 	output.WriteString(str)
 95 | 	writePadString(output, pad, padLen, remains)
 96 | 	return output.String()
 97 | }
 98 | 
 99 | // RightJustify returns a string with pad string at left side if str's rune length is smaller than length.
100 | // If str's rune length is larger than length, str itself will be returned.
101 | //
102 | // If pad is an empty string, str will be returned.
103 | //
104 | // Samples:
105 | //
106 | //	RightJustify("hello", 4, " ")    => "hello"
107 | //	RightJustify("hello", 10, " ")   => "     hello"
108 | //	RightJustify("hello", 10, "123") => "12312hello"
109 | func RightJustify(str string, length int, pad string) string {
110 | 	l := Len(str)
111 | 
112 | 	if l >= length || pad == "" {
113 | 		return str
114 | 	}
115 | 
116 | 	remains := length - l
117 | 	padLen := Len(pad)
118 | 
119 | 	output := &stringBuilder{}
120 | 	output.Grow(len(str) + (remains/padLen+1)*len(pad))
121 | 	writePadString(output, pad, padLen, remains)
122 | 	output.WriteString(str)
123 | 	return output.String()
124 | }
125 | 
126 | // Center returns a string with pad string at both side if str's rune length is smaller than length.
127 | // If str's rune length is larger than length, str itself will be returned.
128 | //
129 | // If pad is an empty string, str will be returned.
130 | //
131 | // Samples:
132 | //
133 | //	Center("hello", 4, " ")    => "hello"
134 | //	Center("hello", 10, " ")   => "  hello   "
135 | //	Center("hello", 10, "123") => "12hello123"
136 | func Center(str string, length int, pad string) string {
137 | 	l := Len(str)
138 | 
139 | 	if l >= length || pad == "" {
140 | 		return str
141 | 	}
142 | 
143 | 	remains := length - l
144 | 	padLen := Len(pad)
145 | 
146 | 	output := &stringBuilder{}
147 | 	output.Grow(len(str) + (remains/padLen+1)*len(pad))
148 | 	writePadString(output, pad, padLen, remains/2)
149 | 	output.WriteString(str)
150 | 	writePadString(output, pad, padLen, (remains+1)/2)
151 | 	return output.String()
152 | }
153 | 
154 | func writePadString(output *stringBuilder, pad string, padLen, remains int) {
155 | 	var r rune
156 | 	var size int
157 | 
158 | 	repeats := remains / padLen
159 | 
160 | 	for i := 0; i < repeats; i++ {
161 | 		output.WriteString(pad)
162 | 	}
163 | 
164 | 	remains = remains % padLen
165 | 
166 | 	if remains != 0 {
167 | 		for i := 0; i < remains; i++ {
168 | 			r, size = utf8.DecodeRuneInString(pad)
169 | 			output.WriteRune(r)
170 | 			pad = pad[size:]
171 | 		}
172 | 	}
173 | }
174 | 


--------------------------------------------------------------------------------
/format_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 Huan Du. All rights reserved.
  2 | // Licensed under the MIT license that can be found in the LICENSE file.
  3 | 
  4 | package xstrings
  5 | 
  6 | import (
  7 | 	"strconv"
  8 | 	"strings"
  9 | 	"testing"
 10 | )
 11 | 
 12 | func TestExpandTabs(t *testing.T) {
 13 | 	runner := func(str string) (result string) {
 14 | 		defer func() {
 15 | 			if e := recover(); e != nil {
 16 | 				result = e.(string)
 17 | 			}
 18 | 		}()
 19 | 
 20 | 		input := strings.Split(str, separator)
 21 | 		n, _ := strconv.Atoi(input[1])
 22 | 		return ExpandTabs(input[0], n)
 23 | 	}
 24 | 
 25 | 	runTestCases(t, runner, _M{
 26 | 		sep("a\tbc\tdef\tghij\tk", "4"): "a   bc  def ghij    k",
 27 | 		sep("abcdefg\thij\nk\tl", "4"):  "abcdefg hij\nk   l",
 28 | 		sep("z中\t文\tw", "4"):            "z中 文  w",
 29 | 		sep("abcdef", "4"):              "abcdef",
 30 | 
 31 | 		sep("abc\td\tef\tghij\nk\tl", "3"): "abc   d  ef ghij\nk  l",
 32 | 		sep("abc\td\tef\tghij\nk\tl", "1"): "abc d ef ghij\nk l",
 33 | 
 34 | 		sep("abc", "0"):  "tab size must be positive",
 35 | 		sep("abc", "-1"): "tab size must be positive",
 36 | 	})
 37 | }
 38 | 
 39 | func TestLeftJustify(t *testing.T) {
 40 | 	runner := func(str string) string {
 41 | 		input := strings.Split(str, separator)
 42 | 		n, _ := strconv.Atoi(input[1])
 43 | 		return LeftJustify(input[0], n, input[2])
 44 | 	}
 45 | 
 46 | 	runTestCases(t, runner, _M{
 47 | 		sep("hello", "4", " "):    "hello",
 48 | 		sep("hello", "10", " "):   "hello     ",
 49 | 		sep("hello", "10", "123"): "hello12312",
 50 | 
 51 | 		sep("hello中文test", "4", " "):    "hello中文test",
 52 | 		sep("hello中文test", "12", " "):   "hello中文test ",
 53 | 		sep("hello中文test", "18", "测试！"): "hello中文test测试！测试！测",
 54 | 
 55 | 		sep("hello中文test", "0", "123"): "hello中文test",
 56 | 		sep("hello中文test", "18", ""):   "hello中文test",
 57 | 	})
 58 | }
 59 | 
 60 | func TestRightJustify(t *testing.T) {
 61 | 	runner := func(str string) string {
 62 | 		input := strings.Split(str, separator)
 63 | 		n, _ := strconv.Atoi(input[1])
 64 | 		return RightJustify(input[0], n, input[2])
 65 | 	}
 66 | 
 67 | 	runTestCases(t, runner, _M{
 68 | 		sep("hello", "4", " "):    "hello",
 69 | 		sep("hello", "10", " "):   "     hello",
 70 | 		sep("hello", "10", "123"): "12312hello",
 71 | 
 72 | 		sep("hello中文test", "4", " "):    "hello中文test",
 73 | 		sep("hello中文test", "12", " "):   " hello中文test",
 74 | 		sep("hello中文test", "18", "测试！"): "测试！测试！测hello中文test",
 75 | 
 76 | 		sep("hello中文test", "0", "123"): "hello中文test",
 77 | 		sep("hello中文test", "18", ""):   "hello中文test",
 78 | 	})
 79 | }
 80 | 
 81 | func TestCenter(t *testing.T) {
 82 | 	runner := func(str string) string {
 83 | 		input := strings.Split(str, separator)
 84 | 		n, _ := strconv.Atoi(input[1])
 85 | 		return Center(input[0], n, input[2])
 86 | 	}
 87 | 
 88 | 	runTestCases(t, runner, _M{
 89 | 		sep("hello", "4", " "):    "hello",
 90 | 		sep("hello", "10", " "):   "  hello   ",
 91 | 		sep("hello", "10", "123"): "12hello123",
 92 | 
 93 | 		sep("hello中文test", "4", " "):    "hello中文test",
 94 | 		sep("hello中文test", "12", " "):   "hello中文test ",
 95 | 		sep("hello中文test", "18", "测试！"): "测试！hello中文test测试！测",
 96 | 
 97 | 		sep("hello中文test", "0", "123"): "hello中文test",
 98 | 		sep("hello中文test", "18", ""):   "hello中文test",
 99 | 	})
100 | }
101 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/huandu/xstrings
2 | 
3 | go 1.12
4 | 


--------------------------------------------------------------------------------
/manipulate.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 Huan Du. All rights reserved.
  2 | // Licensed under the MIT license that can be found in the LICENSE file.
  3 | 
  4 | package xstrings
  5 | 
  6 | import (
  7 | 	"strings"
  8 | 	"unicode/utf8"
  9 | )
 10 | 
 11 | // Reverse a utf8 encoded string.
 12 | func Reverse(str string) string {
 13 | 	var size int
 14 | 
 15 | 	tail := len(str)
 16 | 	buf := make([]byte, tail)
 17 | 	s := buf
 18 | 
 19 | 	for len(str) > 0 {
 20 | 		_, size = utf8.DecodeRuneInString(str)
 21 | 		tail -= size
 22 | 		s = append(s[:tail], []byte(str[:size])...)
 23 | 		str = str[size:]
 24 | 	}
 25 | 
 26 | 	return string(buf)
 27 | }
 28 | 
 29 | // Slice a string by rune.
 30 | //
 31 | // Start must satisfy 0 <= start <= rune length.
 32 | //
 33 | // End can be positive, zero or negative.
 34 | // If end >= 0, start and end must satisfy start <= end <= rune length.
 35 | // If end < 0, it means slice to the end of string.
 36 | //
 37 | // Otherwise, Slice will panic as out of range.
 38 | func Slice(str string, start, end int) string {
 39 | 	var size, startPos, endPos int
 40 | 
 41 | 	origin := str
 42 | 
 43 | 	if start < 0 || end > len(str) || (end >= 0 && start > end) {
 44 | 		panic("out of range")
 45 | 	}
 46 | 
 47 | 	if end >= 0 {
 48 | 		end -= start
 49 | 	}
 50 | 
 51 | 	for start > 0 && len(str) > 0 {
 52 | 		_, size = utf8.DecodeRuneInString(str)
 53 | 		start--
 54 | 		startPos += size
 55 | 		str = str[size:]
 56 | 	}
 57 | 
 58 | 	if end < 0 {
 59 | 		return origin[startPos:]
 60 | 	}
 61 | 
 62 | 	endPos = startPos
 63 | 
 64 | 	for end > 0 && len(str) > 0 {
 65 | 		_, size = utf8.DecodeRuneInString(str)
 66 | 		end--
 67 | 		endPos += size
 68 | 		str = str[size:]
 69 | 	}
 70 | 
 71 | 	if len(str) == 0 && (start > 0 || end > 0) {
 72 | 		panic("out of range")
 73 | 	}
 74 | 
 75 | 	return origin[startPos:endPos]
 76 | }
 77 | 
 78 | // Partition splits a string by sep into three parts.
 79 | // The return value is a slice of strings with head, match and tail.
 80 | //
 81 | // If str contains sep, for example "hello" and "l", Partition returns
 82 | //
 83 | //	"he", "l", "lo"
 84 | //
 85 | // If str doesn't contain sep, for example "hello" and "x", Partition returns
 86 | //
 87 | //	"hello", "", ""
 88 | func Partition(str, sep string) (head, match, tail string) {
 89 | 	index := strings.Index(str, sep)
 90 | 
 91 | 	if index == -1 {
 92 | 		head = str
 93 | 		return
 94 | 	}
 95 | 
 96 | 	head = str[:index]
 97 | 	match = str[index : index+len(sep)]
 98 | 	tail = str[index+len(sep):]
 99 | 	return
100 | }
101 | 
102 | // LastPartition splits a string by last instance of sep into three parts.
103 | // The return value is a slice of strings with head, match and tail.
104 | //
105 | // If str contains sep, for example "hello" and "l", LastPartition returns
106 | //
107 | //	"hel", "l", "o"
108 | //
109 | // If str doesn't contain sep, for example "hello" and "x", LastPartition returns
110 | //
111 | //	"", "", "hello"
112 | func LastPartition(str, sep string) (head, match, tail string) {
113 | 	index := strings.LastIndex(str, sep)
114 | 
115 | 	if index == -1 {
116 | 		tail = str
117 | 		return
118 | 	}
119 | 
120 | 	head = str[:index]
121 | 	match = str[index : index+len(sep)]
122 | 	tail = str[index+len(sep):]
123 | 	return
124 | }
125 | 
126 | // Insert src into dst at given rune index.
127 | // Index is counted by runes instead of bytes.
128 | //
129 | // If index is out of range of dst, panic with out of range.
130 | func Insert(dst, src string, index int) string {
131 | 	return Slice(dst, 0, index) + src + Slice(dst, index, -1)
132 | }
133 | 
134 | // Scrub scrubs invalid utf8 bytes with repl string.
135 | // Adjacent invalid bytes are replaced only once.
136 | func Scrub(str, repl string) string {
137 | 	var buf *stringBuilder
138 | 	var r rune
139 | 	var size, pos int
140 | 	var hasError bool
141 | 
142 | 	origin := str
143 | 
144 | 	for len(str) > 0 {
145 | 		r, size = utf8.DecodeRuneInString(str)
146 | 
147 | 		if r == utf8.RuneError {
148 | 			if !hasError {
149 | 				if buf == nil {
150 | 					buf = &stringBuilder{}
151 | 				}
152 | 
153 | 				buf.WriteString(origin[:pos])
154 | 				hasError = true
155 | 			}
156 | 		} else if hasError {
157 | 			hasError = false
158 | 			buf.WriteString(repl)
159 | 
160 | 			origin = origin[pos:]
161 | 			pos = 0
162 | 		}
163 | 
164 | 		pos += size
165 | 		str = str[size:]
166 | 	}
167 | 
168 | 	if buf != nil {
169 | 		buf.WriteString(origin)
170 | 		return buf.String()
171 | 	}
172 | 
173 | 	// No invalid byte.
174 | 	return origin
175 | }
176 | 
177 | // WordSplit splits a string into words. Returns a slice of words.
178 | // If there is no word in a string, return nil.
179 | //
180 | // Word is defined as a locale dependent string containing alphabetic characters,
181 | // which may also contain but not start with `'` and `-` characters.
182 | func WordSplit(str string) []string {
183 | 	var word string
184 | 	var words []string
185 | 	var r rune
186 | 	var size, pos int
187 | 
188 | 	inWord := false
189 | 
190 | 	for len(str) > 0 {
191 | 		r, size = utf8.DecodeRuneInString(str)
192 | 
193 | 		switch {
194 | 		case isAlphabet(r):
195 | 			if !inWord {
196 | 				inWord = true
197 | 				word = str
198 | 				pos = 0
199 | 			}
200 | 
201 | 		case inWord && (r == '\'' || r == '-'):
202 | 			// Still in word.
203 | 
204 | 		default:
205 | 			if inWord {
206 | 				inWord = false
207 | 				words = append(words, word[:pos])
208 | 			}
209 | 		}
210 | 
211 | 		pos += size
212 | 		str = str[size:]
213 | 	}
214 | 
215 | 	if inWord {
216 | 		words = append(words, word[:pos])
217 | 	}
218 | 
219 | 	return words
220 | }
221 | 


--------------------------------------------------------------------------------
/manipulate_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 Huan Du. All rights reserved.
  2 | // Licensed under the MIT license that can be found in the LICENSE file.
  3 | 
  4 | package xstrings
  5 | 
  6 | import (
  7 | 	"strconv"
  8 | 	"strings"
  9 | 	"testing"
 10 | )
 11 | 
 12 | func TestReverse(t *testing.T) {
 13 | 	runTestCases(t, Reverse, _M{
 14 | 		"reverse string": "gnirts esrever",
 15 | 		"中文如何？":          "？何如文中",
 16 | 		"中en文混~排怎样？a":    "a？样怎排~混文ne中",
 17 | 	})
 18 | }
 19 | 
 20 | func TestSlice(t *testing.T) {
 21 | 	runner := func(str string) (result string) {
 22 | 		defer func() {
 23 | 			if e := recover(); e != nil {
 24 | 				result = e.(string)
 25 | 			}
 26 | 		}()
 27 | 
 28 | 		strs := split(str)
 29 | 		start, _ := strconv.ParseInt(strs[1], 10, 0)
 30 | 		end, _ := strconv.ParseInt(strs[2], 10, 0)
 31 | 
 32 | 		result = Slice(strs[0], int(start), int(end))
 33 | 		return
 34 | 	}
 35 | 
 36 | 	runTestCases(t, runner, _M{
 37 | 		sep("abcdefghijk", "3", "8"):      "defgh",
 38 | 		sep("来点中文如何？", "2", "7"):          "中文如何？",
 39 | 		sep("中en文混~排总是少不了的a", "2", "8"):   "n文混~排总",
 40 | 		sep("中en文混~排总是少不了的a", "0", "0"):   "",
 41 | 		sep("中en文混~排总是少不了的a", "14", "14"): "",
 42 | 		sep("中en文混~排总是少不了的a", "5", "-1"):  "~排总是少不了的a",
 43 | 		sep("中en文混~排总是少不了的a", "14", "-1"): "",
 44 | 
 45 | 		sep("let us slice out of range", "-3", "3"): "out of range",
 46 | 		sep("超出范围哦", "2", "6"):                      "out of range",
 47 | 		sep("don't do this", "3", "2"):              "out of range",
 48 | 		sep("千gan万de不piao要liang", "19", "19"):       "out of range",
 49 | 	})
 50 | }
 51 | 
 52 | func TestPartition(t *testing.T) {
 53 | 	runner := func(str string) string {
 54 | 		input := strings.Split(str, separator)
 55 | 		head, match, tail := Partition(input[0], input[1])
 56 | 		return sep(head, match, tail)
 57 | 	}
 58 | 
 59 | 	runTestCases(t, runner, _M{
 60 | 		sep("hello", "l"):           sep("he", "l", "lo"),
 61 | 		sep("中文总少不了", "少"):          sep("中文总", "少", "不了"),
 62 | 		sep("z这个zh英文混排hao不", "h英文"): sep("z这个z", "h英文", "混排hao不"),
 63 | 		sep("边界tiao件zen能忘", "边界"):   sep("", "边界", "tiao件zen能忘"),
 64 | 		sep("尾巴ye别忘le", "忘le"):      sep("尾巴ye别", "忘le", ""),
 65 | 
 66 | 		sep("hello", "x"):     sep("hello", "", ""),
 67 | 		sep("不是晩香玉", "晚"):     sep("不是晩香玉", "", ""), // Hint: 晩 is not 晚 :)
 68 | 		sep("来ge混排ba", "e 混"): sep("来ge混排ba", "", ""),
 69 | 	})
 70 | }
 71 | 
 72 | func TestLastPartition(t *testing.T) {
 73 | 	runner := func(str string) string {
 74 | 		input := strings.Split(str, separator)
 75 | 		head, match, tail := LastPartition(input[0], input[1])
 76 | 		return sep(head, match, tail)
 77 | 	}
 78 | 
 79 | 	runTestCases(t, runner, _M{
 80 | 		sep("hello", "l"):               sep("hel", "l", "o"),
 81 | 		sep("少量中文总少不了", "少"):            sep("少量中文总", "少", "不了"),
 82 | 		sep("z这个zh英文ch英文混排hao不", "h英文"): sep("z这个zh英文c", "h英文", "混排hao不"),
 83 | 		sep("边界tiao件zen能忘边界", "边界"):     sep("边界tiao件zen能忘", "边界", ""),
 84 | 		sep("尾巴ye别忘le", "尾巴"):           sep("", "尾巴", "ye别忘le"),
 85 | 
 86 | 		sep("hello", "x"):     sep("", "", "hello"),
 87 | 		sep("不是晩香玉", "晚"):     sep("", "", "不是晩香玉"), // Hint: 晩 is not 晚 :)
 88 | 		sep("来ge混排ba", "e 混"): sep("", "", "来ge混排ba"),
 89 | 	})
 90 | }
 91 | 
 92 | func TestInsert(t *testing.T) {
 93 | 	runner := func(str string) (result string) {
 94 | 		defer func() {
 95 | 			if e := recover(); e != nil {
 96 | 				result = e.(string)
 97 | 			}
 98 | 		}()
 99 | 
100 | 		strs := split(str)
101 | 		index, _ := strconv.ParseInt(strs[2], 10, 0)
102 | 		result = Insert(strs[0], strs[1], int(index))
103 | 		return
104 | 	}
105 | 
106 | 	runTestCases(t, runner, _M{
107 | 		sep("abcdefg", "hi", "3"):    "abchidefg",
108 | 		sep("少量中文是必须的", "混pai", "4"): "少量中文混pai是必须的",
109 | 		sep("zh英文hun排", "~！", "5"):   "zh英文h~！un排",
110 | 		sep("插在beginning", "我", "0"): "我插在beginning",
111 | 		sep("插在ending", "我", "8"):    "插在ending我",
112 | 
113 | 		sep("超tian出yuan边tu界po", "foo", "-1"): "out of range",
114 | 		sep("超tian出yuan边tu界po", "foo", "17"): "out of range",
115 | 	})
116 | }
117 | 
118 | func TestScrub(t *testing.T) {
119 | 	runner := func(str string) string {
120 | 		strs := split(str)
121 | 		return Scrub(strs[0], strs[1])
122 | 	}
123 | 
124 | 	runTestCases(t, runner, _M{
125 | 		sep("ab\uFFFDcd\xFF\xCEefg\xFF\xFC\xFD\xFAhijk", "*"): "ab*cd*efg*hijk",
126 | 		sep("no错误です", "*"):                                    "no错误です",
127 | 		sep("", "*"):                                          "",
128 | 	})
129 | }
130 | 
131 | func TestWordSplit(t *testing.T) {
132 | 	runner := func(str string) string {
133 | 		return sep(WordSplit(str)...)
134 | 	}
135 | 
136 | 	runTestCases(t, runner, _M{
137 | 		"one word":                   sep("one", "word"),
138 | 		"一个字：把他给我拿下！":                "",
139 | 		"it's a super-fancy one!!!a": sep("it's", "a", "super-fancy", "one", "a"),
140 | 		"a -b-c' 'd'e":               sep("a", "b-c'", "d'e"),
141 | 	})
142 | }
143 | 


--------------------------------------------------------------------------------
/stringbuilder.go:
--------------------------------------------------------------------------------
1 | //go:build go1.10
2 | // +build go1.10
3 | 
4 | package xstrings
5 | 
6 | import "strings"
7 | 
8 | type stringBuilder = strings.Builder
9 | 


--------------------------------------------------------------------------------
/stringbuilder_go110.go:
--------------------------------------------------------------------------------
 1 | //go:build !go1.10
 2 | // +build !go1.10
 3 | 
 4 | package xstrings
 5 | 
 6 | import "bytes"
 7 | 
 8 | type stringBuilder struct {
 9 | 	bytes.Buffer
10 | }
11 | 


--------------------------------------------------------------------------------
/translate.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2015 Huan Du. All rights reserved.
  2 | // Licensed under the MIT license that can be found in the LICENSE file.
  3 | 
  4 | package xstrings
  5 | 
  6 | import (
  7 | 	"unicode"
  8 | 	"unicode/utf8"
  9 | )
 10 | 
 11 | type runeRangeMap struct {
 12 | 	FromLo rune // Lower bound of range map.
 13 | 	FromHi rune // An inclusive higher bound of range map.
 14 | 	ToLo   rune
 15 | 	ToHi   rune
 16 | }
 17 | 
 18 | type runeDict struct {
 19 | 	Dict [unicode.MaxASCII + 1]rune
 20 | }
 21 | 
 22 | type runeMap map[rune]rune
 23 | 
 24 | // Translator can translate string with pre-compiled from and to patterns.
 25 | // If a from/to pattern pair needs to be used more than once, it's recommended
 26 | // to create a Translator and reuse it.
 27 | type Translator struct {
 28 | 	quickDict  *runeDict       // A quick dictionary to look up rune by index. Only available for latin runes.
 29 | 	runeMap    runeMap         // Rune map for translation.
 30 | 	ranges     []*runeRangeMap // Ranges of runes.
 31 | 	mappedRune rune            // If mappedRune >= 0, all matched runes are translated to the mappedRune.
 32 | 	reverted   bool            // If to pattern is empty, all matched characters will be deleted.
 33 | 	hasPattern bool
 34 | }
 35 | 
 36 | // NewTranslator creates new Translator through a from/to pattern pair.
 37 | func NewTranslator(from, to string) *Translator {
 38 | 	tr := &Translator{}
 39 | 
 40 | 	if from == "" {
 41 | 		return tr
 42 | 	}
 43 | 
 44 | 	reverted := from[0] == '^'
 45 | 	deletion := len(to) == 0
 46 | 
 47 | 	if reverted {
 48 | 		from = from[1:]
 49 | 	}
 50 | 
 51 | 	var fromStart, fromEnd, fromRangeStep rune
 52 | 	var toStart, toEnd, toRangeStep rune
 53 | 	var fromRangeSize, toRangeSize rune
 54 | 	var singleRunes []rune
 55 | 
 56 | 	// Update the to rune range.
 57 | 	updateRange := func() {
 58 | 		// No more rune to read in the to rune pattern.
 59 | 		if toEnd == utf8.RuneError {
 60 | 			return
 61 | 		}
 62 | 
 63 | 		if toRangeStep == 0 {
 64 | 			to, toStart, toEnd, toRangeStep = nextRuneRange(to, toEnd)
 65 | 			return
 66 | 		}
 67 | 
 68 | 		// Current range is not empty. Consume 1 rune from start.
 69 | 		if toStart != toEnd {
 70 | 			toStart += toRangeStep
 71 | 			return
 72 | 		}
 73 | 
 74 | 		// No more rune. Repeat the last rune.
 75 | 		if to == "" {
 76 | 			toEnd = utf8.RuneError
 77 | 			return
 78 | 		}
 79 | 
 80 | 		// Both start and end are used. Read two more runes from the to pattern.
 81 | 		to, toStart, toEnd, toRangeStep = nextRuneRange(to, utf8.RuneError)
 82 | 	}
 83 | 
 84 | 	if deletion {
 85 | 		toStart = utf8.RuneError
 86 | 		toEnd = utf8.RuneError
 87 | 	} else {
 88 | 		// If from pattern is reverted, only the last rune in the to pattern will be used.
 89 | 		if reverted {
 90 | 			var size int
 91 | 
 92 | 			for len(to) > 0 {
 93 | 				toStart, size = utf8.DecodeRuneInString(to)
 94 | 				to = to[size:]
 95 | 			}
 96 | 
 97 | 			toEnd = utf8.RuneError
 98 | 		} else {
 99 | 			to, toStart, toEnd, toRangeStep = nextRuneRange(to, utf8.RuneError)
100 | 		}
101 | 	}
102 | 
103 | 	fromEnd = utf8.RuneError
104 | 
105 | 	for len(from) > 0 {
106 | 		from, fromStart, fromEnd, fromRangeStep = nextRuneRange(from, fromEnd)
107 | 
108 | 		// fromStart is a single character. Just map it with a rune in the to pattern.
109 | 		if fromRangeStep == 0 {
110 | 			singleRunes = tr.addRune(fromStart, toStart, singleRunes)
111 | 			updateRange()
112 | 			continue
113 | 		}
114 | 
115 | 		for toEnd != utf8.RuneError && fromStart != fromEnd {
116 | 			// If mapped rune is a single character instead of a range, simply shift first
117 | 			// rune in the range.
118 | 			if toRangeStep == 0 {
119 | 				singleRunes = tr.addRune(fromStart, toStart, singleRunes)
120 | 				updateRange()
121 | 				fromStart += fromRangeStep
122 | 				continue
123 | 			}
124 | 
125 | 			fromRangeSize = (fromEnd - fromStart) * fromRangeStep
126 | 			toRangeSize = (toEnd - toStart) * toRangeStep
127 | 
128 | 			// Not enough runes in the to pattern. Need to read more.
129 | 			if fromRangeSize > toRangeSize {
130 | 				fromStart, toStart = tr.addRuneRange(fromStart, fromStart+toRangeSize*fromRangeStep, toStart, toEnd, singleRunes)
131 | 				fromStart += fromRangeStep
132 | 				updateRange()
133 | 
134 | 				// Edge case: If fromRangeSize == toRangeSize + 1, the last fromStart value needs be considered
135 | 				// as a single rune.
136 | 				if fromStart == fromEnd {
137 | 					singleRunes = tr.addRune(fromStart, toStart, singleRunes)
138 | 					updateRange()
139 | 				}
140 | 
141 | 				continue
142 | 			}
143 | 
144 | 			fromStart, toStart = tr.addRuneRange(fromStart, fromEnd, toStart, toStart+fromRangeSize*toRangeStep, singleRunes)
145 | 			updateRange()
146 | 			break
147 | 		}
148 | 
149 | 		if fromStart == fromEnd {
150 | 			fromEnd = utf8.RuneError
151 | 			continue
152 | 		}
153 | 
154 | 		_, toStart = tr.addRuneRange(fromStart, fromEnd, toStart, toStart, singleRunes)
155 | 		fromEnd = utf8.RuneError
156 | 	}
157 | 
158 | 	if fromEnd != utf8.RuneError {
159 | 		tr.addRune(fromEnd, toStart, singleRunes)
160 | 	}
161 | 
162 | 	tr.reverted = reverted
163 | 	tr.mappedRune = -1
164 | 	tr.hasPattern = true
165 | 
166 | 	// Translate RuneError only if in deletion or reverted mode.
167 | 	if deletion || reverted {
168 | 		tr.mappedRune = toStart
169 | 	}
170 | 
171 | 	return tr
172 | }
173 | 
174 | func (tr *Translator) addRune(from, to rune, singleRunes []rune) []rune {
175 | 	if from <= unicode.MaxASCII {
176 | 		if tr.quickDict == nil {
177 | 			tr.quickDict = &runeDict{}
178 | 		}
179 | 
180 | 		tr.quickDict.Dict[from] = to
181 | 	} else {
182 | 		if tr.runeMap == nil {
183 | 			tr.runeMap = make(runeMap)
184 | 		}
185 | 
186 | 		tr.runeMap[from] = to
187 | 	}
188 | 
189 | 	singleRunes = append(singleRunes, from)
190 | 	return singleRunes
191 | }
192 | 
193 | func (tr *Translator) addRuneRange(fromLo, fromHi, toLo, toHi rune, singleRunes []rune) (rune, rune) {
194 | 	var r rune
195 | 	var rrm *runeRangeMap
196 | 
197 | 	if fromLo < fromHi {
198 | 		rrm = &runeRangeMap{
199 | 			FromLo: fromLo,
200 | 			FromHi: fromHi,
201 | 			ToLo:   toLo,
202 | 			ToHi:   toHi,
203 | 		}
204 | 	} else {
205 | 		rrm = &runeRangeMap{
206 | 			FromLo: fromHi,
207 | 			FromHi: fromLo,
208 | 			ToLo:   toHi,
209 | 			ToHi:   toLo,
210 | 		}
211 | 	}
212 | 
213 | 	// If there is any single rune conflicts with this rune range, clear single rune record.
214 | 	for _, r = range singleRunes {
215 | 		if rrm.FromLo <= r && r <= rrm.FromHi {
216 | 			if r <= unicode.MaxASCII {
217 | 				tr.quickDict.Dict[r] = 0
218 | 			} else {
219 | 				delete(tr.runeMap, r)
220 | 			}
221 | 		}
222 | 	}
223 | 
224 | 	tr.ranges = append(tr.ranges, rrm)
225 | 	return fromHi, toHi
226 | }
227 | 
228 | func nextRuneRange(str string, last rune) (remaining string, start, end rune, rangeStep rune) {
229 | 	var r rune
230 | 	var size int
231 | 
232 | 	remaining = str
233 | 	escaping := false
234 | 	isRange := false
235 | 
236 | 	for len(remaining) > 0 {
237 | 		r, size = utf8.DecodeRuneInString(remaining)
238 | 		remaining = remaining[size:]
239 | 
240 | 		// Parse special characters.
241 | 		if !escaping {
242 | 			if r == '\\' {
243 | 				escaping = true
244 | 				continue
245 | 			}
246 | 
247 | 			if r == '-' {
248 | 				// Ignore slash at beginning of string.
249 | 				if last == utf8.RuneError {
250 | 					continue
251 | 				}
252 | 
253 | 				start = last
254 | 				isRange = true
255 | 				continue
256 | 			}
257 | 		}
258 | 
259 | 		escaping = false
260 | 
261 | 		if last != utf8.RuneError {
262 | 			// This is a range which start and end are the same.
263 | 			// Considier it as a normal character.
264 | 			if isRange && last == r {
265 | 				isRange = false
266 | 				continue
267 | 			}
268 | 
269 | 			start = last
270 | 			end = r
271 | 
272 | 			if isRange {
273 | 				if start < end {
274 | 					rangeStep = 1
275 | 				} else {
276 | 					rangeStep = -1
277 | 				}
278 | 			}
279 | 
280 | 			return
281 | 		}
282 | 
283 | 		last = r
284 | 	}
285 | 
286 | 	start = last
287 | 	end = utf8.RuneError
288 | 	return
289 | }
290 | 
291 | // Translate str with a from/to pattern pair.
292 | //
293 | // See comment in Translate function for usage and samples.
294 | func (tr *Translator) Translate(str string) string {
295 | 	if !tr.hasPattern || str == "" {
296 | 		return str
297 | 	}
298 | 
299 | 	var r rune
300 | 	var size int
301 | 	var needTr bool
302 | 
303 | 	orig := str
304 | 
305 | 	var output *stringBuilder
306 | 
307 | 	for len(str) > 0 {
308 | 		r, size = utf8.DecodeRuneInString(str)
309 | 		r, needTr = tr.TranslateRune(r)
310 | 
311 | 		if needTr && output == nil {
312 | 			output = allocBuffer(orig, str)
313 | 		}
314 | 
315 | 		if r != utf8.RuneError && output != nil {
316 | 			output.WriteRune(r)
317 | 		}
318 | 
319 | 		str = str[size:]
320 | 	}
321 | 
322 | 	// No character is translated.
323 | 	if output == nil {
324 | 		return orig
325 | 	}
326 | 
327 | 	return output.String()
328 | }
329 | 
330 | // TranslateRune return translated rune and true if r matches the from pattern.
331 | // If r doesn't match the pattern, original r is returned and translated is false.
332 | func (tr *Translator) TranslateRune(r rune) (result rune, translated bool) {
333 | 	switch {
334 | 	case tr.quickDict != nil:
335 | 		if r <= unicode.MaxASCII {
336 | 			result = tr.quickDict.Dict[r]
337 | 
338 | 			if result != 0 {
339 | 				translated = true
340 | 
341 | 				if tr.mappedRune >= 0 {
342 | 					result = tr.mappedRune
343 | 				}
344 | 
345 | 				break
346 | 			}
347 | 		}
348 | 
349 | 		fallthrough
350 | 
351 | 	case tr.runeMap != nil:
352 | 		var ok bool
353 | 
354 | 		if result, ok = tr.runeMap[r]; ok {
355 | 			translated = true
356 | 
357 | 			if tr.mappedRune >= 0 {
358 | 				result = tr.mappedRune
359 | 			}
360 | 
361 | 			break
362 | 		}
363 | 
364 | 		fallthrough
365 | 
366 | 	default:
367 | 		var rrm *runeRangeMap
368 | 		ranges := tr.ranges
369 | 
370 | 		for i := len(ranges) - 1; i >= 0; i-- {
371 | 			rrm = ranges[i]
372 | 
373 | 			if rrm.FromLo <= r && r <= rrm.FromHi {
374 | 				translated = true
375 | 
376 | 				if tr.mappedRune >= 0 {
377 | 					result = tr.mappedRune
378 | 					break
379 | 				}
380 | 
381 | 				if rrm.ToLo < rrm.ToHi {
382 | 					result = rrm.ToLo + r - rrm.FromLo
383 | 				} else if rrm.ToLo > rrm.ToHi {
384 | 					// ToHi can be smaller than ToLo if range is from higher to lower.
385 | 					result = rrm.ToLo - r + rrm.FromLo
386 | 				} else {
387 | 					result = rrm.ToLo
388 | 				}
389 | 
390 | 				break
391 | 			}
392 | 		}
393 | 	}
394 | 
395 | 	if tr.reverted {
396 | 		if !translated {
397 | 			result = tr.mappedRune
398 | 		}
399 | 
400 | 		translated = !translated
401 | 	}
402 | 
403 | 	if !translated {
404 | 		result = r
405 | 	}
406 | 
407 | 	return
408 | }
409 | 
410 | // HasPattern returns true if Translator has one pattern at least.
411 | func (tr *Translator) HasPattern() bool {
412 | 	return tr.hasPattern
413 | }
414 | 
415 | // Translate str with the characters defined in from replaced by characters defined in to.
416 | //
417 | // From and to are patterns representing a set of characters. Pattern is defined as following.
418 | //
419 | // Special characters:
420 | //
421 | //  1. '-' means a range of runes, e.g.
422 | //     "a-z" means all characters from 'a' to 'z' inclusive;
423 | //     "z-a" means all characters from 'z' to 'a' inclusive.
424 | //  2. '^' as first character means a set of all runes excepted listed, e.g.
425 | //     "^a-z" means all characters except 'a' to 'z' inclusive.
426 | //  3. '\' escapes special characters.
427 | //
428 | // Normal character represents itself, e.g. "abc" is a set including 'a', 'b' and 'c'.
429 | //
430 | // Translate will try to find a 1:1 mapping from from to to.
431 | // If to is smaller than from, last rune in to will be used to map "out of range" characters in from.
432 | //
433 | // Note that '^' only works in the from pattern. It will be considered as a normal character in the to pattern.
434 | //
435 | // If the to pattern is an empty string, Translate works exactly the same as Delete.
436 | //
437 | // Samples:
438 | //
439 | //	Translate("hello", "aeiou", "12345")    => "h2ll4"
440 | //	Translate("hello", "a-z", "A-Z")        => "HELLO"
441 | //	Translate("hello", "z-a", "a-z")        => "svool"
442 | //	Translate("hello", "aeiou", "*")        => "h*ll*"
443 | //	Translate("hello", "^l", "*")           => "**ll*"
444 | //	Translate("hello ^ world", `\^lo`, "*") => "he*** * w*r*d"
445 | func Translate(str, from, to string) string {
446 | 	tr := NewTranslator(from, to)
447 | 	return tr.Translate(str)
448 | }
449 | 
450 | // Delete runes in str matching the pattern.
451 | // Pattern is defined in Translate function.
452 | //
453 | // Samples:
454 | //
455 | //	Delete("hello", "aeiou") => "hll"
456 | //	Delete("hello", "a-k")   => "llo"
457 | //	Delete("hello", "^a-k")  => "he"
458 | func Delete(str, pattern string) string {
459 | 	tr := NewTranslator(pattern, "")
460 | 	return tr.Translate(str)
461 | }
462 | 
463 | // Count how many runes in str match the pattern.
464 | // Pattern is defined in Translate function.
465 | //
466 | // Samples:
467 | //
468 | //	Count("hello", "aeiou") => 3
469 | //	Count("hello", "a-k")   => 3
470 | //	Count("hello", "^a-k")  => 2
471 | func Count(str, pattern string) int {
472 | 	if pattern == "" || str == "" {
473 | 		return 0
474 | 	}
475 | 
476 | 	var r rune
477 | 	var size int
478 | 	var matched bool
479 | 
480 | 	tr := NewTranslator(pattern, "")
481 | 	cnt := 0
482 | 
483 | 	for len(str) > 0 {
484 | 		r, size = utf8.DecodeRuneInString(str)
485 | 		str = str[size:]
486 | 
487 | 		if _, matched = tr.TranslateRune(r); matched {
488 | 			cnt++
489 | 		}
490 | 	}
491 | 
492 | 	return cnt
493 | }
494 | 
495 | // Squeeze deletes adjacent repeated runes in str.
496 | // If pattern is not empty, only runes matching the pattern will be squeezed.
497 | //
498 | // Samples:
499 | //
500 | //	Squeeze("hello", "")             => "helo"
501 | //	Squeeze("hello", "m-z")          => "hello"
502 | //	Squeeze("hello   world", " ")    => "hello world"
503 | func Squeeze(str, pattern string) string {
504 | 	var last, r rune
505 | 	var size int
506 | 	var skipSqueeze, matched bool
507 | 	var tr *Translator
508 | 	var output *stringBuilder
509 | 
510 | 	orig := str
511 | 	last = -1
512 | 
513 | 	if len(pattern) > 0 {
514 | 		tr = NewTranslator(pattern, "")
515 | 	}
516 | 
517 | 	for len(str) > 0 {
518 | 		r, size = utf8.DecodeRuneInString(str)
519 | 
520 | 		// Need to squeeze the str.
521 | 		if last == r && !skipSqueeze {
522 | 			if tr != nil {
523 | 				if _, matched = tr.TranslateRune(r); !matched {
524 | 					skipSqueeze = true
525 | 				}
526 | 			}
527 | 
528 | 			if output == nil {
529 | 				output = allocBuffer(orig, str)
530 | 			}
531 | 
532 | 			if skipSqueeze {
533 | 				output.WriteRune(r)
534 | 			}
535 | 		} else {
536 | 			if output != nil {
537 | 				output.WriteRune(r)
538 | 			}
539 | 
540 | 			last = r
541 | 			skipSqueeze = false
542 | 		}
543 | 
544 | 		str = str[size:]
545 | 	}
546 | 
547 | 	if output == nil {
548 | 		return orig
549 | 	}
550 | 
551 | 	return output.String()
552 | }
553 | 


--------------------------------------------------------------------------------
/translate_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 Huan Du. All rights reserved.
 2 | // Licensed under the MIT license that can be found in the LICENSE file.
 3 | 
 4 | package xstrings
 5 | 
 6 | import (
 7 | 	"fmt"
 8 | 	"strings"
 9 | 	"testing"
10 | )
11 | 
12 | func TestTranslate(t *testing.T) {
13 | 	runner := func(str string) string {
14 | 		input := strings.Split(str, separator)
15 | 		return Translate(input[0], input[1], input[2])
16 | 	}
17 | 
18 | 	runTestCases(t, runner, _M{
19 | 		sep("hello", "aeiou", "12345"):    "h2ll4",
20 | 		sep("hello", "aeiou", ""):         "hll",
21 | 		sep("hello", "a-z", "A-Z"):        "HELLO",
22 | 		sep("hello", "z-a", "a-z"):        "svool",
23 | 		sep("hello", "aeiou", "*"):        "h*ll*",
24 | 		sep("hello", "^l", "*"):           "**ll*",
25 | 		sep("hello", "p-z", "*"):          "hello",
26 | 		sep("hello ^ world", `\^lo`, "*"): "he*** * w*r*d",
27 | 
28 | 		sep("中文字符测试", "文中谁敢试？", "123456"):  "21字符测5",
29 | 		sep("中文字符测试", "^文中谁敢试？", "123456"): "中文666试",
30 | 		sep("中文字符测试", "字-试", "0-9"):        "中90999",
31 | 
32 | 		sep("h1e2l3l4o, w5o6r7l8d", "a-z,0-9", `A-Z\-a-czk-p`):       "HbEcLzLkO- WlOmRnLoD",
33 | 		sep("h1e2l3l4o, w5o6r7l8d", "a-zoh-n", "b-zakt-z"):           "t1f2x3x4k, x5k6s7x8e",
34 | 		sep("h1e2l3l4o, w5o6r7l8d", "helloa-zoh-n", "99999b-zakt-z"): "t1f2x3x4k, x5k6s7x8e",
35 | 
36 | 		sep("hello", "e-", "p"):        "hpllo",
37 | 		sep("hello", "-e-", "p"):       "hpllo",
38 | 		sep("hello", "----e---", "p"):  "hpllo",
39 | 		sep("hello", "^---e----", "p"): "peppp",
40 | 
41 | 		sep("hel\uFFFDlo", "\uFFFD", "H"):    "helHlo",
42 | 		sep("hel\uFFFDlo", "^\uFFFD", "H"):   "HHHHH",
43 | 		sep("hel\uFFFDlo", "o-\uFFFDh", "H"): "HelHlH",
44 | 	})
45 | }
46 | 
47 | func TestDelete(t *testing.T) {
48 | 	runner := func(str string) string {
49 | 		input := strings.Split(str, separator)
50 | 		return Delete(input[0], input[1])
51 | 	}
52 | 
53 | 	runTestCases(t, runner, _M{
54 | 		sep("hello", "aeiou"): "hll",
55 | 		sep("hello", "a-k"):   "llo",
56 | 		sep("hello", "^a-k"):  "he",
57 | 
58 | 		sep("中文字符测试", "文中谁敢试？"): "字符测",
59 | 	})
60 | }
61 | 
62 | func TestCount(t *testing.T) {
63 | 	runner := func(str string) string {
64 | 		input := strings.Split(str, separator)
65 | 		return fmt.Sprint(Count(input[0], input[1]))
66 | 	}
67 | 
68 | 	runTestCases(t, runner, _M{
69 | 		sep("hello", "aeiou"): "2",
70 | 		sep("hello", "a-k"):   "2",
71 | 		sep("hello", "^a-k"):  "3",
72 | 
73 | 		sep("中文字符测试", "文中谁敢试？"): "3",
74 | 	})
75 | }
76 | 
77 | func TestSqueeze(t *testing.T) {
78 | 	runner := func(str string) string {
79 | 		input := strings.Split(str, separator)
80 | 		return Squeeze(input[0], input[1])
81 | 	}
82 | 
83 | 	runTestCases(t, runner, _M{
84 | 		sep("hello", ""):             "helo",
85 | 		sep("hello     world", ""):   "helo world",
86 | 		sep("hello     world", " "):  "hello world",
87 | 		sep("hello     world", "  "): "hello world",
88 | 		sep("hello", "a-k"):          "hello",
89 | 		sep("hello", "^a-k"):         "helo",
90 | 		sep("hello", "^a-l"):         "hello",
91 | 		sep("foooo baaaaar", "a"):    "foooo bar",
92 | 
93 | 		sep("打打打打个劫！！", ""):  "打个劫！",
94 | 		sep("打打打打个劫！！", "打"): "打个劫！！",
95 | 	})
96 | }
97 | 


--------------------------------------------------------------------------------
/util_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2015 Huan Du. All rights reserved.
 2 | // Licensed under the MIT license that can be found in the LICENSE file.
 3 | 
 4 | package xstrings
 5 | 
 6 | import (
 7 | 	"strings"
 8 | 	"testing"
 9 | )
10 | 
11 | type _M map[string]string
12 | 
13 | const (
14 | 	separator = " ¶ "
15 | )
16 | 
17 | func runTestCases(t *testing.T, converter func(string) string, cases map[string]string) {
18 | 	for k, v := range cases {
19 | 		s := converter(k)
20 | 
21 | 		if s != v {
22 | 			t.Fatalf("case fails. [case:%v]\nshould => %#v\nactual => %#v", k, v, s)
23 | 		}
24 | 	}
25 | }
26 | 
27 | func sep(strs ...string) string {
28 | 	return strings.Join(strs, separator)
29 | }
30 | 
31 | func split(str string) []string {
32 | 	return strings.Split(str, separator)
33 | }
34 | 


--------------------------------------------------------------------------------