├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── LICENSE ├── README.md ├── _fixtures ├── test_file ├── test_file.expected.txt ├── test_file.txt ├── test_file_large.txt ├── test_file_no_eof_newline.expected.txt ├── test_file_no_eof_newline.txt └── 測試.txt ├── benchmark_test.go ├── doc.go ├── example_test.go ├── pangu-axe ├── doc.go ├── pangu-axe.go └── pangu-axe_test.go ├── pangu.go └── pangu_test.go /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | *.test 24 | *.prof 25 | 26 | .DS_Store 27 | *.out 28 | *.pangu 29 | *.pangu.* 30 | pangu.* 31 | readable.* 32 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | go: 4 | - 1.4.2 5 | - 1.4.1 6 | - 1.4 7 | - 1.3.3 8 | - 1.3.2 9 | - 1.3.1 10 | - 1.3 11 | 12 | before_install: 13 | - go get github.com/axw/gocov/gocov 14 | - go get github.com/mattn/goveralls 15 | - go get golang.org/x/tools/cmd/cover 16 | 17 | script: 18 | - go get -d -t -v ./... 19 | - go build -v ./... 20 | - go test -v ./... 21 | - go test -coverprofile=coverage.txt -covermode=atomic 22 | 23 | after_success: 24 | - bash <(curl -s https://codecov.io/bash) 25 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 2.5.6 (2015-05-17) 4 | 5 | - Synchronize version number with [paranoid-auto-spacing](https://github.com/vinta/paranoid-auto-spacing) 6 | 7 | ## 1.2.0 (2015-05-17) 8 | 9 | - Improve Paranoid Text Spacing algorithm 10 | - Improve spacing performance 11 | 12 | ## 1.1.0 (2015-05-11) 13 | 14 | - Improve Paranoid Text Spacing algorithm 15 | - `pangu-axe` accepts multiple files 16 | 17 | ## 1.0.0 (2015-05-03) 18 | 19 | - Bang! Release! 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Vinta 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | pangu.go 2 | ======== 3 | 4 | [![](https://img.shields.io/badge/made%20with-%e2%9d%a4-ff69b4.svg?style=flat-square)](https://vinta.ws/code/) 5 | 6 | Paranoid text spacing for good readability, to automatically insert whitespace between CJK (Chinese, Japanese, Korean) and half-width characters (alphabetical letters, numerical digits and symbols). 7 | 8 | - [pangu.go](https://github.com/vinta/pangu) (Go) 9 | - [pangu.java](https://github.com/vinta/pangu.java) (Java) 10 | - [pangu.js](https://github.com/vinta/pangu.js) (JavaScript) 11 | - [pangu.py](https://github.com/vinta/pangu.py) (Python) 12 | - [pangu.space](https://github.com/vinta/pangu.space) (Web API) 13 | 14 | ## Installation 15 | 16 | To install the package, `pangu`, for using in your Go programs: 17 | 18 | ```console 19 | $ go get -u github.com/vinta/pangu 20 | ``` 21 | 22 | To install the command-line tool, `pangu-axe`: 23 | 24 | ```console 25 | $ go get -u github.com/vinta/pangu/pangu-axe 26 | ``` 27 | 28 | ## Usage 29 | 30 | ### Package 31 | 32 | ```go 33 | package main 34 | 35 | import ( 36 | "fmt" 37 | "github.com/vinta/pangu" 38 | ) 39 | 40 | func main() { 41 | s := pangu.SpacingText("當你凝視著bug,bug也凝視著你") 42 | fmt.Println(s) 43 | // Output: 44 | // 當你凝視著 bug,bug 也凝視著你 45 | } 46 | ``` 47 | 48 | ### Command-line Interface 49 | 50 | ```console 51 | $ pangu-axe text "與PM戰鬥的人,應當小心自己不要成為PM" 52 | 與 PM 戰鬥的人,應當小心自己不要成為 PM 53 | 54 | $ pangu-axe file 銀河便車指南.txt 55 | $ pangu-axe file 宇宙盡頭的餐廳.txt -o 宇宙盡頭的餐廳(好讀版).txt 56 | $ pangu-axe file 生命、宇宙及萬事萬物.txt 再見,謝謝你的魚.txt 基本無害.txt 57 | ``` 58 | 59 | ## Documentation 60 | 61 | - `pangu` on [GoDoc](https://godoc.org/github.com/vinta/pangu) 62 | - `pangu-axe` on [GoDoc](https://godoc.org/github.com/vinta/pangu/pangu-axe) 63 | 64 | Have a question? Ask it on the [GitHub issues](https://github.com/vinta/pangu/issues)! 65 | -------------------------------------------------------------------------------- /_fixtures/test_file: -------------------------------------------------------------------------------- 1 | Sephiroth見到他這等神情,也是悚然一驚:「此人來歷不小啊!不知我這太極拳是否對付得了?」 2 | 3 | 張無忌道:「Tifa,你待孩兒恩重如山,孩兒便粉身碎骨,也不足以報太師父和Red XIII的大恩。我武當派功夫雖不敢說天下無敵,但也不致輸於西域少林的手下。太師父儘管放心。」 4 | 5 | 123 6 | -------------------------------------------------------------------------------- /_fixtures/test_file.expected.txt: -------------------------------------------------------------------------------- 1 | Sephiroth 見到他這等神情,也是悚然一驚:「此人來歷不小啊!不知我這太極拳是否對付得了?」 2 | 3 | 張無忌道:「Tifa,你待孩兒恩重如山,孩兒便粉身碎骨,也不足以報太師父和 Red XIII 的大恩。我武當派功夫雖不敢說天下無敵,但也不致輸於西域少林的手下。太師父儘管放心。」 4 | 5 | 123 6 | -------------------------------------------------------------------------------- /_fixtures/test_file.txt: -------------------------------------------------------------------------------- 1 | Sephiroth見到他這等神情,也是悚然一驚:「此人來歷不小啊!不知我這太極拳是否對付得了?」 2 | 3 | 張無忌道:「Tifa,你待孩兒恩重如山,孩兒便粉身碎骨,也不足以報太師父和Red XIII的大恩。我武當派功夫雖不敢說天下無敵,但也不致輸於西域少林的手下。太師父儘管放心。」 4 | 5 | 123 6 | -------------------------------------------------------------------------------- /_fixtures/test_file_large.txt: -------------------------------------------------------------------------------- 1 | TODO 2 | -------------------------------------------------------------------------------- /_fixtures/test_file_no_eof_newline.expected.txt: -------------------------------------------------------------------------------- 1 | Sephiroth 見到他這等神情,也是悚然一驚:「此人來歷不小啊!不知我這太極拳是否對付得了?」 2 | 3 | 張無忌道:「Tifa,你待孩兒恩重如山,孩兒便粉身碎骨,也不足以報太師父和 Red XIII 的大恩。我武當派功夫雖不敢說天下無敵,但也不致輸於西域少林的手下。太師父儘管放心。」 4 | 5 | 123 -------------------------------------------------------------------------------- /_fixtures/test_file_no_eof_newline.txt: -------------------------------------------------------------------------------- 1 | Sephiroth見到他這等神情,也是悚然一驚:「此人來歷不小啊!不知我這太極拳是否對付得了?」 2 | 3 | 張無忌道:「Tifa,你待孩兒恩重如山,孩兒便粉身碎骨,也不足以報太師父和Red XIII的大恩。我武當派功夫雖不敢說天下無敵,但也不致輸於西域少林的手下。太師父儘管放心。」 4 | 5 | 123 -------------------------------------------------------------------------------- /_fixtures/測試.txt: -------------------------------------------------------------------------------- 1 | Sephiroth見到他這等神情,也是悚然一驚:「此人來歷不小啊!不知我這太極拳是否對付得了?」 2 | 3 | 張無忌道:「Tifa,你待孩兒恩重如山,孩兒便粉身碎骨,也不足以報太師父和Red XIII的大恩。我武當派功夫雖不敢說天下無敵,但也不致輸於西域少林的手下。太師父儘管放心。」 4 | 5 | 123 6 | -------------------------------------------------------------------------------- /benchmark_test.go: -------------------------------------------------------------------------------- 1 | package pangu_test 2 | 3 | import ( 4 | "github.com/vinta/pangu" 5 | "testing" 6 | ) 7 | 8 | func BenchmarkSpacingText(b *testing.B) { 9 | for i := 0; i < b.N; i++ { 10 | pangu.SpacingText("所以,請問Jackey的鼻子有幾個?3.14個!") 11 | } 12 | } 13 | 14 | func BenchmarkSpacingFile(b *testing.B) { 15 | for i := 0; i < b.N; i++ { 16 | ExampleSpacingFile() 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | // Package pangu provides paranoid text spacing for good readability, 2 | // to automatically insert whitespace between 3 | // CJK (Chinese, Japanese, Korean) and half-width characters 4 | // (alphabetical letters, numerical digits and symbols). 5 | // 6 | // These whitespaces between Chinese and English characters are 7 | // called "Pangu's Space" by sinologist, since it separates the chaos 8 | // between full-width and half-width characters. Studies showed that 9 | // who dislike to add whitespace between English and Chinese characters 10 | // also have relationship problem. Almost 70 percent of them will get 11 | // married to the one they don't love, the rest only can left the 12 | // heritage to their cat. Indeed, love and writing need some space in 13 | // good time. 14 | // 15 | // For more information about pangu, see 16 | // https://github.com/vinta/paranoid-auto-spacing 17 | package pangu 18 | -------------------------------------------------------------------------------- /example_test.go: -------------------------------------------------------------------------------- 1 | package pangu_test 2 | 3 | import ( 4 | "fmt" 5 | "github.com/vinta/pangu" 6 | "os" 7 | ) 8 | 9 | func ExampleSpacingText() { 10 | s := pangu.SpacingText("所以,請問Jackey的鼻子有幾個?3.14個!") 11 | fmt.Println(s) 12 | // Output: 13 | // 所以, 請問 Jackey 的鼻子有幾個? 3.14 個! 14 | } 15 | 16 | func ExampleSpacingFile() { 17 | input := "_fixtures/test_file.txt" 18 | output := "_fixtures/test_file.pangu.txt" 19 | 20 | fw, err := os.Create(output) 21 | if err != nil { 22 | fmt.Println(err) 23 | } 24 | defer fw.Close() 25 | 26 | pangu.SpacingFile(input, fw) 27 | } 28 | -------------------------------------------------------------------------------- /pangu-axe/doc.go: -------------------------------------------------------------------------------- 1 | // pangu-axe (盤古斧) is a command-line interface for pangu. 2 | // 3 | // It separates the chaos between CJK (Chinese, Japanese, Korean) and half-width characters. 4 | // 5 | // If you want to use pangu in your Go programs, see 6 | // https://github.com/vinta/pangu 7 | package main 8 | -------------------------------------------------------------------------------- /pangu-axe/pangu-axe.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "github.com/codegangsta/cli" 6 | "github.com/fatih/color" 7 | "github.com/vinta/pangu" 8 | "os" 9 | "path/filepath" 10 | ) 11 | 12 | const ( 13 | NAME = "pangu-axe" 14 | USAGE = "Paranoid text spacing" 15 | VERSION = "2.5.6" 16 | AUTHOR = "Vinta Chen" 17 | EMAIL = "vinta.chen@gmail.com" 18 | ) 19 | 20 | // PREFIX is prefix of outpu filename 21 | // TODO 22 | var PREFIX = "readable." 23 | 24 | func prefixFilename(path, specified string) string { 25 | if len(specified) > 0 { 26 | return specified 27 | } 28 | 29 | filename := filepath.Base(path) 30 | newFilename := PREFIX + filename 31 | 32 | return newFilename 33 | } 34 | 35 | func processFile(errc chan error, filename, o string) { 36 | var fw *os.File 37 | var err error 38 | 39 | _, err = os.Stat(filename) 40 | if err != nil { 41 | errc <- err 42 | return 43 | } 44 | 45 | switch o { 46 | case "stdout", "STDOUT": 47 | fw = os.Stdout 48 | case "stderr", "STDERR": 49 | fw = os.Stderr 50 | default: 51 | newFilename := prefixFilename(filename, o) 52 | fw, err = os.Create(newFilename) 53 | if err != nil { 54 | errc <- err 55 | return 56 | } 57 | defer fw.Close() 58 | } 59 | 60 | err = pangu.SpacingFile(filename, fw) 61 | errc <- err 62 | } 63 | 64 | func main() { 65 | app := cli.NewApp() 66 | app.Name = NAME 67 | app.Usage = USAGE 68 | app.Version = VERSION 69 | app.Author = AUTHOR 70 | app.Email = EMAIL 71 | app.Commands = []cli.Command{ 72 | { 73 | Name: "text", 74 | Usage: "Performs paranoid text spacing on text", 75 | Aliases: []string{"t"}, 76 | Action: func(c *cli.Context) { 77 | if len(c.Args()) == 0 { 78 | cli.ShowSubcommandHelp(c) 79 | return 80 | } 81 | 82 | text := c.Args().First() 83 | fmt.Println(pangu.SpacingText(text)) 84 | }, 85 | }, 86 | { 87 | Name: "file", 88 | Usage: "Performs paranoid text spacing on files", 89 | Aliases: []string{"f"}, 90 | Flags: []cli.Flag{ 91 | cli.StringFlag{ 92 | Name: "output, o", 93 | Value: "", 94 | Usage: fmt.Sprintf(`Specifies the output file name. If not specified, the output file name will be "%sfilename.ext"`, PREFIX), 95 | }, 96 | }, 97 | Action: func(c *cli.Context) { 98 | if len(c.Args()) == 0 { 99 | cli.ShowSubcommandHelp(c) 100 | return 101 | } 102 | 103 | o := c.String("output") 104 | 105 | if len(c.Args()) > 1 && len(o) > 0 { 106 | color.Red(`can't use the "-output" flag with multiple files`) 107 | os.Exit(1) 108 | } 109 | 110 | errc := make(chan error) 111 | 112 | for _, filename := range c.Args() { 113 | go processFile(errc, filename, o) 114 | } 115 | 116 | for _ = range c.Args() { 117 | err := <-errc 118 | if err != nil { 119 | color.Red("%s", err) 120 | } 121 | } 122 | }, 123 | }, 124 | } 125 | 126 | app.Run(os.Args) 127 | } 128 | -------------------------------------------------------------------------------- /pangu-axe/pangu-axe_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "github.com/stretchr/testify/suite" 6 | "io/ioutil" 7 | "os" 8 | "testing" 9 | ) 10 | 11 | type PanguAxeTestSuite struct { 12 | suite.Suite 13 | realStdout *os.File 14 | r, w *os.File 15 | } 16 | 17 | // The SetupTest method will be run before every test in the suite. 18 | func (suite *PanguAxeTestSuite) SetupTest() { 19 | suite.realStdout = os.Stdout 20 | suite.r, suite.w, _ = os.Pipe() 21 | os.Stdout = suite.w 22 | } 23 | 24 | // The TearDownTest method will be run after every test in the suite. 25 | func (suite *PanguAxeTestSuite) TearDownTest() { 26 | os.Stdout = suite.realStdout 27 | } 28 | 29 | // In order for 'go test' to run this suite, we need to create 30 | // a normal test function and pass our suite to suite.Run 31 | func TestPanguTestSuite(t *testing.T) { 32 | suite.Run(t, new(PanguAxeTestSuite)) 33 | } 34 | 35 | func (suite *PanguAxeTestSuite) getOutput() string { 36 | suite.w.Close() 37 | out, _ := ioutil.ReadAll(suite.r) 38 | outs := fmt.Sprintf("%s", out) 39 | 40 | return outs 41 | } 42 | 43 | func (suite *PanguAxeTestSuite) TestTextCmd() { 44 | os.Args = []string{NAME, "text", "新八的構造成分有95%是眼鏡、3%是水、2%是垃圾"} 45 | main() 46 | 47 | suite.Equal("新八的構造成分有 95% 是眼鏡、3% 是水、2% 是垃圾\n", suite.getOutput()) 48 | } 49 | 50 | func (suite *PanguAxeTestSuite) TestFileCmd() { 51 | os.Args = []string{NAME, "file", "../_fixtures/test_file.txt"} 52 | main() 53 | 54 | suite.Equal("", suite.getOutput()) 55 | } 56 | -------------------------------------------------------------------------------- /pangu.go: -------------------------------------------------------------------------------- 1 | package pangu 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "fmt" 7 | "io" 8 | "os" 9 | "regexp" 10 | "text/template" 11 | ) 12 | 13 | const VERSION = "3.0.0" 14 | 15 | // CJK is short for Chinese, Japanese and Korean. 16 | // 17 | // The constant cjk contains following Unicode blocks: 18 | // \u2e80-\u2eff CJK Radicals Supplement 19 | // \u2f00-\u2fdf Kangxi Radicals 20 | // \u3040-\u309f Hiragana 21 | // \u30a0-\u30ff Katakana 22 | // \u3100-\u312f Bopomofo 23 | // \u3200-\u32ff Enclosed CJK Letters and Months 24 | // \u3400-\u4dbf CJK Unified Ideographs Extension A 25 | // \u4e00-\u9fff CJK Unified Ideographs 26 | // \uf900-\ufaff CJK Compatibility Ideographs 27 | // 28 | // For more information about Unicode blocks, see 29 | // http://unicode-table.com/en/ 30 | const cjk = "" + 31 | "\u2e80-\u2eff" + 32 | "\u2f00-\u2fdf" + 33 | "\u3040-\u309f" + 34 | "\u30a0-\u30ff" + 35 | "\u3100-\u312f" + 36 | "\u3200-\u32ff" + 37 | "\u3400-\u4dbf" + 38 | "\u4e00-\u9fff" + 39 | "\uf900-\ufaff" 40 | 41 | // ANS is short for Alphabets, Numbers 42 | // and Symbols (`~!@#$%^&*()-_=+[]{}\|;:'",<.>/?). 43 | // 44 | // The constant ans doesn't contain all symbols above. 45 | const ans = "A-Za-z0-9`\\$%\\^&\\*\\-=\\+\\\\|/\u00a1-\u00ff\u2022\u2027\u2150-\u218f" 46 | 47 | var cjk_quote = regexp.MustCompile(re("([{{ .CJK }}])" + "([\"'])")) 48 | var quote_cjk = regexp.MustCompile(re("([\"'])" + "([{{ .CJK }}])")) 49 | var fix_quote = regexp.MustCompile(re("([\"'\\(\\[\\{<\u201c])" + "(\\s*)" + "(.+?)" + "(\\s*)" + "([\"'\\)\\]\\}>\u201d])")) 50 | var fix_single_quote = regexp.MustCompile(re("([{{ .CJK }}])" + "( )" + "(')" + "([A-Za-z])")) 51 | 52 | var cjk_hash = regexp.MustCompile(re("([{{ .CJK }}])" + "(#(\\S+))")) 53 | var hash_cjk = regexp.MustCompile(re("((\\S+)#)" + "([{{ .CJK }}])")) 54 | 55 | var cjk_operator_ans = regexp.MustCompile(re("([{{ .CJK }}])" + "([\\+\\-\\*/=&\\|<>])" + "([A-Za-z0-9])")) 56 | var ans_operator_cjk = regexp.MustCompile(re("([A-Za-z0-9])" + "([\\+\\-\\*/=&\\|<>])" + "([{{ .CJK }}])")) 57 | 58 | var cjk_bracket_cjk = regexp.MustCompile(re("([{{ .CJK }}])" + "([\\(\\[\\{<\u201c]+(.*?)[\\)\\]\\}>\u201d]+)" + "([{{ .CJK }}])")) 59 | var cjk_bracket = regexp.MustCompile(re("([{{ .CJK }}])" + "([\\(\\[\\{<\u201c>])")) 60 | var bracket_cjk = regexp.MustCompile(re("([\\)\\]\\}>\u201d<])" + "([{{ .CJK }}])")) 61 | var fix_bracket = regexp.MustCompile(re("([\\(\\[\\{<\u201c]+)" + "(\\s*)" + "(.+?)" + "(\\s*)" + "([\\)\\]\\}>\u201d]+)")) 62 | 63 | var fix_symbol = regexp.MustCompile(re("([{{ .CJK }}])" + "([~!;:,\\.\\?\u2026])" + "([A-Za-z0-9])")) 64 | 65 | var cjk_ans = regexp.MustCompile(re("([{{ .CJK }}])([{{ .ANS }}@])")) 66 | var ans_cjk = regexp.MustCompile(re("([{{ .ANS }}~!;:,\\.\\?\u2026])([{{ .CJK }}])")) 67 | 68 | var context = map[string]string{ 69 | "CJK": cjk, 70 | "ANS": ans, 71 | } 72 | 73 | func re(exp string) string { 74 | var buf bytes.Buffer 75 | 76 | var tmpl = template.New("pangu") 77 | tmpl, _ = tmpl.Parse(exp) 78 | tmpl.Execute(&buf, context) 79 | expr := buf.String() 80 | 81 | return expr 82 | } 83 | 84 | // SpacingText performs paranoid text spacing on text. 85 | // It returns the processed text, with love. 86 | func SpacingText(text string) string { 87 | if len(text) < 2 { 88 | return text 89 | } 90 | 91 | text = cjk_quote.ReplaceAllString(text, "$1 $2") 92 | text = quote_cjk.ReplaceAllString(text, "$1 $2") 93 | text = fix_quote.ReplaceAllString(text, "$1$3$5") 94 | text = fix_single_quote.ReplaceAllString(text, "$1$3$4") 95 | 96 | text = cjk_hash.ReplaceAllString(text, "$1 $2") 97 | text = hash_cjk.ReplaceAllString(text, "$1 $3") 98 | 99 | text = cjk_operator_ans.ReplaceAllString(text, "$1 $2 $3") 100 | text = ans_operator_cjk.ReplaceAllString(text, "$1 $2 $3") 101 | 102 | oldText := text 103 | newText := cjk_bracket_cjk.ReplaceAllString(oldText, "$1 $2 $4") 104 | text = newText 105 | if oldText == newText { 106 | text = cjk_bracket.ReplaceAllString(text, "$1 $2") 107 | text = bracket_cjk.ReplaceAllString(text, "$1 $2") 108 | } 109 | text = fix_bracket.ReplaceAllString(text, "$1$3$5") 110 | 111 | text = fix_symbol.ReplaceAllString(text, "$1$2 $3") 112 | 113 | text = cjk_ans.ReplaceAllString(text, "$1 $2") 114 | text = ans_cjk.ReplaceAllString(text, "$1 $2") 115 | 116 | return text 117 | } 118 | 119 | // SpacingFile reads the file named by filename, performs paranoid text 120 | // spacing on its contents and writes the processed content to w. 121 | // A successful call returns err == nil. 122 | func SpacingFile(filename string, w io.Writer) (err error) { 123 | fr, err := os.Open(filename) 124 | if err != nil { 125 | return err 126 | } 127 | defer fr.Close() 128 | 129 | br := bufio.NewReader(fr) 130 | bw := bufio.NewWriter(w) 131 | 132 | for { 133 | line, err := br.ReadString('\n') 134 | if err == nil { 135 | fmt.Fprint(bw, SpacingText(line)) 136 | } else { 137 | if err == io.EOF { 138 | fmt.Fprint(bw, SpacingText(line)) 139 | break 140 | } 141 | return err 142 | } 143 | } 144 | defer bw.Flush() 145 | 146 | return nil 147 | } 148 | -------------------------------------------------------------------------------- /pangu_test.go: -------------------------------------------------------------------------------- 1 | package pangu_test 2 | 3 | import ( 4 | "crypto/md5" 5 | "encoding/hex" 6 | "fmt" 7 | "github.com/stretchr/testify/suite" 8 | "github.com/vinta/pangu" 9 | "io" 10 | "io/ioutil" 11 | "os" 12 | "testing" 13 | ) 14 | 15 | type PanguTestSuite struct { 16 | suite.Suite 17 | } 18 | 19 | // In order for 'go test' to run this suite, we need to create 20 | // a normal test function and pass our suite to suite.Run 21 | func TestPanguTestSuite(t *testing.T) { 22 | suite.Run(t, new(PanguTestSuite)) 23 | } 24 | 25 | func checkError(err error) { 26 | if err != nil { 27 | fmt.Println(err) 28 | } 29 | } 30 | 31 | func md5Of(filename string) string { 32 | var result []byte 33 | 34 | file, err := os.Open(filename) 35 | checkError(err) 36 | defer file.Close() 37 | 38 | hash := md5.New() 39 | _, err = io.Copy(hash, file) 40 | checkError(err) 41 | 42 | checksum := hex.EncodeToString(hash.Sum(result)) 43 | 44 | return checksum 45 | } 46 | 47 | func (suite *PanguTestSuite) TestSpacingText() { 48 | suite.Equal(pangu.SpacingText(`新八的構造成分有95%是眼鏡、3%是水、2%是垃圾`), `新八的構造成分有 95% 是眼鏡、3% 是水、2% 是垃圾`) 49 | suite.Equal(pangu.SpacingText(`所以,請問Jackey的鼻子有幾個?3.14個!`), `所以, 請問 Jackey 的鼻子有幾個? 3.14 個!`) 50 | suite.Equal(pangu.SpacingText(`JUST WE就是JUST WE,既不偉大也不卑微!`), `JUST WE 就是 JUST WE,既不偉大也不卑微!`) 51 | suite.Equal(pangu.SpacingText(`搭載MP3播放器,連續播放時數最長達到124小時的超強利刃……菊一文字RX-7!`), `搭載 MP3 播放器,連續播放時數最長達到 124 小時的超強利刃…… 菊一文字 RX-7!`) 52 | suite.Equal(pangu.SpacingText(`V`), `V`) 53 | } 54 | 55 | func (suite *PanguTestSuite) TestLatin1Supplement() { 56 | suite.Equal(pangu.SpacingText(`中文Ø漢字`), `中文 Ø 漢字`) 57 | suite.Equal(pangu.SpacingText(`中文 Ø 漢字`), `中文 Ø 漢字`) 58 | } 59 | 60 | func (suite *PanguTestSuite) TestGeneralPunctuation() { 61 | suite.Equal(pangu.SpacingText(`中文•漢字`), `中文 • 漢字`) 62 | suite.Equal(pangu.SpacingText(`中文 • 漢字`), `中文 • 漢字`) 63 | } 64 | 65 | func (suite *PanguTestSuite) TestNumberForms() { 66 | suite.Equal(pangu.SpacingText(`中文Ⅶ漢字`), `中文 Ⅶ 漢字`) 67 | suite.Equal(pangu.SpacingText(`中文 Ⅶ 漢字`), `中文 Ⅶ 漢字`) 68 | } 69 | 70 | func (suite *PanguTestSuite) TestCJKRadicalsSupplement() { 71 | suite.Equal(pangu.SpacingText(`abc⻤123`), `abc ⻤ 123`) 72 | suite.Equal(pangu.SpacingText(`abc ⻤ 123`), `abc ⻤ 123`) 73 | } 74 | 75 | func (suite *PanguTestSuite) TestKangxiRadicals() { 76 | suite.Equal(pangu.SpacingText(`abc⾗123`), `abc ⾗ 123`) 77 | suite.Equal(pangu.SpacingText(`abc ⾗ 123`), `abc ⾗ 123`) 78 | } 79 | 80 | func (suite *PanguTestSuite) TestHiragana() { 81 | suite.Equal(pangu.SpacingText(`abcあ123`), `abc あ 123`) 82 | suite.Equal(pangu.SpacingText(`abc あ 123`), `abc あ 123`) 83 | } 84 | 85 | func (suite *PanguTestSuite) TestKatakana() { 86 | suite.Equal(pangu.SpacingText(`abcア123`), `abc ア 123`) 87 | suite.Equal(pangu.SpacingText(`abc ア 123`), `abc ア 123`) 88 | } 89 | 90 | func (suite *PanguTestSuite) TestBopomofo() { 91 | suite.Equal(pangu.SpacingText(`abcㄅ123`), `abc ㄅ 123`) 92 | suite.Equal(pangu.SpacingText(`abc ㄅ 123`), `abc ㄅ 123`) 93 | } 94 | 95 | func (suite *PanguTestSuite) TestEnclosedCJKLettersAndMonths() { 96 | suite.Equal(pangu.SpacingText(`abc㈱123`), `abc ㈱ 123`) 97 | suite.Equal(pangu.SpacingText(`abc ㈱ 123`), `abc ㈱ 123`) 98 | } 99 | 100 | func (suite *PanguTestSuite) TestCJKUnifiedIdeographsExtensionA() { 101 | suite.Equal(pangu.SpacingText(`abc㐂123`), `abc 㐂 123`) 102 | suite.Equal(pangu.SpacingText(`abc 㐂 123`), `abc 㐂 123`) 103 | } 104 | 105 | func (suite *PanguTestSuite) TestCJKUnifiedIdeographs() { 106 | suite.Equal(pangu.SpacingText(`abc丁123`), `abc 丁 123`) 107 | suite.Equal(pangu.SpacingText(`abc 丁 123`), `abc 丁 123`) 108 | } 109 | 110 | func (suite *PanguTestSuite) TestCJKCompatibilityIdeographs() { 111 | suite.Equal(pangu.SpacingText(`abc車123`), `abc 車 123`) 112 | suite.Equal(pangu.SpacingText(`abc 車 123`), `abc 車 123`) 113 | } 114 | 115 | func (suite *PanguTestSuite) TestTilde() { 116 | suite.Equal(pangu.SpacingText(`前面~後面`), `前面~ 後面`) 117 | suite.Equal(pangu.SpacingText(`前面 ~ 後面`), `前面 ~ 後面`) 118 | suite.Equal(pangu.SpacingText(`前面~ 後面`), `前面~ 後面`) 119 | } 120 | 121 | func (suite *PanguTestSuite) TestBackQuote() { 122 | suite.Equal("前面 ` 後面", pangu.SpacingText("前面`後面")) 123 | suite.Equal("前面 ` 後面", pangu.SpacingText("前面 ` 後面")) 124 | suite.Equal("前面 ` 後面", pangu.SpacingText("前面` 後面")) 125 | } 126 | 127 | func (suite *PanguTestSuite) TestExclamationMark() { 128 | suite.Equal(pangu.SpacingText(`前面!後面`), `前面! 後面`) 129 | suite.Equal(pangu.SpacingText(`前面 ! 後面`), `前面 ! 後面`) 130 | suite.Equal(pangu.SpacingText(`前面! 後面`), `前面! 後面`) 131 | } 132 | 133 | func (suite *PanguTestSuite) TestAt() { 134 | // https://twitter.com/vinta 135 | suite.Equal(pangu.SpacingText(`前面@vinta後面`), `前面 @vinta 後面`) 136 | suite.Equal(pangu.SpacingText(`前面 @vinta 後面`), `前面 @vinta 後面`) 137 | 138 | // http://weibo.com/vintalines 139 | suite.Equal(pangu.SpacingText(`前面@陳上進 後面`), `前面 @陳上進 後面`) 140 | suite.Equal(pangu.SpacingText(`前面 @陳上進 後面`), `前面 @陳上進 後面`) 141 | suite.Equal(pangu.SpacingText(`前面 @陳上進tail`), `前面 @陳上進 tail`) 142 | 143 | // TODO 144 | // suite.Equal(pangu.SpacingText(`陳上進@地球`), `陳上進@地球`) 145 | } 146 | 147 | func (suite *PanguTestSuite) TestHash() { 148 | suite.Equal(pangu.SpacingText(`前面#H2G2後面`), `前面 #H2G2 後面`) 149 | suite.Equal(pangu.SpacingText(`前面#銀河便車指南 後面`), `前面 #銀河便車指南 後面`) 150 | suite.Equal(pangu.SpacingText(`前面#銀河便車指南tail`), `前面 #銀河便車指南 tail`) 151 | suite.Equal(pangu.SpacingText(`前面#銀河公車指南 #銀河拖吊車指南 後面`), `前面 #銀河公車指南 #銀河拖吊車指南 後面`) 152 | 153 | suite.Equal(pangu.SpacingText(`前面#H2G2#後面`), `前面 #H2G2# 後面`) 154 | suite.Equal(pangu.SpacingText(`前面#銀河閃電霹靂車指南#後面`), `前面 #銀河閃電霹靂車指南# 後面`) 155 | } 156 | 157 | func (suite *PanguTestSuite) TestDollar() { 158 | suite.Equal(pangu.SpacingText(`前面$後面`), `前面 $ 後面`) 159 | suite.Equal(pangu.SpacingText(`前面 $ 後面`), `前面 $ 後面`) 160 | 161 | suite.Equal(pangu.SpacingText(`前面$100後面`), `前面 $100 後面`) 162 | 163 | // TODO 164 | // suite.Equal(pangu.SpacingText(`前面$一百塊 後面`), `前面 $一百塊 後面`) 165 | } 166 | 167 | func (suite *PanguTestSuite) TestPercent() { 168 | suite.Equal(pangu.SpacingText(`前面%後面`), `前面 % 後面`) 169 | suite.Equal(pangu.SpacingText(`前面 % 後面`), `前面 % 後面`) 170 | 171 | suite.Equal(pangu.SpacingText(`前面100%後面`), `前面 100% 後面`) 172 | } 173 | 174 | func (suite *PanguTestSuite) TestCarat() { 175 | suite.Equal(pangu.SpacingText(`前面^後面`), `前面 ^ 後面`) 176 | suite.Equal(pangu.SpacingText(`前面 ^ 後面`), `前面 ^ 後面`) 177 | } 178 | 179 | func (suite *PanguTestSuite) TestAmpersand() { 180 | suite.Equal(pangu.SpacingText(`前面&後面`), `前面 & 後面`) 181 | suite.Equal(pangu.SpacingText(`前面 & 後面`), `前面 & 後面`) 182 | 183 | suite.Equal(pangu.SpacingText(`Vinta&Mollie`), `Vinta&Mollie`) 184 | suite.Equal(pangu.SpacingText(`Vinta&陳上進`), `Vinta & 陳上進`) 185 | suite.Equal(pangu.SpacingText(`陳上進&Vinta`), `陳上進 & Vinta`) 186 | 187 | suite.Equal(pangu.SpacingText(`得到一個A&B的結果`), `得到一個 A&B 的結果`) 188 | } 189 | 190 | func (suite *PanguTestSuite) TestAsterisk() { 191 | suite.Equal(pangu.SpacingText(`前面*後面`), `前面 * 後面`) 192 | suite.Equal(pangu.SpacingText(`前面 * 後面`), `前面 * 後面`) 193 | 194 | suite.Equal(pangu.SpacingText(`Vinta*Mollie`), `Vinta*Mollie`) 195 | suite.Equal(pangu.SpacingText(`Vinta*陳上進`), `Vinta * 陳上進`) 196 | suite.Equal(pangu.SpacingText(`陳上進*Vinta`), `陳上進 * Vinta`) 197 | 198 | suite.Equal(pangu.SpacingText(`得到一個A*B的結果`), `得到一個 A*B 的結果`) 199 | } 200 | 201 | func (suite *PanguTestSuite) TestParenthesis() { 202 | // suite.Equal(pangu.SpacingText(`前面(後面`), `前面 ( 後面`) 203 | // suite.Equal(pangu.SpacingText(`前面 ( 後面`), `前面 ( 後面`) 204 | 205 | // suite.Equal(pangu.SpacingText(`前面)後面`), `前面 ) 後面`) 206 | // suite.Equal(pangu.SpacingText(`前面 ) 後面`), `前面 ) 後面`) 207 | 208 | suite.Equal(pangu.SpacingText(`前面(中文123漢字)後面`), `前面 (中文 123 漢字) 後面`) 209 | suite.Equal(pangu.SpacingText(`前面(中文123)後面`), `前面 (中文 123) 後面`) 210 | suite.Equal(pangu.SpacingText(`前面(123漢字)後面`), `前面 (123 漢字) 後面`) 211 | suite.Equal(pangu.SpacingText(`前面(中文123漢字) tail`), `前面 (中文 123 漢字) tail`) 212 | suite.Equal(pangu.SpacingText(`head (中文123漢字)後面`), `head (中文 123 漢字) 後面`) 213 | suite.Equal(pangu.SpacingText(`head (中文123漢字) tail`), `head (中文 123 漢字) tail`) 214 | } 215 | 216 | func (suite *PanguTestSuite) TestMinus() { 217 | suite.Equal(pangu.SpacingText(`前面-後面`), `前面 - 後面`) 218 | suite.Equal(pangu.SpacingText(`前面 - 後面`), `前面 - 後面`) 219 | 220 | suite.Equal(pangu.SpacingText(`Vinta-Mollie`), `Vinta-Mollie`) 221 | suite.Equal(pangu.SpacingText(`Vinta-陳上進`), `Vinta - 陳上進`) 222 | suite.Equal(pangu.SpacingText(`陳上進-Vinta`), `陳上進 - Vinta`) 223 | 224 | suite.Equal(pangu.SpacingText(`得到一個A-B的結果`), `得到一個 A-B 的結果`) 225 | } 226 | 227 | func (suite *PanguTestSuite) TestUnderscore() { 228 | suite.Equal(pangu.SpacingText(`前面_後面`), `前面_後面`) 229 | suite.Equal(pangu.SpacingText(`前面 _ 後面`), `前面 _ 後面`) 230 | } 231 | 232 | func (suite *PanguTestSuite) TestPlus() { 233 | suite.Equal(pangu.SpacingText(`前面+後面`), `前面 + 後面`) 234 | suite.Equal(pangu.SpacingText(`前面 + 後面`), `前面 + 後面`) 235 | 236 | suite.Equal(pangu.SpacingText(`Vinta+Mollie`), `Vinta+Mollie`) 237 | suite.Equal(pangu.SpacingText(`Vinta+陳上進`), `Vinta + 陳上進`) 238 | suite.Equal(pangu.SpacingText(`陳上進+Vinta`), `陳上進 + Vinta`) 239 | 240 | suite.Equal(pangu.SpacingText(`得到一個A+B的結果`), `得到一個 A+B 的結果`) 241 | 242 | suite.Equal(pangu.SpacingText(`得到一個C++的結果`), `得到一個 C++ 的結果`) 243 | 244 | // TODO 245 | // suite.Equal(pangu.SpacingText(`得到一個A+的結果`), `得到一個 A+ 的結果`) 246 | } 247 | 248 | func (suite *PanguTestSuite) TestEqual() { 249 | suite.Equal(pangu.SpacingText(`前面=後面`), `前面 = 後面`) 250 | suite.Equal(pangu.SpacingText(`前面 = 後面`), `前面 = 後面`) 251 | 252 | suite.Equal(pangu.SpacingText(`Vinta=Mollie`), `Vinta=Mollie`) 253 | suite.Equal(pangu.SpacingText(`Vinta=陳上進`), `Vinta = 陳上進`) 254 | suite.Equal(pangu.SpacingText(`陳上進=Vinta`), `陳上進 = Vinta`) 255 | 256 | suite.Equal(pangu.SpacingText(`得到一個A=B的結果`), `得到一個 A=B 的結果`) 257 | } 258 | 259 | func (suite *PanguTestSuite) TestBrace() { 260 | // suite.Equal(pangu.SpacingText(`前面{後面`), `前面 { 後面`) 261 | // suite.Equal(pangu.SpacingText(`前面 { 後面`), `前面 { 後面`) 262 | 263 | // suite.Equal(pangu.SpacingText(`前面}後面`), `前面 } 後面`) 264 | // suite.Equal(pangu.SpacingText(`前面 } 後面`), `前面 } 後面`) 265 | 266 | suite.Equal(pangu.SpacingText(`前面{中文123漢字}後面`), `前面 {中文 123 漢字} 後面`) 267 | suite.Equal(pangu.SpacingText(`前面{中文123}後面`), `前面 {中文 123} 後面`) 268 | suite.Equal(pangu.SpacingText(`前面{123漢字}後面`), `前面 {123 漢字} 後面`) 269 | suite.Equal(pangu.SpacingText(`前面{中文123漢字} tail`), `前面 {中文 123 漢字} tail`) 270 | suite.Equal(pangu.SpacingText(`head {中文123漢字}後面`), `head {中文 123 漢字} 後面`) 271 | suite.Equal(pangu.SpacingText(`head {中文123漢字} tail`), `head {中文 123 漢字} tail`) 272 | } 273 | 274 | func (suite *PanguTestSuite) TestBracket() { 275 | // suite.Equal(pangu.SpacingText(`前面[後面`), `前面 [ 後面`) 276 | // suite.Equal(pangu.SpacingText(`前面 [ 後面`), `前面 [ 後面`) 277 | 278 | // suite.Equal(pangu.SpacingText(`前面]後面`), `前面 ] 後面`) 279 | // suite.Equal(pangu.SpacingText(`前面 ] 後面`), `前面 ] 後面`) 280 | 281 | suite.Equal(pangu.SpacingText(`前面[中文123漢字]後面`), `前面 [中文 123 漢字] 後面`) 282 | suite.Equal(pangu.SpacingText(`前面[中文123]後面`), `前面 [中文 123] 後面`) 283 | suite.Equal(pangu.SpacingText(`前面[123漢字]後面`), `前面 [123 漢字] 後面`) 284 | suite.Equal(pangu.SpacingText(`前面[中文123漢字] tail`), `前面 [中文 123 漢字] tail`) 285 | suite.Equal(pangu.SpacingText(`head [中文123漢字]後面`), `head [中文 123 漢字] 後面`) 286 | suite.Equal(pangu.SpacingText(`head [中文123漢字] tail`), `head [中文 123 漢字] tail`) 287 | } 288 | 289 | func (suite *PanguTestSuite) TestPipe() { 290 | suite.Equal(pangu.SpacingText(`前面|後面`), `前面 | 後面`) 291 | suite.Equal(pangu.SpacingText(`前面 | 後面`), `前面 | 後面`) 292 | 293 | suite.Equal(pangu.SpacingText(`Vinta|Mollie`), `Vinta|Mollie`) 294 | suite.Equal(pangu.SpacingText(`Vinta|陳上進`), `Vinta | 陳上進`) 295 | suite.Equal(pangu.SpacingText(`陳上進|Vinta`), `陳上進 | Vinta`) 296 | 297 | suite.Equal(pangu.SpacingText(`得到一個A|B的結果`), `得到一個 A|B 的結果`) 298 | } 299 | 300 | func (suite *PanguTestSuite) TestBackslash() { 301 | suite.Equal(pangu.SpacingText(`前面\後面`), `前面 \ 後面`) 302 | } 303 | 304 | func (suite *PanguTestSuite) TestColon() { 305 | suite.Equal(pangu.SpacingText(`前面:後面`), `前面: 後面`) 306 | suite.Equal(pangu.SpacingText(`前面 : 後面`), `前面 : 後面`) 307 | suite.Equal(pangu.SpacingText(`前面: 後面`), `前面: 後面`) 308 | } 309 | 310 | func (suite *PanguTestSuite) TestSemicolon() { 311 | suite.Equal(pangu.SpacingText(`前面;後面`), `前面; 後面`) 312 | suite.Equal(pangu.SpacingText(`前面 ; 後面`), `前面 ; 後面`) 313 | suite.Equal(pangu.SpacingText(`前面; 後面`), `前面; 後面`) 314 | } 315 | 316 | func (suite *PanguTestSuite) TestQuote() { 317 | // suite.Equal(pangu.SpacingText(`前面"後面`), `前面 " 後面`) 318 | // suite.Equal(pangu.SpacingText(`前面""後面`), `前面 "" 後面`) 319 | // suite.Equal(pangu.SpacingText(`前面" "後面`), `前面 " " 後面`) 320 | 321 | suite.Equal(pangu.SpacingText(`前面"中文123漢字"後面`), `前面 "中文 123 漢字" 後面`) 322 | suite.Equal(pangu.SpacingText(`前面"中文123"後面`), `前面 "中文 123" 後面`) 323 | suite.Equal(pangu.SpacingText(`前面"123漢字"後面`), `前面 "123 漢字" 後面`) 324 | suite.Equal(pangu.SpacingText(`前面"中文123漢字" tail`), `前面 "中文 123 漢字" tail`) 325 | suite.Equal(pangu.SpacingText(`head "中文123漢字"後面`), `head "中文 123 漢字" 後面`) 326 | suite.Equal(pangu.SpacingText(`head "中文123漢字" tail`), `head "中文 123 漢字" tail`) 327 | 328 | // \u201c and \u201d 329 | suite.Equal(pangu.SpacingText(`前面“中文123漢字”後面`), `前面 “中文 123 漢字” 後面`) 330 | } 331 | 332 | func (suite *PanguTestSuite) TestSingleQuote() { 333 | // suite.Equal(pangu.SpacingText(`前面'後面`), `前面 ' 後面`) 334 | // suite.Equal(pangu.SpacingText(`前面''後面`), `前面 '' 後面`) 335 | // suite.Equal(pangu.SpacingText(`前面' '後面`), `前面 ' ' 後面`) 336 | 337 | suite.Equal(pangu.SpacingText(`前面'中文123漢字'後面`), `前面 '中文 123 漢字' 後面`) 338 | suite.Equal(pangu.SpacingText(`前面'中文123'後面`), `前面 '中文 123' 後面`) 339 | suite.Equal(pangu.SpacingText(`前面'123漢字'後面`), `前面 '123 漢字' 後面`) 340 | suite.Equal(pangu.SpacingText(`前面'中文123漢字' tail`), `前面 '中文 123 漢字' tail`) 341 | suite.Equal(pangu.SpacingText(`head '中文123漢字'後面`), `head '中文 123 漢字' 後面`) 342 | suite.Equal(pangu.SpacingText(`head '中文123漢字' tail`), `head '中文 123 漢字' tail`) 343 | 344 | suite.Equal(pangu.SpacingText(`陳上進 likes 林依諾's status.`), `陳上進 likes 林依諾's status.`) 345 | } 346 | 347 | func (suite *PanguTestSuite) TestLessThan() { 348 | suite.Equal(pangu.SpacingText(`前面<後面`), `前面 < 後面`) 349 | suite.Equal(pangu.SpacingText(`前面 < 後面`), `前面 < 後面`) 350 | 351 | suite.Equal(pangu.SpacingText(`Vinta後面`), `前面 <中文 123 漢字> 後面`) 358 | suite.Equal(pangu.SpacingText(`前面<中文123>後面`), `前面 <中文 123> 後面`) 359 | suite.Equal(pangu.SpacingText(`前面<123漢字>後面`), `前面 <123 漢字> 後面`) 360 | suite.Equal(pangu.SpacingText(`前面<中文123漢字> tail`), `前面 <中文 123 漢字> tail`) 361 | suite.Equal(pangu.SpacingText(`head <中文123漢字>後面`), `head <中文 123 漢字> 後面`) 362 | suite.Equal(pangu.SpacingText(`head <中文123漢字> tail`), `head <中文 123 漢字> tail`) 363 | } 364 | 365 | func (suite *PanguTestSuite) TestComma() { 366 | suite.Equal(pangu.SpacingText(`前面,後面`), `前面, 後面`) 367 | suite.Equal(pangu.SpacingText(`前面 , 後面`), `前面 , 後面`) 368 | suite.Equal(pangu.SpacingText(`前面, 後面`), `前面, 後面`) 369 | } 370 | 371 | func (suite *PanguTestSuite) TestGreaterThan() { 372 | suite.Equal(pangu.SpacingText(`前面>後面`), `前面 > 後面`) 373 | suite.Equal(pangu.SpacingText(`前面 > 後面`), `前面 > 後面`) 374 | 375 | suite.Equal(pangu.SpacingText(`Vinta>Mollie`), `Vinta>Mollie`) 376 | suite.Equal(pangu.SpacingText(`Vinta>陳上進`), `Vinta > 陳上進`) 377 | suite.Equal(pangu.SpacingText(`陳上進>Vinta`), `陳上進 > Vinta`) 378 | 379 | suite.Equal(pangu.SpacingText(`得到一個A>B的結果`), `得到一個 A>B 的結果`) 380 | } 381 | 382 | func (suite *PanguTestSuite) TestPeriod() { 383 | suite.Equal(pangu.SpacingText(`前面.後面`), `前面. 後面`) 384 | suite.Equal(pangu.SpacingText(`前面 . 後面`), `前面 . 後面`) 385 | suite.Equal(pangu.SpacingText(`前面. 後面`), `前面. 後面`) 386 | 387 | // … is \u2026 388 | suite.Equal(pangu.SpacingText(`前面…後面`), `前面… 後面`) 389 | suite.Equal(pangu.SpacingText(`前面……後面`), `前面…… 後面`) 390 | } 391 | 392 | func (suite *PanguTestSuite) TestQuestionMark() { 393 | suite.Equal(pangu.SpacingText(`前面?後面`), `前面? 後面`) 394 | suite.Equal(pangu.SpacingText(`前面 ? 後面`), `前面 ? 後面`) 395 | suite.Equal(pangu.SpacingText(`前面? 後面`), `前面? 後面`) 396 | } 397 | 398 | func (suite *PanguTestSuite) TestSlash() { 399 | suite.Equal(pangu.SpacingText(`前面/後面`), `前面 / 後面`) 400 | suite.Equal(pangu.SpacingText(`前面 / 後面`), `前面 / 後面`) 401 | 402 | suite.Equal(pangu.SpacingText(`Vinta/Mollie`), `Vinta/Mollie`) 403 | suite.Equal(pangu.SpacingText(`Vinta/陳上進`), `Vinta / 陳上進`) 404 | suite.Equal(pangu.SpacingText(`陳上進/Vinta`), `陳上進 / Vinta`) 405 | 406 | suite.Equal(pangu.SpacingText(`得到一個A/B的結果`), `得到一個 A/B 的結果`) 407 | 408 | // TODO 409 | // suite.Equal(pangu.SpacingText(`陳上進/Vinta/Mollie`), `陳上進 / Vinta / Mollie`) 410 | } 411 | 412 | func (suite *PanguTestSuite) TestSpacingFile() { 413 | input := "_fixtures/test_file.txt" 414 | output := "_fixtures/test_file.pangu.txt" 415 | 416 | fw, err := os.Create(output) 417 | checkError(err) 418 | defer fw.Close() 419 | 420 | err = pangu.SpacingFile(input, fw) 421 | suite.Nil(err) 422 | suite.Equal(md5Of(output), md5Of("_fixtures/test_file.expected.txt")) 423 | } 424 | 425 | func (suite *PanguTestSuite) TestSpacingFileNoNewlineAtEOF() { 426 | input := "_fixtures/test_file_no_eof_newline.txt" 427 | output := "_fixtures/test_file_no_eof_newline.pangu.txt" 428 | 429 | fw, err := os.Create(output) 430 | checkError(err) 431 | defer fw.Close() 432 | 433 | err = pangu.SpacingFile(input, fw) 434 | suite.Nil(err) 435 | suite.Equal(md5Of(output), md5Of("_fixtures/test_file_no_eof_newline.expected.txt")) 436 | } 437 | 438 | func (suite *PanguTestSuite) TestSpacingFileNoSuchFile() { 439 | input := "_fixtures/none.exist" 440 | 441 | err := pangu.SpacingFile(input, ioutil.Discard) 442 | suite.EqualError(err, "open _fixtures/none.exist: no such file or directory") 443 | } 444 | --------------------------------------------------------------------------------