├── .gitignore ├── LICENSE ├── README.md ├── cmd └── autocorrect │ ├── dir.go │ └── main.go ├── correct.go ├── correct_test.go ├── dict.go ├── go.mod └── go.sum /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, build with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | .idea/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # autocorrect 2 | 自动给中英文之间加入合理的空格并纠正专用名词大小写 3 | -------------------------------------------------------------------------------- /cmd/autocorrect/dir.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "io/ioutil" 5 | "os" 6 | 7 | "github.com/urfave/cli" 8 | ) 9 | 10 | func processFiles(ctx *cli.Context, path string, callback func(string) string) error { 11 | if IsDir(path) { 12 | names := ScanDir(path) 13 | for _, name := range names { 14 | processFiles(ctx, path+string(os.PathSeparator)+name, callback) 15 | } 16 | 17 | return nil 18 | } 19 | 20 | b, err := ioutil.ReadFile(path) 21 | if err != nil { 22 | return err 23 | } 24 | 25 | content := callback(string(b)) 26 | return ioutil.WriteFile(path, []byte(content), 0666) 27 | } 28 | 29 | // ScanDir 列出指定路径中的文件和目录 30 | // 如果目录不存在,则返回空slice 31 | func ScanDir(directory string) []string { 32 | file, err := os.Open(directory) 33 | if err != nil { 34 | return []string{} 35 | } 36 | names, err := file.Readdirnames(-1) 37 | if err != nil { 38 | return []string{} 39 | } 40 | return names 41 | } 42 | 43 | // IsDir 判断给定文件名是否是一个目录 44 | // 如果文件名存在并且为目录则返回 true。如果 filename 是一个相对路径,则按照当前工作目录检查其相对路径。 45 | func IsDir(filename string) bool { 46 | return isFileOrDir(filename, true) 47 | } 48 | 49 | // IsFile 判断给定文件名是否为一个正常的文件 50 | // 如果文件存在且为正常的文件则返回 true 51 | func IsFile(filename string) bool { 52 | return isFileOrDir(filename, false) 53 | } 54 | 55 | // isFileOrDir 判断是文件还是目录,根据decideDir为true表示判断是否为目录;否则判断是否为文件 56 | func isFileOrDir(filename string, decideDir bool) bool { 57 | fileInfo, err := os.Stat(filename) 58 | if err != nil { 59 | return false 60 | } 61 | isDir := fileInfo.IsDir() 62 | if decideDir { 63 | return isDir 64 | } 65 | return !isDir 66 | } 67 | -------------------------------------------------------------------------------- /cmd/autocorrect/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "io/ioutil" 7 | "os" 8 | 9 | "github.com/studygolang/autocorrect" 10 | 11 | "github.com/urfave/cli" 12 | ) 13 | 14 | var ( 15 | version = "1.0.0" 16 | build = "100" 17 | ) 18 | 19 | func main() { 20 | app := cli.NewApp() 21 | app.Name = "autocorrect" 22 | app.Usage = "自动给中英文之间加入合理的空格并纠正专用名词大小写。支持处理某个目录下所有文件。" 23 | app.Version = fmt.Sprintf("%s (%s)", version, build) 24 | app.Copyright = "(c) 2018 studygolang.com" 25 | app.Authors = []cli.Author{ 26 | { 27 | Name: "Xu Xinhua", 28 | Email: "polaris@studygolang.com", 29 | }, 30 | } 31 | app.EnableBashCompletion = true 32 | app.Flags = globalFlags 33 | app.Action = action 34 | app.Commands = commands 35 | app.Run(os.Args) 36 | } 37 | 38 | var globalFlags = []cli.Flag{} 39 | 40 | var cmdCommonflags = []cli.Flag{ 41 | cli.StringFlag{ 42 | Name: "outfile, f", 43 | Value: "", 44 | Usage: "输出到哪个文件 `FILE`", 45 | }, 46 | cli.BoolFlag{ 47 | Name: "w", 48 | Usage: "将结果直接写入源文件,而不是输出到标准输出,如果提供了 outfile 选项,忽略此选项;如果要处理的是一个目录,该选项无效,直接写入源文件", 49 | }, 50 | } 51 | 52 | var action = func(c *cli.Context) error { 53 | return cli.ShowAppHelp(c) 54 | } 55 | 56 | var bashComplete = func(c *cli.Context) { 57 | // This will complete if no args are passed 58 | if c.NArg() > 0 { 59 | return 60 | } 61 | for _, name := range commandNames { 62 | fmt.Println(name) 63 | } 64 | } 65 | 66 | var commandNames = []string{"space", "correct", "convert"} 67 | 68 | var commands = []cli.Command{ 69 | { 70 | Name: "space", 71 | Aliases: []string{"s"}, 72 | // Category: "", 73 | Usage: "给中英文之间加入合理的空格", 74 | Action: processSpace, 75 | Flags: cmdCommonflags, 76 | BashComplete: bashComplete, 77 | }, 78 | { 79 | Name: "correct", 80 | Aliases: []string{"c"}, 81 | // Category: "", 82 | Usage: "纠正专用名词大小写", 83 | Action: processCorrect, 84 | Flags: cmdCommonflags, 85 | BashComplete: bashComplete, 86 | }, 87 | { 88 | Name: "convert", 89 | Aliases: []string{"a"}, 90 | // Category: "", 91 | Usage: "同时做 space 和 correct 的事情", 92 | Action: processConvert, 93 | Flags: cmdCommonflags, 94 | BashComplete: bashComplete, 95 | }, 96 | } 97 | 98 | func process(ctx *cli.Context, callback func(string) string) error { 99 | content := "" 100 | 101 | if ctx.NArg() > 0 { 102 | arg := ctx.Args().First() 103 | if !exits(arg) { 104 | content = arg 105 | } else { 106 | if IsDir(arg) { 107 | return processFiles(ctx, arg, callback) 108 | } else { 109 | b, err := ioutil.ReadFile(arg) 110 | if err != nil { 111 | return err 112 | } 113 | content = string(b) 114 | } 115 | } 116 | } else { 117 | scanner := bufio.NewScanner(os.Stdin) 118 | for scanner.Scan() { 119 | line := scanner.Text() 120 | content += line + "\n" 121 | } 122 | } 123 | 124 | content = callback(content) 125 | return output(ctx, content) 126 | } 127 | 128 | func processSpace(ctx *cli.Context) error { 129 | return process(ctx, autocorrect.AutoSpace) 130 | } 131 | 132 | func processCorrect(ctx *cli.Context) error { 133 | return process(ctx, autocorrect.AutoCorrect) 134 | } 135 | 136 | func processConvert(ctx *cli.Context) error { 137 | return process(ctx, autocorrect.Convert) 138 | } 139 | 140 | func output(ctx *cli.Context, content string) error { 141 | if ctx.IsSet("outfile") { 142 | outfile := ctx.String("outfile") 143 | file, err := os.OpenFile(outfile, os.O_RDWR|os.O_CREATE, 0666) 144 | if err != nil { 145 | return err 146 | } 147 | defer file.Close() 148 | 149 | _, err = file.WriteString(content) 150 | 151 | return err 152 | } 153 | 154 | if ctx.IsSet("w") { 155 | if ctx.NArg() > 0 { 156 | arg := ctx.Args().First() 157 | if !exits(arg) { 158 | fmt.Println(content) 159 | return nil 160 | } 161 | 162 | ioutil.WriteFile(arg, []byte(content), 0666) 163 | 164 | return nil 165 | } 166 | } 167 | 168 | fmt.Println(content) 169 | 170 | return nil 171 | } 172 | 173 | func exits(filename string) bool { 174 | _, err := os.Stat(filename) 175 | return err == nil || os.IsExist(err) 176 | } 177 | -------------------------------------------------------------------------------- /correct.go: -------------------------------------------------------------------------------- 1 | // BUG(polaris): 一个段落英文开头的大小写转换有问题,比如 go中文网 中的 go 不会转为 Go。 2 | 3 | package autocorrect 4 | 5 | import ( 6 | "strings" 7 | "unicode" 8 | "unicode/utf8" 9 | ) 10 | 11 | var otherDicts = make(map[string]string) 12 | 13 | // AddDict 支持自定义添加字典 14 | func AddDict(dict map[string]string) { 15 | for k, v := range dict { 16 | otherDicts[k] = v 17 | } 18 | } 19 | 20 | // AutoSpace 自动给中英文之间加上空格 21 | func AutoSpace(str string) string { 22 | out := "" 23 | for _, r := range str { 24 | out = addSpaceAtBoundary(out, r) 25 | } 26 | 27 | return out 28 | } 29 | 30 | // AutoCorrect 对常见英文单词进行大家一般写法的纠正,如 go -> Go 31 | func AutoCorrect(str string) string { 32 | oldNews := make([]string, 2*(len(dicts)+len(otherDicts))) 33 | for from, to := range dicts { 34 | oldNews = append(oldNews, " "+from+" ") 35 | oldNews = append(oldNews, " "+to+" ") 36 | } 37 | 38 | replacer := strings.NewReplacer(oldNews...) 39 | return replacer.Replace(str) 40 | } 41 | 42 | // Convert 先执行 AutoSpace,然后执行 AutoCorrect 43 | func Convert(str string) string { 44 | return AutoSpace(AutoCorrect(str)) 45 | } 46 | 47 | func addSpaceAtBoundary(prefix string, nextChar rune) string { 48 | if len(prefix) == 0 { 49 | return string(nextChar) 50 | } 51 | 52 | r, size := utf8.DecodeLastRuneInString(prefix) 53 | if isLatin(size) != isLatin(utf8.RuneLen(nextChar)) && 54 | isAllowSpace(nextChar) && isAllowSpace(r) { 55 | return prefix + " " + string(nextChar) 56 | } 57 | 58 | return prefix + string(nextChar) 59 | } 60 | 61 | func isLatin(size int) bool { 62 | return size == 1 63 | } 64 | 65 | func isAllowSpace(r rune) bool { 66 | return !unicode.IsSpace(r) && !unicode.IsPunct(r) 67 | } 68 | -------------------------------------------------------------------------------- /correct_test.go: -------------------------------------------------------------------------------- 1 | package autocorrect_test 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/studygolang/autocorrect" 7 | ) 8 | 9 | func TestAutoSpace(t *testing.T) { 10 | str := autocorrect.AutoSpace("Go语言中文网,Welcome you,gopher们") 11 | if str != "Go 语言中文网,Welcome you,gopher 们" { 12 | t.Error("error:", str) 13 | } 14 | } 15 | 16 | func TestAutoCorrect(t *testing.T) { 17 | str := autocorrect.AutoCorrect(" go语言中文网,Welcome you, gopher们,```go func") 18 | if str != " Go语言中文网,Welcome you, Gopher们,```go func" { 19 | t.Error("error:", str) 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /dict.go: -------------------------------------------------------------------------------- 1 | package autocorrect 2 | 3 | var dicts = map[string]string{ 4 | // Ruby 5 | "ruby": "Ruby", 6 | "mri": "MRI", 7 | "rails": "Rails", 8 | "gem": "Gem", 9 | "rubygems": "RubyGems", 10 | "rubyonrails": "Ruby on Rails", 11 | "ror": "Ruby on Rails", 12 | "rubyconf": "RubyConf", 13 | "railsconf": "RailsConf", 14 | "rubytuesday": "Ruby Tuesday", 15 | "jruby": "JRuby", 16 | "mruby": "mRuby", 17 | "rvm": "RVM", 18 | "rbenv": "rbenv", 19 | "yard": "YARD", 20 | "rdoc": "RDoc", 21 | "rspec": "RSpec", 22 | "minitest": "MiniTest", 23 | "coffeescript": "CoffeeScript", 24 | "scss": "SCSS", 25 | "sass": "Sass", 26 | "railscasts": "RailsCasts", 27 | "execjs": "ExecJS", 28 | "cocoapods": "CocoaPods", 29 | "capybara": "Capybara", 30 | "rack": "Rack", 31 | "sinatra": "Sinatra", 32 | "lotus": "Lotus", 33 | "grape": "Grape", 34 | "unicorn": "Unicorn", 35 | "thin": "Thin", 36 | "puma": "Puma", 37 | "passenger": "Passenger", 38 | "activerecord": "ActiveRecord", 39 | "active-record": "ActiveRecord", 40 | "activemodal": "ActiveModel", 41 | "activesupport": "ActiveSupport", 42 | "datamapper": "DataMapper", 43 | "devise": "Devise", 44 | "cancancan": "CanCanCan", 45 | "resque": "Resque", 46 | "sidekiq": "Sidekiq", 47 | "turbolinks": "Turbolinks", 48 | "sprockets": "Sprockets", 49 | "redcarpet": "Redcarpet", 50 | "sunspot": "Sunspot", 51 | "carrierwave": "CarrierWave", 52 | "paperclip": "PaperClip", 53 | "simpleform": "Simple Form", 54 | "kaminari": "Kaminari", 55 | "will_paginate": "will_paginate", 56 | "minimagick": "MiniMagick", 57 | "rmagick": "RMagick", 58 | "nokogiri": "Nokogiri", 59 | "god": "God", 60 | "eventmachine": "EventMachine", 61 | "simplecov": "SimpleCov", 62 | "brakeman": "Brakeman", 63 | "activeadmin": "ActiveAdmin", 64 | "railsadmin": "RailsAdmin", 65 | "capistrano": "Capistrano", 66 | "mina": "Mina", 67 | "puppet": "Puppet", 68 | "vagrant": "Vagrant", 69 | "chef": "Chef", 70 | "rubymotion": "RubyMotion", 71 | "irb": "IRB", 72 | "pry": "Pry", 73 | 74 | // Python 75 | 76 | // Node.js 77 | "nodejs": "Node.js", 78 | "npm": "NPM", 79 | 80 | // Go 81 | "golang": "Golang", 82 | "go": "Go", 83 | "gopher": "Gopher", 84 | "goroutine": "Goroutine", 85 | "gctt": "GCTT", 86 | "beego": "Beego", 87 | "echo": "Echo", 88 | "gin": "Gin", 89 | "iris": "Iris", 90 | 91 | // PHP 92 | "php": "PHP", 93 | "pear": "Pear", 94 | 95 | // Cocoa 96 | "afnetworking": "AFNetworking", 97 | "reactivecocoa": "ReactiveCocoa", 98 | "three20": "Three20", 99 | 100 | // Java 101 | 102 | // Programming 103 | "ssh": "SSH", 104 | "web": "Web", 105 | "api": "API", 106 | "css": "CSS", 107 | "html": "HTML", 108 | "json": "JSON", 109 | "jsonp": "JSONP", 110 | "xml": "xml", 111 | "yaml": "YAML", 112 | "yml": "YAML", 113 | "ini": "INI", 114 | "csv": "CSV", 115 | "soap": "SOAP", 116 | "ajax": "Ajax", 117 | "messagepack": "MessagePack", 118 | "javascript": "JavaScript", 119 | "js": "JS", 120 | "png": "PNG", 121 | "dsl": "DSL", 122 | "tdd": "TDD", 123 | "bdd": "BDD", 124 | "cgi": "CGI", 125 | "asp.net": "ASP.NET", 126 | ".net": ".NET", 127 | "rest": "REST", 128 | "orm": "ORM", 129 | "oauth": "OAuth", 130 | "oauth2": "OAuth2", 131 | "i18n": "I18N", 132 | "markdown": "Markdown", 133 | 134 | // Sites 135 | "amazon": "Amazon", 136 | "aws": "AWS", 137 | "facebook": "Facebook", 138 | "github": "GitHub", 139 | "gist": "Gist", 140 | "ruby_china": "Ruby China", 141 | "ruby-china": "Ruby China", 142 | "rubychina": "Ruby China", 143 | "v2ex": "V2EX", 144 | "hackernews": "Hacker News", 145 | "heroku": "Heroku", 146 | "stackoverflow": "Stack Overflow", 147 | "stackexchange": "StackExchange", 148 | "twitter": "Twitter", 149 | "youtube": "YouTube", 150 | 151 | // Databases 152 | "dynamodb": "DynamoDB", 153 | "mysql": "MySQL", 154 | "postgresql": "PostgreSQL", 155 | "sqlite": "SQLite", 156 | "memcached": "Memcached", 157 | "mongodb": "MongoDB", 158 | "redis": "Redis", 159 | "rethinkdb": "RethinkDB", 160 | "elasticsearch": "Elasticsearch", 161 | "solr": "Solr", 162 | "sphinx": "Sphinx", 163 | 164 | // System 165 | "window": "Windows", 166 | "linux": "Linux", 167 | "mac": "Mac", 168 | "osx": "OS X", 169 | "ubuntu": "Ubuntu", 170 | "rhel": "RHEL", 171 | "centos": "CentOS", 172 | "archlinux": "Arch Linux", 173 | "redhat": "RedHat", 174 | 175 | // OpenSource Projects 176 | "gitlab": "GitLab", 177 | "gitlabci": "GitLab CI", 178 | "fontawesome": "Font Awesome", 179 | "bootstrap": "Bootstrap", 180 | "less": "Less", 181 | "jquery": "jQuery", 182 | "requirejs": "RequireJS", 183 | "underscore": "Underscore", 184 | "angularjs": "AngularJS", 185 | "backbone": "Backbone", 186 | "seajs": "SeaJS", 187 | "imagemagick": "ImageMagick", 188 | "fluentd": "Fluentd", 189 | "ffmpeg": "FFMPEG", 190 | 191 | // Tools 192 | "git": "Git", 193 | "svn": "SVN", 194 | "vim": "VIM", 195 | "emacs": "Emacs", 196 | "textmate": "TextMate", 197 | "sublime": "Sublime", 198 | "rubymine": "RubyMine", 199 | "sequelpro": "Sequel Pro", 200 | "virtualbox": "VirtualBox", 201 | "safari": "Safari", 202 | "chrome": "Chrome", 203 | "ie": "IE", 204 | "firefox": "Firefox", 205 | "dash": "Dash", 206 | "termal": "Termal", 207 | "iterm": "iTerm", 208 | "iterm2": "iTerm2", 209 | "iwork": "iWork", 210 | "itunes": "iTunes", 211 | "iphoto": "iPhoto", 212 | "ibook": "iBook", 213 | "imessage": "iMessage", 214 | "tweetbot": "TweetBot", 215 | "sparrow": "Sparrow", 216 | "photoshop": "Photoshop", 217 | "office": "Office", 218 | "word": "Word", 219 | "excel": "Excel", 220 | "powerpoint": "PowerPoint", 221 | 222 | // Misc 223 | "ios": "iOS", 224 | "iphone": "iPhone", 225 | "ipad": "iPad", 226 | "android": "Android", 227 | "imac": "iMac", 228 | "macbookpro": "MacBook Pro", 229 | "macbook": "MacBook", 230 | "rmbp": "rMBP", 231 | "wi-fi": "Wi-Fi", 232 | "wifi": "Wi-Fi", 233 | "vps": "VPS", 234 | "vpn": "VPN", 235 | "arm": "ARM", 236 | "cpu": "CPU", 237 | } 238 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/studygolang/autocorrect 2 | 3 | go 1.12 4 | 5 | require github.com/urfave/cli v1.20.0 6 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/urfave/cli v1.20.0 h1:fDqGv3UG/4jbVl/QkFwEdddtEDjh/5Ov6X+0B/3bPaw= 2 | github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= 3 | --------------------------------------------------------------------------------