├── .circleci └── config.yml ├── .github └── dependabot.yml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── address ├── reader.go ├── row.go ├── row_test.go ├── rows.go └── rows_test.go ├── cmd ├── address.go ├── address_test.go ├── office.go └── root.go ├── example ├── zip_code.mysql.sql └── zip_code.sqlite.sql ├── go.mod ├── go.sum ├── main.go ├── office ├── reader.go └── row.go ├── script └── make-zip ├── testdata ├── README ├── test_001.csv └── test_001_out.json ├── util ├── uniq.go ├── uniq_test.go ├── util.go └── util_test.go └── writer ├── csv.go ├── json.go └── writer.go /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | jobs: 3 | build: 4 | environment: 5 | - GOPATH: /home/circleci/go 6 | docker: 7 | - image: cimg/go:1.18 8 | auth: 9 | username: $DOCKERHUB_USER 10 | password: $DOCKERHUB_PASSWORD 11 | working_directory: /home/circleci/go/src/github.com/inouet/ken-all 12 | steps: 13 | - run: echo 'export PATH=${GOPATH}/bin/:${PATH}' >> $BASH_ENV 14 | - checkout 15 | - run: go get -v -t -d ./... 16 | - run: 17 | name: Run lint 18 | command: | 19 | curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v1.48.0 20 | golangci-lint run ./... 21 | - run: go vet ./... 22 | - run: 23 | name: Run test 24 | command: go test -v -race ./... 25 | deploy: 26 | environment: 27 | - GOPATH: /home/circleci/go 28 | docker: 29 | - image: cimg/go:1.18 30 | auth: 31 | username: $DOCKERHUB_USER 32 | password: $DOCKERHUB_PASSWORD 33 | steps: 34 | - run: echo 'export PATH=${GOPATH}/bin/:${PATH}' >> $BASH_ENV 35 | - checkout 36 | - run: go get -v -t -d ./... 37 | - run: go get github.com/mitchellh/gox 38 | - run: go get github.com/tcnksm/ghr 39 | - run: gox -verbose --osarch "darwin/amd64 linux/amd64 windows/amd64" -output "${GOPATH}/pkg/ken-all/ken-all_{{.OS}}_{{.Arch}}" ./ ./... 40 | - run: ghr -u $CIRCLE_PROJECT_USERNAME $CIRCLE_TAG $GOPATH/pkg/ken-all/ 41 | 42 | workflows: 43 | build: 44 | jobs: 45 | - build: 46 | context: 47 | - docker-hub-creds 48 | filters: 49 | branches: 50 | only: /.*/ 51 | deploy: 52 | jobs: 53 | - deploy: 54 | context: 55 | - docker-hub-creds 56 | filters: 57 | branches: 58 | ignore: /.*/ 59 | tags: 60 | only: /v[0-9]+(\.[0-9]+)*/ 61 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: gomod 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | time: "11:00" 8 | open-pull-requests-limit: 10 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | vendor/ 2 | .idea/ 3 | .DS_Store 4 | ken-all 5 | 6 | # Binaries for programs and plugins 7 | *.exe 8 | *.exe~ 9 | *.dll 10 | *.so 11 | *.dylib 12 | 13 | # Test binary, build with `go test -c` 14 | *.test 15 | 16 | # Output of the go coverage tool, specifically when used with LiteIDE 17 | *.out 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Taiji Inoue 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # parameters 2 | 3 | BINARY_NAME=ken-all 4 | VETPKGS = $(shell go list ./... | grep -v -e vendor) 5 | 6 | export GO111MODULE=on 7 | 8 | .PHONY: build 9 | build: 10 | gox --osarch "darwin/amd64 linux/amd64 windows/amd64" -output="bin/{{.OS}}_{{.Arch}}/$(BINARY_NAME)" 11 | 12 | .PHONY: clean 13 | clean: 14 | go clean 15 | rm -rf bin/ 16 | 17 | .PHONY: test 18 | test: 19 | go test -v ./... 20 | 21 | .PHONY: lint 22 | lint: 23 | golangci-lint run ./... 24 | 25 | .PHONY: deps 26 | deps: 27 | go get -d -v . 28 | go mod tidy 29 | 30 | .PHONY: vet 31 | vet: 32 | go vet $(VETPKGS) 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## 概要 2 | 3 | [![][circleci-svg]][circleci] [![][goreportcard-svg]][goreportcard] 4 | 5 | [circleci]: https://circleci.com/gh/inouet/ken-all/tree/master 6 | [circleci-svg]: https://circleci.com/gh/inouet/ken-all.svg?style=shield 7 | [goreportcard]: https://goreportcard.com/report/github.com/inouet/ken-all 8 | [goreportcard-svg]: https://goreportcard.com/badge/github.com/inouet/ken-all 9 | 10 | 日本郵便の提供する郵便番号データ(通称KEN_ALL.CSV)をパースし、使いやすい形に変換するコマンドラインツールです。 11 | 12 | 主に以下のような処理を行います。 13 | 14 | - 複数行に分割された行を結合します 15 | - SHIFT_JIS を UTF-8に変換します 16 | - 半角カナを全角カナに変換します 17 | - 全角数字、記号を半角に変換します 18 | - データとして使いにくいレコードを加工します(加工処理詳細参照) 19 | - CSV形式をJSON形式、TSV形式に変換できます 20 | 21 | ## インストール 22 | 23 | 24 | ### Macユーザーの場合 25 | 26 | homebrew を使って下記のようにインストールできます。 27 | 28 | ``` 29 | $ brew tap inouet/ken-all 30 | $ brew install ken-all 31 | ``` 32 | 33 | ### go ユーザーの場合 34 | 35 | ``` 36 | $ go get github.com/inouet/ken-all 37 | ``` 38 | 39 | ### バイナリのダウンロード 40 | 41 | [Releases](https://github.com/inouet/ken-all/releases) からダウンロード 42 | 43 | 44 | ## コマンドの使い方 45 | 46 | 47 | データを取得して解凍 48 | 49 | ``` 50 | $ wget --quiet https://www.post.japanpost.jp/zipcode/dl/kogaki/zip/ken_all.zip 51 | 52 | $ unzip ken_all.zip 53 | ``` 54 | 55 | 56 | 住所データをJSON形式に変換 57 | 58 | ``` 59 | $ ken-all address KEN_ALL.CSV -t json 60 | 61 | {"region_id":"01101","zip":"0600000","pref_kana":"ホッカイドウ","city_kana":"サッポロシチュウオウク","town_kana":"","pref":"北海道","city":"札幌市中央区","town":"","update_status":"0","update_reason":"0","pref_code":"01"} 62 | {"region_id":"01101","zip":"0640941","pref_kana":"ホッカイドウ","city_kana":"サッポロシチュウオウク","town_kana":"アサヒガオカ","pref":"北海道","city":"札幌市中央区","town":"旭ケ丘","update_status":"0","update_reason":"0","pref_code":"01"} 63 | {"region_id":"01101","zip":"0600041","pref_kana":"ホッカイドウ","city_kana":"サッポロシチュウオウク","town_kana":"オオドオリヒガシ","pref":"北海道","city":"札幌市中央区","town":"大通東","update_status":"0","update_reason":"0","pref_code":"01"} 64 | {"region_id":"01101","zip":"0600042","pref_kana":"ホッカイドウ","city_kana":"サッポロシチュウオウク","town_kana":"オオドオリニシ","pref":"北海道","city":"札幌市中央区","town":"大通西","update_status":"0","update_reason":"0","pref_code":"01"} 65 | {"region_id":"01101","zip":"0640820","pref_kana":"ホッカイドウ","city_kana":"サッポロシチュウオウク","town_kana":"オオドオリニシ","pref":"北海道","city":"札幌市中央区","town":"大通西","update_status":"0","update_reason":"0","pref_code":"01"} 66 | {"region_id":"01101","zip":"0600031","pref_kana":"ホッカイドウ","city_kana":"サッポロシチュウオウク","town_kana":"キタ1ジョウヒガシ","pref":"北海道","city":"札幌市中央区","town":"北一条東","update_status":"0","update_reason":"0","pref_code":"01"} 67 | : 68 | ``` 69 | 70 | 事業所データをJSON形式に変換 71 | 72 | ``` 73 | $ ken-all office JIGYOSYO.CSV -t json 74 | 75 | {"jis_code":"01101","kana":"(カブ) ニホンケイザイシンブンシヤ サツポロシシヤ","name":"株式会社 日本経済新聞社 札幌支社","pref":"北海道","city":"札幌市中央区","town":"北一条西","address":"6丁目1-2アーバンネット札幌ビル2F","zip7":"0608621","zip5":"060 ","post_office":"札幌中央","type":"0","is_multi":"0","update_status":"0","pref_code":"01"} 76 | : 77 | ``` 78 | 79 | 詳しくは helpを参照ください 80 | 81 | ``` 82 | Usage: 83 | ken-all [flags] 84 | ken-all [command] 85 | 86 | Available Commands: 87 | address Convert KEN_ALL.CSV into other format. 88 | help Help about any command 89 | office Convert JIGYOSYO.CSV into other format. 90 | 91 | Flags: 92 | -h, --help help for ken-all 93 | ``` 94 | 95 | 96 | ## 加工処理詳細 97 | 98 | 日本郵便の提供するCSVの特徴として、町域名が38文字を超える場合複数行に分かれています。 99 | まずはこれを1行にした上で下記のような処理を行います。 100 | 101 | ### 町域名に特定の文字列が入る場合は空文字に変換します 102 | 103 | **例** 104 | 105 | - 以下に掲載がない場合 106 | - ○○一円 (一円を除く) 107 | - ○○の次に番地がくる場合 108 | 109 | **加工前** 110 | 111 | | 郵便番号 | 都道府県名 | 市区町村名 | 町域名 | 112 | |----------|------------|---------------|--------------------------| 113 | | 0600000 | 北海道 | 札幌市中央区 | 以下に掲載がない場合 | 114 | | 1000301 | 東京都 | 利島村 | 利島村一円 | 115 | | 5220317 | 滋賀県 | 犬上郡多賀町 | 一円 | 116 | | 3060433 | 茨城県 | 猿島郡境町 | 境町の次に番地がくる場合 | 117 | 118 | **加工後** 119 | 120 | | 郵便番号 | 都道府県名 | 市区町村名 | 町域名 | 121 | |----------|------------|---------------|--------------------------| 122 | | 0600000 | 北海道 | 札幌市中央区 | | 123 | | 1000301 | 東京都 | 利島村 | | 124 | | 5220317 | 滋賀県 | 犬上郡多賀町 | 一円 | 125 | | 3060433 | 茨城県 | 猿島郡境町 | | 126 | 127 | 128 | ### 町域名が X(A、B)の形式の場合、X、XA、XB の3行に展開します 129 | 130 | ただし、括弧内の文字が地名以外と思われるものは削除しています 131 | 132 | **例** 133 | 134 | - 「その他」、「地階・階層不明」、「全域」、「成田国際空港内」など特定のワードを含むもの 135 | - 「132〜156」、「367番地」 など番地と思われるもの 136 | 137 | 138 | **加工前** 139 | 140 | | 郵便番号 | 都道府県名 | 市区町村名 | 町域名 | 141 | |----------|------------|---------------|---------------------------------------------------| 142 | | 0893443 | 北海道 | 中川郡本別町 | 西美里別(113〜791番地、西活込、西上、西中)| 143 | 144 | 145 | **加工後** 146 | 147 | | 郵便番号 | 都道府県名 | 市区町村名 | 町域名 | 148 | |----------|------------|---------------|----------------| 149 | | 0893443 | 北海道 | 中川郡本別町 | 西美里別 | 150 | | 0893443 | 北海道 | 中川郡本別町 | 西美里別西活込 | 151 | | 0893443 | 北海道 | 中川郡本別町 | 西美里別西上 | 152 | | 0893443 | 北海道 | 中川郡本別町 | 西美里別西中 | 153 | 154 | 155 | ### 岩手県の地割表記は削除します 156 | 157 | 例: 「越中畑64地割〜越中畑66地割」は「越中畑」に変換します 158 | 159 | **加工前** 160 | 161 | | 郵便番号 | 都道府県名 | 市区町村名 | 町域名 | 162 | |----------|------------|----------------|--------------------------------| 163 | | 0295523 | 岩手県 | 和賀郡西和賀町 | 越中畑64地割〜越中畑66地割 | 164 | 165 | **加工後** 166 | 167 | | 郵便番号 | 都道府県名 | 市区町村名 | 町域名 | 168 | |----------|------------|----------------|--------| 169 | | 0295523 | 岩手県 | 和賀郡西和賀町 | 越中畑 | 170 | 171 | 172 | ## 参考 173 | 174 | * [郵便番号データダウンロード](http://www.post.japanpost.jp/zipcode/download.html) 175 | * [郵便番号データの説明](http://www.post.japanpost.jp/zipcode/dl/readme.html) 176 | 177 | -------------------------------------------------------------------------------- /address/reader.go: -------------------------------------------------------------------------------- 1 | package address 2 | 3 | import ( 4 | "encoding/csv" 5 | "io" 6 | "strings" 7 | 8 | "github.com/inouet/ken-all/util" 9 | ) 10 | 11 | // Reader reads records from a CSV-encoded file. 12 | type Reader struct { 13 | r *csv.Reader 14 | } 15 | 16 | // NewReader returns a new Reader that reads from r. 17 | func NewReader(r io.Reader) *Reader { 18 | return &Reader{ 19 | r: csv.NewReader(r), 20 | } 21 | } 22 | 23 | func (reader *Reader) Read() (record []string, err error) { 24 | 25 | inBrackets := false 26 | 27 | idxTownKana := 5 28 | idxTownName := 8 29 | 30 | townKana := "" 31 | townName := "" 32 | 33 | for { 34 | record, err = reader.r.Read() 35 | 36 | if err == io.EOF { 37 | break 38 | } 39 | 40 | for _, v := range []int{3, 4, 5, 8} { 41 | record[v] = util.NormalizeString(record[v]) 42 | } 43 | 44 | // zip5のスペース除去 45 | record[1] = strings.Trim(record[1], " ") 46 | 47 | if strings.Contains(record[idxTownName], "(") { 48 | inBrackets = true 49 | } 50 | 51 | if inBrackets { // カッコ内の場合は結合 52 | townName = townName + record[idxTownName] 53 | if townKana != record[idxTownKana] { 54 | // 6028064 イッチョウメ のように同じものが続く場合は無視する 55 | townKana = townKana + record[idxTownKana] 56 | } 57 | } 58 | 59 | if strings.Contains(record[idxTownName], ")") { 60 | inBrackets = false 61 | } 62 | 63 | if !inBrackets { // カッコ内でない場合 64 | if townKana != "" { 65 | record[idxTownName] = townName 66 | record[idxTownKana] = townKana 67 | } 68 | return record, err 69 | } 70 | continue 71 | } 72 | return record, err 73 | } 74 | -------------------------------------------------------------------------------- /address/row.go: -------------------------------------------------------------------------------- 1 | package address 2 | 3 | import ( 4 | "regexp" 5 | "strings" 6 | 7 | "github.com/inouet/ken-all/util" 8 | ) 9 | 10 | type Row struct { 11 | RegionID string `json:"region_id"` // 0. 全国地方公共団体コード 12 | Zip5 string `json:"-"` // 1. 郵便番号(5桁) 13 | Zip7 string `json:"zip"` // 2. 郵便番号(7桁) 14 | PrefKana string `json:"pref_kana"` // 3. 都道府県名(カナ) 15 | CityKana string `json:"city_kana"` // 4. 市区町村名(カナ) 16 | TownKana string `json:"town_kana"` // 5. 町域名(カナ) 17 | Pref string `json:"pref"` // 6. 都道府県名 18 | City string `json:"city"` // 7. 市区町村名 19 | Town string `json:"town"` // 8. 町域名 20 | IsMultiZip string `json:"-"` // 9. 一町域が二以上の郵便番号で表される場合の表示(1: 該当、0: 該当せず) 21 | HasKoazaBanchi string `json:"-"` // 10. 小字毎に番地が起番されている町域の表示 (1: 該当、0: 該当せず) 22 | HasChome string `json:"-"` // 11. 丁目を有する町域の場合の表示 (1: 該当、0: 該当せず) 23 | IsMultiTown string `json:"-"` // 12. 一つの郵便番号で二以上の町域を表す場合の表示 (1: 該当、0:該当せず) 24 | UpdateStatus string `json:"update_status"` // 13. 更新の表示 (0: 変更なし、1:変更あり、2: 廃止) 25 | UpdateReason string `json:"update_reason"` 26 | // 14. 変更理由(0: 変更なし、1: 市政・区政・町政・分区・政令指定都市施行、2:住居表示の実施、3:区画整理、4:郵便区調整等、5:訂正、6:廃止) 27 | RawTown string `json:"-"` 28 | PrefCode string `json:"pref_code"` // xx. 都道府県コード 29 | } 30 | 31 | func NewRow(cols []string) Row { 32 | row := Row{ 33 | RegionID: cols[0], 34 | Zip5: cols[1], 35 | Zip7: cols[2], 36 | PrefKana: cols[3], 37 | CityKana: cols[4], 38 | TownKana: cols[5], 39 | Pref: cols[6], 40 | City: cols[7], 41 | Town: cols[8], 42 | IsMultiZip: cols[9], 43 | HasKoazaBanchi: cols[10], 44 | HasChome: cols[11], 45 | IsMultiTown: cols[12], 46 | UpdateStatus: cols[13], 47 | UpdateReason: cols[14], 48 | } 49 | 50 | row.fixTown() 51 | row.setPrefCode() 52 | 53 | return row 54 | } 55 | 56 | func (row Row) Array() []string { 57 | cols := []string{ 58 | row.RegionID, 59 | row.Zip5, 60 | row.Zip7, 61 | row.PrefKana, 62 | row.CityKana, 63 | row.TownKana, 64 | row.Pref, 65 | row.City, 66 | row.Town, 67 | row.IsMultiZip, 68 | row.HasKoazaBanchi, 69 | row.HasChome, 70 | row.IsMultiTown, 71 | row.UpdateStatus, 72 | row.UpdateReason, 73 | row.PrefCode, 74 | } 75 | return cols 76 | } 77 | 78 | func (row *Row) setPrefCode() { 79 | row.PrefCode = util.GetPrefCode(row.Pref) 80 | } 81 | 82 | func (row *Row) fixTown() { 83 | row.patch() 84 | 85 | // 以下に掲載がない場合 86 | if row.Town == "以下に掲載がない場合" { 87 | row.Town = "" 88 | row.TownKana = "" 89 | } 90 | 91 | // xx一円 、xxの次に番地がくる場合 92 | if strings.HasSuffix(row.Town, "町一円") || strings.HasSuffix(row.Town, "村一円") || strings.HasSuffix(row.Town, "の次に番地がくる場合") { 93 | row.Town = "" 94 | row.TownKana = "" 95 | } 96 | 97 | // 「」内消す 98 | row.Town = deleteKeyBracket(row.Town) 99 | row.TownKana = deleteKeyBracketKana(row.TownKana) 100 | 101 | // 地割 102 | row.fixTownChiwari() 103 | 104 | // trim 105 | row.Town = strings.Trim(row.Town, " ") 106 | row.TownKana = strings.Trim(row.TownKana, " ") 107 | } 108 | 109 | // 元データの不具合を補正する 110 | func (row *Row) patch() { 111 | // 6511102 」の後に、が足りないので修正する 112 | if row.Zip7 == "6511102" && 113 | row.Town == "山田町下谷上(大上谷、修法ケ原、中一里山「9番地の4、12番地を除く」長尾山、再度公園)" { 114 | row.Town = "山田町下谷上(大上谷、修法ケ原、中一里山「9番地の4、12番地を除く」、長尾山、再度公園)" 115 | row.TownKana = "ヤマダチョウシモタニガミ(オオカミダニ、シュウホウガハラ、" + 116 | "ナカイチリヤマ<9バンチノ4、12バンチヲノゾク>、ナガオヤマ、フタタビコウエン)" 117 | } 118 | } 119 | 120 | // 岩手県の第n地割 もしくは、 n地割 以降は削除する 121 | var ( 122 | regexpChiwari1 = regexp.MustCompile(`(\()?(第)?[0-9]+地割.*`) 123 | regexpChiwari2 = regexp.MustCompile(`(\()?(ダイ)?[0-9]+チワリ.*`) 124 | ) 125 | 126 | func (row *Row) fixTownChiwari() { 127 | if row.Pref == "岩手県" && strings.Contains(row.Town, "地割") { 128 | row.Town = regexpChiwari1.ReplaceAllString(row.Town, "") 129 | // カナ 130 | row.TownKana = regexpChiwari2.ReplaceAllString(row.TownKana, "") 131 | } 132 | } 133 | 134 | func (row *Row) HasBrackets() bool { 135 | return strings.Contains(row.Town, "(") 136 | } 137 | 138 | // 「」内を消す 139 | var regexpKeyBracket = regexp.MustCompile(`「([^「」]+)」`) 140 | 141 | func deleteKeyBracket(str string) string { 142 | str = regexpKeyBracket.ReplaceAllString(str, "") 143 | return str 144 | } 145 | 146 | // カナの <>内を消す 147 | var regexpKeyBracketKana = regexp.MustCompile(`<([^<>]+)>`) 148 | 149 | func deleteKeyBracketKana(str string) string { 150 | str = regexpKeyBracketKana.ReplaceAllString(str, "") 151 | return str 152 | } 153 | 154 | var regexpIsBuilding = regexp.MustCompile(`\((.+)階(.*)\)`) 155 | 156 | // IsBuilding ビルかどうか 157 | func IsBuilding(s string) bool { 158 | return regexpIsBuilding.Match([]byte(s)) 159 | } 160 | -------------------------------------------------------------------------------- /address/row_test.go: -------------------------------------------------------------------------------- 1 | package address 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | ) 7 | 8 | func TestDeleteKeyBracketKana(t *testing.T) { 9 | 10 | type Case struct { 11 | input string 12 | expected string 13 | } 14 | 15 | cases := []Case{ 16 | { 17 | input: "シモクボ<174ヲノゾク>", 18 | expected: "シモクボ", 19 | }, 20 | { 21 | input: "モタイ(1-500<211バンチヲノゾク><フルマチ>、2527-2529<ドトオ>)", 22 | expected: "モタイ(1-500、2527-2529)", 23 | }, 24 | } 25 | 26 | for i, row := range cases { 27 | res := deleteKeyBracketKana(row.input) 28 | if res != row.expected { 29 | t.Errorf("#%d: want '%s', got '%s'\n", i, row.expected, res) 30 | } 31 | } 32 | } 33 | 34 | func TestDeleteKeyBracket(t *testing.T) { 35 | type Case struct { 36 | input string 37 | expected string 38 | } 39 | 40 | cases := []Case{ 41 | { 42 | input: "葛巻(第40地割「57番地125、176を除く」~第45地割)", 43 | expected: "葛巻(第40地割~第45地割)", 44 | }, 45 | { 46 | input: "茂田井(1~500「211番地を除く」「古町」、2527~2529「土遠」)", 47 | expected: "茂田井(1~500、2527~2529)", 48 | }, 49 | } 50 | 51 | for i, row := range cases { 52 | res := deleteKeyBracket(row.input) 53 | if res != row.expected { 54 | t.Errorf("#%d: want '%s', got '%s'\n", i, row.expected, res) 55 | } 56 | } 57 | } 58 | 59 | func TestNewRow(t *testing.T) { 60 | type Case struct { 61 | name string 62 | data string 63 | town string 64 | townKana string 65 | } 66 | 67 | cases := []Case{ 68 | // 複数行にまたがる行の連結 ex) 6028062 69 | { 70 | name: "6028062", 71 | data: `26102,"602 ","6028062","キョウトフ","キョウトシカミギョウク","カメヤチョウ","京都府","京都市上京区","亀屋町(油小路通上長者町下る、油小路通下長者町上る、油小路通",0,0,0,0,0,0 72 | 26102,"602 ","6028062","キョウトフ","キョウトシカミギョウク","カメヤチョウ","京都府","京都市上京区","中長者町上る、油小路通中長者町下る、上長者町通油小路西入、上長者町通油小",0,0,0,0,0,0 73 | 26102,"602 ","6028062","キョウトフ","キョウトシカミギョウク","カメヤチョウ","京都府","京都市上京区","路東入)",0,0,0,0,0,0`, 74 | town: "亀屋町(油小路通上長者町下る、油小路通下長者町上る、油小路通中長者町上る、油小路通中長者町下る、上長者町通油小路西入、上長者町通油小路東入)", 75 | townKana: "カメヤチョウ", 76 | }, 77 | 78 | // 以下に掲載がない場合が消えていること ex) 6000000 79 | { 80 | name: "6000000", 81 | data: `26106,"600 ","6000000","キョウトフ","キョウトシシモギョウク","イカニケイサイガナイバアイ","京都府","京都市下京区","以下に掲載がない場合",0,0,0,0,0,0`, 82 | town: "", 83 | townKana: "", 84 | }, 85 | 86 | // xx一円が消えていること ex) 100-0301 87 | { 88 | name: "1000301", 89 | data: `13362,"10003","1000301","トウキョウト","トシマムラ","トシマムライチエン","東京都","利島村","利島村一円",0,0,0,0,0,0`, 90 | town: "", 91 | townKana: "", 92 | }, 93 | 94 | // 「一円」は残す ex) 522-0317 95 | { 96 | name: "5220317", 97 | data: `25443,"52203","5220317","シガケン","イヌカミグンタガチョウ","イチエン","滋賀県","犬上郡多賀町","一円",0,0,0,0,0,0`, 98 | town: "一円", 99 | townKana: "イチエン", 100 | }, 101 | 102 | // 地割 103 | { 104 | name: "0287915", 105 | data: `03507,"02879","0287915","イワテケン","クノヘグンヒロノチョウ","タネイチダイ15チワリ-ダイ21チワリ(カヌカ、ショウジアイ、ミドリチョウ、オオクボ、タカトリ)","岩手県","九戸郡洋野町","種市第15地割〜第21地割(鹿糠、小路合、緑町、大久保、高取)",0,1,0,0,0,0`, 106 | town: "種市", 107 | townKana: "タネイチ", 108 | }, 109 | { 110 | name: "0285102", 111 | data: `03302,"02851","0285102","イワテケン","イワテグンクズマキマチ","クズマキ(ダイ40チワリ<57バンチ125、176ヲノゾク>-ダイ45","岩手県","岩手郡葛巻町","葛巻(第40地割「57番地125、176を除く」〜第45",1,1,0,0,0,0 112 | 03302,"02851","0285102","イワテケン","イワテグンクズマキマチ","チワリ)","岩手県","岩手郡葛巻町","地割)",1,1,0,0,0,0`, 113 | town: "葛巻", 114 | townKana: "クズマキ", 115 | }, 116 | } 117 | 118 | for i, c := range cases { 119 | 120 | t.Run(c.name, func(t *testing.T) { 121 | 122 | reader := bytes.NewReader([]byte(c.data)) 123 | r := NewReader(reader) 124 | cols, _ := r.Read() 125 | 126 | row := NewRow(cols) 127 | 128 | if row.Town != c.town { 129 | t.Errorf("#%d: want '%s', got '%s'\n", i, c.town, row.Town) 130 | } 131 | if row.TownKana != c.townKana { 132 | t.Errorf("#%d: want '%s', got '%s'\n", i, c.townKana, row.TownKana) 133 | } 134 | }) 135 | } 136 | } 137 | 138 | func TestIsBuilding(t *testing.T) { 139 | cases := []struct { 140 | input string 141 | expected bool 142 | }{ 143 | { 144 | input: "大崎ThinkParkTower(18階)", 145 | expected: true, 146 | }, 147 | { 148 | input: "平和通(南)", 149 | expected: false, 150 | }, 151 | { 152 | input: "中央アエル(地階・階層不明)", 153 | expected: true, 154 | }, 155 | } 156 | 157 | for _, c := range cases { 158 | actual := IsBuilding(c.input) 159 | if actual != c.expected { 160 | t.Errorf("#%s: want '%v', got '%v'\n", c.input, c.expected, actual) 161 | } 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /address/rows.go: -------------------------------------------------------------------------------- 1 | package address 2 | 3 | import ( 4 | "regexp" 5 | "strings" 6 | ) 7 | 8 | func NewRows(cols []string) []Row { 9 | row := NewRow(cols) 10 | rows := []Row{} 11 | 12 | if row.HasBrackets() { 13 | 14 | // 15 | // Town 16 | // 17 | 18 | // 阿三(799の1~867番地) を ( で 分割 19 | arrTown := strings.SplitN(row.Town, "(", 2) 20 | town := arrTown[0] 21 | 22 | // カッコ内 23 | townSub := strings.Replace(arrTown[1], ")", "", -1) 24 | townSub = strings.Replace(townSub, "(", "", -1) 25 | 26 | // 、で配列に分割 27 | townSubs := strings.Split(townSub, "、") 28 | 29 | // 30 | // TownKana 31 | // 32 | 33 | // ( で分割 34 | arrTownKana := strings.SplitN(row.TownKana, "(", 2) 35 | arrTownKana = fixArray(arrTownKana, 2) 36 | townKana := arrTownKana[0] 37 | 38 | // カナカッコ内 39 | townKanaSub := strings.Replace(arrTownKana[1], ")", "", -1) 40 | townKanaSub = strings.Replace(townKanaSub, "(", "", -1) 41 | 42 | townKanaSubs := strings.Split(townKanaSub, "、") 43 | townKanaSubs = fixArray(townKanaSubs, len(townSubs)) 44 | 45 | if !IsBuilding(row.Town) { 46 | // () 展開前のものを追加 47 | rowCopy := row 48 | rowCopy.RawTown = row.Town 49 | rowCopy.Town = town 50 | rowCopy.TownKana = townKana 51 | rows = append(rows, rowCopy) 52 | } 53 | 54 | // () 内を追加 55 | for i, sub := range townSubs { 56 | sub = strings.Trim(sub, " ") 57 | 58 | if isNgString(sub) { 59 | continue 60 | } 61 | 62 | if town == sub { 63 | // 7711231: 富吉(富吉)、 6560514: 賀集(賀集) 64 | continue 65 | } 66 | 67 | rowCopy := row 68 | 69 | // Town 70 | rowCopy.Town = town + sub 71 | rowCopy.RawTown = row.Town 72 | 73 | // TownKana 74 | subKana := townKanaSubs[i] 75 | rowCopy.TownKana = townKana + subKana 76 | 77 | rows = append(rows, rowCopy) 78 | } 79 | 80 | } else { 81 | rows = append(rows, row) 82 | } 83 | return rows 84 | } 85 | 86 | // 指定した長さまで配列を埋める 87 | func fixArray(arr []string, count int) []string { 88 | size := len(arr) 89 | if size == count { 90 | return arr 91 | } 92 | for i := size; i < count; i++ { 93 | arr = append(arr, "") 94 | } 95 | return arr 96 | } 97 | 98 | var ( 99 | regexpNgStringNumber1 = regexp.MustCompile(`^[0-9の・\-]+$`) 100 | regexpNgStringNumber2 = regexp.MustCompile(`^第[0-9]+$`) 101 | regexpNgStringNumber3 = regexp.MustCompile(`^[0-9 ]+丁目[0-9 ]+番?`) 102 | regexpNgStringNumber4 = regexp.MustCompile(`[0-9]+[\-−〜]+[0-9]+`) 103 | ) 104 | 105 | func isNgString(s string) bool { 106 | // 〜 を含んでいたらNG 107 | if strings.Contains(s, "~") { 108 | return true 109 | } 110 | 111 | // ・ を含んでいたらNG 112 | if strings.Contains(s, "・") { 113 | return true 114 | } 115 | 116 | // 数字のみはスキップ 117 | if regexpNgStringNumber1.Match([]byte(s)) { 118 | return true 119 | } 120 | 121 | // 第3 122 | if regexpNgStringNumber2.Match([]byte(s)) { 123 | return true 124 | } 125 | 126 | // 1丁目1番 127 | if regexpNgStringNumber3.Match([]byte(s)) { 128 | return true 129 | } 130 | 131 | // 0482402: 13−4 132 | if regexpNgStringNumber4.Match([]byte(s)) { 133 | return true 134 | } 135 | 136 | if isNgWord(s) { 137 | return true 138 | } 139 | 140 | // 特定のサフィックスを持つものはスキップ 141 | if hasNgSuffix(s) { 142 | return true 143 | } 144 | return false 145 | } 146 | 147 | var ( 148 | ngWordList = []string{ 149 | "その他", 150 | "地階・階層不明", 151 | "全域", // 0895865: 厚内(全域) 152 | "成田国際空港内", // 2820031: 一鍬田(成田国際空港内) 153 | } 154 | ) 155 | 156 | func isNgWord(s string) bool { 157 | for _, ng := range ngWordList { 158 | if s == ng { 159 | return true 160 | } 161 | } 162 | return false 163 | } 164 | 165 | var ( 166 | ngSuffixList = []string{"以上", "以下", "以降", "以内", "番地", "除く", "丁目", "含む", "その他", "以外", "「その他」"} 167 | ) 168 | 169 | func hasNgSuffix(s string) bool { 170 | for _, ng := range ngSuffixList { 171 | if strings.HasSuffix(s, ng) { 172 | return true 173 | } 174 | } 175 | return false 176 | } 177 | -------------------------------------------------------------------------------- /address/rows_test.go: -------------------------------------------------------------------------------- 1 | package address 2 | 3 | import ( 4 | "testing" 5 | 6 | "bytes" 7 | "fmt" 8 | ) 9 | 10 | func TestNewRows(t *testing.T) { 11 | 12 | type Town struct { 13 | town string 14 | townKana string 15 | } 16 | 17 | type Case struct { 18 | data string 19 | zip7 string 20 | expected []Town 21 | } 22 | 23 | cases := []Case{ 24 | { 25 | zip7: "4506246", 26 | data: `23105,"450 ","4506246","アイチケン","ナゴヤシナカムラク","メイエキミッドランドスクエア(コウソウトウ)(46カイ)","愛知県","名古屋市中村区","名駅ミッドランドスクエア(高層棟)(46階)",0,0,0,0,0,0`, 27 | expected: []Town{ 28 | // (xx階) の場合は1行のみ 29 | { 30 | town: "名駅ミッドランドスクエア高層棟46階", 31 | townKana: "メイエキミッドランドスクエアコウソウトウ46カイ", 32 | }, 33 | }, 34 | }, 35 | 36 | // (全域) は消す 37 | { 38 | zip7: "0895865", 39 | data: `01649,"08958","0895865","ホッカイドウ","トカチグンウラホロチョウ","アツナイ(ゼンイキ)","北海道","十勝郡浦幌町","厚内(全域)",0,0,0,0,0,0`, 40 | expected: []Town{ 41 | { 42 | town: "厚内", 43 | townKana: "アツナイ", 44 | }, 45 | }, 46 | }, 47 | 48 | // (成田国際空港内) は消す 49 | { 50 | zip7: "2820031", 51 | data: `12347,"282 ","2820031","チバケン","カトリグンタコマチ","ヒトクワダ(ナリタコクサイクウコウナイ)","千葉県","香取郡多古町","一鍬田(成田国際空港内)",1,0,0,0,0,0`, 52 | expected: []Town{ 53 | { 54 | town: "一鍬田", 55 | townKana: "ヒトクワダ", 56 | }, 57 | }, 58 | }, 59 | 60 | // イッチョウメ が連続 61 | { 62 | zip7: "6028064", 63 | data: `26102,"602 ","6028064","キョウトフ","キョウトシカミギョウク","イッチョウメ","京都府","京都市上京区","一町目(上長者町通堀川東入、東堀川通上長者町上る、東堀川通中",0,0,0,0,0,0 64 | 26102,"602 ","6028064","キョウトフ","キョウトシカミギョウク","イッチョウメ","京都府","京都市上京区","立売通下る)",0,0,0,0,0,0`, 65 | expected: []Town{ 66 | { 67 | town: "一町目", 68 | townKana: "イッチョウメ", 69 | }, 70 | { 71 | town: "一町目上長者町通堀川東入", 72 | townKana: "イッチョウメ", 73 | }, 74 | { 75 | town: "一町目東堀川通上長者町上る", 76 | townKana: "イッチョウメ", 77 | }, 78 | { 79 | town: "一町目東堀川通中立売通下る", 80 | townKana: "イッチョウメ", 81 | }, 82 | }, 83 | }, 84 | 85 | // () 内の数字っぽいやつは除去される 86 | { 87 | zip7: "0482402", 88 | data: `01407,"04824","0482402","ホッカイドウ","ヨイチグンニキチョウ","オオエ(1チョウメ、2チョウメ<651、662、668バンチ>イガイ、3チョウメ5、1","北海道","余市郡仁木町","大江(1丁目、2丁目「651、662、668番地」以外、3丁目5、1",1,0,1,0,0,0 89 | 01407,"04824","0482402","ホッカイドウ","ヨイチグンニキチョウ","3-4、20、678、687バンチ)","北海道","余市郡仁木町","3−4、20、678、687番地)",1,0,1,0,0,0`, 90 | expected: []Town{ 91 | { 92 | town: "大江", 93 | townKana: "オオエ", 94 | }, 95 | }, 96 | }, 97 | { 98 | zip7: "0482331", 99 | data: `01407,"04823","0482331","ホッカイドウ","ヨイチグンニキチョウ","オオエ(2チョウメ651、662、668バンチ、3チョウメ103、118、","北海道","余市郡仁木町","大江(2丁目651、662、668番地、3丁目103、118、",1,0,1,0,0,0 100 | 01407,"04823","0482331","ホッカイドウ","ヨイチグンニキチョウ","210、254、267、372、444、469バンチ)","北海道","余市郡仁木町","210、254、267、372、444、469番地)",1,0,1,0,0,0`, 101 | expected: []Town{ 102 | { 103 | town: "大江", 104 | townKana: "オオエ", 105 | }, 106 | }, 107 | }, 108 | { 109 | zip7: "0300924", 110 | data: `02201,"030 ","0300924","アオモリケン","アオモリシ","タキサワ(シモカワラ190-1)","青森県","青森市","滝沢(下川原190−1)",1,1,0,0,0,0`, 111 | expected: []Town{ 112 | { 113 | town: "滝沢", 114 | townKana: "タキサワ", 115 | }, 116 | }, 117 | }, 118 | 119 | // 地割 120 | { 121 | zip7: "0295505", 122 | data: `03366,"02955","0295505","イワテケン","ワガグンニシワガマチ","ユモト29チワリ、ユモト30チワリ","岩手県","和賀郡西和賀町","湯本29地割、湯本30地割",0,0,0,0,0,0`, 123 | expected: []Town{ 124 | { 125 | town: "湯本", 126 | townKana: "ユモト", 127 | }, 128 | }, 129 | }, 130 | { 131 | zip7: "0287913", 132 | data: `03507,"02879","0287913","イワテケン","クノヘグンヒロノチョウ","タネイチダイ24チワリ-ダイ25チワリ(ミドリガオカチョウ、ヨコテ)","岩手県","九戸郡洋野町","種市第24地割〜第25地割(緑ケ丘町、横手)",0,1,0,0,0,0`, 133 | expected: []Town{ 134 | { 135 | town: "種市", 136 | townKana: "タネイチ", 137 | }, 138 | }, 139 | }, 140 | 141 | // 142 | // xx を除く 0330071 0330072 0285102 9800065 9960301 3842304 4280049 4400075 6511102 7201264 7983321 143 | // 144 | 145 | // 犬落瀬(内金矢、内山、岡沼、金沢、金矢、上淋代、木越、権現沢、四木、七百、下久保「174を除く」、下淋代、高森、通目木、坪毛沢「25、637、641、643、647を除く」、中屋敷、沼久保、根古橋、堀切沢、南平、柳沢、大曲) 146 | { 147 | zip7: "0330071", 148 | data: `02405,"033 ","0330071","アオモリケン","カミキタグンロクノヘマチ","イヌオトセ(ウチカナヤ、ウチヤマ、オカヌマ、カナザワ、カナヤ、カミサビシロ、キコシ、ゴンゲンサワ、","青森県","上北郡六戸町","犬落瀬(内金矢、内山、岡沼、金沢、金矢、上淋代、木越、権現沢、",1,1,0,0,0,0 149 | 02405,"033 ","0330071","アオモリケン","カミキタグンロクノヘマチ","シキ、シチヒャク、シモクボ<174ヲノゾク>、シモサビシロ、タカモリ、ヅメキ、ツボケザワ<2","青森県","上北郡六戸町","四木、七百、下久保「174を除く」、下淋代、高森、通目木、坪毛沢「2",1,1,0,0,0,0 150 | 02405,"033 ","0330071","アオモリケン","カミキタグンロクノヘマチ","5、637、641、643、647ヲノゾク>、ナカヤシキ、ヌマクボ、ネコハシ、ホリキリ","青森県","上北郡六戸町","5、637、641、643、647を除く」、中屋敷、沼久保、根古橋、堀切",1,1,0,0,0,0 151 | 02405,"033 ","0330071","アオモリケン","カミキタグンロクノヘマチ","サワ、ミナミタイ、ヤナギサワ、オオマガリ)","青森県","上北郡六戸町","沢、南平、柳沢、大曲)",1,1,0,0,0,0`, 152 | expected: []Town{ 153 | { 154 | town: "犬落瀬", 155 | townKana: "イヌオトセ", 156 | }, 157 | { 158 | town: "犬落瀬内金矢", 159 | townKana: "イヌオトセウチカナヤ", 160 | }, 161 | { 162 | town: "犬落瀬内山", 163 | townKana: "イヌオトセウチヤマ", 164 | }, 165 | { 166 | town: "犬落瀬岡沼", 167 | townKana: "イヌオトセオカヌマ", 168 | }, 169 | { 170 | town: "犬落瀬金沢", 171 | townKana: "イヌオトセカナザワ", 172 | }, 173 | { 174 | town: "犬落瀬金矢", 175 | townKana: "イヌオトセカナヤ", 176 | }, 177 | { 178 | town: "犬落瀬上淋代", 179 | townKana: "イヌオトセカミサビシロ", 180 | }, 181 | { 182 | town: "犬落瀬木越", 183 | townKana: "イヌオトセキコシ", 184 | }, 185 | { 186 | town: "犬落瀬権現沢", 187 | townKana: "イヌオトセゴンゲンサワ", 188 | }, 189 | { 190 | town: "犬落瀬四木", 191 | townKana: "イヌオトセシキ", 192 | }, 193 | { 194 | town: "犬落瀬七百", 195 | townKana: "イヌオトセシチヒャク", 196 | }, 197 | { 198 | town: "犬落瀬下久保", 199 | townKana: "イヌオトセシモクボ", 200 | }, 201 | { 202 | town: "犬落瀬下淋代", 203 | townKana: "イヌオトセシモサビシロ", 204 | }, 205 | { 206 | town: "犬落瀬高森", 207 | townKana: "イヌオトセタカモリ", 208 | }, 209 | { 210 | town: "犬落瀬通目木", 211 | townKana: "イヌオトセヅメキ", 212 | }, 213 | { 214 | town: "犬落瀬坪毛沢", 215 | townKana: "イヌオトセツボケザワ", 216 | }, 217 | { 218 | town: "犬落瀬中屋敷", 219 | townKana: "イヌオトセナカヤシキ", 220 | }, 221 | { 222 | town: "犬落瀬沼久保", 223 | townKana: "イヌオトセヌマクボ", 224 | }, 225 | { 226 | town: "犬落瀬根古橋", 227 | townKana: "イヌオトセネコハシ", 228 | }, 229 | { 230 | town: "犬落瀬堀切沢", 231 | townKana: "イヌオトセホリキリサワ", 232 | }, 233 | { 234 | town: "犬落瀬南平", 235 | townKana: "イヌオトセミナミタイ", 236 | }, 237 | { 238 | town: "犬落瀬柳沢", 239 | townKana: "イヌオトセヤナギサワ", 240 | }, 241 | { 242 | town: "犬落瀬大曲", 243 | townKana: "イヌオトセオオマガリ", 244 | }, 245 | }, 246 | }, 247 | 248 | { 249 | zip7: "9800065", 250 | data: `04101,"980 ","9800065","ミヤギケン","センダイシアオバク","ツチトイ(1チョウメ<11ヲノゾク>)","宮城県","仙台市青葉区","土樋(1丁目「11を除く」)",0,0,1,0,0,0`, 251 | expected: []Town{ 252 | { 253 | town: "土樋", 254 | townKana: "ツチトイ", 255 | }, 256 | }, 257 | }, 258 | // 折茂( 259 | // 今熊「213〜234、240、247、262、266、275、277、280、295、1199、1206、1504を除く」、 260 | // 大原、 261 | // 沖山、 262 | // 上折茂「1−13、71−192を除く」 263 | // ) 264 | { 265 | zip7: "0330072", 266 | data: `02405,"033 ","0330072","アオモリケン","カミキタグンロクノヘマチ","オリモ(イマクマ<213-234、240、247、262、266、27","青森県","上北郡六戸町","折茂(今熊「213〜234、240、247、262、266、27",1,1,0,0,0,0 267 | 02405,"033 ","0330072","アオモリケン","カミキタグンロクノヘマチ","5、277、280、295、1199、1206、1504ヲノゾク>、","青森県","上北郡六戸町","5、277、280、295、1199、1206、1504を除く」、",1,1,0,0,0,0 268 | 02405,"033 ","0330072","アオモリケン","カミキタグンロクノヘマチ","オオハラ、オキヤマ、カミオリモ<1-13、71-192ヲノゾク>)","青森県","上北郡六戸町","大原、沖山、上折茂「1−13、71−192を除く」)",1,1,0,0,0,0 269 | `, 270 | expected: []Town{ 271 | { 272 | town: "折茂", 273 | townKana: "オリモ", 274 | }, 275 | { 276 | town: "折茂今熊", 277 | townKana: "オリモイマクマ", 278 | }, 279 | { 280 | town: "折茂大原", 281 | townKana: "オリモオオハラ", 282 | }, 283 | { 284 | town: "折茂沖山", 285 | townKana: "オリモオキヤマ", 286 | }, 287 | { 288 | town: "折茂上折茂", 289 | townKana: "オリモカミオリモ", 290 | }, 291 | }, 292 | }, 293 | { 294 | zip7: "0282504", 295 | data: `03202,"02825","0282504","イワテケン","ミヤコシ","ハコイシ(ダイ2チワリ<70-136>-ダイ4チワリ<3-11>)","岩手県","宮古市","箱石(第2地割「70〜136」〜第4地割「3〜11」)",1,1,0,0,0,0`, 296 | expected: []Town{ 297 | { 298 | town: "箱石", 299 | townKana: "ハコイシ", 300 | }, 301 | }, 302 | }, 303 | { 304 | zip7: "0285102", 305 | data: `03302,"02851","0285102","イワテケン","イワテグンクズマキマチ","クズマキ(ダイ40チワリ<57バンチ125、176ヲノゾク>-ダイ45","岩手県","岩手郡葛巻町","葛巻(第40地割「57番地125、176を除く」〜第45",1,1,0,0,0,0 306 | 03302,"02851","0285102","イワテケン","イワテグンクズマキマチ","チワリ)","岩手県","岩手郡葛巻町","地割)",1,1,0,0,0,0`, 307 | expected: []Town{ 308 | { 309 | town: "葛巻", 310 | townKana: "クズマキ", 311 | }, 312 | }, 313 | }, 314 | { 315 | zip7: "9996652", 316 | data: `06203,"99976","9996652","ヤマガタケン","ツルオカシ","ソエガワ(ワタトザワ<タケノコザワオンセン>)","山形県","鶴岡市","添川(渡戸沢「筍沢温泉」)",1,0,0,0,0,0`, 317 | expected: []Town{ 318 | { 319 | town: "添川", 320 | townKana: "ソエガワ", 321 | }, 322 | { 323 | town: "添川渡戸沢", 324 | townKana: "ソエガワワタトザワ", 325 | }, 326 | }, 327 | }, 328 | // 南山(430番地以上「1770−1〜2、1862−42、1923−5を除く」、大谷地、折渡、鍵金野、金山、滝ノ沢、豊牧、沼の台、肘折、平林) 329 | { 330 | zip7: "9960301", 331 | data: `06365,"99602","9960301","ヤマガタケン","モガミグンオオクラムラ","ミナミヤマ(430バンチイジョウ<1770-1-2、1862-42、","山形県","最上郡大蔵村","南山(430番地以上「1770−1〜2、1862−42、",1,1,0,0,0,0 332 | 06365,"99602","9960301","ヤマガタケン","モガミグンオオクラムラ","1923-5ヲノゾク>、オオヤチ、オリワタリ、カンカネノ、キンザン、タキノサワ、トヨマキ、ヌマノダイ、","山形県","最上郡大蔵村","1923−5を除く」、大谷地、折渡、鍵金野、金山、滝ノ沢、豊牧、沼の台、",1,1,0,0,0,0 333 | 06365,"99602","9960301","ヤマガタケン","モガミグンオオクラムラ","ヒジオリ、ヒラバヤシ)","山形県","最上郡大蔵村","肘折、平林)",1,1,0,0,0,0 334 | `, 335 | expected: []Town{ 336 | { 337 | town: "南山", 338 | townKana: "ミナミヤマ", 339 | }, 340 | { 341 | town: "南山大谷地", 342 | townKana: "ミナミヤマオオヤチ", 343 | }, 344 | { 345 | town: "南山折渡", 346 | townKana: "ミナミヤマオリワタリ", 347 | }, 348 | { 349 | town: "南山鍵金野", 350 | townKana: "ミナミヤマカンカネノ", 351 | }, 352 | { 353 | town: "南山金山", 354 | townKana: "ミナミヤマキンザン", 355 | }, 356 | { 357 | town: "南山滝ノ沢", 358 | townKana: "ミナミヤマタキノサワ", 359 | }, 360 | { 361 | town: "南山豊牧", 362 | townKana: "ミナミヤマトヨマキ", 363 | }, 364 | { 365 | town: "南山沼の台", 366 | townKana: "ミナミヤマヌマノダイ", 367 | }, 368 | { 369 | town: "南山肘折", 370 | townKana: "ミナミヤマヒジオリ", 371 | }, 372 | { 373 | town: "南山平林", 374 | townKana: "ミナミヤマヒラバヤシ", 375 | }, 376 | }, 377 | }, 378 | { 379 | zip7: "3771405", 380 | data: `10425,"37714","3771405","グンマケン","アガツマグンツマゴイムラ","カンバラ(モロシコ<アサマエン>)","群馬県","吾妻郡嬬恋村","鎌原(モロシコ「浅間園」)",1,0,0,0,0,0`, 381 | expected: []Town{ 382 | { 383 | town: "鎌原", 384 | townKana: "カンバラ", 385 | }, 386 | { 387 | town: "鎌原モロシコ", 388 | townKana: "カンバラモロシコ", 389 | }, 390 | }, 391 | }, 392 | 393 | // TODO: カナのネストされた() の中、なんで処理できてるんだ?? 394 | // これデータおかしい。他のデータだと、 カナの中のカッコは <> になっている。 395 | { 396 | zip7: "3703321", 397 | data: `10429,"37033","3703321","グンマケン","アガツマグンヒガシアガツママチ","イズミサワ(エボシ(ハルナコハン)、エボシコクユウリン77リンハン)","群馬県","吾妻郡東吾妻町","泉沢(烏帽子「榛名湖畔」、烏帽子国有林77林班)",1,0,0,0,0,0`, 398 | expected: []Town{ 399 | { 400 | town: "泉沢", 401 | townKana: "イズミサワ", 402 | }, 403 | { 404 | town: "泉沢烏帽子", 405 | townKana: "イズミサワエボシハルナコハン", 406 | }, 407 | { 408 | town: "泉沢烏帽子国有林77林班", 409 | townKana: "イズミサワエボシコクユウリン77リンハン", 410 | }, 411 | }, 412 | }, 413 | { 414 | zip7: "3703311", 415 | data: `10429,"37033","3703311","グンマケン","アガツマグンヒガシアガツママチ","オカザキ(エボシ<ハルナコハン>)","群馬県","吾妻郡東吾妻町","岡崎(烏帽子「榛名湖畔」)",1,0,0,0,0,0`, 416 | expected: []Town{ 417 | { 418 | town: "岡崎", 419 | townKana: "オカザキ", 420 | }, 421 | { 422 | town: "岡崎烏帽子", 423 | townKana: "オカザキエボシ", 424 | }, 425 | }, 426 | }, 427 | 428 | { 429 | zip7: "3703322", 430 | data: `10429,"37033","3703322","グンマケン","アガツマグンヒガシアガツママチ","カワド(エボシ<ハルナコハン>)","群馬県","吾妻郡東吾妻町","川戸(烏帽子「榛名湖畔」)",1,0,0,0,0,0`, 431 | expected: []Town{ 432 | { 433 | town: "川戸", 434 | townKana: "カワド", 435 | }, 436 | { 437 | town: "川戸烏帽子", 438 | townKana: "カワドエボシ", 439 | }, 440 | }, 441 | }, 442 | 443 | { 444 | zip7: "3862211", 445 | data: `20207,"38622","3862211","ナガノケン","スザカシ","ニレイマチ(3153-1-3153-1100<ミネノハラ>)","長野県","須坂市","仁礼町(3153−1〜3153−1100「峰の原」)",1,0,0,0,0,0`, 446 | expected: []Town{ 447 | { 448 | town: "仁礼町", 449 | townKana: "ニレイマチ", 450 | }, 451 | }, 452 | }, 453 | 454 | // 茂田井(1〜500「211番地を除く」「古町」、2527〜2529「土遠」) 455 | 456 | { 457 | zip7: "3842304", 458 | data: `20324,"38423","3842304","ナガノケン","キタサクグンタテシナマチ","モタイ(1-500<211バンチヲノゾク><フルマチ>、2527-2529","長野県","北佐久郡立科町","茂田井(1〜500「211番地を除く」「古町」、2527〜2529",1,0,0,0,0,0 459 | 20324,"38423","3842304","ナガノケン","キタサクグンタテシナマチ","<ドトオ>)","長野県","北佐久郡立科町","「土遠」)",1,0,0,0,0,0`, 460 | expected: []Town{ 461 | { 462 | town: "茂田井", 463 | townKana: "モタイ", 464 | }, 465 | }, 466 | }, 467 | 468 | // 牧之原(250〜343番地「255、256、258、259、262、276、294〜300、302〜304番地を除く」) 469 | { 470 | zip7: "4280049", 471 | data: `22209,"428 ","4280049","シズオカケン","シマダシ","マキノハラ(250-343バンチ<255、256、258、259、262、","静岡県","島田市","牧之原(250〜343番地「255、256、258、259、262、",1,0,0,0,0,0 472 | 22209,"428 ","4280049","シズオカケン","シマダシ","276、294-300、302-304バンチヲノゾク>)","静岡県","島田市","276、294〜300、302〜304番地を除く」)",1,0,0,0,0,0`, 473 | expected: []Town{ 474 | { 475 | town: "牧之原", 476 | townKana: "マキノハラ", 477 | }, 478 | }, 479 | }, 480 | // 元データの補正 (、が不足) 481 | { 482 | zip7: "6511102", 483 | data: `28109,"65111","6511102","ヒョウゴケン","コウベシキタク","ヤマダチョウシモタニガミ(オオカミダニ、シュウホウガハラ、ナカイチリヤマ<9バンチノ4、12バンチヲノゾク>ナガ","兵庫県","神戸市北区","山田町下谷上(大上谷、修法ケ原、中一里山「9番地の4、12番地を除く」長",1,1,0,0,0,0 484 | 28109,"65111","6511102","ヒョウゴケン","コウベシキタク","オヤマ、フタタビコウエン)","兵庫県","神戸市北区","尾山、再度公園)",1,1,0,0,0,0`, 485 | expected: []Town{ 486 | { 487 | town: "山田町下谷上", 488 | townKana: "ヤマダチョウシモタニガミ", 489 | }, 490 | { 491 | town: "山田町下谷上大上谷", 492 | townKana: "ヤマダチョウシモタニガミオオカミダニ", 493 | }, 494 | { 495 | town: "山田町下谷上修法ケ原", 496 | townKana: "ヤマダチョウシモタニガミシュウホウガハラ", 497 | }, 498 | { 499 | town: "山田町下谷上中一里山", 500 | townKana: "ヤマダチョウシモタニガミナカイチリヤマ", 501 | }, 502 | { 503 | town: "山田町下谷上長尾山", 504 | townKana: "ヤマダチョウシモタニガミナガオヤマ", 505 | }, 506 | { 507 | town: "山田町下谷上再度公園", 508 | townKana: "ヤマダチョウシモタニガミフタタビコウエン", 509 | }, 510 | }, 511 | }, 512 | { 513 | zip7: "6650808", 514 | data: `28214,"665 ","6650808","ヒョウゴケン","タカラヅカシ","キリハタ(ナガオサン<ソノタ>)","兵庫県","宝塚市","切畑(長尾山「その他」)",1,0,0,0,0,0`, 515 | expected: []Town{ 516 | { 517 | town: "切畑", 518 | townKana: "キリハタ", 519 | }, 520 | { 521 | town: "切畑長尾山", 522 | townKana: "キリハタナガオサン", 523 | }, 524 | }, 525 | }, 526 | { 527 | zip7: "6302168", 528 | data: `29201,"63021","6302168","ナラケン","ナラシ","ボダイセンチョウ(173-257バンチ<ハチブセトウゲ>)","奈良県","奈良市","菩提山町(173〜257番地「鉢伏峠」)",1,0,0,0,0,0`, 529 | expected: []Town{ 530 | { 531 | town: "菩提山町", 532 | townKana: "ボダイセンチョウ", 533 | }, 534 | }, 535 | }, 536 | { 537 | zip7: "7200845", 538 | data: `34207,"720 ","7200845","ヒロシマケン","フクヤマシ","アシダチョウフクダ(376-10<セイホウジ>)","広島県","福山市","芦田町福田(376−10「聖宝寺」)",1,0,0,0,0,0`, 539 | expected: []Town{ 540 | { 541 | town: "芦田町福田", 542 | townKana: "アシダチョウフクダ", 543 | }, 544 | }, 545 | }, 546 | // 町名と() 内が同じ場合は()内削除 547 | { 548 | zip7: "6560514", 549 | data: `28224,"65605","6560514","ヒョウゴケン","ミナミアワジシ","カシュウ(カシュウ)","兵庫県","南あわじ市","賀集(賀集)",0,0,0,0,0,0`, 550 | expected: []Town{ 551 | { 552 | town: "賀集", 553 | townKana: "カシュウ", 554 | }, 555 | }, 556 | }, 557 | } 558 | 559 | for x, c := range cases { 560 | t.Run(c.zip7, func(t *testing.T) { 561 | reader := bytes.NewReader([]byte(c.data)) 562 | r := NewReader(reader) 563 | cols, _ := r.Read() 564 | 565 | rows := NewRows(cols) 566 | 567 | if len(rows) != len(c.expected) { 568 | fmt.Println(rows) 569 | t.Errorf("#%d: zip:%s want '%d', got '%d'\n", x, c.zip7, len(c.expected), len(rows)) 570 | } else { 571 | for i, row := range rows { 572 | if row.Town != c.expected[i].town { 573 | t.Errorf("#%d: zip:%s want '%s', got '%s'\n", x, c.zip7, c.expected[i].town, row.Town) 574 | } 575 | if row.TownKana != c.expected[i].townKana { 576 | t.Errorf("#%d: zip:%s want '%s', got '%s'\n", x, c.zip7, c.expected[i].townKana, row.TownKana) 577 | } 578 | } 579 | } 580 | }) 581 | } 582 | } 583 | -------------------------------------------------------------------------------- /cmd/address.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "errors" 5 | "io" 6 | "log" 7 | "os" 8 | 9 | "golang.org/x/text/encoding/japanese" 10 | "golang.org/x/text/transform" 11 | 12 | "github.com/spf13/cobra" 13 | 14 | "github.com/inouet/ken-all/address" 15 | "github.com/inouet/ken-all/util" 16 | "github.com/inouet/ken-all/writer" 17 | ) 18 | 19 | // http://text.baldanders.info/golang/using-and-testing-cobra/ 20 | 21 | func init() { 22 | output = os.Stdout 23 | } 24 | 25 | func newAddressCmd() *cobra.Command { 26 | 27 | cmd := &cobra.Command{ 28 | Use: "address [KEN_ALL.CSV]", 29 | Short: "Convert KEN_ALL.CSV into other format.", 30 | Args: cobra.MinimumNArgs(1), 31 | RunE: func(cmd *cobra.Command, args []string) error { 32 | 33 | inputFile := args[0] 34 | 35 | outputType, err := cmd.Flags().GetString("type") 36 | if err != nil { 37 | return err 38 | } 39 | 40 | if !isValidOutputType(outputType) { 41 | return errors.New("type must be json or csv or tsv") 42 | } 43 | 44 | err = execAddressCmd(output, inputFile, outputType) 45 | return err 46 | }, 47 | } 48 | 49 | cmd.Flags().StringP("type", "t", "csv", "output type [json,csv,tsv]") 50 | 51 | return cmd 52 | } 53 | 54 | func execAddressCmd(w io.Writer, inputFile, outputType string) error { 55 | 56 | ioReader, err := os.Open(inputFile) 57 | 58 | defer func() { 59 | err := ioReader.Close() 60 | if err != nil { 61 | log.Println("can't close ioReader", err) 62 | } 63 | }() 64 | 65 | if err != nil { 66 | return err 67 | } 68 | 69 | rdr := address.NewReader(transform.NewReader(ioReader, japanese.ShiftJIS.NewDecoder())) 70 | wtr := writer.NewWriter(w, outputType) 71 | 72 | defer wtr.Flush() 73 | 74 | uniq := util.NewUniq() 75 | 76 | for { 77 | cols, err := rdr.Read() 78 | 79 | if err == io.EOF { 80 | break 81 | } 82 | 83 | rows := address.NewRows(cols) 84 | 85 | for _, row := range rows { 86 | // 同じ 郵便番号で同じ住所は出力しない 87 | key := row.Zip7 + row.Pref + row.City + row.Town 88 | if !uniq.IsUnique(key) { 89 | continue 90 | } 91 | 92 | err := wtr.Write(row) 93 | if err != nil { 94 | return err 95 | } 96 | } 97 | } 98 | 99 | return nil 100 | } 101 | -------------------------------------------------------------------------------- /cmd/address_test.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | // https://deeeet.com/writing/2014/12/18/golang-cli-test/ 4 | // https://qiita.com/kami_zh/items/ff636f15da87dabebe6c 5 | 6 | import ( 7 | "bytes" 8 | "io/ioutil" 9 | "path/filepath" 10 | "testing" 11 | ) 12 | 13 | var testDataDir string 14 | 15 | func init() { 16 | testDataDir = "../testdata" 17 | } 18 | 19 | func TestBuild(t *testing.T) { 20 | 21 | cases := []struct { 22 | input string 23 | output string 24 | outputType string 25 | }{ 26 | { 27 | input: "test_001.csv", 28 | output: "test_001_out.json", 29 | outputType: "json", 30 | }, 31 | } 32 | 33 | for _, c := range cases { 34 | buffer := &bytes.Buffer{} 35 | inputFile := filepath.Join(testDataDir, c.input) 36 | outputFile := filepath.Join(testDataDir, c.output) 37 | 38 | err := execAddressCmd(buffer, inputFile, c.outputType) 39 | if err != nil { 40 | t.Errorf("execAddressCmd failed %s", err.Error()) 41 | } 42 | 43 | b, err := ioutil.ReadFile(outputFile) 44 | if err != nil { 45 | t.Errorf("File not found %s", outputFile) 46 | } 47 | 48 | if buffer.String() != string(b) { 49 | t.Errorf("want '%s', got '%s'\n", string(b), buffer) 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /cmd/office.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "errors" 5 | "io" 6 | "log" 7 | "os" 8 | 9 | "github.com/spf13/cobra" 10 | "golang.org/x/text/encoding/japanese" 11 | "golang.org/x/text/transform" 12 | 13 | "github.com/inouet/ken-all/office" 14 | "github.com/inouet/ken-all/writer" 15 | ) 16 | 17 | func init() { 18 | output = os.Stdout 19 | } 20 | 21 | func newOfficeCmd() *cobra.Command { 22 | 23 | cmd := &cobra.Command{ 24 | Use: "office [JIGYOSYO.CSV]", 25 | Short: "Convert JIGYOSYO.CSV into other format.", 26 | Args: cobra.MinimumNArgs(1), 27 | RunE: func(cmd *cobra.Command, args []string) error { 28 | 29 | inputFile := args[0] 30 | 31 | outputType, err := cmd.Flags().GetString("type") 32 | if err != nil { 33 | return err 34 | } 35 | 36 | if !isValidOutputType(outputType) { 37 | return errors.New("type must be json or csv or tsv") 38 | } 39 | 40 | err = execOfficeCmd(output, inputFile, outputType) 41 | 42 | return err 43 | }, 44 | } 45 | 46 | cmd.Flags().StringP("type", "t", "csv", "output type [json,csv,tsv]") 47 | 48 | return cmd 49 | } 50 | 51 | func execOfficeCmd(w io.Writer, inputFile, outputType string) error { 52 | 53 | ioReader, err := os.Open(inputFile) 54 | 55 | defer func() { 56 | err := ioReader.Close() 57 | if err != nil { 58 | log.Println("can't close ioReader", err) 59 | } 60 | }() 61 | 62 | if err != nil { 63 | return err 64 | } 65 | 66 | rdr := office.NewReader(transform.NewReader(ioReader, japanese.ShiftJIS.NewDecoder())) 67 | wtr := writer.NewWriter(w, outputType) 68 | 69 | defer wtr.Flush() 70 | 71 | for { 72 | cols, err := rdr.Read() 73 | 74 | if err == io.EOF { 75 | break 76 | } 77 | row := office.NewRow(cols) 78 | err = wtr.Write(row) 79 | if err != nil { 80 | return err 81 | } 82 | } 83 | 84 | return nil 85 | } 86 | -------------------------------------------------------------------------------- /cmd/root.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "io" 5 | 6 | "github.com/spf13/cobra" 7 | "github.com/spiegel-im-spiegel/gocli/exitcode" 8 | ) 9 | 10 | var output io.Writer 11 | 12 | func newRootCmd() *cobra.Command { 13 | rootCmd := &cobra.Command{ 14 | Use: "ken-all", 15 | Short: "", 16 | Long: "", 17 | Run: func(cmd *cobra.Command, args []string) { 18 | cmd.Help() //nolint:errcheck 19 | }, 20 | } 21 | 22 | rootCmd.AddCommand(newAddressCmd()) 23 | rootCmd.AddCommand(newOfficeCmd()) 24 | return rootCmd 25 | } 26 | 27 | func Execute() (exit exitcode.ExitCode) { 28 | exit = exitcode.Normal 29 | if err := newRootCmd().Execute(); err != nil { 30 | exit = exitcode.Abnormal 31 | } 32 | return 33 | } 34 | 35 | func isValidOutputType(outputType string) bool { 36 | if outputType == "json" || outputType == "csv" || outputType == "tsv" { 37 | return true 38 | } 39 | return false 40 | } 41 | -------------------------------------------------------------------------------- /example/zip_code.mysql.sql: -------------------------------------------------------------------------------- 1 | 2 | create table zip_code ( 3 | zip_code varchar(7) not null, -- 郵便番号7桁 4 | zip_type int not null, -- 区分(1: 住所 2: 事業所) 5 | pref varchar(20) not null, -- 都道府県名 6 | city varchar(255) not null, -- 市区町村名 7 | town varchar(255), -- 町域名 8 | street varchar(255), -- 小字名、丁目、番地等 9 | name varchar(255), -- 大口事業所名 10 | update_code int -- 修正コード 11 | ); 12 | 13 | -- 修正コード 14 | -- 住所 15 | -- 10: 変更なし 16 | -- 11: 市政・区政・町政・分区・政令指定都市施行 17 | -- 12: 住居表示の実施 18 | -- 13: 区画整理 19 | -- 14: 郵便区調整等 20 | -- 15: 訂正 21 | -- 16: 廃止(廃止データのみ使用) 22 | -- 事業所 23 | -- 20: 修正なし 24 | -- 21: 新規追加 25 | -- 25: 廃止 26 | 27 | -------------------------------------------------------------------------------- /example/zip_code.sqlite.sql: -------------------------------------------------------------------------------- 1 | 2 | create table zip_code ( 3 | zip_code text not null, -- 郵便番号7桁 4 | zip_type integer not null, -- 区分 5 | pref text not null, -- 都道府県名 6 | city text not null, -- 市区町村名 7 | town text, -- 町域名 8 | street text, -- 小字名、丁目、番地等 9 | name text, -- 大口事業所名 10 | update_code integer -- 修正コード 11 | ); 12 | 13 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/inouet/ken-all 2 | 3 | require ( 4 | github.com/ktnyt/go-moji v1.0.0 5 | github.com/spf13/cobra v1.7.0 6 | github.com/spiegel-im-spiegel/gocli v0.10.4 7 | golang.org/x/text v0.13.0 8 | ) 9 | 10 | require ( 11 | github.com/inconshreveable/mousetrap v1.1.0 // indirect 12 | github.com/spf13/pflag v1.0.5 // indirect 13 | ) 14 | 15 | go 1.18 16 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= 2 | github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= 3 | github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= 4 | github.com/ktnyt/go-moji v1.0.0 h1:2m4bKK8xLPnOxhZrtYlWQmEXOiavgi9siE1EFAQlNjU= 5 | github.com/ktnyt/go-moji v1.0.0/go.mod h1:sm5PWbazq4XStFViXmVdox5pROP0Zt7uYFhXGa0HMMc= 6 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= 7 | github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I= 8 | github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0= 9 | github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= 10 | github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= 11 | github.com/spiegel-im-spiegel/gocli v0.10.4 h1:aoAWdiQ4hjNxmEod4EeTZTcjdCJcrNOwgHBs5BQYnEQ= 12 | github.com/spiegel-im-spiegel/gocli v0.10.4/go.mod h1:ffI3zoggRyLOZ+IIgaVN8WVMUwfIwfvCEd/0Yl/PZ98= 13 | golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= 14 | golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= 15 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 16 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 17 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/inouet/ken-all/cmd" 5 | ) 6 | 7 | func main() { 8 | cmd.Execute().Exit() 9 | } 10 | -------------------------------------------------------------------------------- /office/reader.go: -------------------------------------------------------------------------------- 1 | package office 2 | 3 | import ( 4 | "encoding/csv" 5 | "io" 6 | 7 | "github.com/inouet/ken-all/util" 8 | ) 9 | 10 | type Reader struct { 11 | r *csv.Reader 12 | } 13 | 14 | func NewReader(r io.Reader) *Reader { 15 | return &Reader{ 16 | r: csv.NewReader(r), 17 | } 18 | } 19 | 20 | func (reader *Reader) Read() (record []string, err error) { 21 | record, err = reader.r.Read() 22 | if err != nil { 23 | return record, err 24 | } 25 | for _, v := range []int{1, 2, 4, 5, 6} { 26 | record[v] = util.NormalizeString(record[v]) 27 | } 28 | return record, err 29 | } 30 | -------------------------------------------------------------------------------- /office/row.go: -------------------------------------------------------------------------------- 1 | package office 2 | 3 | import "github.com/inouet/ken-all/util" 4 | 5 | type Row struct { 6 | JisCode string `json:"jis_code"` // 0. 大口事業所の所在地のJISコード(5バイト) 7 | OfficeKana string `json:"kana"` // 1. 大口事業所名(カナ)(100バイト) 8 | OfficeName string `json:"name"` // 2. 大口事業所名(漢字)(160バイト) 9 | Pref string `json:"pref"` // 3. 都道府県名(漢字)(8バイト) 10 | City string `json:"city"` // 4. 市区町村名(漢字)(24バイト) 11 | Town string `json:"town"` // 5. 町域名(漢字)(24バイト) 12 | Address string `json:"address"` // 6. 小字名、丁目、番地等(漢字)(124バイト) 13 | Zip7 string `json:"zip7"` // 7. 大口事業所個別番号(7バイト) 14 | Zip5 string `json:"zip5"` // 8. 旧郵便番号(5バイト) 15 | PostOffice string `json:"post_office"` // 9. 取扱局(漢字)(40バイト) 16 | Type string `json:"type"` // 10. 個別番号の種別の表示(1バイト)「0」大口事業所 「1」私書箱 17 | // 11. 複数番号の有無(1バイト) 18 | // 「0」複数番号無し 19 | // 「1」複数番号を設定している場合の個別番号の1 20 | // 「2」複数番号を設定している場合の個別番号の2 21 | // 「3」複数番号を設定している場合の個別番号の3 22 | // 一つの事業所が同一種別の個別番号を複数持つ場合に複数番号を設定しているものとします。 23 | // 従って、一つの事業所で大口事業所、私書箱の個別番号をそれぞれ一つづつ設定している場合は 12)は「0」となります。 24 | IsMulti string `json:"is_multi"` 25 | 26 | UpdateStatus string `json:"update_status"` // 12. 修正コード(1バイト) 「0」修正なし/「1」新規追加/「5」廃止 27 | PrefCode string `json:"pref_code"` // xx. 都道府県コード 28 | } 29 | 30 | func NewRow(cols []string) Row { 31 | row := Row{ 32 | JisCode: cols[0], 33 | OfficeKana: cols[1], 34 | OfficeName: cols[2], 35 | Pref: cols[3], 36 | City: cols[4], 37 | Town: cols[5], 38 | Address: cols[6], 39 | Zip7: cols[7], 40 | Zip5: cols[8], 41 | PostOffice: cols[9], 42 | Type: cols[10], 43 | IsMulti: cols[11], 44 | UpdateStatus: cols[12], 45 | } 46 | 47 | row.setPrefCode() 48 | 49 | return row 50 | } 51 | 52 | func (row Row) Array() []string { 53 | cols := []string{ 54 | row.JisCode, 55 | row.OfficeKana, 56 | row.OfficeName, 57 | row.Pref, 58 | row.City, 59 | row.Town, 60 | row.Address, 61 | row.Zip7, 62 | row.Zip5, 63 | row.PostOffice, 64 | row.Type, 65 | row.IsMulti, 66 | row.UpdateStatus, 67 | row.PrefCode, 68 | } 69 | return cols 70 | } 71 | 72 | func (row *Row) setPrefCode() { 73 | row.PrefCode = util.GetPrefCode(row.Pref) 74 | } 75 | -------------------------------------------------------------------------------- /script/make-zip: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # 郵便番号データ生成スクリプト 4 | # 日本郵便のサイトからKEN_ALL.CSV、JIGYOSYO.CSVをダウンロードし、 5 | # 郵便番号マスタ(TSV)を生成します 6 | # 7 | # Usage: 8 | # ./script/make-zip -o OUTPUT_FILE 9 | # 10 | # TSV Format: 11 | # 1: 郵便番号 12 | # 2: 区分(1: 住所 2: 事業所) 13 | # 3: 都道府県 14 | # 4: 市区町村名 15 | # 5: 町域名 16 | # 6: 小字名、丁目、番地等 17 | # 7: 大口事業所名 18 | # 8: 修正コード 19 | # 20 | 21 | set -e 22 | 23 | usage_exit() { 24 | echo "Usage: $0 -o file" 1>&2 25 | exit 1 26 | } 27 | 28 | main() { 29 | 30 | if [ -z "$OUTPUT_FILE" ]; then 31 | usage_exit 32 | fi 33 | if [ -z "$TEMP" ]; then 34 | TEMP="/tmp" 35 | fi 36 | 37 | :> $OUTPUT_FILE 38 | 39 | WORK_DIR=$(mktemp -d $TEMP/tmp.XXXXXX) 40 | echo "[INFO] Working directory = $WORK_DIR" 41 | 42 | # 43 | # 取得 44 | # 45 | 46 | # 郵便番号データ 47 | echo "[INFO] Download ken_all.zip" 48 | wget --quiet https://www.post.japanpost.jp/zipcode/dl/kogaki/zip/ken_all.zip -P $WORK_DIR/ 49 | unzip -p $WORK_DIR/ken_all.zip > $WORK_DIR/KEN_ALL.CSV 50 | rm -f $WORK_DIR/ken_all.zip 51 | 52 | # 事業所データ 53 | echo "[INFO] Download jigyosyo.zip" 54 | wget --quiet https://www.post.japanpost.jp/zipcode/dl/jigyosyo/zip/jigyosyo.zip -P $WORK_DIR/ 55 | unzip -p $WORK_DIR/jigyosyo.zip > $WORK_DIR/JIGYOSYO.CSV 56 | 57 | # 58 | # TSV生成 59 | # 60 | 61 | # 1: 郵便番号 62 | # 2: 区分(1: 住所 2: 事業所) 63 | # 3: 都道府県 64 | # 4: 市区町村名 65 | # 5: 町域名 66 | # 6: 小字名、丁目、番地等 67 | # 7: 大口事業所名 68 | # 8: 修正コード 69 | 70 | # 郵便番号データ 71 | echo "[INFO] Create zip data from KEN_ALL.CSV" 72 | ken-all address $WORK_DIR/KEN_ALL.CSV -t tsv \ 73 | | awk -F "\t" 'BEGIN { OFS = "\t" }{ print $3, "1", $7, $8, $9, "", "", 1$14 }' \ 74 | > $OUTPUT_FILE 75 | 76 | # 事業所データ 77 | echo "[INFO] Create zip data from JIGYOSYO.CSV" 78 | ken-all office $WORK_DIR/JIGYOSYO.CSV -t tsv \ 79 | | awk -F "\t" 'BEGIN { OFS = "\t" }{ print $8, "2", $4, $5, $6, $7, $3, 2$13 }' \ 80 | >> $OUTPUT_FILE 81 | 82 | rm -rf $WORK_DIR 83 | echo "[INFO] Done file = ${OUTPUT_FILE}" 84 | 85 | } 86 | 87 | 88 | while getopts o:h OPT 89 | do 90 | case $OPT in 91 | o) OUTPUT_FILE=$OPTARG 92 | ;; 93 | h) usage_exit 94 | ;; 95 | \?) usage_exit 96 | ;; 97 | esac 98 | done 99 | 100 | main 101 | -------------------------------------------------------------------------------- /testdata/README: -------------------------------------------------------------------------------- 1 | test_001.csv: KEN_ALL head 4行 2 | -------------------------------------------------------------------------------- /testdata/test_001.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/inouet/ken-all/b4595c51725529d238ec1cae64035468058703cc/testdata/test_001.csv -------------------------------------------------------------------------------- /testdata/test_001_out.json: -------------------------------------------------------------------------------- 1 | {"region_id":"01101","zip":"0600000","pref_kana":"ホッカイドウ","city_kana":"サッポロシチュウオウク","town_kana":"","pref":"北海道","city":"札幌市中央区","town":"","update_status":"0","update_reason":"0","pref_code":"01"} 2 | {"region_id":"01101","zip":"0640941","pref_kana":"ホッカイドウ","city_kana":"サッポロシチュウオウク","town_kana":"アサヒガオカ","pref":"北海道","city":"札幌市中央区","town":"旭ケ丘","update_status":"0","update_reason":"0","pref_code":"01"} 3 | {"region_id":"01101","zip":"0600041","pref_kana":"ホッカイドウ","city_kana":"サッポロシチュウオウク","town_kana":"オオドオリヒガシ","pref":"北海道","city":"札幌市中央区","town":"大通東","update_status":"0","update_reason":"0","pref_code":"01"} 4 | {"region_id":"01101","zip":"0600042","pref_kana":"ホッカイドウ","city_kana":"サッポロシチュウオウク","town_kana":"オオドオリニシ","pref":"北海道","city":"札幌市中央区","town":"大通西","update_status":"0","update_reason":"0","pref_code":"01"} 5 | -------------------------------------------------------------------------------- /util/uniq.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | type Uniq struct { 4 | data map[string]int 5 | } 6 | 7 | // IsUnique ユニークかどうか 8 | func (u *Uniq) IsUnique(key string) bool { 9 | if _, ok := u.data[key]; ok { 10 | return false 11 | } 12 | u.data[key] = 1 13 | return true 14 | } 15 | 16 | func NewUniq() Uniq { 17 | u := Uniq{ 18 | data: make(map[string]int), 19 | } 20 | return u 21 | } 22 | -------------------------------------------------------------------------------- /util/uniq_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestUniqIsUnique(t *testing.T) { 8 | 9 | cases := []struct { 10 | data string 11 | expected bool 12 | }{ 13 | {data: "1", expected: true}, 14 | {data: "1", expected: false}, 15 | {data: "2", expected: true}, 16 | {data: "1", expected: false}, 17 | {data: "3", expected: true}, 18 | } 19 | 20 | uniq := NewUniq() 21 | 22 | for _, c := range cases { 23 | actual := uniq.IsUnique(c.data) 24 | if actual != c.expected { 25 | t.Errorf("want '%v', got '%v'\n", c.expected, actual) 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /util/util.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/ktnyt/go-moji" 7 | ) 8 | 9 | // NormalizeString 文字列の正規化 10 | func NormalizeString(str string) string { 11 | // 半角カナ → 全角カナ 12 | r := moji.Convert(str, moji.HK, moji.ZK) 13 | 14 | // 全角英数 → 半角 15 | r = moji.Convert(r, moji.ZE, moji.HE) 16 | 17 | // 全角スペース → 半角スペース 18 | r = strings.Replace(r, " ", " ", -1) 19 | 20 | return r 21 | } 22 | 23 | // PrefCode 都道府県コード 24 | var PrefCode = map[string]string{ 25 | "北海道": "01", 26 | "青森県": "02", 27 | "岩手県": "03", 28 | "宮城県": "04", 29 | "秋田県": "05", 30 | "山形県": "06", 31 | "福島県": "07", 32 | "茨城県": "08", 33 | "栃木県": "09", 34 | "群馬県": "10", 35 | "埼玉県": "11", 36 | "千葉県": "12", 37 | "東京都": "13", 38 | "神奈川県": "14", 39 | "新潟県": "15", 40 | "富山県": "16", 41 | "石川県": "17", 42 | "福井県": "18", 43 | "山梨県": "19", 44 | "長野県": "20", 45 | "岐阜県": "21", 46 | "静岡県": "22", 47 | "愛知県": "23", 48 | "三重県": "24", 49 | "滋賀県": "25", 50 | "京都府": "26", 51 | "大阪府": "27", 52 | "兵庫県": "28", 53 | "奈良県": "29", 54 | "和歌山県": "30", 55 | "鳥取県": "31", 56 | "島根県": "32", 57 | "岡山県": "33", 58 | "広島県": "34", 59 | "山口県": "35", 60 | "徳島県": "36", 61 | "香川県": "37", 62 | "愛媛県": "38", 63 | "高知県": "39", 64 | "福岡県": "40", 65 | "佐賀県": "41", 66 | "長崎県": "42", 67 | "熊本県": "43", 68 | "大分県": "44", 69 | "宮崎県": "45", 70 | "鹿児島県": "46", 71 | "沖縄県": "47", 72 | } 73 | 74 | // GetPrefCode 都道府県名から都道府県コード(JIS X 0401)を取得 75 | func GetPrefCode(prefName string) string { 76 | return PrefCode[prefName] 77 | } 78 | -------------------------------------------------------------------------------- /util/util_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import "testing" 4 | 5 | func TestNormalizeString(t *testing.T) { 6 | cases := []struct { 7 | input string 8 | expected string 9 | }{ 10 | {input: "イワテケン", expected: "イワテケン"}, 11 | {input: "1234567890", expected: "1234567890"}, 12 | {input: "6丁目1-2アーバンネット札幌ビル2F", expected: "6丁目1-2アーバンネット札幌ビル2F"}, 13 | {input: "株式会社 日本経済新聞社 札幌支社", expected: "株式会社 日本経済新聞社 札幌支社"}, 14 | } 15 | 16 | for _, c := range cases { 17 | actual := NormalizeString(c.input) 18 | if actual != c.expected { 19 | t.Errorf("want '%s', got '%s'\n", c.expected, actual) 20 | } 21 | } 22 | } 23 | 24 | func TestGetPrefCode(t *testing.T) { 25 | cases := []struct { 26 | input string 27 | expected string 28 | }{ 29 | {input: "東京都", expected: "13"}, 30 | {input: "テスト", expected: ""}, 31 | } 32 | for _, c := range cases { 33 | actual := GetPrefCode(c.input) 34 | if actual != c.expected { 35 | t.Errorf("want '%s', got '%s'\n", c.expected, actual) 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /writer/csv.go: -------------------------------------------------------------------------------- 1 | package writer 2 | 3 | import ( 4 | "encoding/csv" 5 | ) 6 | 7 | type Writable interface { 8 | Array() []string 9 | } 10 | 11 | type CsvWriter struct { 12 | w *csv.Writer 13 | } 14 | 15 | func (writer CsvWriter) Write(row Writable) error { 16 | cols := row.Array() 17 | return writer.w.Write(cols) 18 | } 19 | 20 | func (writer CsvWriter) Flush() { 21 | writer.w.Flush() 22 | } 23 | -------------------------------------------------------------------------------- /writer/json.go: -------------------------------------------------------------------------------- 1 | package writer 2 | 3 | import ( 4 | "encoding/json" 5 | "io" 6 | ) 7 | 8 | type JSONWriter struct { 9 | w io.Writer 10 | } 11 | 12 | func (writer JSONWriter) Write(row Writable) error { 13 | b, err := json.Marshal(row) 14 | 15 | if err != nil { 16 | return err 17 | } 18 | 19 | b = append(b, "\n"...) 20 | 21 | _, err = writer.w.Write(b) 22 | 23 | if err != nil { 24 | return err 25 | } 26 | 27 | return err 28 | } 29 | 30 | func (writer JSONWriter) Flush() { 31 | } 32 | -------------------------------------------------------------------------------- /writer/writer.go: -------------------------------------------------------------------------------- 1 | package writer 2 | 3 | import ( 4 | "encoding/csv" 5 | "io" 6 | ) 7 | 8 | type Writer interface { 9 | Write(Writable) error 10 | Flush() 11 | } 12 | 13 | func NewWriter(writer io.Writer, outputType string) Writer { 14 | var w Writer 15 | 16 | if outputType == "json" { 17 | w = JSONWriter{ 18 | w: writer, 19 | } 20 | } else if outputType == "csv" || outputType == "tsv" { 21 | tmp := csv.NewWriter(writer) 22 | if outputType == "tsv" { 23 | tmp.Comma = '\t' 24 | } 25 | 26 | w = CsvWriter{ 27 | w: tmp, 28 | } 29 | } 30 | return w 31 | } 32 | --------------------------------------------------------------------------------