├── tests ├── __init__.py ├── test_addresses.py ├── test_utils.py ├── test_option.py ├── test_cache.py └── test_main.py ├── normalize_japanese_addresses ├── library │ ├── __init__.py │ ├── api.py │ ├── patchAddr.py │ ├── japaneseNumerics.py │ ├── utils.py │ └── regex.py ├── __init__.py └── normalize.py ├── requirements.txt ├── Dockerfile ├── LICENSE.txt ├── setup.py ├── README.md └── .gitignore /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /normalize_japanese_addresses/library/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /normalize_japanese_addresses/__init__.py: -------------------------------------------------------------------------------- 1 | from .normalize import normalize 2 | 3 | name = "normalize-japanese-addresses" 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.24.1 2 | kanjize>=1.4.0 3 | pytest>=7.2.1 4 | requests>=2.28.2 5 | pandas>=1.5.3 6 | cachetools>=5.3.0 -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9 2 | 3 | # mount dir 4 | RUN mkdir -p /opt/mnt 5 | WORKDIR /opt/mnt 6 | 7 | RUN python -m pip install --upgrade pip 8 | 9 | RUN apt-get update && apt-get install -y curl 10 | 11 | ADD requirements.txt ./ 12 | RUN pip install -r requirements.txt 13 | 14 | RUN curl -sL https://github.com/geolonia/japanese-addresses/archive/refs/heads/master.tar.gz | tar xvfz - -C /tmp/ 15 | 16 | # expose port 17 | EXPOSE 8888 -------------------------------------------------------------------------------- /normalize_japanese_addresses/library/api.py: -------------------------------------------------------------------------------- 1 | import urllib.parse 2 | 3 | import requests 4 | 5 | 6 | def api_fetch(endpoint: str = '') -> requests.Response: 7 | if endpoint.startswith('http'): 8 | return requests.get(endpoint) 9 | elif endpoint.startswith('file'): 10 | filepath = urllib.parse.unquote(endpoint.replace("file://", "")) 11 | with open(filepath, 'rb') as fp: 12 | res = requests.Response() 13 | res._content = fp.read() 14 | res.status_code = 200 15 | return res 16 | else: 17 | raise ValueError("Invalid endpoint type") 18 | -------------------------------------------------------------------------------- /normalize_japanese_addresses/library/patchAddr.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | ADDR_PATCHES = [ 4 | { 5 | 'pref': '香川県', 6 | 'city': '仲多度郡まんのう町', 7 | 'town': '勝浦', 8 | 'pattern': re.compile('^字?家6'), 9 | 'result': '家六', 10 | }, 11 | { 12 | 'pref': '愛知県', 13 | 'city': 'あま市', 14 | 'town': '西今宿', 15 | 'pattern': re.compile('^字?梶村1'), 16 | 'result': '梶村一', 17 | }, 18 | { 19 | 'pref': '香川県', 20 | 'city': '丸亀市', 21 | 'town': '原田町', 22 | 'pattern': re.compile('^字?東三分1'), 23 | 'result': '東三分一', 24 | }, 25 | ] 26 | 27 | 28 | def patch_addr(pref: str, city: str, town: str, addr: str) -> str: 29 | for patch in ADDR_PATCHES: 30 | if patch['pref'] == pref and patch['city'] == city and patch['town'] == town: 31 | return re.sub(patch['pattern'], patch['result'], addr) 32 | return addr 33 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2020 Geolonia Inc. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | from distutils.core import setup 3 | from os import path 4 | 5 | with open(path.join(path.abspath(path.dirname(__file__)), 'README.md'), encoding='utf-8') as f: 6 | long_description = f.read() 7 | 8 | setup( 9 | name="normalize-japanese-addresses", 10 | version="0.0.9", 11 | author="Takahiro Hama", 12 | author_email="taka710.py@gmail.com", 13 | maintainer="Takahiro Hama", 14 | maintainer_email="taka710.py@gmail.com", 15 | description="Ported version of @geolonia/normalize-japanese-addresses", 16 | long_description=long_description, 17 | long_description_content_type="text/markdown", 18 | license='MIT License', 19 | url="https://github.com/Taka710/normalize-japanese-addresses-py", 20 | project_urls={ 21 | "Bug Tracker": "https://github.com/Taka710/normalize-japanese-addresses-py/issues", 22 | }, 23 | classifiers=[ 24 | "Programming Language :: Python :: 3", 25 | "Programming Language :: Python :: 3.8", 26 | "License :: OSI Approved :: MIT License", 27 | ], 28 | packages=setuptools.find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]), 29 | install_requires=[ 30 | "requests", 31 | "kanjize", 32 | "cachetools" 33 | ], 34 | python_requires=">=3.8", 35 | ) 36 | -------------------------------------------------------------------------------- /normalize_japanese_addresses/library/japaneseNumerics.py: -------------------------------------------------------------------------------- 1 | JAPANESE_NUMERICS = { 2 | "〇": "0", 3 | "一": "1", 4 | "二": "2", 5 | "三": "3", 6 | "四": "4", 7 | "五": "5", 8 | "六": "6", 9 | "七": "7", 10 | "八": "8", 11 | "九": "9", 12 | "0": "0", 13 | "1": "1", 14 | "2": "2", 15 | "3": "3", 16 | "4": "4", 17 | "5": "5", 18 | "6": "6", 19 | "7": "7", 20 | "8": "8", 21 | "9": "9", 22 | "0": "0", 23 | "1": "1", 24 | "2": "2", 25 | "3": "3", 26 | "4": "4", 27 | "5": "5", 28 | "6": "6", 29 | "7": "7", 30 | "8": "8", 31 | "9": "9", 32 | } 33 | 34 | OLD_JAPANESE_NUMERICS = { 35 | "零": "〇", 36 | "壱": "一", 37 | "壹": "一", 38 | "弐": "二", 39 | "弍": "二", 40 | "貳": "二", 41 | "貮": "二", 42 | "参": "三", 43 | "參": "三", 44 | "肆": "四", 45 | "伍": "五", 46 | "陸": "六", 47 | "漆": "七", 48 | "捌": "八", 49 | "玖": "九", 50 | "拾": "十", 51 | "廿": "二十", 52 | "陌": "百", 53 | "佰": "百", 54 | "阡": "千", 55 | "仟": "千", 56 | "萬": "万", 57 | } 58 | 59 | KANJI_DIGIT_CHARACTERS = { 60 | "十", 61 | "百", 62 | "千", 63 | "万", 64 | "億", 65 | "兆", 66 | "京" 67 | } 68 | 69 | LARGE_NUMBERS = { 70 | "京": 10000**4, 71 | "兆": 10000**3, 72 | "億": 10000**2, 73 | "万": 10000, 74 | } 75 | -------------------------------------------------------------------------------- /tests/test_addresses.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import numpy as np 4 | import pandas as pd 5 | import pytest 6 | 7 | from normalize_japanese_addresses import normalize 8 | 9 | addresses = pd.read_csv("./csv/addresses.csv") 10 | 11 | testData = [] 12 | for _, address in addresses.iterrows(): 13 | addr = address['住所'] 14 | pref = address['都道府県'] 15 | city = address['市区町村'] 16 | town = address['町丁目'] 17 | other = address['その他'] 18 | 19 | # Nan判定 20 | addr = '' if addr is np.nan else addr 21 | pref = '' if pref is np.nan else pref 22 | city = '' if city is np.nan else city 23 | town = '' if town is np.nan else town 24 | other = '' if other is np.nan else other 25 | 26 | # \u3000を全角スペースに変換 27 | addr = addr.replace('\u3000', ' ') 28 | pref = pref.replace('\u3000', ' ') 29 | city = city.replace('\u3000', ' ') 30 | town = town.replace('\u3000', ' ') 31 | other = other.replace('\u3000', ' ') 32 | 33 | testData.append([addr, pref, city, town, other]) 34 | 35 | def generate_ids(): 36 | return ["csv test " for data in testData] 37 | 38 | @pytest.mark.parametrize("addr, pref, city, town, other", testData, ids=generate_ids()) 39 | def test_address(addr: str, pref: str, city: str, town: str, other: str): 40 | level = 0 41 | 42 | # 戻り値のレベルを設定 43 | level = level + 1 if len(pref) > 0 else level 44 | level = level + 1 if len(city) > 0 else level 45 | level = level + 1 if len(town) > 0 else level 46 | 47 | res = normalize(addr) 48 | assert res["pref"] == pref 49 | assert res["city"] == city 50 | assert res["town"] == town 51 | assert res["addr"] == other 52 | assert res["level"] == level 53 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | from normalize_japanese_addresses.library.utils import kan2num, find_kanji_numbers 2 | 3 | 4 | def test_kan2num_0001(): 5 | assert kan2num('千二百三十四') == "1234" 6 | 7 | 8 | def test_kan2num_0002(): 9 | assert kan2num('五百三十七の1') == "537の1" 10 | 11 | 12 | def test_kan2num_0003(): 13 | assert kan2num('五百三十七-1') == "537-1" 14 | 15 | 16 | def test_kan2num_0004(): 17 | assert kan2num('一千百十一兆一千百十一億一千百十一万一千百十一') == "1111111111111111" 18 | 19 | 20 | def test_kan2num_0005(): 21 | assert kan2num('一千百十一兆一千百十一億一千百十一万') == "1111111111110000" 22 | 23 | 24 | def test_kan2num_0006(): 25 | assert kan2num('一千百十一兆一千百十一億一千百十一') == "1111111100001111" 26 | 27 | 28 | def test_kan2num_0007(): 29 | assert kan2num('百十一') == "111" 30 | 31 | 32 | def test_kan2num_0008(): 33 | assert kan2num('三億八') == "300000008" 34 | 35 | 36 | def test_kan2num_0009(): 37 | assert kan2num('三百八') == "308" 38 | 39 | 40 | def test_kan2num_0010(): 41 | assert kan2num('三〇八') == "308" 42 | 43 | 44 | def test_kan2num_0011(): 45 | assert kan2num('二〇二〇') == "2020" 46 | 47 | 48 | def test_kan2num_0012(): 49 | assert kan2num('二千') == "2000" 50 | 51 | 52 | def test_kan2num_0013(): 53 | assert kan2num('壱万') == "10000" 54 | 55 | 56 | def test_kan2num_0014(): 57 | assert kan2num('弍万') == "20000" 58 | 59 | 60 | def test_kan2num_0015(): 61 | assert kan2num('一二三四') == "1234" 62 | 63 | 64 | def test_kan2num_0016(): 65 | assert kan2num('千二三四') == "1234" 66 | 67 | 68 | def test_kan2num_0017(): 69 | assert kan2num('千二百三四') == "1234" 70 | 71 | 72 | def test_kan2num_0018(): 73 | assert kan2num('千二百三十四') == "1234" 74 | 75 | 76 | def test_kan2num_0019(): 77 | assert kan2num('壱阡陌拾壱兆壱阡陌拾壱億壱阡陌拾壱萬壱阡陌拾壱') == "1111111111111111" 78 | 79 | 80 | def test_kan2num_0020(): 81 | assert kan2num('壱仟佰拾壱兆壱仟佰拾壱億壱仟佰拾壱萬壱仟佰拾壱') == "1111111111111111" 82 | 83 | 84 | def test_find_kanji_numbers_0001(): 85 | assert find_kanji_numbers('千二百三十四') == ['千二百三十四'] 86 | 87 | 88 | def test_find_kanji_numbers_0002(): 89 | assert find_kanji_numbers('五百三十七の1') == ['五百三十七'] 90 | -------------------------------------------------------------------------------- /tests/test_option.py: -------------------------------------------------------------------------------- 1 | from normalize_japanese_addresses import normalize 2 | 3 | 4 | # Python版に追加したテスト 5 | def test_normalize_add_0001(): 6 | assert normalize('鹿児島市山下町') == \ 7 | {"pref": "鹿児島県", "city": "鹿児島市", "town": "山下町", "addr": "", 8 | "lat": 31.596716, "lng": 130.55643, "level": 3} 9 | 10 | 11 | def test_normalize_add_0002(): 12 | assert normalize('北海道札幌市西区24-2-2-3-3') == \ 13 | {"pref": "北海道", "city": "札幌市西区", "town": "二十四軒二条二丁目", "addr": "3-3", 14 | "lat": 43.074273, "lng": 141.315099, "level": 3} 15 | 16 | 17 | def test_normalize_add_0003(): 18 | assert normalize('北海道札幌市西区24-2-2-3-3', level=1) == \ 19 | {"pref": "北海道", "city": "", "town": "", "addr": "札幌市西区24-2-2-3-3", 20 | "lat": None, "lng": None, "level": 1} 21 | 22 | 23 | # @geolonia/japanese-addresses にある住所データをローカルから読み込むテスト 24 | # テスト実行用に下記コマンドで /tmp/ 以下に住所データを保存する 25 | # curl -sL https://github.com/geolonia/japanese-addresses/archive/refs/heads/master.tar.gz | tar xvfz - -C /tmp/ 26 | def test_normalize_add_0004(): 27 | assert normalize('北海道札幌市西区24-2-2-3-3', level=3, endpoint='file:///tmp/japanese-addresses-master/api/ja') == \ 28 | {"pref": "北海道", "city": "札幌市西区", "town": "二十四軒二条二丁目", "addr": "3-3", 29 | "lat": 43.074273, "lng": 141.315099, "level": 3} 30 | 31 | # issue #8 32 | # @geolonia/normalize-japanese-addressesで実行した結果と同じになることを確認する 33 | # {pref: '北海道', city: '札幌市中央区', town: '宮の森四条十丁目', addr: '', level: 3} 34 | def test_normalize_add_0005(): 35 | assert normalize('北海道札幌市中央区宮の森4条10丁目') == \ 36 | {"pref": "北海道", "city": "札幌市中央区", "town": "宮の森四条十丁目", "addr": "", 37 | "lat": 43.060356, "lng": 141.298776, "level": 3} 38 | 39 | # issue #9 40 | # 茨城県行方市をnormalizeするとUnboundLocalErrorが発生する件の修正 41 | def test_normalize_add_0006(): 42 | res = normalize('茨城県行方市') 43 | assert res['pref'] == '茨城県' 44 | assert res['city'] == '行方市' 45 | assert res['level'] == 2 46 | 47 | def test_normalize_add_0007(): 48 | res = normalize('千葉県茂原市') 49 | assert res['pref'] == '千葉県' 50 | assert res['city'] == '茂原市' 51 | assert res['level'] == 2 52 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## @Taka710/normalize-japanese-addresses-py 2 | [![PyPI](https://img.shields.io/pypi/l/normalize_japanese_addresses.svg)](https://pypi.python.org/pypi/normalize_japanese_addresses/) 3 | [![PyPI](https://img.shields.io/pypi/v/normalize_japanese_addresses.svg)](https://pypi.python.org/pypi/normalize_japanese_addresses/) 4 | 5 | オープンソースの住所正規化ライブラリです。 6 | 7 | 経産省の [IMI コンポーネントツール](https://info.gbiz.go.jp/tools/imi_tools/)のジオコーディングの仕組みから 8 | インスピレーションをうけて開発された [@geolonia/normalize-japanese-addresses](https://github.com/geolonia/normalize-japanese-addresses)を 9 | Pythonで利用できるように書き直したものです。 10 | 11 | ## 使い方 12 | pypiで公開していますので、pipコマンドでインストールしてください。 13 | 14 | ``` 15 | pip install normalize-japanese-addresses 16 | ``` 17 | 18 | 住所を正規化します。 19 | 20 | ```python 21 | from normalize_japanese_addresses import normalize 22 | print(normalize("北海道札幌市西区24-2-2-3-3")) 23 | # {'pref': '北海道', 'city': '札幌市西区', 'town': '二十四軒二条二丁目', 'addr': '3-3', 'lat': 43.074273, 'lng': 141.315099, 'level': 3} 24 | ``` 25 | 26 | 住所の正規化結果として戻されるオブジェクトには、`level` プロパティが含まれます。`level` には、住所文字列のどこまでを判別できたかを以下の数値で格納しています。 27 | 28 | * `0` - 都道府県も判別できなかった。 29 | * `1` - 都道府県まで判別できた。 30 | * `2` - 市区町村まで判別できた。 31 | * `3` - 町丁目まで判別できた。 32 | 33 | 例えば都道府県名のみを正規化したい場合、`level` オプションで指定することで処理を早くすることができます。 34 | ```python 35 | from normalize_japanese_addresses import normalize 36 | print(normalize("北海道札幌市西区24-2-2-3-3", level=1)) 37 | # {'pref': '北海道', 'city': '', 'town': '', 'addr': '札幌市西区24-2-2-3-3', 'lat': 43.074273, 'lng': 141.315099, 'level': 1} 38 | ``` 39 | 40 | 名寄せする住所は、[@geolonia/japanese-addresses](https://geolonia.github.io/japanese-addresses/api/ja)から都度取得しています。 41 | 42 | `endpoint` オプションで `file://` 形式のURLを指定することで、ローカルファイルとして保存した住所を参照することができます。 43 | ``` 44 | # Geolonia 住所データのダウンロード 45 | $ curl -sL https://github.com/geolonia/japanese-addresses/archive/refs/heads/master.tar.gz | tar xvfz - 46 | ``` 47 | ※住所データを最新にしたい場合は都度上記コマンドでダウンロードしてください。 48 | 49 | ```python 50 | from normalize_japanese_addresses import normalize 51 | print(normalize("北海道札幌市西区24-2-2-3-3", endpoint="file:///path/to/japanese-addresses-master/api/ja")) 52 | # {'pref': '北海道', 'city': '札幌市西区', 'town': '二十四軒二条二丁目', 'addr': '3-3', 'lat': 43.074273, 'lng': 141.315099, 'level': 3} 53 | ``` 54 | 55 | 56 | ## 注意 57 | 58 | 以下の仕様は、元の [@geolonia/normalize-japanese-addresses](https://github.com/geolonia/normalize-japanese-addresses)を踏襲しています。 59 | 60 | * この正規化エンジンは、住所の「名寄せ」を目的としており、たとえば京都の「通り名」は削除します。 61 | * 郵便や宅急便などに使用される住所としては、問題ないと考えています。 62 | * この正規化エンジンは、町丁目及び小字レベルまでは対応していますが、それ以降については対応しておりません。 63 | * 住居表示が未整備の地域については全体的に苦手です。 64 | * 漢数字と数字の変換については、[@geolonia/japanese-numeral](https://github.com/geolonia/japanese-numeral)をPythonに書き直して取り込んでいます。 65 | 66 | ## ライセンス、利用規約 67 | 68 | ソースコードのライセンスは MIT ライセンスです。 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### Python template 2 | # Byte-compiled / optimized / DLL files 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | cover/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | .pybuilder/ 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | # For a library or package, you might want to ignore these files since the code is 88 | # intended to run in multiple environments; otherwise, check them in: 89 | # .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 99 | __pypackages__/ 100 | 101 | # Celery stuff 102 | celerybeat-schedule 103 | celerybeat.pid 104 | 105 | # SageMath parsed files 106 | *.sage.py 107 | 108 | # Environments 109 | .env 110 | .venv 111 | env/ 112 | venv/ 113 | ENV/ 114 | env.bak/ 115 | venv.bak/ 116 | 117 | # Spyder project settings 118 | .spyderproject 119 | .spyproject 120 | 121 | # Rope project settings 122 | .ropeproject 123 | 124 | # mkdocs documentation 125 | /site 126 | 127 | # mypy 128 | .mypy_cache/ 129 | .dmypy.json 130 | dmypy.json 131 | 132 | # Pyre type checker 133 | .pyre/ 134 | 135 | # pytype static type analyzer 136 | .pytype/ 137 | 138 | # Cython debug symbols 139 | cython_debug/ 140 | -------------------------------------------------------------------------------- /tests/test_cache.py: -------------------------------------------------------------------------------- 1 | from normalize_japanese_addresses import normalize 2 | from normalize_japanese_addresses.normalize import get_prefectures, DEFAULT_ENDPOINT 3 | from normalize_japanese_addresses.library.regex import set_ttl, clear_cache_of_cities, cache_cities, get_towns 4 | from unittest.mock import patch, MagicMock 5 | 6 | import json 7 | from time import sleep 8 | 9 | 10 | def test_normalize_cache_0001(): 11 | """ 12 | 市区町村がキャッシュされていることを確認 13 | """ 14 | 15 | # キャッシュをクリア 16 | clear_cache_of_cities() 17 | assert len(cache_cities) == 0 18 | 19 | # 住所正規化でキャッシュが有効になることを確認 20 | normalize('大阪府堺市北区新金岡町4丁1−8') 21 | assert len(cache_cities) != 0 22 | 23 | # キャッシュ時間を保存 24 | cities_key = cache_cities.keys() 25 | key_city_0 = list(cities_key)[0] 26 | cache_cities_ttl_before = cache_cities[key_city_0][1] 27 | 28 | # キャッシュを利用していることを確認 29 | normalize('大阪府堺市北区新金岡町4丁1−8') 30 | cities_key = cache_cities.keys() 31 | key_city_0 = list(cities_key)[0] 32 | cache_cities_ttl_after = cache_cities[key_city_0][1] 33 | assert cache_cities_ttl_before == cache_cities_ttl_after 34 | 35 | # キャッシュ時間を0にして、キャッシュが利用されないことを確認 36 | set_ttl(0) 37 | normalize('大阪府堺市北区新金岡町4丁1−8') 38 | cities_key = cache_cities.keys() 39 | key_city_0 = list(cities_key)[0] 40 | cache_cities_ttl_after = cache_cities[key_city_0][1] 41 | assert cache_cities_ttl_before != cache_cities_ttl_after 42 | 43 | 44 | @patch('normalize_japanese_addresses.library.regex.api_fetch') 45 | def test_normalize_cache_0002(mock_api_fetch): 46 | """ 47 | 都道府県がキャッシュされていることを確認 48 | また、TTLが有効になっていることを確認 49 | """ 50 | 51 | set_ttl(60) 52 | 53 | mock_text1 = '{"data": "dummy"}' 54 | mock_response = MagicMock() 55 | mock_response.text = mock_text1 56 | mock_api_fetch.return_value = mock_response 57 | 58 | # 初回の呼び出しではapi_fetchが呼ばれる 59 | result_prefecture = get_prefectures(DEFAULT_ENDPOINT) 60 | 61 | # 再度呼び出すと、キャッシュが有効になっている 62 | mock_text2 = '{"data": "dummy2"}' 63 | result_prefecture = get_prefectures(DEFAULT_ENDPOINT) 64 | assert json.loads(mock_text1) == result_prefecture 65 | assert json.loads(mock_text2) != result_prefecture 66 | 67 | # ttlを0にすると、キャッシュが有効にならずapi_fetchが呼ばれる 68 | set_ttl(0) 69 | 70 | mock_response.text = mock_text2 71 | mock_api_fetch.return_value = mock_response 72 | result_prefecture = get_prefectures(DEFAULT_ENDPOINT) 73 | assert json.loads(mock_text1) != result_prefecture 74 | assert json.loads(mock_text2) == result_prefecture 75 | 76 | # 再度呼び出しても、ttl=0のためキャッシュが有効にならずapi_fetchが呼ばれる 77 | result_prefecture = get_prefectures(DEFAULT_ENDPOINT) 78 | assert json.loads(mock_text1) != result_prefecture 79 | assert json.loads(mock_text2) == result_prefecture 80 | 81 | 82 | @patch('normalize_japanese_addresses.library.regex.api_fetch') 83 | def test_normalize_cache_0003(mock_api_fetch): 84 | """ 85 | 町がキャッシュされていることを確認 86 | また、TTLが有効になっていることを確認 87 | """ 88 | def set_mock(mock_api_fetch, mock_text): 89 | mock_response = MagicMock() 90 | mock_response.text = mock_text 91 | mock_api_fetch.return_value = mock_response 92 | 93 | 94 | prefecture = '大阪府' 95 | city = '堺市北区' 96 | mock_text1 = '[{"town":"奥本町一丁","koaza":"","lat":34.581061,"lng":135.510333}]' 97 | mock_text2 = '[{"town":"奥本町二丁","koaza":"","lat":34.581061,"lng":135.510333}]' 98 | 99 | 100 | set_ttl(60) 101 | 102 | set_mock(mock_api_fetch, mock_text1) 103 | 104 | # 初回の呼び出しではapi_fetchが呼ばれる 105 | result_towns = get_towns(prefecture, city, DEFAULT_ENDPOINT) 106 | 107 | # 再度呼び出すと、キャッシュが有効になっている 108 | set_mock(mock_api_fetch, mock_text2) 109 | 110 | result_towns = get_towns(prefecture, city, DEFAULT_ENDPOINT) 111 | assert json.loads(mock_text1) == result_towns 112 | assert json.loads(mock_text2) != result_towns 113 | 114 | # ttlを0にすると、キャッシュが有効にならずapi_fetchが呼ばれる 115 | set_ttl(0) 116 | 117 | # text1の内容をキャッシュ 118 | set_mock(mock_api_fetch, mock_text1) 119 | result_towns = get_towns(prefecture, city, DEFAULT_ENDPOINT) 120 | 121 | # text2の内容を返すようにする 122 | set_mock(mock_api_fetch, mock_text2) 123 | 124 | # キャッシュが有効にならずapi_fetchが呼ばれる 125 | result_towns = get_towns(prefecture, city, DEFAULT_ENDPOINT) 126 | assert json.loads(mock_text1) != result_towns 127 | assert json.loads(mock_text2) == result_towns 128 | -------------------------------------------------------------------------------- /normalize_japanese_addresses/library/utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from kanjize import kanji2number 4 | 5 | from .japaneseNumerics import ( 6 | JAPANESE_NUMERICS, 7 | OLD_JAPANESE_NUMERICS, 8 | KANJI_DIGIT_CHARACTERS, 9 | LARGE_NUMBERS 10 | ) 11 | 12 | 13 | def normalize(japanese: str) -> str: 14 | for key, value in OLD_JAPANESE_NUMERICS.items(): 15 | japanese = re.sub(key, value, japanese) 16 | 17 | return japanese 18 | 19 | 20 | def split_large_number(japanese: str) -> dict: 21 | def kanjize_error_kanji_to_int(kanji_num_str: str) -> int: 22 | """ 23 | kanjizeが変換できない漢字を数値に変換する 24 | """ 25 | 26 | # 一番最初の漢字が数字でない場合は、先頭に「一」を追加する 27 | if kanji_num_str[0] not in JAPANESE_NUMERICS: 28 | kanji_num_str = "一" + kanji_num_str 29 | 30 | # 漢字の数字から数詞を取り除く 31 | for kanji in KANJI_DIGIT_CHARACTERS: 32 | kanji_num_str = kanji_num_str.replace(kanji, "") 33 | 34 | # 漢字の位置で桁を判定し、数値に変換する 35 | result = 0 36 | for i, kanji in enumerate(kanji_num_str): 37 | result += int(JAPANESE_NUMERICS[kanji]) * ( 38 | 10 ** (len(kanji_num_str) - i - 1) 39 | ) 40 | 41 | return result 42 | 43 | kanji = japanese 44 | numbers = {} 45 | for key, value in LARGE_NUMBERS.items(): 46 | match = re.match(f"(.+){key}", kanji) 47 | if match is not None: 48 | numbers[key] = kanji2number(match.group()) 49 | kanji = kanji.replace(match.group(), "") 50 | else: 51 | numbers[key] = 0 52 | 53 | if len(kanji) > 0: 54 | try: 55 | numbers["千"] = kanji2number(kanji) 56 | except: 57 | numbers["千"] = kanjize_error_kanji_to_int(kanji) 58 | else: 59 | numbers["千"] = 0 60 | 61 | return numbers 62 | 63 | 64 | def kan2num(value: str) -> str: 65 | def _kanji_to_integer(kanji_number: str) -> int: 66 | kanji_number = normalize(kanji_number) 67 | 68 | if ( 69 | re.match("〇", kanji_number) is not None 70 | or re.match("^[〇一二三四五六七八九]+$", kanji_number) is not None 71 | ): 72 | for key, value in JAPANESE_NUMERICS.items(): 73 | kanji_number = kanji_number.replace(key, value) 74 | 75 | return int(kanji_number) 76 | else: 77 | number = 0 78 | numbers = split_large_number(kanji_number) 79 | 80 | for key, value in LARGE_NUMBERS.items(): 81 | if key in numbers: 82 | n = numbers[key] 83 | number = number + n 84 | 85 | if not str(number).isdigit() or not str(numbers["千"]).isdigit(): 86 | raise TypeError( 87 | "The attribute of _kanji_to_integer() must be a Japanese numeral as integer." 88 | ) 89 | 90 | return number + numbers["千"] 91 | 92 | # エラーが発生した場合、そのままの文字列を返す 93 | try: 94 | for fromValue in find_kanji_numbers(value): 95 | value = value.replace(fromValue, str(_kanji_to_integer(fromValue))) 96 | except Exception as e: 97 | pass 98 | 99 | return value 100 | 101 | 102 | def find_kanji_numbers(text: str) -> list: 103 | def isItemLength(item: str) -> bool: 104 | if item is None: 105 | return False 106 | 107 | if re.match("^[0-90-9]+$", item) is None and ( 108 | len(item) > 0 109 | and "兆" != item 110 | and "億" != item 111 | and "万" != item 112 | and "萬" != item 113 | ): 114 | return True 115 | else: 116 | return False 117 | 118 | num = "([0-90-9]*)|([〇一二三四五六七八九壱壹弐弍貳貮参參肆伍陸漆捌玖]*)" 119 | base_pattern = f"(({num})(千|阡|仟))?(({num})(百|陌|佰))?(({num})(十|拾))?({num})?" 120 | pattern = ( 121 | f"(({base_pattern}兆)?({base_pattern}億)?({base_pattern}(万|萬))?{base_pattern})" 122 | ) 123 | regex = re.compile(pattern) 124 | 125 | match = regex.finditer(text) 126 | 127 | match_kanji = "" 128 | return_kanji = [] 129 | if match is not None: 130 | for m in match: 131 | if isItemLength(m.group()): 132 | match_kanji += m.group() 133 | 134 | if len(match_kanji) > 0: 135 | return_kanji.append(match_kanji) 136 | return return_kanji 137 | 138 | 139 | def zenkaku_to_hankaku(value: str) -> str: 140 | return ( 141 | value.translate( 142 | str.maketrans({chr(0xFF10 + i): chr(0x30 + i) for i in range(10)}) 143 | ) 144 | .translate(str.maketrans({chr(0xFF21 + i): chr(0x41 + i) for i in range(26)})) 145 | .translate(str.maketrans({chr(0xFF41 + i): chr(0x61 + i) for i in range(26)})) 146 | ) 147 | -------------------------------------------------------------------------------- /normalize_japanese_addresses/normalize.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | import unicodedata 4 | 5 | from typing import Tuple, Optional 6 | 7 | from .library.regex import ( 8 | get_prefectures, 9 | get_prefecture_regexes, 10 | get_city_regexes, 11 | replace_addr, 12 | normalize_town_name, 13 | set_ttl, 14 | ) 15 | from .library.patchAddr import patch_addr 16 | from .library.utils import zenkaku_to_hankaku 17 | 18 | SPACE: str = " " 19 | HYPHEN: str = "-" 20 | 21 | # japanese-addressesのendpoint 22 | DEFAULT_ENDPOINT = "https://geolonia.github.io/japanese-addresses/api/ja" 23 | 24 | # オプションのレベル設定 25 | DEFAULT_LEVEL = 3 26 | 27 | 28 | def normalize(address: str, **kwargs) -> str: 29 | """ 30 | 住所正規化 31 | :param address: 住所 32 | :param kwargs: オプション(level:正規化レベル) 33 | :return: 正規化後の住所 34 | """ 35 | 36 | # オプションの設定 37 | level, endpoint = set_options(kwargs) 38 | 39 | # 初期設定 40 | addr, pref, city, town, lat, lng, ref_level = get_address_parts(address) 41 | 42 | # 住所の前処理 43 | addr = preprocessing_address(addr) 44 | 45 | # 都道府県情報を取得 46 | prefectures = get_prefectures(endpoint) 47 | prefectures_list: list = list(prefectures.keys()) 48 | 49 | # 都道府県の正規化 50 | addr, pref = normalize_prefecture_names( 51 | addr=addr, 52 | prefectures=prefectures, 53 | prefectures_list=prefectures_list, 54 | endpoint=endpoint, 55 | ) 56 | 57 | # 市区町村の正規化 58 | if pref != "" and level >= 2: 59 | addr, city = normalize_city_names(addr=addr, prefectures=prefectures, pref=pref) 60 | 61 | # 町丁目以降の正規化 62 | if city != "" and level >= 3: 63 | addr, town, lat, lng = normalize_after_town_names( 64 | addr=addr, pref=pref, city=city, endpoint=endpoint 65 | ) 66 | 67 | # 住所の後処理 68 | addr = patch_addr(pref, city, town, addr) 69 | 70 | # 戻り値のレベルを設定 71 | ref_level = set_level(pref, city, town, ref_level) 72 | 73 | return { 74 | "pref": pref, 75 | "city": city, 76 | "town": town, 77 | "addr": addr, 78 | "lat": lat, 79 | "lng": lng, 80 | "level": ref_level, 81 | } 82 | 83 | 84 | def set_options(options: dict) -> tuple: 85 | """ 86 | オプションの設定 87 | """ 88 | level = options.get("level", DEFAULT_LEVEL) 89 | endpoint = options.get("endpoint", DEFAULT_ENDPOINT) 90 | option_ttl = options.get("ttl", None) 91 | if option_ttl is not None and isinstance(option_ttl, int): 92 | set_ttl(option_ttl) 93 | return level, endpoint 94 | 95 | 96 | def get_address_parts( 97 | address: str, 98 | ) -> Tuple[str, str, str, str, Optional[float], Optional[float], int]: 99 | """ 100 | 住所の初期設定 101 | """ 102 | # 初期化 103 | pref: str = "" 104 | city: str = "" 105 | town: str = "" 106 | lat: Optional[float] = None 107 | lng: Optional[float] = None 108 | ref_level: int = 0 109 | 110 | # 初期住所設定 111 | addr: str = unicodedata.normalize("NFC", address) 112 | return addr, pref, city, town, lat, lng, ref_level 113 | 114 | 115 | def replace_spaces(addr: str) -> str: 116 | """ 117 | 全角スペースを半角スペースに置換する 118 | """ 119 | addr = addr.replace(" ", SPACE).replace(" ", SPACE) 120 | addr = re.sub(" +", SPACE, addr) 121 | return addr 122 | 123 | 124 | def replace_hyphen_like_characters_after_digits(addr: str) -> str: 125 | """ 126 | 数字の後にあるハイフンのような文字をハイフンに置換する 127 | """ 128 | hyphen_iter = re.finditer( 129 | "([0-90-9一二三四五六七八九〇十百千][--﹣−‐⁃‑‒–—﹘―⎯⏤ーー─━])|([--﹣−‐⁃‑‒–—﹘―⎯⏤ーー─━])[0-90-9一二三四五六七八九〇十]", 130 | addr, 131 | ) 132 | for m in hyphen_iter: 133 | from_value = m.group() 134 | replace_value = re.sub("[--﹣−‐⁃‑‒–—﹘―⎯⏤ーー─━]", HYPHEN, from_value) 135 | addr = addr.replace(from_value, replace_value) 136 | return addr 137 | 138 | 139 | def remove_spaces_before_town_city_district_name(addr: str) -> str: 140 | """ 141 | 町丁目名の前にあるスペースを削除する 142 | """ 143 | hyphen_iter = re.finditer("(.+)(丁目?|番([町地丁])|条|軒|線|([のノ])町|地割)", addr) 144 | for m in hyphen_iter: 145 | from_value = m.group() 146 | replace_value = from_value.replace(SPACE, "") 147 | addr = addr.replace(from_value, replace_value) 148 | return addr 149 | 150 | 151 | def remove_spaces_before_ward_or_gun(addr: str) -> str: 152 | """ 153 | 区、郡以前のスペースは全て削除する 154 | """ 155 | hyphen_iter = re.finditer("(.+)((郡.+(町|村))|((市|巿).+(区|區)))", addr) 156 | for m in hyphen_iter: 157 | from_value = m.group() 158 | replace_value = from_value.replace(SPACE, "") 159 | addr = addr.replace(from_value, replace_value) 160 | return addr 161 | 162 | 163 | def remove_leading_spaces_before_the_first_arabic_numeral(addr: str) -> str: 164 | """ 165 | 最初のアラビア数字の前にあるスペースを削除する 166 | """ 167 | hyphen_iter = re.finditer(".+?[0-9一二三四五六七八九〇十百千]-", addr) 168 | for m in hyphen_iter: 169 | from_value = m.group() 170 | replace_value = from_value.replace(SPACE, "") 171 | addr = addr.replace(from_value, replace_value) 172 | break 173 | return addr 174 | 175 | 176 | def preprocessing_address(addr: str) -> str: 177 | """ 178 | 住所の前処理 179 | """ 180 | 181 | # スペース変換 182 | addr = replace_spaces(addr) 183 | 184 | # 全角の英数字は半角に変換 185 | addr = zenkaku_to_hankaku(addr) 186 | 187 | # 数字の後に紐づくハイフン類似文字をすべて半角ハイフンに変換 188 | addr = replace_hyphen_like_characters_after_digits(addr) 189 | 190 | # 町丁目名以前のスペースはすべて削除 191 | addr = remove_spaces_before_town_city_district_name(addr) 192 | 193 | # // 区、郡以前のスペースはすべて削除 194 | addr = remove_spaces_before_ward_or_gun(addr) 195 | 196 | # 1番はじめに出てくるアラビア数字以前のスペースを削除 197 | addr = remove_leading_spaces_before_the_first_arabic_numeral(addr) 198 | 199 | return addr 200 | 201 | 202 | def normalize_prefecture_names( 203 | addr: str, prefectures: dict, prefectures_list: list, endpoint: str 204 | ) -> str: 205 | """ 206 | 都道府県名を正規化する 207 | """ 208 | pref = "" 209 | for _pref, reg in get_prefecture_regexes(prefectures_list, False): 210 | if reg.match(addr): 211 | pref = _pref 212 | addr = addr[len(reg.match(addr)[0]) :] 213 | break 214 | 215 | if pref == "": 216 | # 都道府県が省略されている 217 | matched = [] 218 | 219 | for _pref, cities in prefectures.items(): 220 | addr = addr.strip() 221 | for _city, reg in get_city_regexes(_pref, cities): 222 | match = reg.match(addr) 223 | if match is not None: 224 | matched.append( 225 | { 226 | "pref": _pref, 227 | "city": _city, 228 | "addr": addr[len(match.group()) :], 229 | } 230 | ) 231 | 232 | # マッチする都道府県が複数ある場合は町名まで正規化して都道府県名を判別する。(例: 東京都府中市と広島県府中市など) 233 | if len(matched) == 1: 234 | pref = matched[0]["pref"] 235 | else: 236 | for match in matched: 237 | normalized = normalize_town_name( 238 | match["addr"], match["pref"], match["city"], endpoint 239 | ) 240 | 241 | if normalized is not None: 242 | pref = match["pref"] 243 | break 244 | 245 | # 都道府県が省略されている場合に都道府県を抽出(誤検知防止のため、省略 246 | if pref == "": 247 | for _pref, reg in get_prefecture_regexes(prefectures_list, True): 248 | if reg.match(addr): 249 | pref = _pref 250 | addr = addr[len(reg.match(addr)[0]) :] 251 | break 252 | 253 | return addr, pref 254 | 255 | 256 | def normalize_city_names(addr: str, prefectures: dict, pref: str) -> Tuple[str, str]: 257 | """ 258 | 市区町村名を正規化する 259 | """ 260 | city = "" 261 | cities = prefectures[pref] 262 | 263 | for _city, reg in get_city_regexes(pref, cities): 264 | match = reg.match(addr) 265 | if match is not None: 266 | city = _city 267 | addr = addr[len(match.group()) :] 268 | break 269 | 270 | return addr, city 271 | 272 | 273 | def normalize_after_town_names( 274 | addr: str, pref: str, city: str, endpoint: str 275 | ) -> Tuple[str, str, float, float]: 276 | """ 277 | 町名以降の住所を正規化する 278 | """ 279 | 280 | town = "" 281 | lat = None 282 | lng = None 283 | 284 | normalized = normalize_town_name(addr, pref, city, endpoint) 285 | if normalized is not None: 286 | _town = normalized["town"] 287 | town = _town["originalTown"] if "originalTown" in _town else _town["town"] 288 | addr = normalized["addr"] 289 | lat = normalized["lat"] 290 | lng = normalized["lng"] 291 | 292 | # townが取得できた場合にのみ、addrに対する各種の変換処理を行う。(#187) 293 | if town != "": 294 | addr = replace_addr(addr) 295 | 296 | return addr, town, lat, lng 297 | 298 | 299 | def set_level(pref, city, town, ref_level) -> int: 300 | """ 301 | 住所のレベルを設定する 302 | """ 303 | ref_level += len(pref) > 0 304 | ref_level += len(city) > 0 305 | ref_level += len(town) > 0 306 | return ref_level 307 | -------------------------------------------------------------------------------- /normalize_japanese_addresses/library/regex.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | import urllib.parse 4 | from typing import List, Dict, Any, Optional, Union, Generator, Tuple, Callable, Pattern 5 | 6 | import kanjize 7 | from cachetools import TTLCache 8 | import time 9 | 10 | import functools 11 | from functools import wraps 12 | 13 | from .api import api_fetch 14 | from .utils import kan2num, find_kanji_numbers 15 | 16 | JIS_OLD_KANJI = ( 17 | "亞,圍,壹,榮,驛,應,櫻,假,會,懷,覺,樂,陷,歡,氣,戲,據,挾,區,徑,溪,輕,藝,儉,圈,權,嚴,恆,國,齋,雜,蠶,殘,兒,實,釋,從,縱,敍,燒,條,剩,壤,釀,眞,盡,醉,髓,聲,竊," 18 | "淺,錢,禪,爭,插,騷,屬,對,滯,擇,單,斷,癡,鑄,敕,鐵,傳,黨,鬪,屆,腦,廢,發,蠻,拂,邊,瓣,寶,沒,滿,藥,餘,樣,亂,兩,禮,靈,爐,灣,惡,醫,飮,營,圓,歐,奧,價,繪,擴,學," 19 | "罐,勸,觀,歸,犧,擧,狹,驅,莖,經,繼,缺,劍,檢,顯,廣,鑛,碎,劑,參,慘,絲,辭,舍,壽,澁,肅,將,證,乘,疊,孃,觸,寢,圖,穗,樞,齊,攝,戰,潛,雙,莊,裝,藏,續,體,臺,澤,膽," 20 | "彈,蟲,廳,鎭,點,燈,盜,獨,貳,霸,賣,髮,祕,佛,變,辯,豐,飜,默,與,譽,謠,覽,獵,勵,齡,勞,壓,爲,隱,衞,鹽,毆,穩,畫,壞,殼,嶽,卷,關,顏,僞,舊,峽,曉,勳,惠,螢,鷄,縣," 21 | "險,獻,驗,效,號,濟,册,棧,贊,齒,濕,寫,收,獸,處,稱,奬,淨,繩,讓,囑,愼,粹,隨,數,靜,專,踐,纖,壯,搜,總,臟,墮,帶,瀧,擔,團,遲,晝,聽,遞,轉,當,稻,讀,惱,拜,麥,拔," 22 | "濱,竝,辨,舖,襃,萬,譯,豫,搖,來,龍,壘,隸,戀,樓,鰺,鶯,蠣,攪,竈,灌,諫,頸,礦,蘂,靱,賤,壺,礪,檮,濤,邇,蠅,檜,儘,藪,籠,彌,麩".split( 23 | "," 24 | ) 25 | ) 26 | 27 | JIS_NEW_KANJI = ( 28 | "亜,囲,壱,栄,駅,応,桜,仮,会,懐,覚,楽,陥,歓,気,戯,拠,挟,区,径,渓,軽,芸,倹,圏,権,厳,恒,国,斎,雑,蚕,残,児,実,釈,従,縦,叙,焼,条,剰,壌,醸,真,尽,酔,髄,声,窃," 29 | "浅,銭,禅,争,挿,騒,属,対,滞,択,単,断,痴,鋳,勅,鉄,伝,党,闘,届,脳,廃,発,蛮,払,辺,弁,宝,没,満,薬,余,様,乱,両,礼,霊,炉,湾,悪,医,飲,営,円,欧,奥,価,絵,拡,学," 30 | "缶,勧,観,帰,犠,挙,狭,駆,茎,経,継,欠,剣,検,顕,広,鉱,砕,剤,参,惨,糸,辞,舎,寿,渋,粛,将,証,乗,畳,嬢,触,寝,図,穂,枢,斉,摂,戦,潜,双,荘,装,蔵,続,体,台,沢,胆," 31 | "弾,虫,庁,鎮,点,灯,盗,独,弐,覇,売,髪,秘,仏,変,弁,豊,翻,黙,与,誉,謡,覧,猟,励,齢,労,圧,為,隠,衛,塩,殴,穏,画,壊,殻,岳,巻,関,顔,偽,旧,峡,暁,勲,恵,蛍,鶏,県," 32 | "険,献,験,効,号,済,冊,桟,賛,歯,湿,写,収,獣,処,称,奨,浄,縄,譲,嘱,慎,粋,随,数,静,専,践,繊,壮,捜,総,臓,堕,帯,滝,担,団,遅,昼,聴,逓,転,当,稲,読,悩,拝,麦,抜," 33 | "浜,並,弁,舗,褒,万,訳,予,揺,来,竜,塁,隷,恋,楼,鯵,鴬,蛎,撹,竃,潅,諌,頚,砿,蕊,靭,賎,壷,砺,梼,涛,迩,蝿,桧,侭,薮,篭,弥,麸".split( 34 | "," 35 | ) 36 | ) 37 | 38 | ttl = 60 * 60 * 24 * 7 39 | cache_prefecture = TTLCache(maxsize=300, ttl=ttl) 40 | cache_cities = {} 41 | cache_towns = TTLCache(maxsize=300, ttl=ttl) 42 | 43 | 44 | def set_ttl(ttl_value: int) -> None: 45 | global ttl 46 | global cache_prefecture 47 | global cache_towns 48 | 49 | ttl = ttl_value 50 | cache_prefecture = TTLCache(maxsize=300, ttl=ttl) 51 | cache_prefecture.clear() 52 | cache_towns = TTLCache(maxsize=300, ttl=ttl) 53 | cache_towns.clear() 54 | clear_cache_of_cities() 55 | 56 | def clear_cache_of_cities() -> None: 57 | global cache_cities 58 | 59 | cache_cities.clear() 60 | 61 | def get_prefectures(endpoint: str) -> dict: 62 | global cache_prefecture 63 | endpoint_url = f"{endpoint}.json" 64 | prefectures = cache_prefecture.get(endpoint_url) 65 | if prefectures is None: 66 | prefectures = json.loads(api_fetch(endpoint_url).text) 67 | cache_prefecture[endpoint_url] = prefectures 68 | return prefectures 69 | 70 | def get_prefecture_regexes(prefecture_names: list, omit_mode: bool = False) -> list: 71 | prefecture_regex = "([都道府県])" 72 | for prefecture_name in prefecture_names: 73 | _prefecture_name = re.sub(f"{prefecture_regex}$", "", prefecture_name) 74 | reg = ( 75 | re.compile(f"^{_prefecture_name}{prefecture_regex}") 76 | if not omit_mode 77 | else re.compile(f"^{_prefecture_name}{prefecture_regex}?") 78 | ) 79 | yield prefecture_name, reg 80 | 81 | def cities_list_to_tuple(lst) -> tuple: 82 | # citiesのリストについては、事前に長さでソートする必要があるためTupleに変換する前に実行する 83 | lst.sort(key=len) 84 | return tuple(lst) 85 | 86 | 87 | def cache_cities_with_ttl() -> Callable: 88 | global cache_cities 89 | 90 | def decorator(func): 91 | @functools.wraps(func) 92 | def wrapper(*args, **kwargs): 93 | # リスト引数をタプルに変換してキャッシュする 94 | args_key = tuple(cities_list_to_tuple(arg) if isinstance(arg, list) else arg for arg in args) 95 | if args_key in cache_cities: 96 | result, timestamp = cache_cities[args_key] 97 | if time.time() - timestamp <= ttl: 98 | return result 99 | result = func(*args, **kwargs) 100 | cache_cities[args_key] = (result, time.time()) 101 | return result 102 | 103 | return wrapper 104 | 105 | return decorator 106 | 107 | @cache_cities_with_ttl() 108 | def get_city_regexes(pref: str, cities: list) -> tuple: 109 | results = [] 110 | 111 | for city in cities: 112 | _city = to_regex(city) 113 | if re.match(".*?([町村])$", city) is not None: 114 | _city = re.sub("(.+?)郡", "(\\1郡)?", _city) 115 | results.append((city, re.compile(f"^{_city}"))) 116 | 117 | return results 118 | 119 | def get_towns(pref: str, city: str, endpoint: str) -> list: 120 | global cache_towns 121 | 122 | town_endpoint = "/".join( 123 | [ 124 | endpoint, 125 | urllib.parse.quote(pref), 126 | urllib.parse.quote(city), 127 | ] 128 | ) 129 | 130 | endpoint_url = f"{town_endpoint}.json" 131 | towns = cache_towns.get(endpoint_url) 132 | if towns is None: 133 | towns = list(json.loads((api_fetch(endpoint_url)).text)) 134 | cache_towns[endpoint_url] = towns 135 | 136 | return towns 137 | 138 | def get_town_regexes(pref: str, city: str, endpoint: str) -> list: 139 | def get_normalized_chome_regex(match_value: str) -> str: 140 | regexes = [re.sub("(丁目?|番([町丁])|条|軒|線|([のノ])町|地割)", "", match_value)] 141 | 142 | if re.match("^壱", match_value) is not None: 143 | regexes.append("一") 144 | regexes.append("1") 145 | regexes.append("1") 146 | else: 147 | num = match_value 148 | for match in re.finditer("([一二三四五六七八九十]+)", match_value): 149 | replace_num = str(kan2num(match.group())) 150 | num = num.replace(match.group(), replace_num) 151 | 152 | num = re.sub("(丁目?|番([町丁])|条|軒|線|([のノ])町|地割)", "", num) 153 | 154 | regexes.append(num) 155 | 156 | _regex = "|".join(regexes) 157 | _regex = f"({_regex})(([町丁])目?|番([町丁])|条|軒|線|の町?|地割|[--﹣−‐⁃‑‒–—﹘―⎯⏤ーー─━])" 158 | 159 | return _regex 160 | 161 | def towns_length(api_town: dict) -> int: 162 | # 大字で始まる場合、優先度を低く設定する。 163 | town_len = len(api_town["town"]) 164 | town_len = town_len - 2 if str(api_town["town"]).startswith("大字") else town_len 165 | return town_len 166 | 167 | def is_kanji_number_follewed_by_cho(target_town_name: str) -> bool: 168 | x_cho = re.match(".町", target_town_name) 169 | if not x_cho: 170 | return False 171 | else: 172 | kanji_numbers = find_kanji_numbers(x_cho.group()) 173 | return len(kanji_numbers) > 0 174 | 175 | api_pre_towns = get_towns(pref, city, endpoint) 176 | api_towns_set = [x["town"] for x in api_pre_towns] 177 | api_towns = [] 178 | townAddr = "" 179 | 180 | # 京都かどうかを判定 181 | is_kyoto = re.match("^京都市", city) is not None 182 | # 町丁目に「町」が含まれるケースへの対応 183 | # 通常は「○○町」のうち「町」の省略を許容し同義語として扱うが、まれに自治体内に「○○町」と「○○」が共存しているケースがある。 184 | # この場合は町の省略は許容せず、入力された住所は書き分けられているものとして正規化を行う。 185 | # 更に、「愛知県名古屋市瑞穂区十六町1丁目」漢数字を含むケースだと丁目や番地・号の正規化が不可能になる。このようなケースも除外。 186 | for town in api_pre_towns: 187 | api_towns.append(town) 188 | 189 | originalTown = town["town"] 190 | if str(originalTown).find("町") == -1: 191 | continue 192 | 193 | # 「愛知県名古屋市瑞穂区十六町1丁目」など漢数字を含むケースは、曖昧処理から除外 194 | if re.match("[壱一二三四五六七八九十百千万]+町", originalTown) is None: 195 | townAddr = re.sub( 196 | "(?!^町)町", "", originalTown 197 | ) # NOTE: 冒頭の「町」は明らかに省略するべきではないので、除外 198 | 199 | if ( 200 | not is_kyoto 201 | and townAddr not in api_towns_set 202 | and f"大字{townAddr}" not in api_towns_set 203 | and not is_kanji_number_follewed_by_cho( # 大字は省略されるため、大字〇〇と〇〇町がコンフリクトする。このケースを除外 204 | originalTown 205 | ) 206 | ): 207 | # エイリアスとして町なしのパターンを登録 208 | dict_town = town.copy() 209 | dict_town["originalTown"] = town["town"] 210 | dict_town["town"] = townAddr 211 | api_towns.append(dict_town) 212 | 213 | # 少ない文字数の地名に対してミスマッチしないように文字の長さ順にソート 214 | towns = sorted(api_towns, key=lambda x: towns_length(x), reverse=True) 215 | 216 | town_regexes = [] 217 | for town in towns: 218 | _town = town["town"] 219 | # 横棒を含む場合(流通センター、など)に対応 220 | _town = re.sub("[--﹣−‐⁃‑‒–—﹘―⎯⏤ーー─━]", "[--﹣−‐⁃‑‒–—﹘―⎯⏤ーー─━]", _town) 221 | _town = re.sub("大?字", "(大?字)?", _town) 222 | 223 | for replace_town in re.finditer( 224 | "([壱一二三四五六七八九十]+)(丁目?|番([町丁])|条|軒|線|([のノ])町|地割)", _town 225 | ): 226 | _town = re.sub( 227 | replace_town.group(), 228 | get_normalized_chome_regex(replace_town.group()), 229 | _town, 230 | ) 231 | 232 | _town = to_regex(_town) 233 | 234 | return_town = {} 235 | if "originalTown" in town: 236 | return_town["originalTown"] = town["originalTown"] 237 | return_town["town"] = town["town"] 238 | town_regexes.append([return_town, _town, town["lat"], town["lng"]]) 239 | 240 | # X丁目の丁目なしの数字だけ許容するため、最後に数字だけ追加していく 241 | for town in towns: 242 | chome_match = re.search(r'([^一二三四五六七八九十]+)([一二三四五六七八九十]+)(丁目?)', town["town"]) 243 | if chome_match is None: 244 | continue 245 | 246 | chome_name_part = chome_match.group(1) 247 | chome_number_kanji = chome_match.group(2) 248 | chome_number = kan2num(chome_number_kanji) 249 | chome_pattern = f"^{chome_name_part}({chome_number_kanji}|{chome_number})" 250 | return_town = {} 251 | if "originalTown" in town: 252 | return_town["originalTown"] = town["originalTown"] 253 | return_town["town"] = town["town"] 254 | town_regexes.append([return_town, chome_pattern, town["lat"], town["lng"]]) 255 | 256 | return town_regexes 257 | 258 | 259 | def replace_addr(addr: str) -> str: 260 | def replace_1(match_value: str) -> str: 261 | for num in list(re.finditer("([0-9]+)", match_value)): 262 | match_value = match_value.replace( 263 | num.group(), kanjize.number2kanji(int(num.group())) 264 | ) 265 | return match_value 266 | 267 | addr = re.sub("^-", "", addr) 268 | 269 | patterns = [ 270 | (re.compile("([0-9]+)(丁目)"), lambda m: replace_1(m.group())), 271 | ( 272 | re.compile("(([0-9〇一二三四五六七八九十百千]+)(番地?)([0-9〇一二三四五六七八九十百千]+)号)\\s*(.+)"), 273 | lambda m: "{} {}".format(m.group(1), m.group(5)), 274 | ), 275 | ( 276 | re.compile("([0-9〇一二三四五六七八九十百千]+)\\s*(番地?)\\s*([(0-9〇一二三四五六七八九十百千]+)\\s*号?"), 277 | lambda m: "{}-{}".format(m.group(1), m.group(3)), 278 | ), 279 | (re.compile("([0-9〇一二三四五六七八九十百千]+)番地?"), r"\1"), 280 | (re.compile("([0-9〇一二三四五六七八九十百千]+)の"), r"\1-"), 281 | ( 282 | re.compile("([0-9〇一二三四五六七八九十百千]+)[--﹣−‐⁃‑‒–—﹘―⎯⏤ーー─━]"), 283 | lambda m: re.sub("[--﹣−‐⁃‑‒–—﹘―⎯⏤ーー─━]", "-", kan2num(m.group())), 284 | ), 285 | ( 286 | re.compile("[--﹣−‐⁃‑‒–—﹘―⎯⏤ーー─━]([0-9〇一二三四五六七八九十百千]+)"), 287 | lambda m: re.sub("[--﹣−‐⁃‑‒–—﹘―⎯⏤ーー─━]", "-", kan2num(m.group())), 288 | ), 289 | (re.compile("([0-9〇一二三四五六七八九十百千]+)-"), lambda m: kan2num(m.group())), 290 | (re.compile("-([0-9〇一二三四五六七八九十百千]+)"), lambda m: kan2num(m.group())), 291 | (re.compile("-[^0-9]([0-9〇一二三四五六七八九十百千]+)"), lambda m: kan2num(m.group())), 292 | (re.compile("([0-9〇一二三四五六七八九十百千]+)$"), lambda m: kan2num(m.group())), 293 | ] 294 | 295 | for pattern, repl in patterns: 296 | addr = pattern.sub(repl, addr) 297 | 298 | return addr.strip() 299 | 300 | 301 | def jis_kanji_regexes() -> Generator[Tuple[Pattern, str, str], None, None]: 302 | for old_kanji, new_kanji in zip(JIS_OLD_KANJI, JIS_NEW_KANJI): 303 | regex = re.compile(f"{old_kanji}|{new_kanji}") 304 | yield regex, old_kanji, new_kanji 305 | 306 | 307 | def jis_kanji_to_both_forms(value: str) -> str: 308 | _value = value 309 | for reg, old_kanji, new_kanji in jis_kanji_regexes(): 310 | pattern = re.compile(reg) 311 | _value = pattern.sub(f"({old_kanji}|{new_kanji})", _value) 312 | return _value 313 | 314 | 315 | def to_regex(value: str) -> str: 316 | # 以下なるべく文字数が多いものほど上にすること 317 | patterns = [ 318 | ("三栄町|四谷三栄町", "(三栄町|四谷三栄町)"), 319 | ("鬮野川|くじ野川|くじの川", "(鬮野川|くじ野川|くじの川)"), 320 | ("通り|とおり", "(通り|とおり)"), 321 | ("柿碕町|柿さき町", "(柿碕町|柿さき町)"), 322 | ("埠頭|ふ頭", "(埠頭|ふ頭)"), 323 | ("番町|番丁", "(番町|番丁)"), 324 | ("大冝|大宜", "(大冝|大宜)"), 325 | ("穝|さい", "(穝|さい)"), 326 | ("杁|えぶり", "(杁|えぶり)"), 327 | ("薭|稗|ひえ|ヒエ", "(薭|稗|ひえ|ヒエ)"), 328 | ("[之ノの]", "[之ノの]"), 329 | ("[ヶケが]", "[ヶケが]"), 330 | ("[ヵカか力]", "[ヵカか力]"), 331 | ("[ッツっつ]", "[ッツっつ]"), 332 | ("[ニ二]", "[ニ二]"), 333 | ("[ハ八]", "[ハ八]"), 334 | ("[塚塚]", "[塚塚]"), 335 | ("[釜竈]", "[釜竈]"), 336 | ("[條条]", "[條条]"), 337 | ("[狛拍]", "[狛拍]"), 338 | ("[藪薮]", "[藪薮]"), 339 | ("[渕淵]", "[渕淵]"), 340 | ("[エヱえ]", "[エヱえ]"), 341 | ("[曾曽]", "[曾曽]"), 342 | ("[舟船]", "[舟船]"), 343 | ("[莵菟]", "[莵菟]"), 344 | ("[市巿]", "[市巿]"), 345 | ] 346 | 347 | # コンパイル済み正規表現オブジェクトのリストを順番に適用 348 | for pattern in [(re.compile(p[0]), p[1]) for p in patterns]: 349 | value = pattern[0].sub("({})".format(pattern[0].pattern), value) 350 | 351 | value = jis_kanji_to_both_forms(value) 352 | 353 | return value 354 | 355 | 356 | def normalize_town_name( 357 | addr: str, pref: str, city: str, endpoint: str 358 | ) -> Optional[Dict[str, str]]: 359 | # アドレスの前後の空白を削除する 360 | addr = addr.strip() 361 | 362 | # アドレスの先頭が"大字"で始まっていた場合は削除 363 | addr = re.sub("^大字", "", addr) 364 | 365 | # 町名の正規化 366 | regex_prefixes = ["^"] 367 | if re.match("^京都市", city): 368 | # 京都は通り名削除のために後方一致を使う 369 | regex_prefixes.append(".*") 370 | 371 | for regex_prefix in regex_prefixes: 372 | for town, pattern, lat, lng in get_town_regexes(pref, city, endpoint): 373 | if regex_prefix == "^": 374 | regex = re.compile(f"{regex_prefix}{pattern}") 375 | match = regex.match(addr) 376 | if match: 377 | # 正規表現にマッチした場合、辞書型で町の名前、住所、緯度、経度を返す 378 | return { 379 | "town": town, 380 | "addr": addr[len(match.group()) :], 381 | "lat": lat, 382 | "lng": lng, 383 | } 384 | else: 385 | regex = re.compile(f"{regex_prefix}{pattern}") 386 | match = regex.match(addr) 387 | if match: 388 | # 正規表現にマッチした場合、辞書型で町の名前、住所、緯度、経度を返す 389 | return { 390 | "town": town, 391 | "addr": re.search(pattern, match.group()).group() 392 | if len(addr) == len(match.group()) 393 | else addr[len(match.group()) :], 394 | "lat": lat, 395 | "lng": lng, 396 | } 397 | 398 | # 正規表現にマッチしなかった場合は None を返す 399 | return None 400 | -------------------------------------------------------------------------------- /tests/test_main.py: -------------------------------------------------------------------------------- 1 | import unicodedata 2 | 3 | from normalize_japanese_addresses import normalize 4 | 5 | 6 | def test_normalize_0001(): 7 | assert normalize('大阪府堺市北区新金岡町4丁1−8') == \ 8 | {"pref": "大阪府", "city": "堺市北区", "town": "新金岡町四丁", "addr": "1-8", 9 | "lat": 34.568184, "lng": 135.519409, "level": 3} 10 | 11 | 12 | def test_normalize_0002(): 13 | assert normalize('大阪府堺市北区新金岡町4丁1ー8') == \ 14 | {"pref": "大阪府", "city": "堺市北区", "town": "新金岡町四丁", "addr": "1-8", 15 | "lat": 34.568184, "lng": 135.519409, "level": 3} 16 | 17 | 18 | def test_normalize_0003(): 19 | assert normalize('和歌山県串本町串本1234') == \ 20 | {"pref": "和歌山県", "city": "東牟婁郡串本町", "town": "串本", "addr": "1234", 21 | "lat": 33.470358, "lng": 135.779952, "level": 3} 22 | 23 | 24 | def test_normalize_0004(): 25 | assert normalize('和歌山県東牟婁郡串本町串本1234') == \ 26 | {"pref": "和歌山県", "city": "東牟婁郡串本町", "town": "串本", "addr": "1234", 27 | "lat": 33.470358, "lng": 135.779952, "level": 3} 28 | 29 | 30 | def test_normalize_0005(): 31 | assert normalize('和歌山県東牟婁郡串本町串本千二百三十四') == \ 32 | {"pref": "和歌山県", "city": "東牟婁郡串本町", "town": "串本", "addr": "1234", 33 | "lat": 33.470358, "lng": 135.779952, "level": 3} 34 | 35 | 36 | def test_normalize_0006(): 37 | assert normalize('和歌山県東牟婁郡串本町串本一千二百三十四') == \ 38 | {"pref": "和歌山県", "city": "東牟婁郡串本町", "town": "串本", "addr": "1234", 39 | "lat": 33.470358, "lng": 135.779952, "level": 3} 40 | 41 | 42 | def test_normalize_0007(): 43 | assert normalize('和歌山県東牟婁郡串本町串本一二三四') == \ 44 | {"pref": "和歌山県", "city": "東牟婁郡串本町", "town": "串本", "addr": "1234", 45 | "lat": 33.470358, "lng": 135.779952, "level": 3} 46 | 47 | 48 | def test_normalize_0008(): 49 | assert normalize('和歌山県東牟婁郡串本町くじ野川一二三四') == \ 50 | {"pref": "和歌山県", "city": "東牟婁郡串本町", "town": "鬮野川", "addr": "1234", 51 | "lat": 33.493026, "lng": 135.784941, "level": 3} 52 | 53 | 54 | def test_normalize_0009(): 55 | assert normalize('京都府京都市中京区寺町通御池上る上本能寺前町488番地') == \ 56 | {"pref": "京都府", "city": "京都市中京区", "town": "上本能寺前町", "addr": "488", 57 | "lat": 35.011582, "lng": 135.767914, "level": 3} 58 | 59 | 60 | def test_normalize_0010(): 61 | assert normalize('京都府京都市中京区上本能寺前町488') == \ 62 | {"pref": "京都府", "city": "京都市中京区", "town": "上本能寺前町", "addr": "488", 63 | "lat": 35.011582, "lng": 135.767914, "level": 3} 64 | 65 | 66 | def test_normalize_0011(): 67 | assert normalize('大阪府大阪市中央区大手前2-1') == \ 68 | {"pref": "大阪府", "city": "大阪市中央区", "town": "大手前二丁目", "addr": "1", 69 | "lat": 34.687006, "lng": 135.519317, "level": 3} 70 | 71 | 72 | def test_normalize_0012(): 73 | assert normalize('北海道札幌市西区24-2-2-3-3') == \ 74 | {"pref": "北海道", "city": "札幌市西区", "town": "二十四軒二条二丁目", "addr": "3-3", 75 | "lat": 43.074273, "lng": 141.315099, "level": 3} 76 | 77 | 78 | def test_normalize_0013(): 79 | assert normalize('京都府京都市東山区大和大路2-537-1') == \ 80 | {"pref": "京都府", "city": "京都市東山区", "town": "大和大路二丁目", "addr": "537-1", 81 | "lat": 34.989944, "lng": 135.770967, "level": 3} 82 | 83 | 84 | def test_normalize_0014(): 85 | assert normalize('京都府京都市東山区大和大路2丁目五百三十七の1') == \ 86 | {"pref": "京都府", "city": "京都市東山区", "town": "大和大路二丁目", "addr": "537-1", 87 | "lat": 34.989944, "lng": 135.770967, "level": 3} 88 | 89 | 90 | def test_normalize_0015(): 91 | assert normalize('愛知県蒲郡市旭町17番1号') == \ 92 | {"pref": "愛知県", "city": "蒲郡市", "town": "旭町", "addr": "17-1", 93 | "lat": 34.825785, "lng": 137.218621, "level": 3} 94 | 95 | 96 | def test_normalize_0016(): 97 | assert normalize('北海道岩見沢市栗沢町万字寿町1−2') == \ 98 | {"pref": "北海道", "city": "岩見沢市", "town": "栗沢町万字寿町", "addr": "1-2", 99 | "lat": 43.135248, "lng": 141.986658, "level": 3} 100 | 101 | 102 | def test_normalize_0017(): 103 | assert normalize('北海道久遠郡せたな町北檜山区北檜山193') == \ 104 | {"pref": "北海道", "city": "久遠郡せたな町", "town": "北檜山区北檜山", "addr": "193", 105 | "lat": 42.414, "lng": 139.881784, "level": 3} 106 | 107 | 108 | def test_normalize_0018(): 109 | assert normalize('北海道久遠郡せたな町北桧山区北桧山193') == \ 110 | {"pref": "北海道", "city": "久遠郡せたな町", "town": "北檜山区北檜山", "addr": "193", 111 | "lat": 42.414, "lng": 139.881784, "level": 3} 112 | 113 | 114 | def test_normalize_0019(): 115 | assert normalize('京都府京都市中京区錦小路通大宮東入七軒町466') == \ 116 | {"pref": "京都府", "city": "京都市中京区", "town": "七軒町", "addr": "466", 117 | "lat": 35.004829, "lng": 135.749797, "level": 3} 118 | 119 | 120 | def test_normalize_0020(): 121 | assert normalize('栃木県佐野市七軒町2201') == \ 122 | {"pref": "栃木県", "city": "佐野市", "town": "七軒町", "addr": "2201", 123 | "lat": 36.305969, "lng": 139.57389, "level": 3} 124 | 125 | 126 | def test_normalize_0021(): 127 | assert normalize('京都府京都市東山区大和大路通三条下る東入若松町393') == \ 128 | {"pref": "京都府", "city": "京都市東山区", "town": "若松町", "addr": "393", 129 | "lat": 35.007967, "lng": 135.774082, "level": 3} 130 | 131 | 132 | def test_normalize_0022(): 133 | assert normalize('長野県長野市長野東之門町2462') == \ 134 | {"pref": "長野県", "city": "長野市", "town": "大字長野", "addr": "東之門町2462", 135 | "lat": 36.674892, "lng": 138.178449, "level": 3} 136 | 137 | 138 | def test_normalize_0023(): 139 | assert normalize('岩手県下閉伊郡普代村第1地割上村43−25') == \ 140 | {"pref": "岩手県", "city": "下閉伊郡普代村", "town": "第一地割字上村", "addr": "43-25", 141 | "lat": 39.990149, "lng": 141.928282, "level": 3} 142 | 143 | 144 | def test_normalize_0024(): 145 | assert normalize('岩手県花巻市下北万丁目174−1') == \ 146 | {"pref": "岩手県", "city": "花巻市", "town": "下北万丁目", "addr": "174-1", 147 | "lat": 39.394178, "lng": 141.099889, "level": 3} 148 | 149 | 150 | def test_normalize_0025(): 151 | assert normalize('岩手県花巻市十二丁目1192') == \ 152 | {"pref": "岩手県", "city": "花巻市", "town": "十二丁目", "addr": "1192", 153 | "lat": 39.358268, "lng": 141.122331, "level": 3} 154 | 155 | 156 | def test_normalize_0026(): 157 | assert normalize('岩手県滝沢市後268−566') == \ 158 | {"pref": "岩手県", "city": "滝沢市", "town": "後", "addr": "268-566", 159 | "lat": 39.839043, "lng": 141.094179, "level": 3} 160 | 161 | 162 | def test_normalize_0027(): 163 | assert normalize('青森県五所川原市金木町喜良市千苅62−8') == \ 164 | {"pref": "青森県", "city": "五所川原市", "town": "金木町喜良市", "addr": "千苅62-8", 165 | "lat": 40.904317, "lng": 140.486676, "level": 3} 166 | 167 | 168 | def test_normalize_0028(): 169 | assert normalize('岩手県盛岡市盛岡駅西通2丁目9番地1号') == \ 170 | {"pref": "岩手県", "city": "盛岡市", "town": "盛岡駅西通二丁目", "addr": "9-1", 171 | "lat": 39.698721, "lng": 141.135252, "level": 3} 172 | 173 | 174 | def test_normalize_0029(): 175 | assert normalize('岩手県盛岡市盛岡駅西通2丁目9の1') == \ 176 | {"pref": "岩手県", "city": "盛岡市", "town": "盛岡駅西通二丁目", "addr": "9-1", 177 | "lat": 39.698721, "lng": 141.135252, "level": 3} 178 | 179 | 180 | def test_normalize_0030(): 181 | assert normalize('岩手県盛岡市盛岡駅西通2の9の1') == \ 182 | {"pref": "岩手県", "city": "盛岡市", "town": "盛岡駅西通二丁目", "addr": "9-1", 183 | "lat": 39.698721, "lng": 141.135252, "level": 3} 184 | 185 | 186 | def test_normalize_0031(): 187 | assert normalize('岩手県盛岡市盛岡駅西通2丁目9番地1号 マリオス10F') == \ 188 | {"pref": "岩手県", "city": "盛岡市", "town": "盛岡駅西通二丁目", "addr": "9-1 マリオス10F", 189 | "lat": 39.698721, "lng": 141.135252, "level": 3} 190 | 191 | 192 | def test_normalize_0032(): 193 | assert normalize('東京都文京区千石4丁目15-7') == \ 194 | {"pref": "東京都", "city": "文京区", "town": "千石四丁目", "addr": "15-7", 195 | "lat": 35.729052, "lng": 139.740683, "level": 3} 196 | 197 | 198 | def test_normalize_0033(): 199 | assert normalize('東京都文京区千石四丁目15-7') == \ 200 | {"pref": "東京都", "city": "文京区", "town": "千石四丁目", "addr": "15-7", 201 | "lat": 35.729052, "lng": 139.740683, "level": 3} 202 | 203 | 204 | def test_normalize_0034(): 205 | assert normalize('東京都文京区千石4丁目15-7') == \ 206 | {"pref": "東京都", "city": "文京区", "town": "千石四丁目", "addr": "15-7", 207 | "lat": 35.729052, "lng": 139.740683, "level": 3} 208 | 209 | 210 | def test_normalize_0035(): 211 | assert normalize('東京都文京区千石4丁目15-7') == \ 212 | {"pref": "東京都", "city": "文京区", "town": "千石四丁目", "addr": "15-7", 213 | "lat": 35.729052, "lng": 139.740683, "level": 3} 214 | 215 | 216 | def test_normalize_0036(): 217 | assert normalize('東京都文京区 千石4丁目15-7') == \ 218 | {"pref": "東京都", "city": "文京区", "town": "千石四丁目", "addr": "15-7", 219 | "lat": 35.729052, "lng": 139.740683, "level": 3} 220 | 221 | 222 | def test_normalize_0037(): 223 | assert normalize('東京都文京区千石4-15-7 ') == \ 224 | {"pref": "東京都", "city": "文京区", "town": "千石四丁目", "addr": "15-7", 225 | "lat": 35.729052, "lng": 139.740683, "level": 3} 226 | 227 | 228 | def test_normalize_0038(): 229 | assert normalize('和歌山県東牟婁郡串本町串本 833') == \ 230 | {"pref": "和歌山県", "city": "東牟婁郡串本町", "town": "串本", "addr": "833", 231 | "lat": 33.470358, "lng": 135.779952, "level": 3} 232 | 233 | 234 | def test_normalize_0039(): 235 | assert normalize('和歌山県東牟婁郡串本町串本 833') == \ 236 | {"pref": "和歌山県", "city": "東牟婁郡串本町", "town": "串本", "addr": "833", 237 | "lat": 33.470358, "lng": 135.779952, "level": 3} 238 | 239 | 240 | def test_normalize_0040(): 241 | assert normalize('東京都世田谷区上北沢4の9の2') == \ 242 | {"pref": "東京都", "city": "世田谷区", "town": "上北沢四丁目", "addr": "9-2", 243 | "lat": 35.669726, "lng": 139.620901, "level": 3} 244 | 245 | 246 | def test_normalize_0041(): 247 | assert normalize('東京都品川区東五反田2丁目5-11') == \ 248 | {"pref": "東京都", "city": "品川区", "town": "東五反田二丁目", "addr": "5-11", 249 | "lat": 35.624169, "lng": 139.72819, "level": 3} 250 | 251 | 252 | def test_normalize_0042(): 253 | assert normalize('東京都世田谷区上北沢四丁目2-1') == \ 254 | {"pref": "東京都", "city": "世田谷区", "town": "上北沢四丁目", "addr": "2-1", 255 | "lat": 35.669726, "lng": 139.620901, "level": 3} 256 | 257 | 258 | def test_normalize_0043(): 259 | assert normalize('東京都世田谷区上北沢4-2-1') == \ 260 | {"pref": "東京都", "city": "世田谷区", "town": "上北沢四丁目", "addr": "2-1", 261 | "lat": 35.669726, "lng": 139.620901, "level": 3} 262 | 263 | 264 | def test_normalize_0044(): 265 | assert normalize('東京都世田谷区上北沢4ー2ー1') == \ 266 | {"pref": "東京都", "city": "世田谷区", "town": "上北沢四丁目", "addr": "2-1", 267 | "lat": 35.669726, "lng": 139.620901, "level": 3} 268 | 269 | 270 | def test_normalize_0045(): 271 | assert normalize('東京都世田谷区上北沢4-2-1') == \ 272 | {"pref": "東京都", "city": "世田谷区", "town": "上北沢四丁目", "addr": "2-1", 273 | "lat": 35.669726, "lng": 139.620901, "level": 3} 274 | 275 | 276 | def test_normalize_0046(): 277 | assert normalize('東京都品川区西五反田2丁目31-6') == \ 278 | {"pref": "東京都", "city": "品川区", "town": "西五反田二丁目", "addr": "31-6", 279 | "lat": 35.626368, "lng": 139.721005, "level": 3} 280 | 281 | 282 | def test_normalize_0047(): 283 | assert normalize('東京都品川区西五反田2-31-6') == \ 284 | {"pref": "東京都", "city": "品川区", "town": "西五反田二丁目", "addr": "31-6", 285 | "lat": 35.626368, "lng": 139.721005, "level": 3} 286 | 287 | 288 | def test_normalize_0048(): 289 | assert normalize('大阪府大阪市此花区西九条三丁目2-16') == \ 290 | {"pref": "大阪府", "city": "大阪市此花区", "town": "西九条三丁目", "addr": "2-16", 291 | "lat": 34.684074, "lng": 135.467031, "level": 3} 292 | 293 | 294 | def test_normalize_0049(): 295 | assert normalize('大阪府大阪市此花区西九条三丁目2番16号') == \ 296 | {"pref": "大阪府", "city": "大阪市此花区", "town": "西九条三丁目", "addr": "2-16", 297 | "lat": 34.684074, "lng": 135.467031, "level": 3} 298 | 299 | 300 | def test_normalize_0050(): 301 | assert normalize('大阪府大阪市此花区西九条3-2-16') == \ 302 | {"pref": "大阪府", "city": "大阪市此花区", "town": "西九条三丁目", "addr": "2-16", 303 | "lat": 34.684074, "lng": 135.467031, "level": 3} 304 | 305 | 306 | def test_normalize_0051(): 307 | assert normalize('大阪府大阪市此花区西九条3丁目2-16') == \ 308 | {"pref": "大阪府", "city": "大阪市此花区", "town": "西九条三丁目", "addr": "2-16", 309 | "lat": 34.684074, "lng": 135.467031, "level": 3} 310 | 311 | 312 | def test_normalize_0052(): 313 | assert normalize('大阪府大阪市此花区西九条3-2-16') == \ 314 | {"pref": "大阪府", "city": "大阪市此花区", "town": "西九条三丁目", "addr": "2-16", 315 | "lat": 34.684074, "lng": 135.467031, "level": 3} 316 | 317 | 318 | def test_normalize_0053(): 319 | assert normalize('千葉県鎌ケ谷市中佐津間2丁目15-14-9') == \ 320 | {"pref": "千葉県", "city": "鎌ヶ谷市", "town": "中佐津間二丁目", "addr": "15-14-9", 321 | "lat": 35.800253, "lng": 140.002133, "level": 3} 322 | 323 | 324 | def test_normalize_0054(): 325 | assert normalize('岐阜県不破郡関ケ原町関ヶ原1701−6') == \ 326 | {"pref": "岐阜県", "city": "不破郡関ケ原町", "town": "大字関ケ原", "addr": "1701-6", 327 | "lat": 35.368524, "lng": 136.464997, "level": 3} 328 | 329 | 330 | def test_normalize_0055(): 331 | assert normalize('岐阜県関ケ原町関ヶ原1701−6') == \ 332 | {"pref": "岐阜県", "city": "不破郡関ケ原町", "town": "大字関ケ原", "addr": "1701-6", 333 | "lat": 35.368524, "lng": 136.464997, "level": 3} 334 | 335 | 336 | def test_normalize_0056(): 337 | assert normalize('東京都町田市木曽東4丁目14-イ22') == \ 338 | {"pref": "東京都", "city": "町田市", "town": "木曽東四丁目", "addr": "14-イ22", 339 | "lat": 35.564817, "lng": 139.429661, "level": 3} 340 | 341 | 342 | def test_normalize_0057(): 343 | assert normalize('東京都町田市木曽東4丁目14ーイ22') == \ 344 | {"pref": "東京都", "city": "町田市", "town": "木曽東四丁目", "addr": "14-イ22", 345 | "lat": 35.564817, "lng": 139.429661, "level": 3} 346 | 347 | 348 | def test_normalize_0058(): 349 | assert normalize('東京都町田市木曽東四丁目十四ーイ二十二') == \ 350 | {"pref": "東京都", "city": "町田市", "town": "木曽東四丁目", "addr": "14-イ22", 351 | "lat": 35.564817, "lng": 139.429661, "level": 3} 352 | 353 | 354 | def test_normalize_0059(): 355 | assert normalize('東京都町田市木曽東四丁目14ーイ22') == \ 356 | {"pref": "東京都", "city": "町田市", "town": "木曽東四丁目", "addr": "14-イ22", 357 | "lat": 35.564817, "lng": 139.429661, "level": 3} 358 | 359 | 360 | def test_normalize_0060(): 361 | assert normalize('東京都町田市木曽東四丁目14のイ22') == \ 362 | {"pref": "東京都", "city": "町田市", "town": "木曽東四丁目", "addr": "14-イ22", 363 | "lat": 35.564817, "lng": 139.429661, "level": 3} 364 | 365 | 366 | def test_normalize_0061(): 367 | assert normalize('岩手県花巻市南万丁目127') == \ 368 | {"pref": "岩手県", "city": "花巻市", "town": "南万丁目", "addr": "127", 369 | "lat": 39.387522, "lng": 141.088029, "level": 3} 370 | 371 | 372 | def test_normalize_0062(): 373 | assert normalize('和歌山県東牟婁郡串本町田並1512') == \ 374 | {"pref": "和歌山県", "city": "東牟婁郡串本町", "town": "田並", "addr": "1512", 375 | "lat": 33.48681, "lng": 135.717844, "level": 3} 376 | 377 | 378 | def test_normalize_0063(): 379 | assert normalize('神奈川県川崎市多摩区東三田1-2-2') == \ 380 | {"pref": "神奈川県", "city": "川崎市多摩区", "town": "東三田一丁目", "addr": "2-2", 381 | "lat": 35.612653, "lng": 139.549014, "level": 3} 382 | 383 | 384 | def test_normalize_0064(): 385 | assert normalize('東京都町田市木曽東4の14のイ22') == \ 386 | {"pref": "東京都", "city": "町田市", "town": "木曽東四丁目", "addr": "14-イ22", 387 | "lat": 35.564817, "lng": 139.429661, "level": 3} 388 | 389 | 390 | def test_normalize_0065(): 391 | assert normalize('東京都町田市木曽東4ー14ーイ22') == \ 392 | {"pref": "東京都", "city": "町田市", "town": "木曽東四丁目", "addr": "14-イ22", 393 | "lat": 35.564817, "lng": 139.429661, "level": 3} 394 | 395 | 396 | def test_normalize_0066(): 397 | assert normalize('富山県富山市三番町1番23号') == \ 398 | {"pref": "富山県", "city": "富山市", "town": "三番町", "addr": "1-23", 399 | "lat": 36.688141, "lng": 137.217397, "level": 3} 400 | 401 | 402 | def test_normalize_0067(): 403 | assert normalize('富山県富山市3-1-23') == \ 404 | {"pref": "富山県", "city": "富山市", "town": "三番町", "addr": "1-23", 405 | "lat": 36.688141, "lng": 137.217397, "level": 3} 406 | 407 | 408 | def test_normalize_0068(): 409 | assert normalize('富山県富山市中央通り3-1-23') == \ 410 | {"pref": "富山県", "city": "富山市", "town": "中央通り三丁目", "addr": "1-23", 411 | "lat": 36.689604, "lng": 137.222128, "level": 3} 412 | 413 | 414 | def test_normalize_0069(): 415 | assert normalize('埼玉県南埼玉郡宮代町大字国納309-1') == \ 416 | {"pref": "埼玉県", "city": "南埼玉郡宮代町", "town": "大字国納", "addr": "309-1", 417 | "lat": 36.038996, "lng": 139.697478, "level": 3} 418 | 419 | 420 | def test_normalize_0070(): 421 | assert normalize('埼玉県南埼玉郡宮代町国納309-1') == \ 422 | {"pref": "埼玉県", "city": "南埼玉郡宮代町", "town": "大字国納", "addr": "309-1", 423 | "lat": 36.038996, "lng": 139.697478, "level": 3} 424 | 425 | 426 | def test_normalize_0071(): 427 | assert normalize('大阪府高槻市奈佐原2丁目1-2 メゾンエトワール') == \ 428 | {"pref": "大阪府", "city": "高槻市", "town": "奈佐原二丁目", "addr": "1-2 メゾンエトワール", 429 | "lat": 34.861189, "lng": 135.579573, "level": 3} 430 | 431 | 432 | def test_normalize_0072(): 433 | assert normalize('埼玉県八潮市大字大瀬1丁目1-1') == \ 434 | {"pref": "埼玉県", "city": "八潮市", "town": "大瀬一丁目", "addr": "1-1", 435 | "lat": 35.808825, "lng": 139.84291, "level": 3} 436 | 437 | 438 | def test_normalize_0073(): 439 | assert normalize('岡山県笠岡市大宜1249-1') == \ 440 | {"pref": "岡山県", "city": "笠岡市", "town": "大宜", "addr": "1249-1", 441 | "lat": 34.506729, "lng": 133.473295, "level": 3} 442 | 443 | 444 | def test_normalize_0074(): 445 | assert normalize('岡山県笠岡市大宜1249-1') == \ 446 | {"pref": "岡山県", "city": "笠岡市", "town": "大宜", "addr": "1249-1", 447 | "lat": 34.506729, "lng": 133.473295, "level": 3} 448 | 449 | 450 | def test_normalize_0075(): 451 | assert normalize('岡山県笠岡市大冝1249-1') == \ 452 | {"pref": "岡山県", "city": "笠岡市", "town": "大宜", "addr": "1249-1", 453 | "lat": 34.506729, "lng": 133.473295, "level": 3} 454 | 455 | 456 | def test_normalize_0076(): 457 | assert normalize('岡山県岡山市中区さい33-2') == \ 458 | {"pref": "岡山県", "city": "岡山市中区", "town": "さい", "addr": "33-2", 459 | "lat": 34.680505, "lng": 133.948429, "level": 3} 460 | 461 | 462 | def test_normalize_0077(): 463 | assert normalize('岡山県岡山市中区穝33-2') == \ 464 | {"pref": "岡山県", "city": "岡山市中区", "town": "さい", "addr": "33-2", 465 | "lat": 34.680505, "lng": 133.948429, "level": 3} 466 | 467 | 468 | def test_normalize_0078(): 469 | assert normalize('千葉県松戸市栄町3丁目166-5') == \ 470 | {"pref": "千葉県", "city": "松戸市", "town": "栄町三丁目", "addr": "166-5", 471 | "lat": 35.803015, "lng": 139.905619, "level": 3} 472 | 473 | 474 | def test_normalize_0079(): 475 | assert normalize('東京都新宿区三栄町17-16') == \ 476 | {"pref": "東京都", "city": "新宿区", "town": "四谷三栄町", "addr": "17-16", 477 | "lat": 35.688757, "lng": 139.725668, "level": 3} 478 | 479 | 480 | def test_normalize_0080(): 481 | assert normalize('東京都新宿区三榮町17-16') == \ 482 | {"pref": "東京都", "city": "新宿区", "town": "四谷三栄町", "addr": "17-16", 483 | "lat": 35.688757, "lng": 139.725668, "level": 3} 484 | 485 | 486 | def test_normalize_0081(): 487 | assert normalize('新潟県新潟市中央区礎町通1ノ町1968−1') == \ 488 | {"pref": "新潟県", "city": "新潟市中央区", "town": "礎町通一ノ町", "addr": "1968-1", 489 | "lat": 37.920235, "lng": 139.049572, "level": 3} 490 | 491 | 492 | def test_normalize_0082(): 493 | assert normalize('新潟県新潟市中央区礎町通1の町1968−1') == \ 494 | {"pref": "新潟県", "city": "新潟市中央区", "town": "礎町通一ノ町", "addr": "1968-1", 495 | "lat": 37.920235, "lng": 139.049572, "level": 3} 496 | 497 | 498 | def test_normalize_0083(): 499 | assert normalize('新潟県新潟市中央区礎町通1の町1968の1') == \ 500 | {"pref": "新潟県", "city": "新潟市中央区", "town": "礎町通一ノ町", "addr": "1968-1", 501 | "lat": 37.920235, "lng": 139.049572, "level": 3} 502 | 503 | 504 | def test_normalize_0084(): 505 | assert normalize('新潟県新潟市中央区礎町通1-1968-1') == \ 506 | {"pref": "新潟県", "city": "新潟市中央区", "town": "礎町通一ノ町", "addr": "1968-1", 507 | "lat": 37.920235, "lng": 139.049572, "level": 3} 508 | 509 | 510 | def test_normalize_0085(): 511 | assert normalize('新潟県新潟市中央区上大川前通11番町1881-2') == \ 512 | {"pref": "新潟県", "city": "新潟市中央区", "town": "上大川前通十一番町", "addr": "1881-2", 513 | "lat": 37.927874, "lng": 139.049152, "level": 3} 514 | 515 | 516 | def test_normalize_0086(): 517 | assert normalize('新潟県新潟市中央区上大川前通11-1881-2') == \ 518 | {"pref": "新潟県", "city": "新潟市中央区", "town": "上大川前通十一番町", "addr": "1881-2", 519 | "lat": 37.927874, "lng": 139.049152, "level": 3} 520 | 521 | 522 | def test_normalize_0087(): 523 | assert normalize('新潟県新潟市中央区上大川前通十一番町1881-2') == \ 524 | {"pref": "新潟県", "city": "新潟市中央区", "town": "上大川前通十一番町", "addr": "1881-2", 525 | "lat": 37.927874, "lng": 139.049152, "level": 3} 526 | 527 | 528 | def test_normalize_0088(): 529 | assert normalize('埼玉県上尾市壱丁目111') == \ 530 | {"pref": "埼玉県", "city": "上尾市", "town": "大字壱丁目", "addr": "111", 531 | "lat": 35.957701, "lng": 139.570578, "level": 3} 532 | 533 | 534 | def test_normalize_0089(): 535 | assert normalize('埼玉県上尾市一丁目111') == \ 536 | {"pref": "埼玉県", "city": "上尾市", "town": "大字壱丁目", "addr": "111", 537 | "lat": 35.957701, "lng": 139.570578, "level": 3} 538 | 539 | 540 | def test_normalize_0090(): 541 | assert normalize('埼玉県上尾市一町目111') == \ 542 | {"pref": "埼玉県", "city": "上尾市", "town": "大字壱丁目", "addr": "111", 543 | "lat": 35.957701, "lng": 139.570578, "level": 3} 544 | 545 | 546 | def test_normalize_0091(): 547 | assert normalize('埼玉県上尾市壱町目111') == \ 548 | {"pref": "埼玉県", "city": "上尾市", "town": "大字壱丁目", "addr": "111", 549 | "lat": 35.957701, "lng": 139.570578, "level": 3} 550 | 551 | 552 | def test_normalize_0092(): 553 | assert normalize('埼玉県上尾市1-111') == \ 554 | {"pref": "埼玉県", "city": "上尾市", "town": "大字壱丁目", "addr": "111", 555 | "lat": 35.957701, "lng": 139.570578, "level": 3} 556 | 557 | 558 | def test_normalize_0093(): 559 | assert normalize('神奈川県横浜市港北区大豆戸町17番地11') == \ 560 | {"pref": "神奈川県", "city": "横浜市港北区", "town": "大豆戸町", "addr": "17-11", 561 | "lat": 35.513492, "lng": 139.625651, "level": 3} 562 | 563 | 564 | def test_normalize_0094(): 565 | assert normalize('神奈川県横浜市港北区大豆戸町17番地11', level=1) == \ 566 | {"pref": "神奈川県", "city": "", "town": "", "addr": "横浜市港北区大豆戸町17番地11", 567 | "lat": None, "lng": None, "level": 1} 568 | 569 | 570 | def test_normalize_0095(): 571 | assert normalize('神奈川県横浜市港北区大豆戸町17番地11', level=2) == \ 572 | {"pref": "神奈川県", "city": "横浜市港北区", "town": "", "addr": "大豆戸町17番地11", 573 | "lat": None, "lng": None, "level": 2} 574 | 575 | 576 | def test_normalize_0096(): 577 | assert normalize('神奈川県横浜市港北区大豆戸町17番地11', level=3) == \ 578 | {"pref": "神奈川県", "city": "横浜市港北区", "town": "大豆戸町", "addr": "17-11", 579 | "lat": 35.513492, "lng": 139.625651, "level": 3} 580 | 581 | 582 | def test_normalize_0097(): 583 | assert normalize('神奈川県横浜市港北区', level=3) == \ 584 | {"pref": "神奈川県", "city": "横浜市港北区", "town": "", "addr": "", 585 | "lat": None, "lng": None, "level": 2} 586 | 587 | 588 | def test_normalize_0098(): 589 | assert normalize('神奈川県', level=3) == \ 590 | {"pref": "神奈川県", "city": "", "town": "", "addr": "", 591 | "lat": None, "lng": None, "level": 1} 592 | 593 | 594 | def test_normalize_0099(): 595 | assert normalize('神奈川県あいうえお市') == \ 596 | {"pref": "神奈川県", "city": "", "town": "", "addr": "あいうえお市", 597 | "lat": None, "lng": None, "level": 1} 598 | 599 | 600 | def test_normalize_0100(): 601 | assert normalize('東京都港区あいうえお') == \ 602 | {"pref": "東京都", "city": "港区", "town": "", "addr": "あいうえお", 603 | "lat": None, "lng": None, "level": 2} 604 | 605 | 606 | def test_normalize_0101(): 607 | assert normalize('あいうえお') == \ 608 | {"pref": "", "city": "", "town": "", "addr": "あいうえお", 609 | "lat": None, "lng": None, "level": 0} 610 | 611 | 612 | def test_normalize_0102(): 613 | assert normalize('東京都江東区豊洲1丁目2-27') == \ 614 | {"pref": "東京都", "city": "江東区", "town": "豊洲一丁目", "addr": "2-27", 615 | "lat": 35.661813, "lng": 139.792044, "level": 3} 616 | 617 | 618 | def test_normalize_0103(): 619 | assert normalize('東京都江東区豊洲 1丁目2-27') == \ 620 | {"pref": "東京都", "city": "江東区", "town": "豊洲一丁目", "addr": "2-27", 621 | "lat": 35.661813, "lng": 139.792044, "level": 3} 622 | 623 | 624 | def test_normalize_0104(): 625 | assert normalize('東京都江東区豊洲 1-2-27') == \ 626 | {"pref": "東京都", "city": "江東区", "town": "豊洲一丁目", "addr": "2-27", 627 | "lat": 35.661813, "lng": 139.792044, "level": 3} 628 | 629 | 630 | def test_normalize_0105(): 631 | assert normalize('東京都 江東区 豊洲 1-2-27') == \ 632 | {"pref": "東京都", "city": "江東区", "town": "豊洲一丁目", "addr": "2-27", 633 | "lat": 35.661813, "lng": 139.792044, "level": 3} 634 | 635 | 636 | def test_normalize_0106(): 637 | assert normalize('東京都江東区豊洲 1ー2ー27') == \ 638 | {"pref": "東京都", "city": "江東区", "town": "豊洲一丁目", "addr": "2-27", 639 | "lat": 35.661813, "lng": 139.792044, "level": 3} 640 | 641 | 642 | def test_normalize_0107(): 643 | assert normalize('東京都町田市木曽東四丁目14ーイ22 ジオロニアマンション') == \ 644 | {"pref": "東京都", "city": "町田市", "town": "木曽東四丁目", "addr": "14-イ22 ジオロニアマンション", 645 | "lat": 35.564817, "lng": 139.429661, "level": 3} 646 | 647 | 648 | def test_normalize_0108(): 649 | assert normalize('東京都町田市木曽東四丁目14ーA22 ジオロニアマンション') == \ 650 | {"pref": "東京都", "city": "町田市", "town": "木曽東四丁目", "addr": "14-A22 ジオロニアマンション", 651 | "lat": 35.564817, "lng": 139.429661, "level": 3} 652 | 653 | 654 | def test_normalize_0109(): 655 | assert normalize('東京都町田市木曽東四丁目一四━A二二 ジオロニアマンション') == \ 656 | {"pref": "東京都", "city": "町田市", "town": "木曽東四丁目", "addr": "14-A22 ジオロニアマンション", 657 | "lat": 35.564817, "lng": 139.429661, "level": 3} 658 | 659 | 660 | def test_normalize_0110(): 661 | assert normalize('東京都江東区豊洲 一丁目2-27') == \ 662 | {"pref": "東京都", "city": "江東区", "town": "豊洲一丁目", "addr": "2-27", 663 | "lat": 35.661813, "lng": 139.792044, "level": 3} 664 | 665 | 666 | def test_normalize_0111(): 667 | assert normalize('東京都江東区豊洲 四-2-27') == \ 668 | {"pref": "東京都", "city": "江東区", "town": "豊洲四丁目", "addr": "2-27", 669 | "lat": 35.653798, "lng": 139.800664, "level": 3} 670 | 671 | 672 | def test_normalize_0112(): 673 | assert normalize('石川県七尾市藤橋町亥45番地1') == \ 674 | {"pref": "石川県", "city": "七尾市", "town": "藤橋町", "addr": "亥45-1", 675 | "lat": 37.041154, "lng": 136.941183, "level": 3} 676 | 677 | 678 | def test_normalize_0113(): 679 | assert normalize('石川県七尾市藤橋町亥四十五番地1') == \ 680 | {"pref": "石川県", "city": "七尾市", "town": "藤橋町", "addr": "亥45-1", 681 | "lat": 37.041154, "lng": 136.941183, "level": 3} 682 | 683 | 684 | def test_normalize_0114(): 685 | assert normalize('石川県七尾市藤橋町 亥 四十五番地1') == \ 686 | {"pref": "石川県", "city": "七尾市", "town": "藤橋町", "addr": "亥45-1", 687 | "lat": 37.041154, "lng": 136.941183, "level": 3} 688 | 689 | 690 | def test_normalize_0115(): 691 | assert normalize('石川県七尾市藤橋町 亥 45-1') == \ 692 | {"pref": "石川県", "city": "七尾市", "town": "藤橋町", "addr": "亥45-1", 693 | "lat": 37.041154, "lng": 136.941183, "level": 3} 694 | 695 | 696 | def test_normalize_0116(): 697 | assert normalize('和歌山県和歌山市 七番丁 19') == \ 698 | {"pref": "和歌山県", "city": "和歌山市", "town": "七番丁", "addr": "19", 699 | "lat": 34.230447, "lng": 135.171994, "level": 3} 700 | 701 | 702 | def test_normalize_0117(): 703 | assert normalize('和歌山県和歌山市7番町19') == \ 704 | {"pref": "和歌山県", "city": "和歌山市", "town": "七番丁", "addr": "19", 705 | "lat": 34.230447, "lng": 135.171994, "level": 3} 706 | 707 | 708 | def test_normalize_0118(): 709 | assert normalize('和歌山県和歌山市十二番丁45') == \ 710 | {"pref": "和歌山県", "city": "和歌山市", "town": "十二番丁", "addr": "45", 711 | "lat": 34.232035, "lng": 135.172088, "level": 3} 712 | 713 | 714 | def test_normalize_0119(): 715 | assert normalize('和歌山県和歌山市12番丁45') == \ 716 | {"pref": "和歌山県", "city": "和歌山市", "town": "十二番丁", "addr": "45", 717 | "lat": 34.232035, "lng": 135.172088, "level": 3} 718 | 719 | 720 | def test_normalize_0120(): 721 | assert normalize('和歌山県和歌山市12-45') == \ 722 | {"pref": "和歌山県", "city": "和歌山市", "town": "十二番丁", "addr": "45", 723 | "lat": 34.232035, "lng": 135.172088, "level": 3} 724 | 725 | 726 | def test_normalize_0121(): 727 | assert normalize('兵庫県宝塚市東洋町1番1号') == \ 728 | {"pref": "兵庫県", "city": "宝塚市", "town": "東洋町", "addr": "1-1", 729 | "lat": 34.797971, "lng": 135.363236, "level": 3} 730 | 731 | 732 | def test_normalize_0122(): 733 | assert normalize('兵庫県宝塚市東洋町1番1号') == \ 734 | {"pref": "兵庫県", "city": "宝塚市", "town": "東洋町", "addr": "1-1", 735 | "lat": 34.797971, "lng": 135.363236, "level": 3} 736 | 737 | 738 | def test_normalize_0123(): 739 | assert normalize('北海道札幌市中央区北三条西3丁目1-56マルゲンビル3F') == \ 740 | {"pref": "北海道", "city": "札幌市中央区", "town": "北三条西三丁目", "addr": "1-56マルゲンビル3F", 741 | "lat": 43.065075, "lng": 141.351683, "level": 3} 742 | 743 | 744 | def test_normalize_0124(): 745 | assert normalize('北海道札幌市北区北24条西6丁目1−1') == \ 746 | {"pref": "北海道", "city": "札幌市北区", "town": "北二十四条西六丁目", "addr": "1-1", 747 | "lat": 43.090538, "lng": 141.340527, "level": 3} 748 | 749 | 750 | def test_normalize_0125(): 751 | assert normalize('堺市北区新金岡町4丁1−8') == \ 752 | {"pref": "大阪府", "city": "堺市北区", "town": "新金岡町四丁", "addr": "1-8", 753 | "lat": 34.568184, "lng": 135.519409, "level": 3} 754 | 755 | 756 | def test_normalize_0126(): 757 | assert normalize('串本町串本1234') == \ 758 | {"pref": "和歌山県", "city": "東牟婁郡串本町", "town": "串本", "addr": "1234", 759 | "lat": 33.470358, "lng": 135.779952, "level": 3} 760 | 761 | 762 | def test_normalize_0127(): 763 | assert normalize('広島県府中市府川町315') == \ 764 | {"pref": "広島県", "city": "府中市", "town": "府川町", "addr": "315", 765 | "lat": 34.567649, "lng": 133.236891, "level": 3} 766 | 767 | 768 | def test_normalize_0128(): 769 | assert normalize('府中市府川町315') == \ 770 | {"pref": "広島県", "city": "府中市", "town": "府川町", "addr": "315", 771 | "lat": 34.567649, "lng": 133.236891, "level": 3} 772 | 773 | 774 | def test_normalize_0129(): 775 | assert normalize('府中市宮西町2丁目24番地') == \ 776 | {"pref": "東京都", "city": "府中市", "town": "宮西町二丁目", "addr": "24", 777 | "lat": 35.669764, "lng": 139.477636, "level": 3} 778 | 779 | 780 | def test_normalize_0130(): 781 | assert normalize('三重県三重郡菰野町大字大強原2796') == \ 782 | {"pref": "三重県", "city": "三重郡菰野町", "town": "大字大強原", "addr": "2796", 783 | "lat": 35.028963, "lng": 136.530668, "level": 3} 784 | 785 | 786 | def test_normalize_0131(): 787 | assert normalize('三重県三重郡菰野町大強原2796') == \ 788 | {"pref": "三重県", "city": "三重郡菰野町", "town": "大字大強原", "addr": "2796", 789 | "lat": 35.028963, "lng": 136.530668, "level": 3} 790 | 791 | 792 | def test_normalize_0132(): 793 | assert normalize('福岡県北九州市小倉南区大字井手浦874') == \ 794 | {"pref": "福岡県", "city": "北九州市小倉南区", "town": "大字井手浦", "addr": "874", 795 | "lat": 33.77509, "lng": 130.893088, "level": 3} 796 | 797 | 798 | def test_normalize_0133(): 799 | assert normalize('福岡県北九州市小倉南区井手浦874') == \ 800 | {"pref": "福岡県", "city": "北九州市小倉南区", "town": "大字井手浦", "addr": "874", 801 | "lat": 33.77509, "lng": 130.893088, "level": 3} 802 | 803 | 804 | def test_normalize_0134(): 805 | assert normalize('沖縄県那覇市小禄1丁目5番23号1丁目マンション301') == \ 806 | {"pref": "沖縄県", "city": "那覇市", "town": "小禄一丁目", "addr": "5-23 一丁目マンション301", 807 | "lat": 26.192719, "lng": 127.679409, "level": 3} 808 | 809 | 810 | def test_normalize_0135(): 811 | assert normalize('香川県仲多度郡まんのう町勝浦字家六2094番地1') == \ 812 | {"pref": "香川県", "city": "仲多度郡まんのう町", "town": "勝浦", "addr": "家六2094-1", 813 | "lat": 34.097457, "lng": 133.97318, "level": 3} 814 | 815 | 816 | def test_normalize_0136(): 817 | assert normalize('香川県仲多度郡まんのう町勝浦家六2094番地1') == \ 818 | {"pref": "香川県", "city": "仲多度郡まんのう町", "town": "勝浦", "addr": "家六2094-1", 819 | "lat": 34.097457, "lng": 133.97318, "level": 3} 820 | 821 | 822 | def test_normalize_0137(): 823 | assert normalize('愛知県あま市西今宿梶村一38番地4') == \ 824 | {"pref": "愛知県", "city": "あま市", "town": "西今宿", "addr": "梶村一38-4", 825 | "lat": 35.2002, "lng": 136.831606, "level": 3} 826 | 827 | 828 | def test_normalize_0138(): 829 | assert normalize('香川県丸亀市原田町字東三分一1926番地1') == \ 830 | {"pref": "香川県", "city": "丸亀市", "town": "原田町", "addr": "東三分一1926-1", 831 | "lat": 34.258954, "lng": 133.78778, "level": 3} 832 | 833 | 834 | def test_normalize_0139(): 835 | # 都道府県無し, 郡無し 836 | assert normalize('串本町串本千二百三十四') == \ 837 | {"pref": "和歌山県", "city": "東牟婁郡串本町", "town": "串本", "addr": "1234", 838 | "lat": 33.470358, "lng": 135.779952, "level": 3} 839 | 840 | 841 | def test_normalize_0140(): 842 | # 都道府県無し, 郡無し 843 | assert normalize('せたな町北檜山区北檜山193') == \ 844 | {"pref": "北海道", "city": "久遠郡せたな町", "town": "北檜山区北檜山", "addr": "193", 845 | "lat": 42.414, "lng": 139.881784, "level": 3} 846 | 847 | 848 | def test_normalize_0141(): 849 | assert normalize('岩手県花巻市十二丁目704') == \ 850 | {"pref": "岩手県", "city": "花巻市", "town": "十二丁目", "addr": "704", 851 | "lat": 39.358268, "lng": 141.122331, "level": 3} 852 | 853 | 854 | def test_normalize_0142(): 855 | assert normalize('岩手県花巻市12丁目704') == \ 856 | {"pref": "岩手県", "city": "花巻市", "town": "十二丁目", "addr": "704", 857 | "lat": 39.358268, "lng": 141.122331, "level": 3} 858 | 859 | 860 | def test_normalize_0143(): 861 | assert normalize('岩手県花巻市12丁目704') == \ 862 | {"pref": "岩手県", "city": "花巻市", "town": "十二丁目", "addr": "704", 863 | "lat": 39.358268, "lng": 141.122331, "level": 3} 864 | 865 | 866 | def test_normalize_0144(): 867 | assert normalize('京都府京都市中京区河原町二条下ル一之船入町537-50') == \ 868 | {"pref": "京都府", "city": "京都市中京区", "town": "一之船入町", "addr": "537-50", 869 | "lat": 35.01217, "lng": 135.769483, "level": 3} 870 | 871 | 872 | def test_normalize_0145(): 873 | assert normalize('京都府宇治市莵道森本8−10') == \ 874 | {"pref": "京都府", "city": "宇治市", "town": "莵道", "addr": "森本8-10", 875 | "lat": 34.904244, "lng": 135.827041, "level": 3} 876 | 877 | 878 | def test_normalize_0146(): 879 | # 船と舟のゆらぎ 880 | assert normalize('京都府京都市中京区河原町二条下ル一之舟入町537-50') == \ 881 | {"pref": "京都府", "city": "京都市中京区", "town": "一之船入町", "addr": "537-50", 882 | "lat": 35.01217, "lng": 135.769483, "level": 3} 883 | 884 | 885 | def test_normalize_0147(): 886 | # 莵と菟のゆらぎ 887 | assert normalize('京都府宇治市菟道森本8−10') == \ 888 | {"pref": "京都府", "city": "宇治市", "town": "莵道", "addr": "森本8-10", 889 | "lat": 34.904244, "lng": 135.827041, "level": 3} 890 | 891 | 892 | def test_normalize_0148(): 893 | # 「都道府県」の文字列を省略した場合 894 | assert normalize('岩手花巻市12丁目704') == \ 895 | {"pref": "岩手県", "city": "花巻市", "town": "十二丁目", "addr": "704", 896 | "lat": 39.358268, "lng": 141.122331, "level": 3} 897 | 898 | 899 | def test_normalize_0149(): 900 | # 市(し、いち)と巿(ふつ)のゆらぎ 901 | assert normalize('千葉県巿川巿巿川1丁目') == \ 902 | {"pref": "千葉県", "city": "市川市", "town": "市川一丁目", "addr": "", 903 | "lat": 35.731849, "lng": 139.909029, "level": 3} 904 | 905 | 906 | def test_normalize_0150(): 907 | assert normalize('京都市北区紫野東御所田町') == \ 908 | {"pref": "京都府", "city": "京都市北区", "town": "紫野東御所田町", "addr": "", 909 | "lat": 35.039861, "lng": 135.753474, "level": 3} 910 | 911 | 912 | def test_normalize_0151(): 913 | assert normalize('鹿児島市山下町') == \ 914 | {"pref": "鹿児島県", "city": "鹿児島市", "town": "山下町", "addr": "", 915 | "lat": 31.596716, "lng": 130.55643, "level": 3} 916 | 917 | 918 | def test_normalize_0152(): 919 | assert normalize('市川市八幡1丁目1番1号') == \ 920 | {"pref": "千葉県", "city": "市川市", "town": "八幡一丁目", "addr": "1-1", 921 | "lat": 35.720285, "lng": 139.932528, "level": 3} 922 | 923 | 924 | def test_normalize_0153(): 925 | assert normalize('千葉市川市八幡1丁目1番1号') == \ 926 | {"pref": "千葉県", "city": "市川市", "town": "八幡一丁目", "addr": "1-1", 927 | "lat": 35.720285, "lng": 139.932528, "level": 3} 928 | 929 | 930 | def test_normalize_0154(): 931 | assert normalize('石川郡石川町字長久保185-4') == \ 932 | {"pref": "福島県", "city": "石川郡石川町", "town": "字長久保", "addr": "185-4", 933 | "lat": 37.155602, "lng": 140.446048, "level": 3} 934 | 935 | 936 | def test_normalize_0155(): 937 | assert normalize('福島石川郡石川町字長久保185-4') == \ 938 | {"pref": "福島県", "city": "石川郡石川町", "town": "字長久保", "addr": "185-4", 939 | "lat": 37.155602, "lng": 140.446048, "level": 3} 940 | 941 | 942 | def test_normalize_0156(): 943 | # 町丁目に長音符(ー)が入る場合で、丁目の数字がその後に続く場合 944 | assert normalize('広島市西区商工センター六丁目9番39号') == \ 945 | {"pref": "広島県", "city": "広島市西区", "town": "商工センター六丁目", "addr": "9-39", 946 | "lat": 34.36812, "lng": 132.388293, "level": 3} 947 | 948 | 949 | def test_normalize_0157(): 950 | # 町丁目に長音符(ー)が入る場合で、丁目の数字が 1 の場合 951 | assert normalize('新潟県新潟市西区流通センター一丁目1-1') == \ 952 | {"pref": "新潟県", "city": "新潟市西区", "town": "流通センター一丁目", "addr": "1-1", 953 | "lat": 37.866158, "lng": 138.998185, "level": 3} 954 | 955 | 956 | def test_normalize_0158(): 957 | # 町丁目に長音符(ー)が入る場合 958 | assert normalize('青森県八戸市北インター工業団地4丁目1-1') == \ 959 | {"pref": "青森県", "city": "八戸市", "town": "北インター工業団地四丁目", "addr": "1-1", 960 | "lat": 40.556931, "lng": 141.426763, "level": 3} 961 | 962 | 963 | def test_normalize_0159(): 964 | assert normalize('富山県高岡市オフィスパーク1-1') == \ 965 | {"pref": "富山県", "city": "高岡市", "town": "オフィスパーク", "addr": "1-1", 966 | "lat": 36.670088, "lng": 136.998867, "level": 3} 967 | 968 | 969 | def test_normalize_0160(): 970 | assert normalize('福井県三方上中郡若狭町若狭テクノバレー1-1') == \ 971 | {"pref": "福井県", "city": "三方上中郡若狭町", "town": "若狭テクノバレー", "addr": "1-1", 972 | "lat": 35.477349, "lng": 135.859423, "level": 3} 973 | 974 | 975 | def test_normalize_0161(): 976 | assert normalize('埼玉県越谷市大字蒲生3795-1') == \ 977 | {"pref": "埼玉県", "city": "越谷市", "town": "大字蒲生", "addr": "3795-1", 978 | "lat": 35.860429, "lng": 139.790945, "level": 3} 979 | 980 | 981 | def test_normalize_0162(): 982 | assert normalize('埼玉県越谷市蒲生茜町9-3') == \ 983 | {"pref": "埼玉県", "city": "越谷市", "town": "蒲生茜町", "addr": "9-3", 984 | "lat": 35.866741, "lng": 139.7888, "level": 3} 985 | 986 | 987 | def test_normalize_0163(): 988 | assert normalize('埼玉県川口市大字芝字宮根3938-5') == \ 989 | {"pref": "埼玉県", "city": "川口市", "town": "大字芝", "addr": "字宮根3938-5", 990 | "lat": 35.843399, "lng": 139.690803, "level": 3} 991 | 992 | 993 | def test_normalize_0164(): 994 | assert normalize('北海道上川郡東神楽町十四号北1番地') == \ 995 | {"pref": "北海道", "city": "上川郡東神楽町", "town": "十四号", "addr": "北1", 996 | "lat": 43.693918, "lng": 142.463511, "level": 3} 997 | 998 | 999 | # 町丁目内の文字列の「町」の省略に関連するケース 1000 | def test_normalize_0165(): 1001 | assert normalize('東京都江戸川区西小松川12-345') == \ 1002 | {"pref": "東京都", "city": "江戸川区", "town": "西小松川町", "addr": "12-345", 1003 | "lat": 35.698405, "lng": 139.862007, "level": 3} 1004 | 1005 | 1006 | def test_normalize_0166(): 1007 | assert normalize('滋賀県長浜市木之本西山123-4') == \ 1008 | {"pref": "滋賀県", "city": "長浜市", "town": "木之本町西山", "addr": "123-4", 1009 | "lat": 35.496171, "lng": 136.204177, "level": 3} 1010 | 1011 | 1012 | def test_normalize_0167(): 1013 | assert normalize('福島県須賀川市西川町123-456') == \ 1014 | {"pref": "福島県", "city": "須賀川市", "town": "西川町", "addr": "123-456", 1015 | "lat": 37.294611, "lng": 140.359974, "level": 3} 1016 | 1017 | 1018 | def test_normalize_0168(): 1019 | assert normalize('福島県須賀川市西川123-456') == \ 1020 | {"pref": "福島県", "city": "須賀川市", "town": "西川", "addr": "123-456", 1021 | "lat": 37.296938, "lng": 140.343569, "level": 3} 1022 | 1023 | 1024 | def test_normalize_0169(): 1025 | assert normalize('広島県三原市幸崎久和喜12-345') == \ 1026 | {"pref": "広島県", "city": "三原市", "town": "幸崎久和喜", "addr": "12-345", 1027 | "lat": 34.348481, "lng": 133.067756, "level": 3} 1028 | 1029 | 1030 | def test_normalize_0170(): 1031 | assert normalize('広島県三原市幸崎町久和喜24-56') == \ 1032 | {"pref": "広島県", "city": "三原市", "town": "幸崎町久和喜", "addr": "24-56", 1033 | "lat": 34.352656, "lng": 133.055612, "level": 3} 1034 | 1035 | 1036 | # 漢数字を含む町丁目については、後続の丁目や番地が壊れるので町の省略を許容しない 1037 | def test_normalize_0171(): 1038 | assert normalize('愛知県名古屋市瑞穂区十六町1丁目123-4') == \ 1039 | {"pref": "愛知県", "city": "名古屋市瑞穂区", "town": "十六町一丁目", "addr": "123-4", 1040 | "lat": 35.128862, "lng": 136.936585, "level": 3} 1041 | 1042 | 1043 | # 大字◯◯と◯◯町が共存するケース 1044 | def test_normalize_0172(): 1045 | assert normalize('埼玉県川口市新堀999-888') == \ 1046 | {"pref": "埼玉県", "city": "川口市", "town": "大字新堀", "addr": "999-888", 1047 | "lat": 35.827425, "lng": 139.783579, "level": 3} 1048 | 1049 | 1050 | def test_normalize_0173(): 1051 | assert normalize('埼玉県川口市大字新堀999-888') == \ 1052 | {"pref": "埼玉県", "city": "川口市", "town": "大字新堀", "addr": "999-888", 1053 | "lat": 35.827425, "lng": 139.783579, "level": 3} 1054 | 1055 | 1056 | def test_normalize_0174(): 1057 | assert normalize('埼玉県川口市新堀町999-888') == \ 1058 | {"pref": "埼玉県", "city": "川口市", "town": "新堀町", "addr": "999-888", 1059 | "lat": 35.825057, "lng": 139.781901, "level": 3} 1060 | 1061 | 1062 | def test_normalize_0175(): 1063 | assert normalize('埼玉県川口市大字新堀町999-888') == \ 1064 | {"pref": "埼玉県", "city": "川口市", "town": "新堀町", "addr": "999-888", 1065 | "lat": 35.825057, "lng": 139.781901, "level": 3} 1066 | 1067 | 1068 | # 町から始まる町丁目について、町を省略した場合は寄せない 1069 | def test_normalize_0176(): 1070 | # 東京都荒川区町屋5丁目 の町を省略した場合 1071 | res = normalize('東京都荒川区屋5丁目') 1072 | assert res['town'] != '町屋5丁目' 1073 | assert res['level'] == 2 1074 | 1075 | 1076 | def test_normalize_0177(): 1077 | # 石川県輪島市町野町桶戸 の前側の町(町の名前の一部で、接尾の町に当たらない)を省略した場合 1078 | res = normalize('石川県輪島市野町桶戸') 1079 | assert res['town'] != '町野町桶戸' 1080 | assert res['level'] == 2 1081 | 1082 | 1083 | def test_normalize_0178(): 1084 | # 石川県輪島市町野町桶戸 の後側の町を省略した場合 1085 | assert normalize('石川県輪島市町野桶戸') == \ 1086 | {"pref": "石川県", "city": "輪島市", "town": "町野町桶戸", "addr": "", 1087 | "lat": 37.414993, "lng": 137.092547, "level": 3} 1088 | 1089 | def test_normalize_0179(): 1090 | # 住所の正規化に先立って、文字をUnicode正規化する 1091 | address = unicodedata.normalize("NFKD", "茨城県つくば市筑穂1丁目10−4") 1092 | res = normalize(address) 1093 | assert res["city"] == "つくば市" 1094 | 1095 | # 番地・号の分離:京都の住所では「一号|1号..」などが「一番町」に正規化されてはいけない 1096 | def test_normalize_0180(): 1097 | res = normalize('京都府京都市上京区主計町一番一号') 1098 | assert res['town'] != '一番町' 1099 | assert res['town'] == '主計町' 1100 | assert res['addr'] == '1-1' 1101 | 1102 | def test_normalize_0181(): 1103 | res = normalize('京都府京都市上京区主計町二番二号') 1104 | assert res['town'] != '二番町' 1105 | assert res['town'] == '主計町' 1106 | assert res['addr'] == '2-2' 1107 | 1108 | def test_normalize_0182(): 1109 | res = normalize('京都府京都市上京区主計町三番三号') 1110 | assert res['town'] != '三番町' 1111 | assert res['town'] == '主計町' 1112 | assert res['addr'] == '3-3' 1113 | 1114 | def test_normalize_0183(): 1115 | res = normalize('京都府京都市上京区中務町543番21号') 1116 | assert res['town'] != '一番町' 1117 | assert res['town'] == '中務町' 1118 | assert res['addr'] == '543-21' 1119 | 1120 | def test_normalize_0184(): 1121 | res = normalize('京都府京都市上京区晴明町1番3号') 1122 | assert res['town'] != '三番町' 1123 | assert res['town'] == '晴明町' 1124 | assert res['addr'] == '1-3' 1125 | 1126 | def test_normalize_0185(): 1127 | res = normalize('京都府京都市上京区主計町1番地3') 1128 | assert res['town'] == '主計町' 1129 | assert res['addr'] == '1-3' 1130 | 1131 | def test_normalize_0186(): 1132 | res = normalize('京都府京都市上京区主計町123番') 1133 | assert res['town'] == '主計町' 1134 | assert res['addr'] == '123' 1135 | 1136 | def test_normalize_0187(): 1137 | res = normalize('京都府京都市上京区主計町123番地') 1138 | assert res['town'] == '主計町' 1139 | assert res['addr'] == '123' 1140 | 1141 | # 京都府京都市上京区主計町1番2-403号 建物名の省略と部屋番号の表記のケース 1142 | def test_normalize_0188(): 1143 | res = normalize('京都府京都市上京区主計町1番2-403号') 1144 | assert res['town'] == '主計町' 1145 | assert res['addr'] == '1-2-403号' 1146 | 1147 | def test_normalize_0189(): 1148 | res = normalize('京都府京都市上京区主計町1番1号おはようビル301号室') 1149 | assert res['town'] != '一番町' 1150 | assert res['town'] == '主計町' 1151 | assert res['addr'] == '1-1 おはようビル301号室' 1152 | 1153 | # latとlngのデータがないときはNoneを返す 1154 | def test_normalize_0190(): 1155 | res = normalize('大分県大分市田中町3丁目1-12') 1156 | assert res['lat'] is None 1157 | assert res['lng'] is None 1158 | 1159 | # 漢数字の正規化の改善 1160 | def test_normalize_0191(): 1161 | assert normalize('和歌山県東牟婁郡串本町串本千二三四') == \ 1162 | {"pref": "和歌山県", "city": "東牟婁郡串本町", "town": "串本", "addr": "1234", 1163 | "lat": 33.470358, "lng": 135.779952, "level": 3} 1164 | 1165 | def test_normalize_0192(): 1166 | assert normalize('和歌山県東牟婁郡串本町串本千二百三四') == \ 1167 | {"pref": "和歌山県", "city": "東牟婁郡串本町", "town": "串本", "addr": "1234", 1168 | "lat": 33.470358, "lng": 135.779952, "level": 3} 1169 | 1170 | def test_normalize_0193(): 1171 | assert normalize('和歌山県東牟婁郡串本町串本千二百三十四') == \ 1172 | {"pref": "和歌山県", "city": "東牟婁郡串本町", "town": "串本", "addr": "1234", 1173 | "lat": 33.470358, "lng": 135.779952, "level": 3} 1174 | 1175 | # 弥/彌の正規化に対応 1176 | def test_normalize_0194(): 1177 | assert normalize('愛知県名古屋市瑞穂区弥富町') == \ 1178 | {"pref": "愛知県", "city": "名古屋市瑞穂区", "town": "彌富町", "addr": "", 1179 | "lat": 35.132011, "lng": 136.955457, "level": 3} 1180 | 1181 | # 京都府京都市下京区西中筋通北小路通上る丸屋町 京都の通り名削除と町の省略がコンフリクトするケース 1182 | def test_normalize_0195(): 1183 | res = normalize('京都府京都市下京区西中筋通北小路通上る丸屋町') 1184 | assert res['city'] == '京都市下京区' 1185 | assert res['town'] != '北小路町' 1186 | assert res['addr'] == '丸屋町' 1187 | 1188 | 1189 | # 京都府京都市下京区油小路通高辻下ル麓町123 1190 | def test_normalize_0196(): 1191 | res = normalize('京都府京都市下京区油小路通高辻下ル麓町123') 1192 | assert res['city'] == '京都市下京区' 1193 | assert res['town'] == '麓町' 1194 | assert res['addr'] == '123' 1195 | 1196 | 1197 | # 番地・号の分離: 京都の住所では「一号|1号..」などが「一番町」に正規化されてはいけない 1198 | # 京都府京都市上京区あああ通り主計町1番2-403号 通り名を含むケース 1199 | def test_normalize_0197(): 1200 | res = normalize('京都府京都市上京区あああ通り主計町1番2-403号') 1201 | assert res['city'] == '京都市上京区' 1202 | assert res['town'] == '主計町' 1203 | assert res['addr'] == '1-2-403号' 1204 | 1205 | # 京都以外の字は正しく分離される 1206 | def test_normalize_0198(): 1207 | res = normalize('愛知県名古屋市緑区鳴海町字アイウエオ100番200号') 1208 | assert res['town'] == '鳴海町' 1209 | assert res['addr'] == '字アイウエオ100-200' 1210 | assert res['level'] == 3 1211 | 1212 | # 途中にスペースを含むケース 1213 | # 京都府京都市 下京区上之町999 1214 | def test_normalize_0199(): 1215 | res = normalize('京都府京都市 下京区上之町999') 1216 | assert res['pref'] == '京都府' 1217 | assert res['city'] == '京都市下京区' 1218 | assert res['town'] == '上之町' 1219 | assert res['addr'] == '999' 1220 | 1221 | # 宮城県仙台市 若林区土樋999 1222 | def test_normalize_0200(): 1223 | res = normalize('宮城県仙台市 若林区土樋999') 1224 | assert res['pref'] == '宮城県' 1225 | assert res['city'] == '仙台市若林区' 1226 | assert res['town'] == '土樋' 1227 | assert res['addr'] == '999' 1228 | 1229 | # 青森県上北郡 横浜町字三保野888 1230 | def test_normalize_0201(): 1231 | res = normalize('青森県上北郡 横浜町字三保野888') 1232 | assert res['pref'] == '青森県' 1233 | assert res['city'] == '上北郡横浜町' 1234 | assert res['town'] == '字三保野' 1235 | assert res['addr'] == '888' 1236 | 1237 | # 町丁目名が判別できなかった場合、残った住所には漢数字->数字などの変換処理を施さない 1238 | def test_normalize_0202(): 1239 | res = normalize('北海道滝川市一の坂町西') 1240 | assert res['town'] == '' 1241 | assert res['addr'] == '一の坂町西' 1242 | 1243 | # 番地号部分にスペースが含まれていても正規化する 1244 | def test_normalize_0203(): 1245 | addresses = [ 1246 | '港区新橋五丁目 24 番 8 号', 1247 | '港区新橋五丁目24 番 8 号', 1248 | '港区新橋5-24-8', 1249 | ] 1250 | 1251 | results = [normalize(address) for address in addresses] 1252 | for i in range(len(results) -1): 1253 | assert results[i] == results[i+1] 1254 | 1255 | # 旧漢字対応 (麩 -> 麸) 1256 | def test_normalize_0204(): 1257 | res = normalize('愛知県津島市池麩町') 1258 | assert res['town'] == '池麸町' 1259 | assert res['level'] == 3 1260 | 1261 | # 柿碕町|柿さき町 1262 | def test_normalize_0205(): 1263 | res = normalize('愛知県安城市柿碕町') 1264 | assert res['town'] == '柿さき町' 1265 | assert res['level'] == 3 1266 | 1267 | # 丁目の数字だけあるときは正しく「一丁目」まで補充できる 1268 | def test_normalize_0206(): 1269 | res = normalize('東京都文京区小石川1') 1270 | assert res['town'] == '小石川一丁目' 1271 | assert res['addr'] == '' 1272 | 1273 | # 丁目の数字だけあるときは正しく「一丁目」まで補充できる(以降も対応) 1274 | def test_normalize_0207(): 1275 | res = normalize('東京都文京区小石川1ビル名') 1276 | assert res['town'] == '小石川一丁目' 1277 | assert res['addr'] == 'ビル名' 1278 | 1279 | # 東京都千代田区永田町1-2-3-レジデンス億万101 (号の後にハイフンで漢数字末尾に含んだマンション名が続き、号室が数値の場合 1280 | def test_normalize_0208(): 1281 | res = normalize('東京都千代田区永田町1-2-3-レジデンス億万101') 1282 | assert res == {"pref": "東京都", "city": "千代田区", "town": "永田町一丁目", "addr": "2-3-レジデンス億万101", 1283 | "lat": 35.675895, "lng": 139.746306, "level": 3} 1284 | 1285 | # 漢数字の小字のケース(kanjize, kanjize_error_kanji_to_intのエラー回避確認) 1286 | def test_normalize_0209(): 1287 | res = normalize('愛知県豊田市西丹波町三五十') 1288 | assert res['town'] == '西丹波町' 1289 | assert res['addr'] == '350' 1290 | assert res['level'] == 3 1291 | 1292 | # 広島県府中市栗柄町名字八五十2459(kanjize, kanjize_error_kanji_to_intのエラー回避確認) 1293 | def test_normalize_0210(): 1294 | res = normalize('広島県府中市栗柄町名字八五十2459') 1295 | assert res['town'] == '栗柄町' 1296 | assert res['addr'] == '名字852459' 1297 | assert res['level'] == 3 1298 | --------------------------------------------------------------------------------