├── .fig
    ├── demo.png
    ├── map_matching.png
    ├── map_matching_futian.png
    ├── map_matching_futian_with_satellite.png
    ├── observ_prob_distribution.png
    └── v大于1的情况.png
├── .gitignore
├── LICENSE
├── README.md
├── bug.py
├── changelog.md
├── data
    ├── network
    │   └── LXD_graph.ckpt
    └── trajs
    │   ├── gt.json
    │   ├── traj_0.geojson
    │   ├── traj_1.geojson
    │   ├── traj_10.geojson
    │   ├── traj_11.geojson
    │   ├── traj_12.geojson
    │   ├── traj_13.geojson
    │   ├── traj_14.geojson
    │   ├── traj_15.geojson
    │   ├── traj_2.geojson
    │   ├── traj_3.geojson
    │   ├── traj_4.geojson
    │   ├── traj_5.geojson
    │   ├── traj_6.geojson
    │   ├── traj_7.geojson
    │   ├── traj_8.geojson
    │   └── traj_9.geojson
├── demo.py
├── docs
    └── API.md
├── eval.py
├── mapmatching
    ├── __init__.py
    ├── geo
    │   ├── __init__.py
    │   ├── azimuth.py
    │   ├── coord
    │   │   ├── __init__.py
    │   │   ├── coordTransform_py.py
    │   │   └── coordTransfrom_shp.py
    │   ├── io.py
    │   ├── metric
    │   │   ├── __init__.py
    │   │   └── trajDist.py
    │   ├── ops
    │   │   ├── __init__.py
    │   │   ├── distance.py
    │   │   ├── linear_referencing.py
    │   │   ├── point2line.py
    │   │   ├── resample.py
    │   │   ├── simplify.py
    │   │   ├── substring.py
    │   │   └── to_array.py
    │   ├── query.py
    │   └── vis
    │   │   ├── __init__.py
    │   │   ├── linestring.py
    │   │   └── point.py
    ├── graph
    │   ├── __init__.py
    │   ├── astar.py
    │   ├── base.py
    │   ├── bi_astar.py
    │   ├── geograph.py
    │   └── geographx.py
    ├── match
    │   ├── __int__.py
    │   ├── candidatesGraph.py
    │   ├── dir_similarity.py
    │   ├── geometricAnalysis.py
    │   ├── io.py
    │   ├── metric.py
    │   ├── misc.py
    │   ├── postprocess.py
    │   ├── spatialAnalysis.py
    │   ├── status.py
    │   ├── temporalAnalysis.py
    │   ├── topologicalAnalysis.py
    │   ├── visualization.py
    │   └── viterbi.py
    ├── matching.py
    ├── osmnet
    │   ├── __init__.py
    │   ├── build_graph.py
    │   ├── combine_edges.py
    │   ├── downloader.py
    │   ├── misc.py
    │   ├── osm_io.py
    │   ├── parse_osm_xml.py
    │   └── twoway_edge.py
    ├── setting.py
    ├── update_network.py
    └── utils
    │   ├── __init__.py
    │   ├── db.py
    │   ├── img.py
    │   ├── interval_helper.py
    │   ├── log_helper.py
    │   ├── logger_helper.py
    │   ├── misc.py
    │   ├── parallel_helper.py
    │   ├── serialization.py
    │   └── timer.py
├── requirement.txt
└── test.py


/.fig/demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenke727/ST-MapMatching/2b88c219142cfc1d1460669027798538ee0b2ad0/.fig/demo.png


--------------------------------------------------------------------------------
/.fig/map_matching.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenke727/ST-MapMatching/2b88c219142cfc1d1460669027798538ee0b2ad0/.fig/map_matching.png


--------------------------------------------------------------------------------
/.fig/map_matching_futian.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenke727/ST-MapMatching/2b88c219142cfc1d1460669027798538ee0b2ad0/.fig/map_matching_futian.png


--------------------------------------------------------------------------------
/.fig/map_matching_futian_with_satellite.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenke727/ST-MapMatching/2b88c219142cfc1d1460669027798538ee0b2ad0/.fig/map_matching_futian_with_satellite.png


--------------------------------------------------------------------------------
/.fig/observ_prob_distribution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenke727/ST-MapMatching/2b88c219142cfc1d1460669027798538ee0b2ad0/.fig/observ_prob_distribution.png


--------------------------------------------------------------------------------
/.fig/v大于1的情况.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenke727/ST-MapMatching/2b88c219142cfc1d1460669027798538ee0b2ad0/.fig/v大于1的情况.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | network/
  2 | result/
  3 | data/
  4 | check.py
  5 | 
  6 | # Byte-compiled / optimized / DLL files
  7 | __pycache__/
  8 | *.py[cod]
  9 | *.pkl
 10 | *$py.class
 11 | 
 12 | # C extensions
 13 | *.so
 14 | 
 15 | # Others
 16 | .DS_Store
 17 | debug
 18 | cache
 19 | log
 20 | 
 21 | # Distribution / packaging
 22 | .vscode/
 23 | test/
 24 | tmp/
 25 | api/
 26 | rsrc
 27 | .Python
 28 | build/
 29 | develop-eggs/
 30 | dist/
 31 | downloads/
 32 | eggs/
 33 | .eggs/
 34 | lib/
 35 | lib64/
 36 | parts/
 37 | sdist/
 38 | var/
 39 | wheels/
 40 | pip-wheel-metadata/
 41 | share/python-wheels/
 42 | *.egg-info/
 43 | .installed.cfg
 44 | *.egg
 45 | MANIFEST
 46 | 
 47 | # PyInstaller
 48 | #  Usually these files are written by a python script from a template
 49 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 50 | *.manifest
 51 | *.spec
 52 | 
 53 | # Installer logs
 54 | pip-log.txt
 55 | pip-delete-this-directory.txt
 56 | 
 57 | # Unit test / coverage reports
 58 | htmlcov/
 59 | .tox/
 60 | .nox/
 61 | .coverage
 62 | .coverage.*
 63 | .cache
 64 | nosetests.xml
 65 | coverage.xml
 66 | *.cover
 67 | *.py,cover
 68 | .hypothesis/
 69 | .pytest_cache/
 70 | 
 71 | # Translations
 72 | *.mo
 73 | *.pot
 74 | 
 75 | # Django stuff:
 76 | *.log
 77 | local_settings.py
 78 | db.sqlite3
 79 | db.sqlite3-journal
 80 | 
 81 | # Flask stuff:
 82 | instance/
 83 | .webassets-cache
 84 | 
 85 | # Scrapy stuff:
 86 | .scrapy
 87 | 
 88 | # Sphinx documentation
 89 | docs/_build/
 90 | 
 91 | # PyBuilder
 92 | target/
 93 | 
 94 | # Jupyter Notebook
 95 | .ipynb_checkpoints
 96 | 
 97 | # IPython
 98 | profile_default/
 99 | ipython_config.py
100 | 
101 | # pyenv
102 | .python-version
103 | 
104 | # pipenv
105 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
106 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
107 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
108 | #   install all needed dependencies.
109 | #Pipfile.lock
110 | 
111 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
112 | __pypackages__/
113 | 
114 | # Celery stuff
115 | celerybeat-schedule
116 | celerybeat.pid
117 | 
118 | # SageMath parsed files
119 | *.sage.py
120 | 
121 | # Environments
122 | .env
123 | .venv
124 | env/
125 | venv/
126 | ENV/
127 | env.bak/
128 | venv.bak/
129 | 
130 | # Spyder project settings
131 | .spyderproject
132 | .spyproject
133 | 
134 | # Rope project settings
135 | .ropeproject
136 | 
137 | # mkdocs documentation
138 | /site
139 | 
140 | # mypy
141 | .mypy_cache/
142 | .dmypy.json
143 | dmypy.json
144 | 
145 | # Pyre type checker
146 | .pyre/
147 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 wenke727
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # ST-MapMatching
  2 | 
  3 | ## 版本
  4 | 
  5 | V2.0.0
  6 | 
  7 | ## 描述
  8 | 
  9 | 基于时间和空间特性的全局地图匹配算法（ST-Matching），一种针对低采样率的GPS轨迹的新颖全局地图匹配算法。算法的基础路网信息源为 [OSM](https://wiki.openstreetmap.org/wiki/Main_Page)，可通过`DigraphOSM`自动下载。算法匹配过程考虑以下两个方面特征：
 10 | 
 11 | 1. 道路网络的空间几何和拓扑结构
 12 | 
 13 | 2. 轨迹的速度/时间约束。基于时空分析，构建候选图，从中确定最佳匹配路径。
 14 | 
 15 | 输入WGS坐标系的`GPS轨迹点集`，输出途径的路段；
 16 | 
 17 | 本算法为 MSRA《[Map-Matching for Low-Sampling-Rate GPS Trajectories](https://www.microsoft.com/en-us/research/publication/map-matching-for-low-sampling-rate-gps-trajectories/)》的复现，并根据自己的认识有一些改动，中文解读可参考 [CSDN文章](https://blog.csdn.net/qq_43281895/article/details/103145327)。
 18 | 
 19 | ## 调用说明
 20 | 
 21 | 详见 `demo.py`
 22 | 
 23 | ```python
 24 | from mapmatching import build_geograph, ST_Matching
 25 | 
 26 | """step 1: 获取/加载路网"""
 27 | # 方法1：
 28 | # 根据 bbox 从 OSM 下载路网，从头解析获得路网数据
 29 | # net = build_geograph(bbox=[113.930914, 22.570536, 113.945456, 22.585613],
 30 | #                      xml_fn="./data/network/LXD.osm.xml", ll=False)
 31 | # 将预处理路网保存为 ckpt
 32 | # net.save_checkpoint('./data/network/LXD_graph.ckpt')
 33 | 
 34 | # 方法2：
 35 | # 使用预处理路网 
 36 | net = build_geograph(ckpt='./data/network/LXD_graph.ckpt')
 37 | 
 38 | """step 2: 创建地图匹配 matcher"""
 39 | matcher = ST_Matching(net=net, ll=False)
 40 | 
 41 | """step 3: 加载轨迹点集合，以打石一路为例"""
 42 | idx = 4
 43 | traj = matcher.load_points(f"./data/trajs/traj_{idx}.geojson").reset_index(drop=True)
 44 | res = matcher.matching(traj, top_k=5, dir_trans=True, details=False, plot=True,
 45 |                        simplify=True, debug_in_levels=False)
 46 | 
 47 | # 后续步骤可按需选择
 48 | """step 4: 将轨迹点映射到匹配道路上"""
 49 | path = matcher.transform_res_2_path(res, ori_crs=True)
 50 | proj_traj = matcher.project(traj, path)
 51 | 
 52 | """step 5: eval"""
 53 | matcher.eval(traj, res, resample=5, eps=10)
 54 | ```
 55 | 
 56 | ### 输入示例
 57 | 
 58 | ```json
 59 | {
 60 | "type": "FeatureCollection",
 61 | "name": "traj_debug_dashiyilu_0",
 62 | "crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
 63 | "features": [
 64 | { "type": "Feature", "properties": {"geometry": { "type": "Point", "coordinates": [ 113.931956598012064, 22.575930582940785 ] } }},
 65 | { "type": "Feature", "properties": {"geometry": { "type": "Point", "coordinates": [ 113.932515057750763, 22.575632036146079 ] } }},
 66 | { "type": "Feature", "properties": {"geometry": { "type": "Point", "coordinates": [ 113.932920306714124, 22.575490522559665 ] } }},
 67 | { "type": "Feature", "properties": {"geometry": { "type": "Point", "coordinates": [ 113.933781789624888, 22.575346314537452 ] } }},
 68 | { "type": "Feature", "properties": {"geometry": { "type": "Point", "coordinates": [ 113.943190113338488, 22.575121559997108 ] } }},
 69 | { "type": "Feature", "properties": {"geometry": { "type": "Point", "coordinates": [ 113.943816093693101, 22.575196482404341 ] } }}
 70 | ]
 71 | }
 72 | 
 73 | ```
 74 | 
 75 | 注:
 76 | 
 77 | 1. 示例输入对应`./data/trajs/traj_4.geojson`，其中 `geometry` 为唯一需要提供的字段，在`vscode`中可借助插件`Geo Data Viewer`可视化;
 78 | 2. 输入轨迹点的坐标系默认为 `wgs84`, `gcj02` 的轨迹需在调用函数`load_points`明确坐标系`in_sys='gcj'`;
 79 | 3. 提供的预处理路网仅覆盖深圳南山区万科云城片区，并没有完成覆盖`./data/trajs`中所有的测试用例。 若需测试所有用例，需自行调整 bbox 获取相应区域的路网。
 80 | 
 81 | ### 输出示例
 82 | 
 83 | #### demo 输出
 84 | 
 85 | ```python
 86 | {
 87 |   #  输出状态码，0 为正常输出
 88 |   'status': 0, 
 89 |   # 匹配路段 index
 90 |   'epath': [123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135], 
 91 |   # 第一条路被通过的比例(即第一条路上, 第一个轨迹点及之后的部分的占比)
 92 |   'step_0': 0.7286440473726905, 
 93 |   # 最后一条路被通过的比例(即最后一条路上, 最后一个轨迹点及之前的部分的占比）
 94 |   'step_n': 0.8915310605450645，
 95 |   # 概率
 96 |   'probs': {
 97 |       'prob': 0.9457396931471692, 
 98 |       'norm_prob': 0.9861498301181256,
 99 |       'dist_prob': 0.9946361835772438,
100 |       'trans_prob': 0.9880031610906268,
101 |       'dir_prob': 0.9933312073337599}
102 |  }
103 | ```
104 | 
105 | 可视化效果如下:
106 |   
107 | ![](.fig/demo.png)
108 | 
109 |  - matcher.matching 将 plot 参数设置为 True
110 |  - 瓦片地图，需要安装 [Tilemap](https://github.com/wenke727/TileMap)
111 | 
112 | #### 其他地图匹配效果
113 | 
114 | `./data/trajs/traj_0.geojson` 匹配效果
115 | 
116 | 
117 | ![](.fig/map_matching_futian.png)
118 | 
119 | ## 环境安装
120 | 
121 | 详见 requirement.txt, 建议`geopandas`使用conda安装
122 | 
123 | ```bash
124 | conda create -n stmm python=3.9
125 | conda activate stmm
126 | conda install -c conda-forge geopandas==0.12.2
127 | pip install -r requirement.txt
128 | ```
129 | 
130 | 


--------------------------------------------------------------------------------
/bug.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import pandas as pd
 3 | import geopandas as gpd
 4 | from mapmatching import build_geograph, ST_Matching
 5 | 
 6 | pd.set_option('display.width', 5000)        # 打印结果不换行方法
 7 | pd.set_option('display.max_rows', 500)
 8 | 
 9 | # %%
10 | net = build_geograph(ckpt='./data/network/GZ_test.ckpt')
11 | matcher = ST_Matching(net=net)
12 | 
13 | traj = gpd.read_file('./data/traj_others.geojson').set_crs(epsg=4326)
14 | res = matcher.matching(traj, top_k=8, search_radius=80, plot=True,
15 |                        dir_trans=False, details=True, simplify=False, debug_in_levels=True)
16 | graph = res['details']['graph']
17 | res['details']['steps'].query('trans_prob < .85')
18 | 
19 | # %%
20 | 


--------------------------------------------------------------------------------
/data/network/LXD_graph.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenke727/ST-MapMatching/2b88c219142cfc1d1460669027798538ee0b2ad0/data/network/LXD_graph.ckpt


--------------------------------------------------------------------------------
/data/trajs/gt.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "traj_0.geojson": [ 
 3 |         1491845271, 1491845278, 499265789, 499281499, 499281680, 499256522, 499374765, 499374692, 499374699, 7232025515, 499374438, 2491091193, 7959990771, 1982884838, 7959990781, 7959990780, 7232047153, 5834799311, 7959603120, 7959602896, 7959603265, 7959603254, 7959603255, 7959603111, 7959603029, 7959602931, 7959602934, 7963887479, 7959602935, 7959603012, 7959603013, 9867711844, 7959602961, 499482004, 2750667738, 7959590815, 499478781, 2525944467, 2525990691, 2525990692, 499543123, 2750667601, 7961465922, 2750667627, 5834799167, 499543165, 7965680795, 5271707993, 4848290283, 2750593369, 277673161, 277052181, 2750593322, 8444060575, 8076307463, 499542953, 5834799144, 7959603245, 7959932863, 7959932864, 5834799157, 7959932862, 7959932869, 6496420812, 7959990662, 7959990663, 6496420788, 7959932876, 6033482523, 7959990664, 7959990665, 7959932880, 499681331, 499681324, 5435020958, 499681326, 500020999, 7707208812, 7640452829],
 4 |     "traj_1.geojson": [ 
 5 |         7959602916, 7959990653, 7959990662, 7959603239, 7959602899, 7959602919, 7959603232, 7959603209, 7263135412, 6496420768, 7959603216, 7959603210, 7959590851, 2044316564, 7959603170, 7959603096, 7959590857, 7959603272, 7959603093],
 6 |     "traj_2.geojson": [ 
 7 |         8526860927, 8526860929, 8526860961],
 8 |     "traj_3.geojson": [ 
 9 |         8526860977, 8526860891],
10 |     "traj_4.geojson": [ 
11 |         7834079836, 8526860922, 5345110208, 8526860926, 8526860927, 8526860977, 8526860891, 9908986643, 8526861026, 8526860998, 5345110822, 8526861014, 8526861012, 5140241022],
12 |     "traj_5.geojson": [ 
13 |         8526860966, 8526860961],
14 |     "traj_6.geojson": [ 
15 |         10121421919, 8526861038],
16 |     "traj_7.geojson": [ 
17 |         2508061907, 4044798340],
18 |     "traj_8.geojson": [ 
19 |         10121421924, 8526861072, 5179129482, 8526861079],
20 |     "traj_9.geojson": [ 
21 |         500016494, 7959990625, 7959990623, 7241618417, 7959990538, 7959990621, 7959990622, 7959990546, 7959990558, 7959990556, 7249081512, 6033481332, 7959990534, 8109971622, 8109971643, 8109971632, 4397491519, 4397491540, 499374672, 1114538640, 1114538642, 499256255, 1116467143, 6410193855, 1116467144, 6410193851, 499237147, 499237159, 9730051941, 1491845135, 499237230, 9671934765],
22 |     "traj_10.geojson": [ 
23 |         7959990732, 6496420992],
24 |     "traj_11.geojson": [ 
25 |         499543283, 6302207410, 6467166907, 2525990702, 6467166929, 499478538, 1116501297, 7973099538, 1116492767, 7973099584, 267602472, 7973099537, 7973114863, 7899265523, 8298779513, 8298792534, 7973099533],
26 |     "traj_12.geojson": [ 
27 |         8169270272, 2376751183, 2376751145, 8168061649, 8168061760, 8168061648, 8169270272, 2376751183],
28 |     "traj_13.geojson": [ 
29 |         2750593369, 277673161, 277052181, 2750593322, 8444060575],
30 |     "traj_14.geojson": [ 
31 |         2508061907, 4044798340, 2508061873],
32 |     "traj_15.geojson": [ 
33 |         10121421923, 8526861038, 2366083151, 8526860966, 5345110197, 8526860929, 8526860928, 8526860926, 8526860916, 8526860846, 9144224473, 8526860847, 8526860884, 5345110215, 2508090891, 5569752402, 5569752372, 1981097845, 2366083157, 2366083173, 2366083171, 6072476402, 277486223, 277486228, 277939196, 277486222, 277663457, 2701105203, 2702591034, 2701105231, 2701105309, 277049979, 1116420116, 5445976849, 5445976847, 793893699, 1116420144, 277664219, 2427779668, 279077760, 277673550, 277664239, 1932007679, 2407737640, 277664224, 277664226, 6366992734, 2132634054, 2132634188, 6366992731, 2291907903, 1169606344, 2433356711, 6465619119, 2403175276, 2403189538, 9527226880, 9527226879, 8442017240, 9527231181, 2467373573, 2467373546, 277323315]
34 | }


--------------------------------------------------------------------------------
/data/trajs/traj_0.geojson:
--------------------------------------------------------------------------------
 1 | {
 2 | "type": "FeatureCollection",
 3 | "name": "trips",
 4 | "crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
 5 | "features": [
 6 | { "type": "Feature", "properties": { "id": null }, "geometry": { "type": "Point", "coordinates": [ 114.042192099217814, 22.530825799254831 ] } },
 7 | { "type": "Feature", "properties": { "id": null }, "geometry": { "type": "Point", "coordinates": [ 114.048087551857591, 22.53141414915628 ] } },
 8 | { "type": "Feature", "properties": { "id": null }, "geometry": { "type": "Point", "coordinates": [ 114.050457097022772, 22.530254493344991 ] } },
 9 | { "type": "Feature", "properties": { "id": null }, "geometry": { "type": "Point", "coordinates": [ 114.051374300525396, 22.534269663922935 ] } },
10 | { "type": "Feature", "properties": { "id": null }, "geometry": { "type": "Point", "coordinates": [ 114.050237176637481, 22.537490331019249 ] } },
11 | { "type": "Feature", "properties": { "id": null }, "geometry": { "type": "Point", "coordinates": [ 114.044716748650771, 22.537863550640491 ] } },
12 | { "type": "Feature", "properties": { "id": null }, "geometry": { "type": "Point", "coordinates": [ 114.046725298091147, 22.542379323865038 ] } },
13 | { "type": "Feature", "properties": { "id": null }, "geometry": { "type": "Point", "coordinates": [ 114.056957680637467, 22.542526131019244 ] } },
14 | { "type": "Feature", "properties": { "id": null }, "geometry": { "type": "Point", "coordinates": [ 114.058074914718418, 22.537513356219687 ] } },
15 | { "type": "Feature", "properties": { "id": null }, "geometry": { "type": "Point", "coordinates": [ 114.058331080637473, 22.531227627019256 ] } },
16 | { "type": "Feature", "properties": { "id": null }, "geometry": { "type": "Point", "coordinates": [ 114.062977233476687, 22.529223325030639 ] } }
17 | ]
18 | }
19 | 


--------------------------------------------------------------------------------
/data/trajs/traj_1.geojson:
--------------------------------------------------------------------------------
 1 | {
 2 | "type": "FeatureCollection",
 3 | "crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
 4 | "features": [
 5 | { "type": "Feature", "properties": { "x": 114.063323, "y": 22.534902, "time": "2019\/08\/04 09:54:56", "speed": 23, "direction": 133, "event": 5, "alarmCode": null, "altitude": 0, "mileage": 0, "errorCode": 0, "plate": "粤B*****Y", "point_in_p": 1, "t": 9.9155555555555566, "time_inter": 0.0041666666666650004, "x1": 114.061877, "y1": 22.534855, "dis": 0.148603264545345, "v": 35.664783490900007 }, "geometry": { "type": "Point", "coordinates": [ 114.058204, 22.537611 ] } },
 6 | { "type": "Feature", "properties": { "x": 114.061877, "y": 22.534855, "time": "2019\/08\/04 09:55:11", "speed": 41, "direction": 134, "event": 5, "alarmCode": null, "altitude": 0, "mileage": 0, "errorCode": 0, "plate": "粤B*****Y", "point_in_p": 1, "t": 9.9197222222222212, "time_inter": 0.0041666666666659996, "x1": 114.061367, "y1": 22.534857, "dis": 0.052379998046211997, "v": 12.57119953109156 }, "geometry": { "type": "Point", "coordinates": [ 114.056759, 22.537566 ] } },
 7 | { "type": "Feature", "properties": { "x": 114.061367, "y": 22.534857, "time": "2019\/08\/04 09:55:41", "speed": 0, "direction": 133, "event": 5, "alarmCode": null, "altitude": 0, "mileage": 0, "errorCode": 0, "plate": "粤B*****Y", "point_in_p": 1, "t": 9.9280555555555541, "time_inter": 0.21666666666666701, "x1": 114.055378, "y1": 22.537428, "dis": 0.67828441441240395, "v": 3.1305434511341681 }, "geometry": { "type": "Point", "coordinates": [ 114.05625, 22.537569 ] } },
 8 | { "type": "Feature", "properties": { "x": 114.058723, "y": 22.536897, "time": "2019\/08\/04 19:28:41", "speed": 18, "direction": 137, "event": 5, "alarmCode": null, "altitude": 0, "mileage": 3623, "errorCode": 0, "plate": "粤B*****Y", "point_in_p": 1, "t": 19.478055555555553, "time_inter": 0.016666666666669001, "x1": 114.05634, "y1": 22.536877, "dis": 0.24475240643540599, "v": 14.685144386122037 }, "geometry": { "type": "Point", "coordinates": [ 114.053608, 22.539613 ] } },
 9 | { "type": "Feature", "properties": { "x": 114.05634, "y": 22.536877, "time": "2019\/08\/04 19:29:41", "speed": 14, "direction": 174, "event": 5, "alarmCode": null, "altitude": 0, "mileage": 3625, "errorCode": 0, "plate": "粤B*****Y", "point_in_p": 1, "t": 19.494722222222222, "time_inter": 0.022222222222222001, "x1": 114.057492, "y1": 22.538428, "dis": 0.209145443690229, "v": 9.4115449660603616 }, "geometry": { "type": "Point", "coordinates": [ 114.051228, 22.539597 ] } }
10 | ]
11 | }
12 | 


--------------------------------------------------------------------------------
/data/trajs/traj_10.geojson:
--------------------------------------------------------------------------------
1 | {
2 | "type": "FeatureCollection",
3 | "crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
4 | "features": [
5 | { "type": "Feature", "properties": { "PID": "09005700121902181123005192D", "DIR": 88, "Order": 0, "Type": "street", "X": 1269779986, "Y": 255987452, "RID": "d957e2-8486-e315-b325-e0a0a0", "MoveDir": 179, "dir_sim": 0.97600402622262106, "revert": false, "lane_num": 3.0 }, "geometry": { "type": "Point", "coordinates": [ 114.053374, 22.536374 ] } },
6 | { "type": "Feature", "properties": { "PID": "09005700121902181122585292D", "DIR": 0, "Order": 1, "Type": "street", "X": 1269779987, "Y": 255989066, "RID": "d957e2-8486-e315-b325-e0a0a0", "MoveDir": 179, "dir_sim": null, "revert": false, "lane_num": 3.0 }, "geometry": { "type": "Point", "coordinates": [ 114.053374, 22.536509 ] } }
7 | ]
8 | }
9 | 


--------------------------------------------------------------------------------
/data/trajs/traj_11.geojson:
--------------------------------------------------------------------------------
 1 | {
 2 | "type": "FeatureCollection",
 3 | "crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
 4 | "features": [
 5 | { "type": "Feature", "properties": { "index": 0, "PID": "09005700122003271208461303O", "DIR": 268, "Order": 0, "Type": "street", "X": 1269695445, "Y": 256061732, "RID": "24fd43-b288-813c-b717-c8f6f8", "pid_order": 0 }, "geometry": { "type": "Point", "coordinates": [ 114.045793, 22.542549 ] } },
 6 | { "type": "Feature", "properties": { "index": 31, "PID": "09005700122003271403302783O", "DIR": 268, "Order": 12, "Type": "street", "X": 1269649100, "Y": 256061810, "RID": "852936-8486-e315-b324-e0a043", "pid_order": 31 }, "geometry": { "type": "Point", "coordinates": [ 114.041645, 22.542523 ] } },
 7 | { "type": "Feature", "properties": { "index": 32, "PID": "09005700122003271403312563O", "DIR": 268, "Order": 0, "Type": "street", "X": 1269647736, "Y": 256061778, "RID": "d9faab-09d2-a493-b92a-06ce71", "pid_order": 32 }, "geometry": { "type": "Point", "coordinates": [ 114.041523, 22.54252 ] } },
 8 | { "type": "Feature", "properties": { "index": 33, "PID": "09005700122003271209165313O", "DIR": 268, "Order": 0, "Type": "street", "X": 1269647619, "Y": 256061094, "RID": "4cf167-c86e-f803-6b33-f71d66", "pid_order": 33 }, "geometry": { "type": "Point", "coordinates": [ 114.041513, 22.542463 ] } },
 9 | { "type": "Feature", "properties": { "index": 48, "PID": "09005700122003271209342533O", "DIR": 262, "Order": 4, "Type": "street", "X": 1269628967, "Y": 256059848, "RID": "47e199-f68d-1124-71cb-1b9515", "pid_order": 48 }, "geometry": { "type": "Point", "coordinates": [ 114.039844, 22.542342 ] } },
10 | { "type": "Feature", "properties": { "index": 49, "PID": "09005700122003271209360523O", "DIR": 261, "Order": 5, "Type": "street", "X": 1269627178, "Y": 256059601, "RID": "47e199-f68d-1124-71cb-1b9515", "pid_order": 49 }, "geometry": { "type": "Point", "coordinates": [ 114.039684, 22.54232 ] } },
11 | { "type": "Feature", "properties": { "index": 50, "PID": "09005700122003271209371303O", "DIR": 260, "Order": 0, "Type": "street", "X": 1269625955, "Y": 256059427, "RID": "543ddf-f5a6-16e9-6440-54d70f", "pid_order": 50 }, "geometry": { "type": "Point", "coordinates": [ 114.039574, 22.542305 ] } },
12 | { "type": "Feature", "properties": { "index": 85, "PID": "09005700122003271211104943O", "DIR": 259, "Order": 10, "Type": "street", "X": 1269583400, "Y": 256052227, "RID": "6e0ac7-4562-19d2-b192-114c14", "pid_order": 85 }, "geometry": { "type": "Point", "coordinates": [ 114.035769, 22.541659 ] } },
13 | { "type": "Feature", "properties": { "index": 86, "PID": "09005700122003271211116973O", "DIR": 259, "Order": 0, "Type": "street", "X": 1269582291, "Y": 256052044, "RID": "dc6a1f-5d1b-dd32-f0f7-e73be1", "pid_order": 86 }, "geometry": { "type": "Point", "coordinates": [ 114.035671, 22.541642 ] } },
14 | { "type": "Feature", "properties": { "index": 87, "PID": "09005700122003271211130883O", "DIR": 259, "Order": 0, "Type": "street", "X": 1269580961, "Y": 256051828, "RID": "c8b1bb-e6c4-b2b3-b83f-9a5efd", "pid_order": 87 }, "geometry": { "type": "Point", "coordinates": [ 114.035551, 22.541622 ] } }
15 | ]
16 | }
17 | 


--------------------------------------------------------------------------------
/data/trajs/traj_12.geojson:
--------------------------------------------------------------------------------
 1 | {
 2 | "type": "FeatureCollection",
 3 | "crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
 4 | "features": [
 5 | { "type": "Feature", "properties": { "PID": "09005700121709121245083628V", "DIR": 246, "Order": 0, "Type": "street", "X": 1269866362, "Y": 255849901, "RID": "778ee8-ce04-d8b0-f08c-1a89bc" }, "geometry": { "type": "Point", "coordinates": [ 114.061128, 22.524865 ] } },
 6 | { "type": "Feature", "properties": { "PID": "09005700121709121245113878V", "DIR": 257, "Order": 0, "Type": "street", "X": 1269865552, "Y": 255849631, "RID": "a524cd-7cfc-5679-4e04-82c2c0" }, "geometry": { "type": "Point", "coordinates": [ 114.061055, 22.524843 ] } },
 7 | { "type": "Feature", "properties": { "PID": "09005700121709121245134728V", "DIR": 257, "Order": 1, "Type": "street", "X": 1269864750, "Y": 255849442, "RID": "a524cd-7cfc-5679-4e04-82c2c0" }, "geometry": { "type": "Point", "coordinates": [ 114.060983, 22.524828 ] } },
 8 | { "type": "Feature", "properties": { "PID": "09005700121709121245155908V", "DIR": 254, "Order": 2, "Type": "street", "X": 1269863866, "Y": 255849239, "RID": "a524cd-7cfc-5679-4e04-82c2c0" }, "geometry": { "type": "Point", "coordinates": [ 114.060904, 22.524812 ] } },
 9 | { "type": "Feature", "properties": { "PID": "09005700121709121245176388V", "DIR": 255, "Order": 3, "Type": "street", "X": 1269862966, "Y": 255848996, "RID": "a524cd-7cfc-5679-4e04-82c2c0" }, "geometry": { "type": "Point", "coordinates": [ 114.060823, 22.524792 ] } },
10 | { "type": "Feature", "properties": { "PID": "09005700121709121245208858V", "DIR": 254, "Order": 4, "Type": "street", "X": 1269861430, "Y": 255848593, "RID": "a524cd-7cfc-5679-4e04-82c2c0" }, "geometry": { "type": "Point", "coordinates": [ 114.060685, 22.524759 ] } },
11 | { "type": "Feature", "properties": { "PID": "09005700121709121245237738V", "DIR": 257, "Order": 5, "Type": "street", "X": 1269860030, "Y": 255848200, "RID": "a524cd-7cfc-5679-4e04-82c2c0" }, "geometry": { "type": "Point", "coordinates": [ 114.060559, 22.524727 ] } },
12 | { "type": "Feature", "properties": { "PID": "09005700121709121245252308V", "DIR": 253, "Order": 6, "Type": "street", "X": 1269859327, "Y": 255847988, "RID": "a524cd-7cfc-5679-4e04-82c2c0" }, "geometry": { "type": "Point", "coordinates": [ 114.060496, 22.52471 ] } },
13 | { "type": "Feature", "properties": { "PID": "09005700121709121245271368V", "DIR": 257, "Order": 0, "Type": "street", "X": 1269858467, "Y": 255847720, "RID": "b14bfe-8493-fd1d-a9c7-2ba1c3" }, "geometry": { "type": "Point", "coordinates": [ 114.060419, 22.524688 ] } },
14 | { "type": "Feature", "properties": { "PID": "09005700121709121245295998V", "DIR": 248, "Order": 1, "Type": "street", "X": 1269857382, "Y": 255847366, "RID": "b14bfe-8493-fd1d-a9c7-2ba1c3" }, "geometry": { "type": "Point", "coordinates": [ 114.060321, 22.524659 ] } },
15 | { "type": "Feature", "properties": { "PID": "09005700121709121245310818V", "DIR": 259, "Order": 0, "Type": "street", "X": 1269856744, "Y": 255847096, "RID": "550a27-40c5-f0d3-5717-a1907d" }, "geometry": { "type": "Point", "coordinates": [ 114.060264, 22.524637 ] } },
16 | { "type": "Feature", "properties": { "PID": "09005700121709121245338928V", "DIR": 268, "Order": 1, "Type": "street", "X": 1269855959, "Y": 255846855, "RID": "550a27-40c5-f0d3-5717-a1907d" }, "geometry": { "type": "Point", "coordinates": [ 114.060193, 22.524618 ] } }
17 | ]
18 | }
19 | 


--------------------------------------------------------------------------------
/data/trajs/traj_13.geojson:
--------------------------------------------------------------------------------
 1 | {
 2 | "type": "FeatureCollection",
 3 | "crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
 4 | "features": [
 5 | { "type": "Feature", "properties": { "index": 0, "PID": "09005700122003271656163343O", "DIR": 104, "Order": 0, "Type": "street", "X": 1269776083, "Y": 256056993, "RID": "c127c5-0a78-e80e-cf05-fef57d", "pid_order": 0 }, "geometry": { "type": "Point", "coordinates": [ 114.053021, 22.542176 ] } },
 6 | { "type": "Feature", "properties": { "index": 14, "PID": "09005700122003271656337113O", "DIR": 85, "Order": 6, "Type": "street", "X": 1269794417, "Y": 256054175, "RID": "cb7422-27d2-c73b-b682-a12ebd", "pid_order": 14 }, "geometry": { "type": "Point", "coordinates": [ 114.054666, 22.54194 ] } },
 7 | { "type": "Feature", "properties": { "index": 15, "PID": "09005700122003271656348363O", "DIR": 83, "Order": 7, "Type": "street", "X": 1269795653, "Y": 256054268, "RID": "cb7422-27d2-c73b-b682-a12ebd", "pid_order": 15 }, "geometry": { "type": "Point", "coordinates": [ 114.054777, 22.541948 ] } },
 8 | { "type": "Feature", "properties": { "index": 36, "PID": "09005700122003271237374253O", "DIR": 88, "Order": 1, "Type": "street", "X": 1269820336, "Y": 256060903, "RID": "706762-0847-0b0e-0332-10af49", "pid_order": 36 }, "geometry": { "type": "Point", "coordinates": [ 114.056992, 22.542496 ] } },
 9 | { "type": "Feature", "properties": { "index": 37, "PID": "09005700122003271237384253O", "DIR": 91, "Order": 0, "Type": "street", "X": 1269821783, "Y": 256060908, "RID": "fbca6b-289f-05a5-aabf-a822ec", "pid_order": 37 }, "geometry": { "type": "Point", "coordinates": [ 114.057122, 22.542495 ] } }
10 | ]
11 | }
12 | 


--------------------------------------------------------------------------------
/data/trajs/traj_14.geojson:
--------------------------------------------------------------------------------
 1 | {
 2 | "type": "FeatureCollection",
 3 | "features": [
 4 | { "type": "Feature", "properties": { "index": 0 }, "geometry": { "type": "Point", "coordinates": [ 113.933129, 22.57567 ] } },
 5 | { "type": "Feature", "properties": { "index": 6 }, "geometry": { "type": "Point", "coordinates": [ 113.93361, 22.575661 ] } },
 6 | { "type": "Feature", "properties": { "index": 7 }, "geometry": { "type": "Point", "coordinates": [ 113.933647, 22.575727 ] } },
 7 | { "type": "Feature", "properties": { "index": 20 }, "geometry": { "type": "Point", "coordinates": [ 113.933641, 22.576783 ] } },
 8 | { "type": "Feature", "properties": { "index": 21 }, "geometry": { "type": "Point", "coordinates": [ 113.93364, 22.576839 ] } }
 9 | ]
10 | }
11 | 


--------------------------------------------------------------------------------
/data/trajs/traj_2.geojson:
--------------------------------------------------------------------------------
1 | {
2 | "type": "FeatureCollection",
3 | "features": [
4 | { "type": "Feature", "properties": { "index": 0 }, "geometry": { "type": "Point", "coordinates": [ 113.934189, 22.575404 ] } },
5 | { "type": "Feature", "properties": { "index": 1 }, "geometry": { "type": "Point", "coordinates": [ 113.934189, 22.575481 ] } }
6 | ]
7 | }
8 | 


--------------------------------------------------------------------------------
/data/trajs/traj_3.geojson:
--------------------------------------------------------------------------------
1 | {
2 | "type": "FeatureCollection",
3 | "features": [
4 | { "type": "Feature", "properties": { "index": 0 }, "geometry": { "type": "Point", "coordinates": [ 113.936943, 22.575324 ] } },
5 | { "type": "Feature", "properties": { "index": 1 }, "geometry": { "type": "Point", "coordinates": [ 113.936943, 22.575324 ] } }
6 | ]
7 | }
8 | 


--------------------------------------------------------------------------------
/data/trajs/traj_4.geojson:
--------------------------------------------------------------------------------
 1 | {
 2 | "type": "FeatureCollection",
 3 | "name": "traj_debug_dashiyilu_0",
 4 | "crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
 5 | "features": [
 6 | { "type": "Feature", "properties": { "index": 0, "PID": "09005700121709091547447799Y", "DIR": 126, "Order": 0, "Type": "street", "X": 1268428106, "Y": 256456350, "RID": "81ce8c-d832-1db9-61dc-ee8b61", "MoveDir": 124, "dir_sim": 0.99964534332822241, "revert": false, "pid_order": 0 }, "geometry": { "type": "Point", "coordinates": [ 113.931956598012064, 22.576130582940785 ] } },
 7 | { "type": "Feature", "properties": { "index": 1, "PID": "1", "DIR": 106, "Order": 2, "Type": "street", "X": 1268436351, "Y": 256452423, "RID": "e03ca1-a0bd-f8ba-e8b6-f42f8f", "MoveDir": 106, "dir_sim": null, "revert": false, "pid_order": 6 }, "geometry": { "type": "Point", "coordinates": [ 113.932515057750763, 22.575632036146079 ] } },
 8 | { "type": "Feature", "properties": { "index": 7, "PID": "09005700121709091547560449Y", "DIR": 106, "Order": 2, "Type": "street", "X": 1268436351, "Y": 256452423, "RID": "e03ca1-a0bd-f8ba-e8b6-f42f8f", "MoveDir": 106, "dir_sim": null, "revert": false, "pid_order": 7 }, "geometry": { "type": "Point", "coordinates": [ 113.932920306714124, 22.575490522559665 ] } },
 9 | { "type": "Feature", "properties": { "index": 15, "PID": "09005700121709091548069209Y", "DIR": 95, "Order": 2, "Type": "street", "X": 1268445444, "Y": 256450532, "RID": "ce6e7e-7263-e9af-a5fb-dc8582", "MoveDir": 95, "dir_sim": null, "revert": false, "pid_order": 15 }, "geometry": { "type": "Point", "coordinates": [ 113.933781789624888, 22.575346314537452 ] } },
10 | { "type": "Feature", "properties": { "index": 101, "PID": "09005700121709091551224079Y", "DIR": 92, "Order": 4, "Type": "street", "X": 1268551143, "Y": 256448826, "RID": "025131-7415-f096-fb9f-ec2bf0", "MoveDir": 92, "dir_sim": null, "revert": false, "pid_order": 101 }, "geometry": { "type": "Point", "coordinates": [ 113.943190113338488, 22.575121559997108 ] } },
11 | { "type": "Feature", "properties": { "index": 108, "PID": "09005700121709091551348359Y", "DIR": 0, "Order": 3, "Type": "street", "X": 1268558107, "Y": 256449791, "RID": "690002-535e-1613-f0b2-077d39", "MoveDir": 70, "dir_sim": null, "revert": false, "pid_order": 108 }, "geometry": { "type": "Point", "coordinates": [ 113.943816093693101, 22.575196482404341 ] } }
12 | ]
13 | }
14 | 


--------------------------------------------------------------------------------
/data/trajs/traj_5.geojson:
--------------------------------------------------------------------------------
1 | {
2 | "type": "FeatureCollection",
3 | "features": [
4 | { "type": "Feature", "properties": { "index": 0 }, "geometry": { "type": "Point", "coordinates": [ 113.934365, 22.575465 ] } },
5 | { "type": "Feature", "properties": { "index": 2 }, "geometry": { "type": "Point", "coordinates": [ 113.93425, 22.575567 ] } }
6 | ]
7 | }
8 | 


--------------------------------------------------------------------------------
/data/trajs/traj_6.geojson:
--------------------------------------------------------------------------------
1 | {
2 | "type": "FeatureCollection",
3 | "features": [
4 | { "type": "Feature", "properties": { "index": 0 }, "geometry": { "type": "Point", "coordinates": [ 113.937851, 22.575306 ] } },
5 | { "type": "Feature", "properties": { "index": 8 }, "geometry": { "type": "Point", "coordinates": [ 113.937059, 22.57532 ] } }
6 | ]
7 | }
8 | 


--------------------------------------------------------------------------------
/data/trajs/traj_7.geojson:
--------------------------------------------------------------------------------
1 | {
2 | "type": "FeatureCollection",
3 | "features": [
4 | { "type": "Feature", "properties": { "index": 0 }, "geometry": { "type": "Point", "coordinates": [ 113.932763, 22.575714 ] } },
5 | { "type": "Feature", "properties": { "index": 4 }, "geometry": { "type": "Point", "coordinates": [ 113.933053, 22.575698 ] } }
6 | ]
7 | }
8 | 


--------------------------------------------------------------------------------
/data/trajs/traj_8.geojson:
--------------------------------------------------------------------------------
1 | {
2 | "type": "FeatureCollection",
3 | "features": [
4 | { "type": "Feature", "properties": { "index": 0 }, "geometry": { "type": "Point", "coordinates": [ 113.934151, 22.577512 ] } },
5 | { "type": "Feature", "properties": { "index": 6 }, "geometry": { "type": "Point", "coordinates": [ 113.934144, 22.577979 ] } }
6 | ]
7 | }
8 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
 1 | from mapmatching import build_geograph, ST_Matching
 2 | 
 3 | """step 1: 获取/加载路网"""
 4 | # 方法1：
 5 | # 根据 bbox 从 OSM 下载路网，从头解析获得路网数据
 6 | # net = build_geograph(bbox=[113.930914, 22.570536, 113.945456, 22.585613],
 7 | #                      xml_fn="./data/network/LXD.osm.xml", ll=False, n_jobs=16)
 8 | # 将预处理路网保存为 ckpt
 9 | # net.save_checkpoint('./data/network/LXD_graph.ckpt')
10 | 
11 | # 方法2：
12 | # 使用预处理路网 
13 | net = build_geograph(ckpt='./data/network/LXD_graph.ckpt') 
14 | # net = build_geograph(ckpt='./data/network/Shenzhen_graph_pygeos.ckpt') 
15 | 
16 | """step 2: 创建地图匹配 matcher"""
17 | matcher = ST_Matching(net=net, ll=False)
18 | 
19 | """step 3: 加载轨迹点集合，以打石一路为例"""
20 | idx = 4
21 | traj = matcher.load_points(f"./data/trajs/traj_{idx}.geojson").reset_index(drop=True)
22 | res = matcher.matching(traj, top_k=5, dir_trans=True, details=False, plot=True,
23 |                        simplify=True, debug_in_levels=False)
24 | 
25 | # 后续步骤可按需选择
26 | """step 4: 将轨迹点映射到匹配道路上"""
27 | path = matcher.transform_res_2_path(res)
28 | proj_traj = matcher.project(traj, path)
29 | 
30 | """step 5: eval"""
31 | matcher.eval(traj, res, resample=5, eps=10)
32 | 


--------------------------------------------------------------------------------
/docs/API.md:
--------------------------------------------------------------------------------
 1 | # API 设计文档
 2 | 
 3 | ## 地图匹配模块
 4 | 
 5 | |            模块             |             函数              | 输入                                                         | 输出         | 说明                                                         |
 6 | | :-------------------------: | :---------------------------: | :----------------------------------------------------------- | ------------ | ------------------------------------------------------------ |
 7 | |   *candidate<br />Graph*    |        construct_graph        | cands<br />common_attrs<br />left_attrs<br />right_attrs<br />rename_dict | gt           | Construct the candiadte graph (level, src, dst) for spatial and temporal analysis.<br />针对 od 落在同一个 edge 上时，将 od 对调 |
 8 | |    *geometric Analysis*     |       _filter_candidate       | df_candidates<br />top_k<br />pid=‘eid’<br />edge_keys       | df_cands     | 过滤cands<br />1 按照距离顺序排序，并针对每一个道路保留最近的一个路段<br />2 针对每一个节点，保留 top_k 个记录 |
 9 | |                             |      get_k_neigbor_edges      | points<br />edges<br />top_k<br />radius<br />               | df_cands     | [sindex.query_bulk](https://geopandas.org/en/stable/docs/reference/api/geopandas.sindex.SpatialIndex.query_bulk.html#geopandas.sindex.SpatialIndex.query_bulk)，返回的是tree geom 的 整数 index |
10 | |                             |        cal_observ_prob        | dist<br />bias<br />deviation<br />normal=True               | observe_prob | 正态分布                                                     |
11 | |                             | project_point_to_line_segment | points<br />edges<br />keeps_colsFai                         |              |                                                              |
12 | |                             |    analyse_geometric_info     |                                                              |              |                                                              |
13 | |   *spatial<br />Analysis*   |       cal_traj_distance       |                                                              |              |                                                              |
14 | |                             |     _move_dir_similarity      |                                                              |              |                                                              |
15 | |                             |          _trans_prob          |                                                              |              |                                                              |
16 | |                             |     analyse_spatial_info      |                                                              |              |                                                              |
17 | | *topological<br />Analysis* |              --               |                                                              |              |                                                              |
18 | |  *temporal<br />Analysis*   |        cos_similarity         |                                                              |              |                                                              |
19 | |          *viterbi*          |   process_viterbi_pipeline    |                                                              |              |                                                              |
20 | |        *postprocess*        |           get_path            |                                                              |              |                                                              |
21 | |                             |         get_one_step          |                                                              |              |                                                              |
22 | |                             |        get_connectors         |                                                              |              |                                                              |
23 | |       *visualization*       |         plot_matching         |                                                              |              |                                                              |
24 | |                             |     matching_debug_level      |                                                              |              |                                                              |
25 | |                             |    matching_debug_subplot     |                                                              |              |                                                              |
26 | |                             |                               |                                                              |              |                                                              |
27 | 
28 | 


--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import numpy as np
 3 | from tqdm import tqdm
 4 | from pathlib import Path
 5 | 
 6 | from mapmatching import ST_Matching, build_geograph
 7 | from mapmatching.setting import DATA_FOLDER
 8 | from mapmatching.utils.timer import Timer
 9 | 
10 | from loguru import logger
11 | 
12 | def save_lables(res, fn):
13 |     with open(fn, 'w') as f:
14 |         json.dump(res, f)
15 | 
16 | 
17 | def load_labels(fn):
18 |     with open(fn, 'r') as f:
19 |         _dict = json.load(f)
20 | 
21 |     _dict = {k:np.array(v) for k, v in _dict.items() }
22 | 
23 |     return _dict
24 | 
25 | 
26 | def evaluation(matcher, trajs_folder, debug_folder=None):
27 |     trajs = trajs_folder.glob("*.geojson")
28 |     gt_fn = trajs_folder / 'gt.json'
29 |     labels = load_labels(gt_fn)
30 | 
31 |     if debug_folder is None:
32 |         debug_folder = DATA_FOLDER / "result"
33 |     debug_folder.mkdir(exist_ok=True)
34 | 
35 |     preds = {}
36 |     hit = 0
37 |     errors = {}
38 |     timer = Timer()
39 |     timer.start()
40 | 
41 |     for fn in tqdm(sorted(trajs)):
42 |         name = fn.name
43 |         traj = matcher.load_points(fn, simplify=False)
44 |         save_fn = debug_folder / str(name).replace('geojson', 'jpg') if debug_folder else None
45 |         res = matcher.matching(traj, simplify=True, plot=False, dir_trans=True, debug_in_levels=False, save_fn=None)  # 
46 |         # matcher.plot_result(traj, res)
47 |         vpath = net.transform_epath_to_vpath(res['epath'])
48 |         preds[fn.name] = [int(i) for i in res['epath']]
49 | 
50 |         if np.array(vpath == labels[name]).all():
51 |             hit += 1
52 |         else:
53 |             errors[name] = fn
54 | 
55 |     print(f"Prcision: {hit / (hit + len(errors)) * 100:.1f} %, time cost: {timer.stop():.2f} s")
56 |     if len(errors):
57 |         print(f"Errors: {errors.keys()}")
58 | 
59 |     return preds
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     trajs_folder = DATA_FOLDER / "trajs"
64 | 
65 |     net = build_geograph(ckpt = DATA_FOLDER / 'network/Shenzhen_graph_pygeos.ckpt')
66 |     matcher = ST_Matching(net=net)
67 | 
68 |     preds = evaluation(matcher, trajs_folder, debug_folder=Path("./debug"))
69 |     
70 |     save_lables(preds, DATA_FOLDER / "trajs/gt_epath.json")
71 | 


--------------------------------------------------------------------------------
/mapmatching/__init__.py:
--------------------------------------------------------------------------------
1 | from .graph import GeoDigraph
2 | from .utils.timer import Timer, timeit
3 | from .matching import build_geograph, ST_Matching, STATUS
4 | 


--------------------------------------------------------------------------------
/mapmatching/geo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenke727/ST-MapMatching/2b88c219142cfc1d1460669027798538ee0b2ad0/mapmatching/geo/__init__.py


--------------------------------------------------------------------------------
/mapmatching/geo/azimuth.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | from shapely import wkt
  4 | from haversine import haversine, haversine_vector, Unit
  5 | from shapely.geometry import Point, LineString
  6 | 
  7 | 
  8 | def azimuth_diff(a, b, unit='radian'):
  9 |     """calcaluate the angle diff between two azimuth, the imput unit is `degree`.
 10 |     Args:
 11 |         a (float): Unit: degree
 12 |         b (float): Unit: degree
 13 |         unit(string): `radian` or `degree`
 14 |     Returns:
 15 |         [type]: [description]
 16 |     """
 17 |     assert unit in ['degree', 'radian']
 18 |     diff = np.abs(a-b)
 19 | 
 20 |     if isinstance(diff, np.ndarray):
 21 |         diff[diff > 180] = 360 - diff[diff > 180]
 22 |     else:
 23 |         if diff > 180:
 24 |             diff = 360 - diff
 25 | 
 26 |     return diff if unit =='degree' else diff * math.pi / 180
 27 | 
 28 | 
 29 | def azimuthAngle(x1, y1, x2, y2):
 30 |     """calculate the azimuth angle from (x1, y1) to (x2, y2)
 31 | 
 32 |     Args:
 33 |         x1 (float): [description]
 34 |         y1 (float): [description]
 35 |         x2 (float): [description]
 36 |         y2 (float): [description]
 37 | 
 38 |     Returns:
 39 |         float: The angle in degree.
 40 |     """
 41 |     angle = 0.0
 42 |     dx, dy = x2 - x1, y2 - y1
 43 | 
 44 |     if dx == 0:
 45 |         angle = math.pi * 0
 46 |         if y2 == y1 :
 47 |             angle = 0.0
 48 |         elif y2 < y1 :
 49 |             angle = math.pi
 50 |     elif dy == 0:
 51 |         angle = 0
 52 |         if dx > 0:
 53 |             angle = math.pi / 2.0
 54 |         else:
 55 |             angle = math.pi / 2.0 * 3.0
 56 |     elif x2 > x1 and y2 > y1:
 57 |         angle = math.atan(dx / dy)
 58 |     elif x2 > x1 and y2 < y1 :
 59 |         angle = math.pi / 2 + math.atan(-dy / dx)
 60 |     elif x2 < x1 and y2 < y1 :
 61 |         angle = math.pi + math.atan(dx / dy)
 62 |     elif x2 < x1 and y2 > y1 :
 63 |         angle = 3.0 * math.pi / 2.0 + math.atan(dy / -dx)
 64 | 
 65 |     return angle * 180 / math.pi
 66 | 
 67 | 
 68 | def azimuthAngle_vector(x1, y1, x2, y2):
 69 |     angle = 0
 70 |     dx = x2 - x1
 71 |     dy = y2 - y1
 72 |     
 73 |     ans = np.zeros_like(dx)
 74 |     
 75 |     x_euqal = dx == 0
 76 |     x_smaller = dx < 0
 77 |     x_bigger = dx > 0
 78 |     
 79 |     y_equal = dy == 0
 80 |     y_smaller = dy < 0
 81 |     y_bigger = dy > 0    
 82 |     
 83 |     ans[x_euqal] = 0.0
 84 |     # ans[dx == 0 and dy == 0] = 0.0
 85 |     ans[x_euqal & y_smaller ] = np.pi
 86 |     
 87 |     ans[y_equal & x_bigger] = np.pi / 2.0
 88 |     ans[y_equal & x_smaller] = np.pi / 2.0 * 3.0
 89 |     
 90 |     ans[x_bigger & y_bigger] = np.arctan(dx[x_bigger & y_bigger] / dy[x_bigger & y_bigger])
 91 |     ans[x_bigger & y_smaller] = np.pi / 2.0 \
 92 |         + np.arctan(-dy[x_bigger & y_smaller] / dx[x_bigger & y_smaller])
 93 | 
 94 |     ans[x_smaller & y_smaller] = np.pi \
 95 |         + np.arctan(dx[x_smaller & y_smaller] / dy[x_smaller & y_smaller])
 96 |     ans[x_smaller & y_bigger] = np.pi / 2.0 * 3.0 \
 97 |         + np.arctan(dy[x_smaller & y_bigger] / -dx[x_smaller & y_bigger])
 98 | 
 99 |     return ans * 180 / np.pi
100 | 
101 | 
102 | def azimuth_cos_similarity(angel_0:float, angel_1:float, normal=False):
103 |     """Calculate the `cosine similarity` bewteen `angel_0` and `angel_1`.
104 | 
105 |     Args:
106 |         angel_0 (float): Angel 0, unit degree.
107 |         angel_1 (float): Angel 1, unit degree.
108 |         normal (bool): Normal the cosine similarity from [-1, 1] to [0, 1].
109 | 
110 |     Returns:
111 |         cos similarity(float): [-1, 1]
112 |     """
113 | 
114 |     res =  np.cos(azimuth_diff(angel_0, angel_1, unit='radian'))
115 |     if normal:
116 |         res = (res + 1) / 2
117 |     
118 |     return res
119 |     
120 | 
121 | def azimuth_cos_distance(angel_0:float, angel_1:float):
122 |     """Calculate the `cosine distance` bewteen `angel_0` and `angel_1`.
123 | 
124 |     Args:
125 |         angel_0 (float): Angel 0, unit degree.
126 |         angel_1 (float): Angel 1, unit degree.
127 | 
128 |     Returns:
129 |         cos distance(float): [0, 2]
130 |     """
131 | 
132 |     return 1 - azimuth_cos_similarity(angel_0, angel_1)
133 | 
134 | 
135 | def cal_linestring_azimuth(geom):
136 |     """caculate the azimuth of eahc line segment in a polyline.
137 | 
138 |     Args:
139 |         geom (LineString): The polyline geometry.
140 | 
141 |     Returns:
142 |         [list]: The list of azimuth(unit: degree).
143 |     """
144 |     if isinstance(geom, LineString):
145 |         coords = np.array(geom.coords)
146 |     if isinstance(geom, (list, np.ndarray)):
147 |         coords = geom
148 | 
149 |     seg_angels = azimuthAngle_vector(coords[:-1, 0], coords[:-1, 1], 
150 |                                      coords[1:, 0], coords[1:, 1])
151 | 
152 |     return seg_angels
153 | 
154 | 
155 | def cal_points_azimuth(geoms:list):
156 |     """caculate the azimuth of a trajectory.
157 | 
158 |     Args:
159 |         geom (LineString): The polyline geometry.
160 | 
161 |     Returns:
162 |         [list]: The list of azimuth (unit: degree).
163 |     """
164 |     if not geoms or not geoms[0]:
165 |         return None
166 |     if not isinstance( geoms[0], Point):
167 |         return None
168 |     
169 |     coords = [ g.coords[0] for g in geoms ]
170 |     seg_angels = [azimuthAngle( *coords[i], *coords[i+1] ) for i in range(len(coords)-1) ]
171 |     
172 |     return seg_angels
173 | 
174 | 
175 | def cal_linestring_azimuth_cos_dist(geom, head_azimuth, weight=True, offset=1):
176 |     if geom is None:
177 |         return None
178 |     
179 |     if isinstance(geom, LineString):
180 |         coords = np.array(geom.coords)
181 |     elif isinstance(geom, list):
182 |         coords = np.array(geom)
183 |     elif isinstance(geom, np.ndarray):
184 |         coords = geom
185 |     else:    
186 |         assert False, print(type(geom), geom)
187 |     
188 |     road_angels = cal_linestring_azimuth(coords)
189 | 
190 |     lst = azimuth_cos_similarity(road_angels, head_azimuth)
191 |     if offset:
192 |         lst = (lst + 1) / 2
193 |         
194 |     if not weight:
195 |         val = np.mean(lst)
196 |     else:
197 |         # FIXME: coords
198 |         try:
199 |             coords = coords[:, ::-1]
200 |             weights = haversine_vector(coords[:-1], coords[1:], unit=Unit.METERS)
201 |         except:
202 |             weights = np.linalg.norm(coords[:-1] - coords[1:], axis=1)
203 |         if np.sum(weights) == 0:
204 |             val = np.mean(lst)
205 |         else:
206 |             val = np.average(lst, weights=weights)
207 |     
208 |     return val
209 | 
210 | 
211 | def cal_coords_seq_azimuth(coords):
212 |     return azimuthAngle_vector(coords[:-1, 0], coords[:-1, 1], 
213 |                                coords[1: , 0], coords[1:, 1])
214 |     
215 | 
216 | if __name__ == '__main__':
217 |     p0 = wkt.loads('POINT (113.934151 22.577512)')
218 |     p1 = wkt.loads('POINT (113.934144 22.577979)')
219 |     polyline = wkt.loads('LINESTRING (113.9340705 22.577737, 113.9340788 22.5777828, 113.934093 22.5778236, 113.9341161 22.5778661, 113.934144 22.5779051, 113.934186 22.57795, 113.9342268 22.5779823, 113.9342743 22.5780131, 113.9343212 22.5780352, 113.9343734 22.5780515, 113.9344212 22.5780605, 113.9344796 22.5780669)')
220 | 
221 |     import matplotlib.pyplot as plt
222 |     import geopandas as gpd
223 |     gpd.GeoDataFrame({'geometry': [p0, p1, polyline]}).plot()
224 |     plt.show()
225 | 
226 |     angels = azimuthAngle(*p0.coords[0], *p1.coords[0])
227 | 
228 |     road_angels  = cal_linestring_azimuth(polyline)
229 |     head_azimuth = cal_linestring_azimuth(LineString([p0.coords[0], p1.coords[0]]))
230 |     
231 |     cal_linestring_azimuth_cos_dist(LineString([p0.coords[0], p1.coords[0]]), head_azimuth, True)
232 |     # head_azimuth = cal_points_azimuth([p0, p1])
233 |     # head_azimuth = cal_points_azimuth([p1, p0])
234 | 
235 |     # azimuth_cos_distance(road_angels, head_azimuth[0])
236 |     
237 |     cal_linestring_azimuth_cos_dist(polyline, head_azimuth[0], True)
238 |     
239 |     cal_linestring_azimuth_cos_dist(polyline, head_azimuth[0], False)
240 |     


--------------------------------------------------------------------------------
/mapmatching/geo/coord/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenke727/ST-MapMatching/2b88c219142cfc1d1460669027798538ee0b2ad0/mapmatching/geo/coord/__init__.py


--------------------------------------------------------------------------------
/mapmatching/geo/coord/coordTransform_py.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import json
  3 | import urllib
  4 | import math
  5 | 
  6 | x_pi = 3.14159265358979324 * 3000.0 / 180.0
  7 | pi = 3.1415926535897932384626  # π
  8 | a = 6378245.0  # 长半轴
  9 | ee = 0.00669342162296594323  # 偏心率平方
 10 | 
 11 | 
 12 | class Geocoding:
 13 |     def __init__(self, api_key):
 14 |         self.api_key = api_key
 15 | 
 16 |     def geocode(self, address):
 17 |         """
 18 |         利用高德geocoding服务解析地址获取位置坐标
 19 |         :param address:需要解析的地址
 20 |         :return:
 21 |         """
 22 |         geocoding = {'s': 'rsv3',
 23 |                      'key': self.api_key,
 24 |                      'city': '全国',
 25 |                      'address': address}
 26 |         geocoding = urllib.urlencode(geocoding)
 27 |         ret = urllib.urlopen("%s?%s" % ("http://restapi.amap.com/v3/geocode/geo", geocoding))
 28 | 
 29 |         if ret.getcode() == 200:
 30 |             res = ret.read()
 31 |             json_obj = json.loads(res)
 32 |             if json_obj['status'] == '1' and int(json_obj['count']) >= 1:
 33 |                 geocodes = json_obj['geocodes'][0]
 34 |                 lng = float(geocodes.get('location').split(',')[0])
 35 |                 lat = float(geocodes.get('location').split(',')[1])
 36 |                 return [lng, lat]
 37 |             else:
 38 |                 return None
 39 |         else:
 40 |             return None
 41 | 
 42 | 
 43 | def gcj02_to_bd09(lng, lat):
 44 |     """
 45 |     火星坐标系(GCJ-02)转百度坐标系(BD-09)
 46 |     谷歌、高德——>百度
 47 |     :param lng:火星坐标经度
 48 |     :param lat:火星坐标纬度
 49 |     :return:
 50 |     """
 51 |     z = math.sqrt(lng * lng + lat * lat) + 0.00002 * math.sin(lat * x_pi)
 52 |     theta = math.atan2(lat, lng) + 0.000003 * math.cos(lng * x_pi)
 53 |     bd_lng = z * math.cos(theta) + 0.0065
 54 |     bd_lat = z * math.sin(theta) + 0.006
 55 |     return [bd_lng, bd_lat]
 56 | 
 57 | 
 58 | def bd09_to_gcj02(bd_lon, bd_lat):
 59 |     """
 60 |     百度坐标系(BD-09)转火星坐标系(GCJ-02)
 61 |     百度——>谷歌、高德
 62 |     :param bd_lat:百度坐标纬度
 63 |     :param bd_lon:百度坐标经度
 64 |     :return:转换后的坐标列表形式
 65 |     """
 66 |     x = bd_lon - 0.0065
 67 |     y = bd_lat - 0.006
 68 |     z = math.sqrt(x * x + y * y) - 0.00002 * math.sin(y * x_pi)
 69 |     theta = math.atan2(y, x) - 0.000003 * math.cos(x * x_pi)
 70 |     gg_lng = z * math.cos(theta)
 71 |     gg_lat = z * math.sin(theta)
 72 |     return [gg_lng, gg_lat]
 73 | 
 74 | 
 75 | def wgs84_to_gcj02(lng, lat):
 76 |     """
 77 |     WGS84转GCJ02(火星坐标系)
 78 |     :param lng:WGS84坐标系的经度
 79 |     :param lat:WGS84坐标系的纬度
 80 |     :return:
 81 |     """
 82 |     if out_of_china(lng, lat):  # 判断是否在国内
 83 |         return [lng, lat]
 84 |     dlat = _transformlat(lng - 105.0, lat - 35.0)
 85 |     dlng = _transformlng(lng - 105.0, lat - 35.0)
 86 |     radlat = lat / 180.0 * pi
 87 |     magic = math.sin(radlat)
 88 |     magic = 1 - ee * magic * magic
 89 |     sqrtmagic = math.sqrt(magic)
 90 |     dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi)
 91 |     dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi)
 92 |     mglat = lat + dlat
 93 |     mglng = lng + dlng
 94 |     return [mglng, mglat]
 95 | 
 96 | 
 97 | def gcj02_to_wgs84(lng, lat):
 98 |     """
 99 |     GCJ02(火星坐标系)转GPS84
100 |     :param lng:火星坐标系的经度
101 |     :param lat:火星坐标系纬度
102 |     :return:
103 |     """
104 |     if out_of_china(lng, lat):
105 |         return [lng, lat]
106 |     dlat = _transformlat(lng - 105.0, lat - 35.0)
107 |     dlng = _transformlng(lng - 105.0, lat - 35.0)
108 |     radlat = lat / 180.0 * pi
109 |     magic = math.sin(radlat)
110 |     magic = 1 - ee * magic * magic
111 |     sqrtmagic = math.sqrt(magic)
112 |     dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi)
113 |     dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi)
114 |     mglat = lat + dlat
115 |     mglng = lng + dlng
116 |     return [lng * 2 - mglng, lat * 2 - mglat]
117 | 
118 | 
119 | def bd09_to_wgs84(bd_lon, bd_lat):
120 |     lon, lat = bd09_to_gcj02(bd_lon, bd_lat)
121 |     return gcj02_to_wgs84(lon, lat)
122 | 
123 | 
124 | def wgs84_to_bd09(lon, lat):
125 |     lon, lat = wgs84_to_gcj02(lon, lat)
126 |     return gcj02_to_bd09(lon, lat)
127 | 
128 | 
129 | def _transformlat(lng, lat):
130 |     ret = -100.0 + 2.0 * lng + 3.0 * lat + 0.2 * lat * lat + \
131 |           0.1 * lng * lat + 0.2 * math.sqrt(math.fabs(lng))
132 |     ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 *
133 |             math.sin(2.0 * lng * pi)) * 2.0 / 3.0
134 |     ret += (20.0 * math.sin(lat * pi) + 40.0 *
135 |             math.sin(lat / 3.0 * pi)) * 2.0 / 3.0
136 |     ret += (160.0 * math.sin(lat / 12.0 * pi) + 320 *
137 |             math.sin(lat * pi / 30.0)) * 2.0 / 3.0
138 |     return ret
139 | 
140 | 
141 | def _transformlng(lng, lat):
142 |     ret = 300.0 + lng + 2.0 * lat + 0.1 * lng * lng + \
143 |           0.1 * lng * lat + 0.1 * math.sqrt(math.fabs(lng))
144 |     ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 *
145 |             math.sin(2.0 * lng * pi)) * 2.0 / 3.0
146 |     ret += (20.0 * math.sin(lng * pi) + 40.0 *
147 |             math.sin(lng / 3.0 * pi)) * 2.0 / 3.0
148 |     ret += (150.0 * math.sin(lng / 12.0 * pi) + 300.0 *
149 |             math.sin(lng / 30.0 * pi)) * 2.0 / 3.0
150 |     return ret
151 | 
152 | 
153 | def out_of_china(lng, lat):
154 |     """
155 |     判断是否在国内，不在国内不做偏移
156 |     :param lng:
157 |     :param lat:
158 |     :return:
159 |     """
160 |     return not (lng > 73.66 and lng < 135.05 and lat > 3.86 and lat < 53.55)
161 | 
162 | 
163 | if __name__ == '__main__':
164 |     lng = 128.543
165 |     lat = 37.065
166 |     result1 = gcj02_to_bd09(lng, lat)
167 |     result2 = bd09_to_gcj02(lng, lat)
168 |     result3 = wgs84_to_gcj02(lng, lat)
169 |     result4 = gcj02_to_wgs84(lng, lat)
170 |     result5 = bd09_to_wgs84(lng, lat)
171 |     result6 = wgs84_to_bd09(lng, lat)
172 | 
173 |     g = Geocoding('API_KEY')  # 这里填写你的高德api的key
174 |     result7 = g.geocode('北京市朝阳区朝阳公园')
175 |     print(result1, result2, result3, result4, result5, result6, result7)
176 | 


--------------------------------------------------------------------------------
/mapmatching/geo/coord/coordTransfrom_shp.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import geopandas as gpd
  4 | 
  5 | from shapely.geometry import Point, LineString, Polygon, MultiPolygon
  6 | from . import coordTransform_py as ct
  7 | 
  8 | 
  9 | def polyline_wgs_to_gcj(gdf):
 10 |     '''
 11 |     transfer the shapfile coordination system
 12 |     '''
 13 |     gdf['geometry'] = gdf.apply(lambda i: LineString(pd.DataFrame(i.geometry.coords.xy).T.rename(
 14 |         columns={0: 'x', 1: 'y'}).apply(lambda x: ct.wgs84_to_gcj02(x.x, x.y), axis=1)), axis=1)
 15 |     return gdf
 16 | 
 17 | 
 18 | def polyline_gcj_to_wgs(gdf):
 19 |     '''
 20 |     transfer the shapfile coordination system
 21 |     '''
 22 |     gdf['geometry'] = gdf.apply(lambda i: LineString(pd.DataFrame(i.geometry.coords.xy).T.rename(
 23 |         columns={0: 'x', 1: 'y'}).apply(lambda x: ct.gcj02_to_wgs84(x.x, x.y), axis=1)), axis=1)
 24 |     return gdf
 25 | 
 26 | 
 27 | # new function
 28 | def gdf_wgs_to_gcj(gdf):
 29 |     '''
 30 |     transfer the shapfile coordination system
 31 |     '''
 32 |     if isinstance(gdf.iloc[0].geometry, Polygon):
 33 |         gdf['geometry'] = gdf.apply(lambda i: Polygon(pd.DataFrame(i.geometry.exterior.coords.xy).T.rename(
 34 |             columns={0: 'x', 1: 'y'}).apply(lambda x: ct.wgs84_to_gcj02(x.x, x.y), axis=1)), axis=1)
 35 |     elif isinstance(gdf.iloc[0].geometry, LineString):
 36 |         gdf['geometry'] = gdf.apply(lambda i: LineString(pd.DataFrame(i.geometry.coords.xy).T.rename(
 37 |             columns={0: 'x', 1: 'y'}).apply(lambda x: ct.wgs84_to_gcj02(x.x, x.y), axis=1)), axis=1)
 38 |     elif isinstance(gdf.iloc[0].geometry, MultiPolygon):
 39 |         gdf['geometry'] = gdf.geometry.apply(lambda item: MultiPolygon([Polygon(pd.DataFrame(geom.exterior.coords.xy).T.rename(
 40 |             columns={0: 'x', 1: 'y'}).apply(lambda x: ct.wgs84_to_gcj02(x.x, x.y), axis=1)) for geom in item.geoms]))
 41 |     elif isinstance(gdf.iloc[0].geometry, Point):
 42 |         gdf['geometry'] = gdf.apply(lambda i: Point(
 43 |             ct.wgs84_to_gcj02(i.geometry.x, i.geometry.y)), axis=1)
 44 |     return gdf
 45 | 
 46 | 
 47 | def gdf_gcj_to_wgs(gdf):
 48 |     '''
 49 |     transfer the shapfile coordination system
 50 |     '''
 51 |     if isinstance(gdf.iloc[0].geometry, Polygon):
 52 |         gdf['geometry'] = gdf.apply(lambda i: Polygon(pd.DataFrame(i.geometry.exterior.coords.xy).T.rename(
 53 |             columns={0: 'x', 1: 'y'}).apply(lambda x: ct.gcj02_to_wgs84(x.x, x.y), axis=1)), axis=1)
 54 |     elif isinstance(gdf.iloc[0].geometry, LineString):
 55 |         gdf['geometry'] = gdf.apply(lambda i: LineString(pd.DataFrame(i.geometry.coords.xy).T.rename(
 56 |             columns={0: 'x', 1: 'y'}).apply(lambda x: ct.gcj02_to_wgs84(x.x, x.y), axis=1)), axis=1)
 57 |     elif isinstance(gdf.iloc[0].geometry, MultiPolygon):
 58 |         gdf['geometry'] = gdf.geometry.apply(lambda item: MultiPolygon([Polygon(pd.DataFrame(geom.exterior.coords.xy).T.rename(
 59 |             columns={0: 'x', 1: 'y'}).apply(lambda x: ct.gcj02_to_wgs84(x.x, x.y), axis=1)) for geom in item.geoms]))
 60 |     elif isinstance(gdf.iloc[0].geometry, Point):
 61 |         gdf['geometry'] = gdf.apply(lambda i: Point(
 62 |             ct.gcj02_to_wgs84(i.geometry.x, i.geometry.y)), axis=1)
 63 |     return gdf
 64 | 
 65 | def coord_transfer( res, in_sys = 'gcj', out_sys = 'wgs' ):
 66 |     assert in_sys in ['gcj', 'wgs'] and out_sys in ['gcj', 'wgs'], "check coordination system"
 67 |     if in_sys != out_sys:
 68 |         if in_sys == 'gcj':
 69 |             res = gdf_gcj_to_wgs(res)
 70 |         else:
 71 |             res = gdf_wgs_to_gcj(res)
 72 |     return res
 73 | 
 74 | def df_to_gdf_points( trip, in_sys = 'gcj', out_sys = 'wgs', keep_datetime =True ):
 75 |     if not keep_datetime and len(trip.dtypes[trip.dtypes == 'datetime64[ns]'].index)>0:
 76 |         trip = trip.drop(columns = trip.dtypes[trip.dtypes == 'datetime64[ns]'].index)
 77 |     # gpd.GeoDataFrame(trip, geometry=  trip.apply( lambda x: Point( x.x, x.y ),axis=1)).to_file( f'{plate}.geojson', driver='GeoJSON' )
 78 |     trip = gpd.GeoDataFrame( trip, geometry = trip.apply( lambda x: Point( x.x, x.y ),axis=1), crs={'init':'epsg:4326'})
 79 |     trip = coord_transfer( trip, in_sys, out_sys )
 80 |     return trip
 81 | 
 82 | 
 83 | def traj_points_to_line( df_tra, df_trip, plate, save = False ):
 84 |     gdf = gpd.GeoDataFrame()
 85 |     for i in df_trip.trip_id.unique():
 86 |         tra = LineString( df_tra[df_tra.trip_id==i][['x','y','t']].values )
 87 |         gdf = gdf.append( {'trip_id':i, 'geometry':LineString( df_tra[df_tra.trip_id==i][['x','y','t']].values ) }, ignore_index=True)
 88 |     gdf = gdf.merge( df_trip, on ='trip_id' )
 89 |     gdf = gdf_gcj_to_wgs( gdf )
 90 |     gdf.crs={'init':'epsg:4326'}
 91 |     # gdf.to_crs(epsg=4547) 
 92 |     if save:    gdf.to_file( '%s.shp'%(plate), encoding='utf-8' )
 93 |     return gdf
 94 | 
 95 | 
 96 | if __name__ == '__main__':
 97 |     # a = gpd.read_file('../trajectory_related/input/Futian_boundary_wgs.shp')
 98 |     # df_to_gdf_points(trip)
 99 |     pass
100 | 


--------------------------------------------------------------------------------
/mapmatching/geo/io.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import shapely
  4 | import warnings
  5 | import geopandas as gpd
  6 | from loguru import logger
  7 | import sqlalchemy
  8 | 
  9 | from ..setting import postgre_url
 10 | 
 11 | 
 12 | ENGINE = sqlalchemy.create_engine(postgre_url)
 13 | 
 14 | def has_table(name, con=None, engine=None):
 15 |     flag = False
 16 |     if con is None:
 17 |         con = engine.connect()
 18 |         flag = True
 19 |     
 20 |     status = con.dialect.has_table(con, name)
 21 |     if flag: 
 22 |         con.close()
 23 |     
 24 |     return status
 25 |  
 26 | def read_postgis(name, atts="*", condition=None, engine=ENGINE, bbox=None, mask=None, geom_col='geometry', *args, **kwargs):
 27 |     """
 28 |     Refs: https://geopandas.org/en/stable/docs/reference/api/geopandas.read_postgis.html#geopandas.read_postgis
 29 |     """
 30 |     with engine.connect() as conn:
 31 |         if not has_table(name, con=conn):
 32 |             warnings.warn(f"Not exist {name}")
 33 |             return None
 34 |         
 35 |         if bbox is not None:
 36 |             wkt = shapely.box(*bbox).to_wkt()
 37 |         elif shapely.is_geometry(mask):
 38 |             wkt = mask.wkt
 39 |         else:
 40 |             wkt = None
 41 |         
 42 |         if mask is None:
 43 |             sql = f"SELECT {atts} FROM {name}"
 44 |         else:
 45 |             sql = f"""SELECT {atts} FROM {name} WHERE ST_Intersects( geometry, ST_GeomFromText('{wkt}', 4326) )"""
 46 |             
 47 |         if condition:
 48 |             sql += f" WHERE {condition}" if wkt is None else f" {condition}"
 49 |         
 50 |         gdf = gpd.read_postgis(sqlalchemy.text(sql), con=conn, geom_col=geom_col, *args, **kwargs)
 51 |     
 52 |     return gdf
 53 | 
 54 | def to_postgis(gdf:gpd.GeoDataFrame, name, duplicates_idx=None, engine=ENGINE, if_exists='fail', *args, **kwargs):
 55 |     """
 56 |     Upload GeoDataFrame into PostGIS database.
 57 | 
 58 |     This method requires SQLAlchemy and GeoAlchemy2, and a PostgreSQL
 59 |     Python driver (e.g. psycopg2) to be installed.
 60 | 
 61 |     Parameters
 62 |     ----------
 63 |     name : str
 64 |         Name of the target table.
 65 |     con : sqlalchemy.engine.Connection or sqlalchemy.engine.Engine
 66 |         Active connection to the PostGIS database.
 67 |     if_exists : {'fail', 'replace', 'append'}, default 'fail'
 68 |         How to behave if the table already exists:
 69 | 
 70 |         - fail: Raise a ValueError.
 71 |         - replace: Drop the table before inserting new values.
 72 |         - append: Insert new values to the existing table.
 73 |     schema : string, optional
 74 |         Specify the schema. If None, use default schema: 'public'.
 75 |     index : bool, default False
 76 |         Write DataFrame index as a column.
 77 |         Uses *index_label* as the column name in the table.
 78 |     index_label : string or sequence, default None
 79 |         Column label for index column(s).
 80 |         If None is given (default) and index is True,
 81 |         then the index names are used.
 82 |     chunksize : int, optional
 83 |         Rows will be written in batches of this size at a time.
 84 |         By default, all rows will be written at once.
 85 |     dtype : dict of column name to SQL type, default None
 86 |         Specifying the datatype for columns.
 87 |         The keys should be the column names and the values
 88 |         should be the SQLAlchemy types.
 89 |     """
 90 | 
 91 |     ori_gdf = None
 92 |     flag = False
 93 |     if if_exists=='append' and duplicates_idx is not None:
 94 |         if has_table(name, engine=engine):
 95 |             ori_gdf = read_postgis(name, engine=engine)
 96 |             # FIXME 目前因为版本的原因出问题
 97 |             if_exists = 'replace'
 98 |             flag = True
 99 |     
100 |     with engine.connect() as conn:
101 |         if flag:
102 |             tmp = ori_gdf.append(ori_gdf).append(gdf).drop_duplicates(duplicates_idx)
103 |             if tmp.shape[0] == 0:
104 |                 print(f"There is no new record in {name}")
105 |                 return True
106 | 
107 |             # Check newly added att, if exist then delete it 
108 |             drop_cols = []
109 |             remain_cols = []
110 |             for i in tmp.columns:
111 |                 if i not in ori_gdf.columns:
112 |                     drop_cols.append(i)
113 |                     continue
114 |                 remain_cols.append(i)
115 | 
116 |             if drop_cols:
117 |                 logger.warning(f"Drop column `{drop_cols}`, for not exit in the db")
118 |         
119 |             gdf = tmp[remain_cols]
120 |         
121 |         status = gdf.to_postgis(name=name, con=conn, if_exists=if_exists, *args, **kwargs)
122 |     
123 |     return status 
124 | 
125 | def to_geojson(gdf, fn):
126 |     if not isinstance(gdf, gpd.GeoDataFrame):
127 |         print('Check the format of the gdf.')
128 |         return False
129 | 
130 |     if 'geojson' not in str(fn):
131 |         fn = f'{fn}.geojson'
132 |     
133 |     gdf.to_file(fn, driver="GeoJSON")
134 | 
135 |     return 
136 | 
137 | def set_engine(url):
138 |     global ENGINE
139 |     ENGINE = sqlalchemy.create_engine(url)
140 |     
141 |     return ENGINE
142 | 


--------------------------------------------------------------------------------
/mapmatching/geo/metric/__init__.py:
--------------------------------------------------------------------------------
1 | from .trajDist import lcss, edr, erp
2 | 


--------------------------------------------------------------------------------
/mapmatching/geo/metric/trajDist.py:
--------------------------------------------------------------------------------
  1 | # refs: https://github.com/bguillouet/traj-dist
  2 | 
  3 | import numpy as np
  4 | from haversine import haversine_vector, Unit
  5 | from ..ops.distance import haversine_matrix
  6 | import numba
  7 | 
  8 | @numba.njit
  9 | def lcss_dp(n0, n1, M):
 10 |     # An (m+1) times (n+1) matrix
 11 |     C = [[0] * (n1 + 1) for _ in range(n0 + 1)]
 12 |     for i in range(1, n0 + 1):
 13 |         for j in range(1, n1 + 1):
 14 |             if M[i - 1, j - 1]:
 15 |                 C[i][j] = C[i - 1][j - 1] + 1
 16 |             else:
 17 |                 C[i][j] = max(C[i][j - 1], C[i - 1][j])
 18 | 
 19 |     val = float(C[n0][n1]) / min([n0, n1])
 20 | 
 21 |     return val
 22 | 
 23 | def cal_dist_matrix(array1:np.ndarray, array2:np.ndarray, ll=True):
 24 |     if ll:
 25 |         M = haversine_matrix(array1, array2, xy=True)
 26 |     else:
 27 |         M = np.linalg.norm((array1[:, np.newaxis, :] - array2[np.newaxis, :, :]), axis=-1)
 28 | 
 29 |     return M
 30 | 
 31 | def lcss(array1:np.ndarray, array2:np.ndarray, eps:float=10.0, ll=True):
 32 |     """
 33 |     Usage
 34 |     -----
 35 |     The `Longuest-Common-Subsequence distance` (Spherical Geometry) between trajectory t0 and t1.
 36 |     Parameters
 37 |     ----------
 38 |     param t0 : len(t0) x 2 numpy_array
 39 |     param t1 : len(t1) x 2 numpy_array
 40 |     eps : float
 41 |     Returns
 42 |     -------
 43 |     lcss : float
 44 |            The Longuest-Common-Subsequence distance between trajectory t0 and t1
 45 |     """
 46 |     M = cal_dist_matrix(array1, array2, ll)
 47 |     mask = M < eps
 48 |     M[mask] = True
 49 |     M[~mask] = False
 50 | 
 51 |     val = lcss_dp(len(array1), len(array2), M)
 52 | 
 53 |     return val
 54 | 
 55 | def edr(array1, array2, eps, ll=False):
 56 |     """
 57 |     Usage
 58 |     -----
 59 |     The `Edit Distance on Real sequence` between trajectory t0 and t1.
 60 |     Parameters
 61 |     ----------
 62 |     param t0 : len(t0)x2 numpy_array
 63 |     param t1 : len(t1)x2 numpy_array
 64 |     eps : float
 65 |     Returns
 66 |     -------
 67 |     edr : float
 68 |            The Longuest-Common-Subsequence distance between trajectory t0 and t1
 69 |     """
 70 |     n0 = len(array1)
 71 |     n1 = len(array2)
 72 |     
 73 |     dist_matrix = cal_dist_matrix(array1, array2, ll)
 74 |     M = dist_matrix.copy()
 75 |     mask = M < eps
 76 |     M[mask] = True
 77 |     M[~mask] = False
 78 |     M.astype(int)
 79 | 
 80 |     # An (m+1) times (n+1) matrix
 81 |     C = [[0] * (n1 + 1) for _ in range(n0 + 1)]
 82 |     for i in range(1, n0 + 1):
 83 |         for j in range(1, n1 + 1):
 84 |             subcost = M[i -1, j - 1]
 85 |             C[i][j] = min(C[i][j - 1] + 1, C[i - 1][j] + 1, C[i - 1][j - 1] + subcost)
 86 |     edr = float(C[n0][n1]) / max([n0, n1])
 87 |     
 88 |     return edr
 89 | 
 90 | def erp(array1, array2, g, ll=False):
 91 |     """
 92 |     Usage
 93 |     -----
 94 |     The `Edit distance with Real Penalty` between trajectory t0 and t1.
 95 |     Parameters
 96 |     ----------
 97 |     param t0 : len(t0)x2 numpy_array
 98 |     param t1 : len(t1)x2 numpy_array
 99 |     Returns
100 |     -------
101 |     dtw : float
102 |           The Dynamic-Time Warping distance between trajectory t0 and t1
103 |     """
104 |     n0 = len(array1)
105 |     n1 = len(array2)
106 |     C = np.zeros((n0 + 1, n1 + 1))
107 | 
108 |     dist_matrix = cal_dist_matrix(array1, array2, ll)
109 | 
110 |     ref_1 = haversine_vector(array1[:, ::-1], g[::-1], unit=Unit.METERS)
111 |     ref_2 = haversine_vector(array2[:, ::-1], g[::-1], unit=Unit.METERS)
112 | 
113 |     C[1:, 0] = np.sum(ref_1)
114 |     C[0, 1:] = np.sum(ref_2)
115 |     for i in np.arange(n0) + 1:
116 |         for j in np.arange(n1) + 1:
117 |             derp0 = C[i - 1, j] + ref_1[i - 1]
118 |             derp1 = C[i, j - 1] + ref_2[j - 1]
119 |             derp01 = C[i - 1, j - 1] + M[i - 1, j - 1]
120 |             C[i, j] = min(derp0, derp1, derp01)
121 |     
122 |     erp = C[n0, n1]
123 |     
124 |     return erp
125 | 
126 | """ Euclidean Geometry """
127 | def e_lcss(t0, t1, eps, ll=False):
128 |     """
129 |     Usage
130 |     -----
131 |     The Longuest-Common-Subsequence distance between trajectory t0 and t1.
132 |     Parameters
133 |     ----------
134 |     param t0 : len(t0)x2 numpy_array
135 |     param t1 : len(t1)x2 numpy_array
136 |     eps : float
137 |     Returns
138 |     -------
139 |     lcss : float
140 |            The Longuest-Common-Subsequence distance between trajectory t0 and t1
141 |     """
142 |     n0 = len(t0)
143 |     n1 = len(t1)
144 |     # An (m+1) times (n+1) matrix
145 |     C = [[0] * (n1 + 1) for _ in range(n0 + 1)]
146 |     for i in range(1, n0 + 1):
147 |         for j in range(1, n1 + 1):
148 |             if eucl_dist(t0[i - 1], t1[j - 1]) < eps:
149 |                 C[i][j] = C[i - 1][j - 1] + 1
150 |             else:
151 |                 C[i][j] = max(C[i][j - 1], C[i - 1][j])
152 |     lcss = 1 - float(C[n0][n1]) / min([n0, n1])
153 |     return lcss
154 | 
155 | 
156 | 


--------------------------------------------------------------------------------
/mapmatching/geo/ops/__init__.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import geopandas as gpd
 3 | 
 4 | def check_duplicate_points(points:gpd.GeoDataFrame):
 5 |     """Check for duplicate nodes in a sequence of coordinates
 6 | 
 7 |     Args:
 8 |         points (gpd.GeoDataFrame): _description_
 9 | 
10 |     Returns:
11 |         _type_: _description_
12 |     """
13 |     coords = np.concatenate(points.geometry.apply(lambda x: x.coords))
14 |     mask = np.sum(coords[:-1] == coords[1:], axis=1) == 2
15 |     mask = np.concatenate([mask, [False]])
16 | 
17 |     if mask.sum():
18 |         idxs = np.where(mask == True)[0]
19 |         print(f"Exist duplicate points, idx: {idxs}.")
20 |     
21 |         return points[~mask]
22 | 
23 |     return points
24 | 
25 | from .point2line import project_point_2_linestring, project_points_2_linestrings


--------------------------------------------------------------------------------
/mapmatching/geo/ops/distance.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import geopandas as gpd
  4 | from shapely.geometry import Point
  5 | from haversine import haversine, haversine_vector, Unit
  6 | 
  7 | # from .to_array import points_geoseries_2_ndarray
  8 | 
  9 | 
 10 | def get_length(geoms):
 11 |     crs = geoms.estimate_utm_crs()
 12 |     return geoms.to_crs(crs).length
 13 | 
 14 | def cal_pointwise_distance_geoseries(arr1, arr2, align=True):
 15 |     """calculate two geoseries distance
 16 | 
 17 |     Args:
 18 |         arr1 (gpd.GeoSeries): Geom array 1.
 19 |         arr2 (gpd.GeoSeries): Geom array 2.
 20 |         align (bool, optional): Align the two  Geom arrays. Defaults to True.
 21 | 
 22 |     Returns:
 23 |         pd.Series: Distance array
 24 |     """
 25 |     if isinstance(arr1, pd.Series):
 26 |         arr1 = gpd.GeoSeries(arr1)
 27 |     if isinstance(arr2, pd.Series):
 28 |         arr2 = gpd.GeoSeries(arr2)
 29 |     arr1.reset_index(drop=True)
 30 |     arr2.reset_index(drop=True)
 31 | 
 32 |     crs_1 = arr1.crs
 33 |     crs_2 = arr2.crs
 34 |     assert crs_1 is not None or crs_2 is not None, "arr1 and arr2 must have one has crs"
 35 |     
 36 |     if align:
 37 |         if crs_1 is None:
 38 |             arr1.set_crs(crs_2, inplace=True)
 39 |         if crs_2 is None:
 40 |             arr2.set_crs(crs_1, inplace=True)
 41 |     else:
 42 |         assert crs_1 is not None and crs_2 is not None, "Turn `align` on to align geom1 and geom2"
 43 | 
 44 |     if arr1.crs.to_epsg() == 4326:
 45 |         crs = arr1.estimate_utm_crs()
 46 |         dist = arr1.to_crs(crs).distance(arr2.to_crs(crs))
 47 |     else:
 48 |         dist = arr1.distance(arr2)
 49 | 
 50 |     return dist
 51 | 
 52 | def cal_distance_matrix_geoseries(points1, points2, align=True):
 53 |     """Generate a pairwise distance matrix between two GeoSeries.
 54 | 
 55 |     Args:
 56 |         arr1 (gpd.GeoSeries): Geom array 1.
 57 |         arr2 (gpd.GeoSeries): Geom array 2.
 58 | 
 59 |     Returns:
 60 |         pd.DataFrame: A distance matrix of size n x m
 61 |     """
 62 |     n, m = len(points1), len(points2)
 63 | 
 64 |     # Replicate arr1 and arr2
 65 |     repeated_arr1 = points1.repeat(m)#.reset_index(drop=True)
 66 |     repeated_arr2 = gpd.GeoSeries(pd.concat([points2] * n), crs=points2.crs)#.reset_index(drop=True)
 67 | 
 68 |     # Calculate distances
 69 |     distances = cal_pointwise_distance_geoseries(repeated_arr1, repeated_arr2, align=align)
 70 | 
 71 |     # Reshape into matrix
 72 |     distance_matrix = distances.values.reshape(n, m)
 73 |     
 74 |     return pd.DataFrame(distance_matrix, index=points1.index, columns=points2.index)
 75 | 
 76 | def coords_seq_distance(coords):
 77 |     # for matrix
 78 |     dist_np = np.linalg.norm(coords[:-1] - coords[1:], axis=1)
 79 |     
 80 |     return dist_np, np.sum(dist_np)
 81 | 
 82 | def get_vertical_dist(pointX, pointA, pointB, ll=False):
 83 |     if ll:
 84 |         a, b, c = haversine_vector(
 85 |             np.array([pointA, pointA, pointB])[:, ::-1],
 86 |             np.array([pointB, pointX, pointX])[:, ::-1],
 87 |             unit=Unit.METERS
 88 |         )
 89 |     else:
 90 |         a, b, c = np.linalg.norm(
 91 |             np.array([pointA, pointA, pointB]) - np.array([pointB, pointX, pointX]), axis = 1)
 92 | 
 93 |     #当弦两端重合时,点到弦的距离变为点间距离
 94 |     if a==0:
 95 |         return b
 96 | 
 97 |     p = (a + b + c) / 2
 98 |     S = np.sqrt(np.abs(p*(p-a)*(p-b)*(p-c)))
 99 |     
100 |     vertical_dist = S * 2 / a
101 | 
102 |     return vertical_dist
103 | 
104 | """ haversine """
105 | def geom_series_distance(col1, col2, in_crs=4326, out_crs=900913):
106 |     assert isinstance(col1, gpd.GeoSeries) and isinstance(col2, gpd.GeoSeries)
107 | 
108 |     if in_crs == out_crs:
109 |         return col1.distance(col2)
110 | 
111 |     if isinstance(col1, pd.Series):
112 |         a = gpd.GeoSeries(col1).set_crs(in_crs, allow_override=True).to_crs(out_crs)
113 |     if isinstance(col2, pd.Series):
114 |         b = gpd.GeoSeries(col2).set_crs(in_crs, allow_override=True).to_crs(out_crs)
115 |     
116 |     return a.distance(b)
117 | 
118 | def haversine_matrix(array1, array2, xy=True, unit=Unit.METERS):
119 |     '''
120 |     The exact same function as "haversine", except that this
121 |     version replaces math functions with numpy functions.
122 |     This may make it slightly slower for computing the haversine
123 |     distance between two points, but is much faster for computing
124 |     the distance matrix between two vectors of points due to vectorization.
125 |     '''
126 |     if xy:
127 |         array1 = array1[:, ::-1]
128 |         array2 = array2[:, ::-1]
129 |     
130 |     dist = haversine_vector(np.repeat(array1, len(array2), axis=0), 
131 |                             np.concatenate([array2] * len(array1)),
132 |                             unit=unit)
133 | 
134 |     matrix = dist.reshape((len(array1), len(array2)))
135 | 
136 |     return matrix
137 | 
138 | def haversine_vector_xy(array1, array2, unit=Unit.METERS, comb=False, normalize=False):
139 |     # ensure arrays are numpy ndarrays
140 |     if not isinstance(array1, np.ndarray):
141 |         array1 = np.array(array1)
142 |     if not isinstance(array2, np.ndarray):
143 |         array2 = np.array(array2)
144 | 
145 |     array1 = array1[:, ::-1]
146 |     array2 = array2[:, ::-1]
147 |     ans = haversine_vector(array1, array2, unit, comb, normalize)
148 |     
149 |     return ans
150 | 
151 | def coords_pair_dist(o, d, xy=True):
152 |     if isinstance(o, Point) and isinstance(d, Point):
153 |         return haversine((o.y, o.x), (d.y, d.x), unit=Unit.METERS)
154 |     
155 |     if (isinstance(o, tuple) and isinstance(d, tuple)) or \
156 |        (isinstance(o, list) and isinstance(d, list)):
157 |         if xy:
158 |             return haversine(o[:2][::-1], d[:2][::-1], unit=Unit.METERS)
159 |         else:
160 |             return haversine(o[:2], d[:2], unit=Unit.METERS)
161 |     
162 |     return np.inf
163 | 
164 | def cal_coords_seq_distance(points:np.ndarray, xy=True):
165 |     if xy:
166 |         points = points.copy()
167 |         points = points[:, ::-1]
168 |     
169 |     # FIXME
170 |     try:
171 |         dist_np = haversine_vector(points[:-1], points[1:], unit=Unit.METERS)
172 |     except:
173 |         dist_np = np.linalg.norm(points[:-1] - points[1:], axis=1)
174 |     
175 |     return dist_np, dist_np.sum()
176 | 
177 | def cal_points_geom_seq_distacne(geoms:gpd.GeoSeries):
178 |     coords = points_geoseries_2_ndarray(geoms)
179 |     dist, total = cal_coords_seq_distance(coords, xy=True)
180 | 
181 |     return dist, coords
182 | 
183 | def haversine_geoseries(points1, points2, unit=Unit.METERS, comb=False, normalize=False):
184 |     coords_0 = points_geoseries_2_ndarray(points1)
185 |     coords_1 = points_geoseries_2_ndarray(points2)
186 |     dist = haversine_vector_xy(coords_0, coords_1, unit, comb, normalize)
187 | 
188 |     return dist
189 | 
190 | 
191 | if __name__ == "__main__":
192 |     # matrix = haversine_matrix(traj_points, points_, xy=True)
193 |     
194 |     # 创建两个测试 GeoSeries
195 |     points1 = gpd.GeoSeries([Point(0, 0), Point(1, 1)])
196 |     points2 = gpd.GeoSeries([Point(1, 1), Point(0, 0), Point(1, 1)])
197 | 
198 |     # 确保两个 GeoSeries 使用相同的 CRS
199 |     points1.set_crs(epsg=4326, inplace=True)
200 |     points2.set_crs(epsg=4326, inplace=True)
201 | 
202 |     # 计算两个 GeoSeries 之间的距离矩阵
203 |     distance_matrix = cal_distance_matrix_geoseries(points1, points2)
204 |     distance_matrix
205 | 
206 | 


--------------------------------------------------------------------------------
/mapmatching/geo/ops/linear_referencing.py:
--------------------------------------------------------------------------------
  1 | import numba
  2 | import numpy as np
  3 | from shapely import LineString, Point
  4 | from .distance import coords_seq_distance
  5 | from .to_array import geoseries_to_coords, points_geoseries_2_ndarray
  6 | 
  7 | def _check(point, line):
  8 |     res = linear_referencing(point, polyline, cut=False)
  9 |     dist = res['offset']
 10 |     _dist = line.project(point)
 11 | 
 12 |     assert (dist - _dist) / (dist + 1e-8) < 1e-8, "check"
 13 | 
 14 | def plot(point, polyline, res):
 15 |     proj = res['proj_point']
 16 |     if 'seg_0' in res:
 17 |         seg_0 = LineString(res['seg_0'])
 18 |     if 'seg_1' in res:
 19 |         seg_1 = LineString(res['seg_1'])
 20 |     
 21 |     import geopandas as gpd
 22 |     ax = gpd.GeoDataFrame({
 23 |         'geometry':[point, polyline],
 24 |         'name': ['seg_0', 'seg_1']
 25 |     }).plot(color='red', linewidth=5, alpha=.5)
 26 |     
 27 |     gpd.GeoDataFrame({"geometry": [proj]}).plot(ax=ax, color='blue', label='Project')
 28 | 
 29 |     segs = gpd.GeoDataFrame({"name": ['seg_0', "seg_1"],
 30 |                              "geometry": [LineString(seg_0), LineString(seg_1)]})
 31 |     segs.plot(ax=ax, column='name', legend=True, linestyle="--")
 32 | 
 33 |     return ax
 34 | 
 35 | def closest_point_on_segments(point:np.ndarray, lines:np.ndarray, eps=1e-9):
 36 |     """Calculate the closest point p' and its params on each segments on a polyline.
 37 | 
 38 |     Args:
 39 |         point (np.array): Point (shape: [2,]). 
 40 |         lines (np.array): Polyline in the form of coords sequence(shape: [n, 2]).
 41 |         eps (float, optional): Defaults to 1e-9.
 42 | 
 43 |     Returns:
 44 |         (array, array, array): proj, dist, ratio
 45 |     """
 46 |     segs = np.hstack([lines[:-1][:, np.newaxis],
 47 |                       lines[1:][:, np.newaxis]])
 48 |     pq = segs[:, 1] - segs[:, 0]
 49 |     d = np.power(pq, 2).sum(axis=1)
 50 |     d[d == 0] = eps
 51 | 
 52 |     x, y = point
 53 |     dx = x - segs[:, 0, 0]
 54 |     dy = y - segs[:, 0, 1]
 55 |     t = pq[:, 0] * dx + pq[:, 1] * dy
 56 |     
 57 |     ratio = t / d
 58 |     ratio[ratio < 0] = 0
 59 |     ratio[ratio > 1] = 1
 60 | 
 61 |     offset = pq * ratio[:, np.newaxis]
 62 |     proj =  offset + segs[:, 0]
 63 |     dist = np.linalg.norm(point - proj, axis=1)
 64 | 
 65 |     return proj, dist, ratio
 66 | 
 67 | # @numba.jit
 68 | def cut_lines(idx, proj, ratio, coords):
 69 |     NONE_COORD = None
 70 |     if idx == 0 and ratio == 0:
 71 |         return NONE_COORD, coords
 72 |     if idx == coords.shape[0] - 2 and ratio == 1:
 73 |         return coords, NONE_COORD
 74 | 
 75 |     if ratio == 0:
 76 |         seg_0 = coords[:idx + 1] 
 77 |         seg_1 = coords[idx:]    
 78 |     elif ratio < 1:
 79 |         seg_0 = np.concatenate([coords[:idx+1], [proj]]) 
 80 |         seg_1 = np.concatenate([[proj], coords[idx+1:]])     
 81 |     else:
 82 |         seg_0 = coords[:idx+2]
 83 |         seg_1 = coords[idx+1:]
 84 |     
 85 |     return seg_0, seg_1
 86 | 
 87 | def linear_referencing(point:Point, polyline:LineString, cut=True, to_geom=False):
 88 |     # TODO vectorized
 89 |     # iterating through each segment in the polyline and returning the one with minimum distance
 90 | 
 91 |     p_coords = np.array(point.coords[0])
 92 |     l_coords = np.array(polyline.coords)
 93 | 
 94 |     projs, dists, ratios = closest_point_on_segments(p_coords, l_coords)
 95 |     idx = np.argmin(dists)
 96 |     proj = projs[idx]
 97 |     ratio = ratios[idx]
 98 |     len_np, total_len = coords_seq_distance(l_coords)
 99 |     offset = len_np[:idx].sum() + len_np[idx] * ratio
100 | 
101 |     res = {}
102 |     res['proj_point'] = Point(proj) if to_geom else proj
103 |     res['dist_p2c'] = dists[idx]
104 |     if not cut:
105 |         res['offset'] = offset
106 |     else:
107 |         seg_0, seg_1 = cut_lines(idx, proj, ratio, l_coords)
108 |         if to_geom:
109 |             seg_0 = LineString(seg_0)
110 |             seg_1 = LineString(seg_1)
111 |         res['seg_0'] = seg_0
112 |         res['seg_1'] = seg_1
113 |         res['len_0'] = offset
114 |         res['len_1'] = total_len - offset
115 | 
116 |     return res
117 | 
118 | # @numba.jit
119 | def lines_to_matrix(lines, n_rows, n_cols):
120 |     _lines = np.zeros((n_rows, n_cols, 2))
121 |     mask = np.ones((n_rows, n_cols), dtype=np.bool_)
122 | 
123 |     for i, line in enumerate(lines):
124 |         n = len(line)
125 |         _lines[i, :n] = line
126 |         _lines[i, n:] = line[-1]
127 |         mask[i, n:] = 0
128 |     
129 |     return _lines, mask
130 | 
131 | # @numba.jit
132 | def cut_line(idx, proj, ratio, coords):
133 |     NONE_COORD = None
134 |     if idx == 0 and ratio == 0:
135 |         return NONE_COORD, coords
136 |     if idx == coords.shape[0] - 2 and ratio == 1:
137 |         return coords, NONE_COORD
138 | 
139 |     if ratio == 0:
140 |         seg_0 = coords[:idx + 1] 
141 |         seg_1 = coords[idx:]    
142 |     elif ratio < 1:
143 |         seg_0 = np.concatenate([coords[:idx+1], [proj]]) 
144 |         seg_1 = np.concatenate([[proj], coords[idx+1:]])     
145 |     else:
146 |         seg_0 = coords[:idx+2]
147 |         seg_1 = coords[idx+1:]
148 |     
149 |     return seg_0, seg_1
150 | 
151 | # @numba.jit
152 | def numba_cut_lines(col_idxs, closest, ratio, lines):
153 |     res = [cut_line(i, c, r, s) 
154 |            for i, c, r, s in zip(col_idxs, closest, ratio, lines)]
155 |     
156 |     return res
157 | 
158 | def linear_referencing_vector(points:np.array, lines:np.array, cut=True, eps=1e-9):
159 |     n_len = [len(i) for i in lines]
160 |     n_cols = max(n_len)
161 |     n_rows = len(lines)
162 |     _lines, mask = lines_to_matrix(lines, n_rows, n_cols)
163 | 
164 |     segs = np.dstack([_lines[:, :-1][:,:,np.newaxis],
165 |                       _lines[:, 1:][:,:,np.newaxis]])
166 |     pq = segs[:, :, 1] - segs[:, :, 0]
167 |     d = np.power(pq, 2).sum(axis=-1)
168 |     len_np = np.sqrt(d)
169 |     d[d == 0] = eps
170 | 
171 |     x, y = points[:, 0], points[:, 1]
172 |     dx = x[:, np.newaxis] - segs[:, :, 0, 0]
173 |     dy = y[:, np.newaxis] - segs[:, :, 0, 1]
174 |     t = pq[:, :, 0] * dx + pq[:, :, 1] * dy
175 |     
176 |     ratios = t / d
177 |     ratios[ratios < 0] = 0
178 |     ratios[ratios > 1] = 1
179 | 
180 |     offset = pq * ratios[:, :, np.newaxis] # (n, l, 2)
181 |     closests = offset + segs[:, :, 0]
182 |     dists = np.linalg.norm(points[:, np.newaxis] - closests, axis=-1)
183 | 
184 |     col_idxs = np.argmin(dists, axis=1)
185 |     row_idxs = np.arange(n_rows)
186 |     
187 |     cp = closests[row_idxs, col_idxs]
188 |     r = ratios[row_idxs, col_idxs]
189 |     dist_p2c = dists[row_idxs, col_idxs]
190 | 
191 |     sum_mask = np.zeros((n_rows, n_cols-1), dtype=np.bool_)
192 |     for i, col in enumerate(col_idxs):
193 |         sum_mask[i, :col] = True
194 | 
195 |     offset = np.sum(len_np, axis=1, where=sum_mask) + len_np[row_idxs, col_idxs] * r
196 | 
197 |     res = {}
198 |     res['proj_point'] = [i for i in cp]
199 |     res['dist_p2c'] = dist_p2c
200 | 
201 |     if not cut:
202 |         res['offset'] = offset
203 |     else:
204 |         # TODO normalized = True
205 |         tmp = numba_cut_lines(col_idxs, cp, r, lines)
206 |         seg_0, seg_1 = list(zip(*tmp))
207 |         res['seg_0'] = seg_0
208 |         res['seg_1'] = seg_1
209 |         res['len_0'] = offset
210 |         res['len_1'] = len_np.sum(axis=1) - offset
211 | 
212 |     return res
213 | 
214 | def linear_referencing_geom(point_geoms, line_geoms, cut=True, eps=1e-9):
215 |     _points = points_geoseries_2_ndarray(point_geoms)
216 |     _lines = geoseries_to_coords(line_geoms)
217 | 
218 |     res = linear_referencing_vector(_points, _lines, cut, eps)    
219 | 
220 |     return res
221 | 
222 | 
223 | if __name__ == "__main__":
224 |     polyline = LineString([[-1,0], [0, 0], [1,1], [1, 1], [2,3]])
225 |     point = Point([-0.5, 1])
226 |     res = linear_referencing(point, polyline)
227 | 
228 |     # case 0
229 |     point = Point([-0.5, 1])
230 |     _check(point, polyline)
231 |     res = linear_referencing(point, polyline)
232 |     plot(point, polyline, res)
233 | 
234 |     # case 1
235 |     point = Point([-1.5, .5])
236 |     _check(point, polyline)
237 |     res = linear_referencing(point, polyline)
238 |     plot(point, polyline, res)
239 | 
240 |     # case 2
241 |     point = Point([2.2, 3.5])
242 |     _check(point, polyline)
243 |     res = linear_referencing(point, polyline)
244 |     plot(point, polyline, res)
245 | 
246 |     # case 3
247 |     point = Point([0.5, 1])
248 |     # _check(point, polyline)
249 |     res = linear_referencing(point, polyline)
250 |     plot(point, polyline, res);
251 | 
252 |     # case 4
253 |     point = Point([-.1, 1.2])
254 |     polyline = LineString([[0, 0], [0, 1], [1,1]])
255 |     res = linear_referencing(point, polyline)
256 |     plot(point, polyline, res)
257 | 
258 | 
259 |     from shapely import wkt
260 |     # case 0
261 |     point = wkt.loads('POINT (113.934194 22.577979)')
262 |     # case 1
263 |     point = wkt.loads('POINT (113.934144 22.577979)')
264 | 
265 |     # case 0, 创科路/打石二路路口
266 |     polyline = wkt.loads("LINESTRING (113.934186 22.57795, 113.934227 22.577982, 113.934274 22.578013, 113.934321 22.578035, 113.934373 22.578052, 113.934421 22.57806, 113.93448 22.578067)")
267 |     
268 |     res = linear_referencing(point, polyline)
269 |     plot(point, polyline, res)
270 |     


--------------------------------------------------------------------------------
/mapmatching/geo/ops/point2line.py:
--------------------------------------------------------------------------------
  1 | import numba
  2 | import numpy as np
  3 | import shapely
  4 | from shapely import Point, LineString
  5 | import geopandas as gpd
  6 | from geopandas import GeoDataFrame
  7 | 
  8 | from .distance import cal_coords_seq_distance, cal_pointwise_distance_geoseries
  9 | 
 10 | 
 11 | @numba.jit
 12 | def get_first_index(arr, val):
 13 |     """有效地返回数组中第一个值满足条件的索引
 14 |     Refs: https://blog.csdn.net/weixin_39707612/article/details/111457329;
 15 |     耗时： 0.279 us; np.argmax(arr> vak)[0] 1.91 us
 16 | 
 17 |     Args:
 18 |         A (np.array): Numpy arr
 19 |         k (float): value
 20 | 
 21 |     Returns:
 22 |         int: The first index that large that `val`
 23 |     """
 24 |     for i in range(len(arr)):
 25 |         if arr[i] >= val:
 26 |             return i + 1
 27 |         val -= arr[i]
 28 | 
 29 |     return -1
 30 | 
 31 | def project_point_2_linestring(point:Point, line:LineString, normalized:bool=True):
 32 |     dist = line.project(point, normalized)
 33 |     proj_point = line.interpolate(dist, normalized)
 34 | 
 35 |     return proj_point, dist
 36 | 
 37 | def cut_linestring(line:LineString, offset:float, point:Point=None, normalized=False):
 38 |     _len = 1 if normalized else line.length
 39 |     coords = np.array(line.coords)
 40 | 
 41 |     if offset <= 0:
 42 |         res = {"seg_0": None, "seg_1": coords}
 43 |     elif offset >= _len:
 44 |         res = {"seg_0": coords, "seg_1": None}
 45 |     else:
 46 |         # points = np.array([Point(*i) for i in coords])
 47 |         # dist_intervals = line.project(points, normalized)
 48 |         dist_arr, _ = cal_coords_seq_distance(coords)
 49 | 
 50 |         idx = get_first_index(dist_arr, offset)
 51 |         pd = np.sum(dist_arr[:idx])
 52 |         if pd == offset:
 53 |             coords_0 = coords[:idx+1]
 54 |             coords_1 = coords[idx:]
 55 |         else:
 56 |             if point is None:
 57 |                 point = line.interpolate(offset, normalized)
 58 |             cp = np.array(point.coords)
 59 |             coords_0 = np.concatenate([coords[:idx], cp]) 
 60 |             coords_1 = np.concatenate([cp, coords[idx:]]) 
 61 |         
 62 |         res = {'seg_0': coords_0, 'seg_1': coords_1}
 63 | 
 64 |     res['seg_0'] = LineString(res['seg_0'])
 65 |     res['seg_1'] = LineString(res['seg_1'])
 66 | 
 67 |     return res
 68 | 
 69 | def test_cut_linestring(line, point):
 70 |     # test: project_point_2_linestring
 71 |     cp, dist = project_point_2_linestring(point, line)
 72 |     data = {'name': ['point', 'line', 'cp'],
 73 |             'geometry': [point, line, cp]
 74 |             }
 75 |     ax = gpd.GeoDataFrame(data).plot(column='name', alpha=.5)
 76 | 
 77 |     # test: cut_linestring
 78 |     seg_0, seg_1 = cut_linestring(line, dist)
 79 |     data = {'name': ['ori', 'seg_0', 'seg_1'],
 80 |             'geometry': [line, seg_0, seg_1]
 81 |             }
 82 |     gpd.GeoDataFrame(data).plot(column='name', legend=True, linestyle="--", ax=ax)
 83 | 
 84 | def project_points_2_linestrings(points:GeoDataFrame, lines:GeoDataFrame, 
 85 |                                  normalized:bool=True, drop_ori_geom=True, 
 86 |                                  keep_attrs:list=['eid', 'geometry'], precision=1e-7, 
 87 |                                  ll=True, cal_dist=True):
 88 |     """projects points to the nearest linestring
 89 | 
 90 |     Args:
 91 |         panos (GeoDataFrame | GeoSeries): Points
 92 |         paths (GeoDataFrame | GeoSeries): Edges
 93 |         keep_attrs (list, optional): _description_. Defaults to ['eid', 'geometry'].
 94 |         drop_ori_geom (bool, optional): Drop the origin point and line geometry. Defaults to True.
 95 | 
 96 |     Returns:
 97 |         GeoDataFrame: The GeoDataFrame of projected points with `proj_point`, `offset`
 98 |         
 99 |     Example:
100 |         ```
101 |         import geopandas as gpd
102 |         from shapely import Point, LineString
103 | 
104 |         points = gpd.GeoDataFrame(
105 |             geometry=[
106 |                 Point(113.93195659801206, 22.575930582940785),
107 |                 Point(113.93251505775076, 22.57563203614608),
108 |                 Point(113.93292030671412, 22.575490522559665),
109 |                 Point(113.93378178962489, 22.57534631453745)
110 |             ]
111 |         )
112 | 
113 |         lines = gpd.GeoDataFrame({
114 |             "eid": [63048, 63935],
115 |             "geometry": [
116 |                 LineString([(113.9319709, 22.5759509), (113.9320297, 22.5759095), (113.9321652, 22.5758192), (113.9323286, 22.575721), (113.9324839, 22.5756433), (113.9326791, 22.5755563), (113.9328524, 22.5754945), (113.9330122, 22.5754474), (113.933172, 22.5754073), (113.9333692, 22.5753782), (113.9334468, 22.5753503), (113.9335752, 22.5753413), (113.9336504, 22.5753383)]),
117 |                 LineString([(113.9336504, 22.5753383), (113.9336933, 22.5753314), (113.9337329, 22.5753215), (113.9337624, 22.5753098), (113.933763, 22.5753095)])]
118 |         })
119 | 
120 |         prod_ps = project_points_2_linestrings(points.geometry, lines)
121 |         _, ax = plot_geodata(prod_ps, color='red', label='proj', marker='*')
122 |         lines.plot(ax=ax, label='lines')
123 |         points.plot(ax=ax, label='points', alpha=.5)
124 |         ax.legend()
125 |         ```
126 |     """
127 |     proj_df = points.geometry.apply(lambda x: lines.loc[lines.distance(x).idxmin(), keep_attrs])\
128 |                             .rename(columns={"geometry": 'edge_geom'})
129 | 
130 |     att_lst = ['proj_point', 'offset']
131 |     proj_df.loc[:, 'point_geom'] = points.geometry
132 |     proj_df.loc[:, att_lst] = proj_df.apply(
133 |         lambda x: project_point_2_linestring(
134 |             x.point_geom, x.edge_geom, normalized), 
135 |         axis=1, result_type='expand'
136 |     ).values
137 | 
138 |     proj_df.loc[:, 'dist_p2c'] = cal_pointwise_distance_geoseries(proj_df['point_geom'], proj_df['proj_point'])
139 | 
140 |     if drop_ori_geom:
141 |         proj_df.drop(columns=['point_geom', 'edge_geom'], inplace=True)
142 | 
143 |     return gpd.GeoDataFrame(proj_df).set_geometry('proj_point')
144 | 
145 | 
146 | """ decrapted """
147 | def get_foot_point(point, line_p1, line_p2):
148 |     """
149 |     @point, line_p1, line_p2 : [x, y, z]
150 |     """
151 |     x0 = point[0]
152 |     y0 = point[1]
153 |     # z0 = point[2]
154 | 
155 |     x1 = line_p1[0]
156 |     y1 = line_p1[1]
157 |     # z1 = line_p1[2]
158 | 
159 |     x2 = line_p2[0]
160 |     y2 = line_p2[1]
161 |     # z2 = line_p2[2]
162 |     assert not (x1 == x2 and y1 == y2), f"check line {line_p1}, {line_p2}"
163 |     # k = -((x1 - x0) * (x2 - x1) + (y1 - y0) * (y2 - y1) + (z1 - z0) * (z2 - z1)) / \
164 |     #     ((x2 - x1) ** 2 + (y2 - y1) ** 2 + (z2 - z1) ** 2)*1.0
165 |     k = -((x1 - x0) * (x2 - x1) + (y1 - y0) * (y2 - y1)) / ((x2 - x1) ** 2 + (y2 - y1) ** 2 )*1.0
166 |     xn = k * (x2 - x1) + x1
167 |     yn = k * (y2 - y1) + y1
168 |     # zn = k * (z2 - z1) + z1
169 | 
170 |     return (round(xn, 6), round(yn, 6))
171 | 
172 | def relation_bet_point_and_line( point, line ):
173 |     """Judge the realtion between point and the line, there are three situation:
174 |     1) the foot point is on the line, the value is in [0,1]; 
175 |     2) the foot point is on the extension line of segment AB, near the starting point, the value < 0; 
176 |     3) the foot point is on the extension line of segment AB, near the ending point, the value >1; 
177 | 
178 |     Args:
179 |         point ([double, double]): point corrdination
180 |         line ([x0, y0, x1, y1]): line coordiantions
181 | 
182 |     Returns:
183 |         [float]: the realtion between point and the line (起点 < 0 <= 线段中 <= 1 < 终点)
184 |     """
185 |     pqx = line[2] - line[0]
186 |     pqy = line[3] - line[1]
187 |     dx  = point[0]- line[0]
188 |     dy  = point[1]- line[1]
189 |     
190 |     d = pow(pqx, 2) + pow(pqy, 2) 
191 |     t = pqx * dx + pqy * dy
192 | 
193 |     flag = 1
194 |     if(d > 0): 
195 |         t = t / d
196 |         flag = t
197 | 
198 |     return flag
199 | 
200 | def cal_foot_point_on_polyline( point: Point, line: LineString, foot=True, ratio_thres=.0):
201 |     """caculate the foot point is on the line or not
202 | 
203 |     Args:
204 |         point (list): coordination (x, y)
205 |         line (pd.Series): [description]
206 |         ratio_thres (float, optional): [ratio threshold]. Defaults to 0.005.
207 | 
208 |     Returns:
209 |         [bool]: locate on the lane or not
210 |     """
211 |     line_ = line.coords[0] + line.coords[-1]
212 |     factor = relation_bet_point_and_line((point.x, point.y), line_)
213 |     flag = 0 - ratio_thres <= factor <= 1 + ratio_thres
214 | 
215 |     if foot:
216 |         _foot = get_foot_point((point.x, point.y), line.coords[0], line.coords[-1])
217 |         return {'flag': factor, 'foot':_foot}
218 |     
219 |     return flag
220 | 
221 | 
222 | if __name__ == "__main__":
223 |     line = LineString([(0, 0), (0, 1), (1, 1)])
224 | 
225 |     test_cut_linestring(line, Point((0.5, 0)))
226 |     test_cut_linestring(line, Point((0, 1)))
227 |     test_cut_linestring(line, Point((1.1, 1.5)))
228 | 
229 | 
230 | 


--------------------------------------------------------------------------------
/mapmatching/geo/ops/resample.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import geopandas as gpd
 4 | from shapely.geometry import Point
 5 | 
 6 | from .distance import cal_points_geom_seq_distacne
 7 | 
 8 | 
 9 | def resample_point_seq(points, step=2, last=True):
10 |     # TODO linear referencing + speedup
11 |     points = points[~(points == points.shift(1))]
12 |     if points.shape[0] == 1:
13 |         return gpd.GeoDataFrame(points), np.array([points.iloc[0].coords[0]])
14 | 
15 |     dist, coords = cal_points_geom_seq_distacne(points)
16 |     dxdy = coords[1:] - coords[:-1]
17 | 
18 |     cum_dist = np.cumsum(dist)
19 |     cum_dist = np.concatenate([[0], cum_dist])
20 |     samples = np.arange(0, cum_dist[-1], step)
21 |     seg_ids = pd.cut(samples, bins=cum_dist, labels=range(len(dist)), right=False)
22 | 
23 |     samples_lst = []
24 |     samples_coords = []
25 |     for s, idx in zip(samples, seg_ids):
26 |         ratio = (s - cum_dist[idx]) / dist[idx]
27 |         xy = coords[idx] + dxdy[idx] * ratio
28 |         samples_coords.append(xy)
29 |         samples_lst.append({"seg_idx": idx, "offset": s, "geometry": Point(xy)})
30 |     if last:
31 |         samples_lst.append({"seg_idx": len(dist) - 1, "offset": dist[-1], "geometry": Point(coords[-1])})
32 |         samples_coords.append(coords[-1])
33 |     
34 |     df_samples = gpd.GeoDataFrame(samples_lst)
35 | 
36 |     return df_samples, np.array(samples_coords)
37 | 
38 | def resample_polyline_seq_to_point_seq(polyline, step=2, last=True):
39 |     coords = np.concatenate(polyline.apply(lambda x: x.coords).values)
40 | 
41 |     mask = np.sum(coords[:-1] == coords[1:], axis=1) == 2
42 |     mask = np.concatenate([mask, [False]])
43 |     geoms = gpd.GeoSeries([Point(i) for i in coords[~mask]])
44 | 
45 |     return resample_point_seq(geoms, step, last)
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     df = gpd.read_file('./data/trajs/traj_9.geojson').head(20)
50 | 
51 |     df_samples, coords = resample_point_seq(df.geometry)
52 | 
53 |     df.plot(color='r', alpha=.1)
54 | 
55 | 


--------------------------------------------------------------------------------
/mapmatching/geo/ops/simplify.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import geopandas as gpd
  3 | from .distance import get_vertical_dist
  4 | 
  5 | 
  6 | def dp_compress(point_list, dist_thres=8, verbose=False):
  7 |     """Douglas-Peucker compress alg Douglas-Peucker.
  8 | 
  9 |     Args:
 10 |         point_list (lst): The ordered coordinations [(x1, y1, id1), (x2, y2, id2), ... , (xn, yn, idn)]
 11 |         dist_max (int, optional): The max distance (Unit: meters). Defaults to 8.
 12 |         verbose (bool, optional): [description]. Defaults to False.
 13 |     """
 14 |     def _dfs(point_list, start, end, res, dist_max):
 15 |         # start, end = 0, len(point_list)-1
 16 |         if start >= end:
 17 |             return
 18 |         
 19 |         res.append(point_list[start])
 20 |         res.append(point_list[end])
 21 | 
 22 |         if start < end:
 23 |             index = start + 1
 24 |             max_vertical_dist = 0
 25 |             key_point_index = 0
 26 | 
 27 |             while(index < end):
 28 |                 cur_vertical_dist = get_vertical_dist(
 29 |                     point_list[index][:2], 
 30 |                     point_list[start][:2], 
 31 |                     point_list[end][:2],
 32 |                     ll=False
 33 |                 )
 34 |                 if cur_vertical_dist > max_vertical_dist:
 35 |                     max_vertical_dist = cur_vertical_dist
 36 |                     key_point_index = index
 37 |                 index += 1
 38 | 
 39 |             if max_vertical_dist >= dist_max:
 40 |                 _dfs(point_list, start, key_point_index, res, dist_max)
 41 |                 _dfs(point_list, key_point_index, end, res, dist_max)
 42 | 
 43 |     res = []
 44 |     _dfs(point_list, 0, len(point_list)-1, res, dist_thres)
 45 | 
 46 |     res = list(set(res))
 47 |     res = sorted(res, key=lambda x:x[2])
 48 |     
 49 |     if verbose:
 50 |         print(f"Compression rate {len(res)/len(point_list)*100:.2f}% (={len(point_list)}/{len(res)}), "\
 51 |               f"mean error: {get_MeanErr(point_list,res):.2f}")
 52 |     
 53 |     return res
 54 | 
 55 | 
 56 | def get_MeanErr(point_list, output_point_list):
 57 |     Err=0
 58 |     start, end = 0, len(output_point_list)-1
 59 | 
 60 |     while(start < end):
 61 |         pointA_id = int(output_point_list[start][2])
 62 |         pointB_id = int(output_point_list[start+1][2])
 63 | 
 64 |         id = pointA_id + 1
 65 |         while(id < pointB_id):
 66 |             Err += get_vertical_dist(output_point_list[start][:2], output_point_list[start+1][:2], point_list[id][:2])
 67 |             id += 1
 68 |         start += 1
 69 | 
 70 |     return Err/len(point_list)
 71 | 
 72 | 
 73 | def dp_compress_for_points(df, dist_thres=10, verbose=False, reset_index=True):
 74 |     traj = df.copy()
 75 |     traj.loc[:, 'pid_order'] = traj.index
 76 |     point_lst = traj.apply(lambda x: (x.geometry.x, x.geometry.y, x.pid_order), axis=1).values.tolist()
 77 |     point_lst = dp_compress(point_lst, dist_thres, verbose)
 78 | 
 79 |     if reset_index:
 80 |         return traj.loc[[ i[2] for i in point_lst]].reset_index()
 81 |     
 82 |     return traj.loc[[ i[2] for i in point_lst]]
 83 | 
 84 | 
 85 | def simplify_trajetory_points(points: gpd.GeoDataFrame, tolerance: int = None, inplace=False, logger=None):
 86 |     """The algorithm (Douglas-Peucker) recursively splits the original line into smaller parts 
 87 |     and connects these parts’ endpoints by a straight line. Then, it removes all points whose 
 88 |     distance to the straight line is smaller than tolerance. It does not move any points and 
 89 |     it always preserves endpoints of the original line or polygon.
 90 | 
 91 |     Args:
 92 |         points (gpd.GeoDataFrame): _description_
 93 |         traj_thres (int, optional): The compression threshold(Unit: meter). Defaults to None.
 94 |         inplace (bool, optional): _description_. Defaults to False.
 95 | 
 96 |     Returns:
 97 |         gpd.GeoDataFrame: _description_
 98 |     """
 99 |     ori_size = points.shape[0]
100 |     if ori_size == 1:
101 |         return points
102 | 
103 |     points = points if inplace else points.copy()
104 |     points = dp_compress_for_points(points, dist_thres=tolerance)
105 | 
106 |     if ori_size == 2:
107 |         if points.iloc[0].geometry.distance(points.iloc[1].geometry) < 1e-6:
108 |             points = points.head(1)
109 |             if logger:
110 |                 logger.info(
111 |                     f"Trajectory only has one point or all the same points.")
112 |             return points
113 | 
114 |     if logger:
115 |         logger.debug(
116 |             f"Trajectory compression rate: {points.shape[0]/ori_size*100:.1f}% ({ori_size} -> {points.shape[0]})")
117 | 
118 |     return points
119 | 
120 | 
121 | if __name__ == '__main__':
122 |     point_list = []
123 |     output_point_list = []
124 | 
125 |     fd=open(r"./Dguiji.txt",'r')
126 |     for line in fd:
127 |         line=line.strip()
128 |         id=int(line.split(",")[0])
129 |         longitude=float(line.split(",")[1])
130 |         latitude=float(line.split(",")[2])
131 |         point_list.append((longitude,latitude,id))
132 |     fd.close()
133 | 
134 |     output_point_list = dp_compress(point_list, dist_thres=8, verbose=True)
135 | 
136 |     import geopandas as gpd
137 |     traj = gpd.read_file("../traj_for_compress.geojson")
138 |     dp_compress_for_points(traj, 8, True)
139 | 


--------------------------------------------------------------------------------
/mapmatching/geo/ops/substring.py:
--------------------------------------------------------------------------------
 1 | import shapely
 2 | import numpy as np
 3 | 
 4 | 
 5 | def substrings(linestring:np.ndarray, start_dist:float, end_dist:float, normalized=False) -> np.ndarray:
 6 |     """Cut a linestring at two offset values
 7 | 
 8 |     Args:
 9 |         linestring (np.ndarray): input line
10 |         start_dist (float): starting offset, distance to the start point of linestring
11 |         end_dist (float): ending offset, distance to the start point of linestring
12 |         normalized (bool, optional): If the normalized arg is True, the distance will be 
13 |             interpreted as a fraction of the geometry’s length. Defaults to False.
14 | 
15 |     Returns:
16 |         np.ndarray: a linestring containing only the part covering starting offset to ending offset
17 |     
18 |     Ref: 
19 |         https://github.com/cyang-kth/fmm/blob/master/src/algorithm/geom_algorithm.cpp#L351-L417
20 |         https://shapely.readthedocs.io/en/stable/manual.html?highlight=substring#shapely.ops.substring
21 |     """
22 | 
23 |     return NotImplementedError
24 | 
25 | 


--------------------------------------------------------------------------------
/mapmatching/geo/ops/to_array.py:
--------------------------------------------------------------------------------
 1 | import numba
 2 | import numpy as np
 3 | import geopandas as gpd
 4 | 
 5 | @numba.jit
 6 | def points_geoseries_2_ndarray(geoms:gpd.GeoSeries):
 7 |     return np.concatenate([np.array(i.coords) for i in geoms])
 8 | 
 9 | @numba.jit
10 | def geoseries_to_coords(geoms):
11 |     return [np.array(i.coords) for i in geoms]
12 | 


--------------------------------------------------------------------------------
/mapmatching/geo/query.py:
--------------------------------------------------------------------------------
  1 | import shapely
  2 | import warnings
  3 | import numpy as np
  4 | import geopandas as gpd
  5 | from geopandas import GeoDataFrame
  6 | from shapely import geometry as shapely_geom
  7 | 
  8 | from .ops.linear_referencing import linear_referencing_geom
  9 | from ..utils import timeit
 10 | 
 11 | @timeit
 12 | def get_k_neigh_geoms(query: GeoDataFrame, gdf: GeoDataFrame, query_id='qid', 
 13 |                       radius: float = 50, top_k=None, predicate: str = 'intersects', 
 14 |                       check_diff=True, project=True, keep_geom=True):
 15 |     """
 16 |     Get the k nearest geometries of the query within a search radius using a built-in grid-based spatial index.
 17 | 
 18 |     Args:
 19 |         query (GeoDataFrame, GeoSeries, geometry): The query object.
 20 |         gdf (GeoDataFrame): The base geometry.
 21 |         query_id (str, optional): The index of the query object. Defaults to 'qid'.
 22 |         radius (float, optional): The search radius. Defaults to 50 (in meters for the WGS system).
 23 |         top_k (int, optional): The number of top-k elements to retrieve. Defaults to None (retrieve all).
 24 |         predicate (str, optional): The predicate operation in geopandas. Defaults to 'intersects'.
 25 |         check_diff (bool, optional): Check if there are no matching queries. Defaults to True.
 26 |         project (bool, optional): Project the query object to gdf. Only supports Point geometries. Defaults to True.
 27 |         keep_geom (bool, optional): Whether to keep the geometry columns in the result. Defaults to True.
 28 |         normalized (bool, optional): Normalize the distances. Defaults to False.
 29 | 
 30 |     Returns:
 31 |         GeoDataFrame: The query result.
 32 | 
 33 |     Example:
 34 |         # Example usage 1
 35 |         import geopandas as gpd
 36 |         from stmm.geo.query import get_K_neighbors
 37 | 
 38 |         traj = matcher.load_points("./data/trajs/traj_4.geojson").head(4)
 39 |         query = traj[['PID','geometry']].head(1).copy()
 40 |         gdf = net.df_edges[['eid', 'geometry']].copy()
 41 | 
 42 |         df_cands, no_cands_query = get_K_neighbors(query, gdf, top_k=8)
 43 |         plot_candidates(query, gdf, df_cands)
 44 | 
 45 |         # Example usage 2
 46 |         import geopandas as gpd
 47 |         from shapely import LineString, Point
 48 |         from stmm.geo.query import plot_candidates, get_K_neighbors
 49 | 
 50 |         lines = [LineString([[0, i], [10, i]]) for i in range(0, 10)]
 51 |         lines += [LineString(([5.2,5.2], [5.8, 5.8]))]
 52 |         edges = gpd.GeoDataFrame({'geometry': lines, 
 53 |                                     'way_id':[i for i in range(10)] + [5]})
 54 | 
 55 |         a, b = Point(1, 1.1), Point(5, 5.1) 
 56 |         points = gpd.GeoDataFrame({'geometry': [a, b]}, index=[1, 3])
 57 |         points.loc[:, 'PID'] = points.index
 58 | 
 59 |         res, _ = get_K_neighbors(points, edges, buffer=2, top_k=2, ll=False)
 60 |         ax = plot_candidates(points, edges, res)
 61 |     """
 62 | 
 63 |     # TODO: Determine appropriate index for gdf
 64 | 
 65 |     # Check spatial index
 66 |     if not gdf.has_sindex:
 67 |         try:
 68 |             print("rebuild sindex: ")
 69 |             gdf.sindex
 70 |         except:
 71 |             raise ValueError()
 72 | 
 73 |     # Prepare query
 74 |     if isinstance(query, shapely_geom.base.BaseGeometry):
 75 |         _query = gpd.GeoSeries([query])
 76 |     if isinstance(query, GeoDataFrame):
 77 |         if query_id in list(query):
 78 |             _query = query.set_index(query_id)['geometry']
 79 |         else:
 80 |             _query = query['geometry'].copy()
 81 |             _query.index.set_names(query_id, inplace=True)
 82 |     elif isinstance(query, gpd.GeoSeries):
 83 |         _query = query.copy()
 84 |         _query.index.set_names(query_id, inplace=True)
 85 |     else:
 86 |         raise TypeError(query)
 87 | 
 88 |     if _query.crs != gdf.crs:
 89 |         _query = _query.to_crs(gdf.crs)
 90 |     _query.index.set_names(query_id, inplace=True)
 91 | 
 92 |     # Query bulk
 93 |     get_box = lambda i: shapely_geom.box(i.x - radius, i.y - radius, i.x + radius, i.y + radius)
 94 |     query_geoms = _query.apply(get_box)
 95 |     cands = gdf.sindex.query_bulk(query_geoms, predicate)
 96 |     if len(cands[0]) == 0:
 97 |         return None, None
 98 | 
 99 |     df_cands = _get_cands(_query, gdf, cands, query_id)
100 |     _project(df_cands, project)
101 | 
102 |     if radius:
103 |         df_cands.query(f"dist_p2c <= {radius}", inplace=True)
104 |     if top_k:
105 |         df_cands = _filter_candidate(df_cands, query_id, top_k)
106 | 
107 |     if not keep_geom:
108 |         df_cands.drop(columns=["query_geom", "edge_geom"], inplace=True)
109 | 
110 |     # Check difference
111 |     no_cands_query = None
112 |     if check_diff:
113 |         cands_pid = set(cands[0])
114 |         all_pid = set(_query.index.unique())
115 |         no_cands_query = all_pid.difference(cands_pid)
116 |         warnings.warn(f"{no_cands_query} has no neighbors within the {radius} search zone.")
117 | 
118 |     return df_cands.set_geometry('edge_geom').set_crs(gdf.crs), no_cands_query
119 | 
120 | 
121 | @timeit
122 | def _get_cands(_query, gdf, cands, query_id):
123 |     _points = _query.iloc[cands[0]]
124 |     
125 |     df_cands = gdf.iloc[cands[1]]
126 |     df_cands.rename(columns={'geometry': 'edge_geom'}, inplace=True)
127 |     df_cands.loc[:, query_id] = _points.index
128 |     df_cands.loc[:, "query_geom"] = _points.values
129 | 
130 |     return df_cands
131 | 
132 | @timeit
133 | def _project(df_cands, project=True):
134 |     # dist_p2c
135 |     if not project:
136 |         cal_proj_dist = lambda x: x['query_geom'].distance(x['edge_geom'])
137 |         df_cands.loc[:, 'dist_p2c'] = df_cands.apply(cal_proj_dist, axis=1)
138 | 
139 |         return df_cands
140 | 
141 |     df_projs = linear_referencing_geom(df_cands['query_geom'], df_cands['edge_geom'])
142 |     df_cands.loc[:, df_projs.keys()] = df_projs.values()
143 |     # df_cands = gpd.GeoDataFrame(df_cands, crs=gdf.crs, geometry='proj_point')
144 | 
145 |     return df_cands
146 |     
147 | 
148 | def plot_candidates(cands):
149 |     # TODO draw buffer
150 |     from ..geo.vis import plot_geodata
151 |     _, ax = plot_geodata(cands, color='r', tile_alpha=.6, alpha=0)
152 | 
153 |     cands.set_geometry('edge_geom').plot(ax=ax, column='dist_p2c', cmap='Reds_r', legend='candidates')
154 |     if 'proj_point' in list(cands):
155 |         cands.loc[:, 'proj_point'] = cands['proj_point'].apply(shapely.Point)
156 |         cands.set_geometry('proj_point').plot(ax=ax, cmap='Reds_r')
157 |     cands.set_geometry('query_geom').plot(ax=ax, marker='*', label='Point', zorder=9)
158 | 
159 |     return ax
160 | 
161 | @timeit
162 | def _filter_candidate(df: gpd.GeoDataFrame,
163 |                       pid: str = 'pid',
164 |                       top_k: int = 5,
165 |                       ):
166 |     """Filter candidates, which belongs to the same way, and pickup the nearest one.
167 | 
168 |     Args:
169 |         df (gpd.GeoDataFrame): df candidates.
170 |         top_k (int, optional): _description_. Defaults to 5.
171 |         pid (str, optional): _description_. Defaults to 'pid'.
172 | 
173 |     Returns:
174 |         gpd.GeoDataFrame: The filtered candidates.
175 |     """
176 |     # origin_size = df.shape[0]
177 |     df = df.sort_values([pid, 'dist_p2c'])\
178 |            .groupby(pid)\
179 |            .head(top_k)\
180 |            .reset_index(drop=True)
181 | 
182 |     return df
183 | 


--------------------------------------------------------------------------------
/mapmatching/geo/vis/__init__.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     TILEMAP_FLAG = True
 3 |     from tilemap import plot_geodata, add_basemap
 4 | except:
 5 |     TILEMAP_FLAG = False
 6 | 
 7 |     def plot_geodata(data, *args, **kwargs):
 8 |         return None, data.plot()
 9 |     
10 |     def add_basemap(ax, *args, **kwargs):
11 |         return ax
12 | 


--------------------------------------------------------------------------------
/mapmatching/geo/vis/linestring.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def plot_linestring_with_arrows(gdf_line, ax, color='red'):
 5 |     coord_arrs = gdf_line.geometry.apply(lambda x: np.array(x.coords))
 6 |     gdf_line.plot(ax=ax, color=color)
 7 | 
 8 |     for coords in coord_arrs:
 9 |         # refs: https://wizardforcel.gitbooks.io/matplotlib-user-guide/content/4.5.html
10 |         mid = coords.shape[0] // 2
11 |         ax.annotate('', xy=(coords[mid+1] + coords[mid]) / 2, xytext=coords[mid],
12 |                     arrowprops=dict(arrowstyle="-|>", color=color),
13 |                     zorder=9
14 |         )
15 | 
16 |     return
17 | 


--------------------------------------------------------------------------------
/mapmatching/geo/vis/point.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from . import plot_geodata
 3 | 
 4 | def plot_points_with_dir(points, heading=None, arrowprops=dict(facecolor='blue', shrink=0.05, alpha=0.6)):
 5 |     """plot points with dirs
 6 | 
 7 |     Args:
 8 |         points (_type_): _description_
 9 |         heading (_type_, optional): _description_. Defaults to None.
10 |         arrowprops (_type_, optional): _description_. Defaults to dict(facecolor='blue', shrink=0.05, alpha=0.6).
11 | 
12 |     Returns:
13 |         _type_: _description_
14 | 
15 |     Example:
16 |         ```
17 |         import shapely
18 |         import geopandas as gpd
19 |         from mapmatching.geo.vis.point import plot_point_with_dir
20 | 
21 |         gdf = gpd.GeoDataFrame({'geometry': [shapely.Point((113.912154, 22.784351))]})
22 |         plot_points_with_dir(gdf, 347)
23 |         ```
24 |     """
25 |     types = points.geom_type.unique()
26 |     assert len(types) == 1 and types[0] == "Point", "check points geom_type"
27 |     fig, ax = plot_geodata(points, zorder=2)
28 | 
29 |     if not heading:
30 |         return  ax
31 | 
32 |     if isinstance(heading, (int, float)):
33 |         heading = [heading] * points.shape[0]
34 | 
35 |     for i, geom in enumerate(points.geometry):
36 |         x, y = geom.coords[0]
37 |         x0, x1 = ax.get_xlim()
38 |         aux_line_len = (x1-x0) / 12
39 |         dy, dx = math.cos(heading[i]/180*math.pi) * aux_line_len, math.sin(heading[i]/180*math.pi) * aux_line_len
40 |         ax.annotate('', xy=(x+dx, y+dy), xytext=(x,y), arrowprops=arrowprops, zorder=1)
41 | 
42 |     return ax
43 | 
44 | 


--------------------------------------------------------------------------------
/mapmatching/graph/__init__.py:
--------------------------------------------------------------------------------
1 | from .geograph import GeoDigraph


--------------------------------------------------------------------------------
/mapmatching/graph/astar.py:
--------------------------------------------------------------------------------
  1 | import heapq
  2 | import numpy as np
  3 | from loguru import logger
  4 | from collections import deque
  5 | from haversine import haversine, Unit
  6 | 
  7 | 
  8 | def calculate_nodes_dist(nodes:dict, src:int, dst:int, memo:dict={}, ll=True):
  9 |     assert src in nodes and dst in nodes, "Check the input o and d."
 10 |     if (src, dst) in memo:
 11 |         return memo[(src, dst)]
 12 |     
 13 |     if ll:
 14 |         _src = nodes[src]
 15 |         _dst = nodes[dst]
 16 |         _len = haversine(
 17 |             (_src['y'], _src['x']), 
 18 |             (_dst['y'], _dst['x']), 
 19 |             unit=Unit.METERS
 20 |         )
 21 |     else:
 22 |         _len = nodes[src]['geometry'].distance(nodes[dst]['geometry'])
 23 |     
 24 |     return _len
 25 | 
 26 | 
 27 | class PathPlanning:
 28 |     def __init__(self, graph: dict, nodes: dict,
 29 |                  search_memo: dict = {}, nodes_dist_memo: dict = {},
 30 |                  max_steps: int = 2000, max_dist: int = 10000, level='debug', ll=True):
 31 | 
 32 |         self.graph = graph
 33 |         self.nodes = nodes
 34 |         self.search_memo = search_memo
 35 |         self.nodes_dist_memo = nodes_dist_memo
 36 |         self.max_steps = max_steps
 37 |         self.max_dist = max_dist
 38 |         self.level = level
 39 |         self.ll = ll
 40 | 
 41 |     def has_edge(self, src, dst):
 42 |         if src in self.graph and dst in self.graph:
 43 |             return True
 44 | 
 45 |         info = f"Trip ({src}, {dst})" + \
 46 |             f"{', `src` not in graph' if src not in self.graph else ', '}" + \
 47 |             f"{', `dst` not in graph' if dst not in self.graph else ''}"
 48 |         
 49 |         getattr(logger, self.level)(info)
 50 |         
 51 |         return False
 52 | 
 53 |     def search(self, src, dst):
 54 |         return NotImplementedError
 55 |     
 56 |     def reconstruct_path(self):
 57 |         return NotImplementedError
 58 | 
 59 | 
 60 | class Astar(PathPlanning):
 61 |     def __init__(self, graph: dict, nodes: dict,
 62 |                  search_memo: dict = {}, nodes_dist_memo: dict = {}, 
 63 |                  max_steps: int = 2000, max_dist: int = 10000, level='debug', ll=True):
 64 |         super().__init__(graph, nodes, search_memo, nodes_dist_memo, max_steps, max_dist, level, ll)
 65 |         
 66 |     def search(self, src, dst, max_steps=None, max_dist=None, weight='cost'):
 67 |         if src == dst:
 68 |             return {'status': 0, 'vpath': [src], 'cost': 0}
 69 |         
 70 |         if (src, dst) in self.search_memo:
 71 |             res = self.search_memo[(src, dst)]
 72 |             return res
 73 |         
 74 |         if not self.has_edge(src, dst):
 75 |             return {"status": 1, 'vpath': [], 'cost': np.inf} 
 76 | 
 77 |         # init
 78 |         queue = [(0, src)]
 79 |         came_from = {src: None}
 80 |         distance = {src: 0}
 81 |         step_counter = 0
 82 | 
 83 |         max_steps = self.max_steps if max_steps is None else max_steps
 84 |         max_dist = self.max_dist if max_dist is None else max_dist
 85 | 
 86 |         # searching
 87 |         while queue:
 88 |             _, cur = heapq.heappop(queue)
 89 |             if cur == dst or step_counter > max_steps:
 90 |                 break
 91 |             
 92 |             for nxt, attrs in self.graph[cur].items():
 93 |                 if nxt not in self.graph:
 94 |                     continue
 95 |                 
 96 |                 new_cost = distance[cur] + attrs[weight]
 97 |                 if nxt in distance and new_cost >= distance[nxt]:
 98 |                     continue
 99 | 
100 |                 distance[nxt] = new_cost
101 |                 if distance[nxt] > max_dist:
102 |                     continue
103 |                 
104 |                 _h = calculate_nodes_dist(self.nodes, dst, nxt, self.nodes_dist_memo, self.ll)
105 |                 heapq.heappush(queue, (new_cost + _h, nxt) )
106 |                 came_from[nxt] = cur
107 | 
108 |             step_counter += 1
109 | 
110 |         # abnormal situation
111 |         if cur != dst:
112 |             res = {"status": 2, 'vpath': [], 'cost': np.inf} 
113 |             self.search_memo[(src, dst)] = res
114 |             return res
115 | 
116 |         # reconstruct path
117 |         path = self.reconstruct_path(dst, came_from)
118 |         res = {'status': 0, 'vpath': path, 'cost': distance[dst]}
119 |         self.search_memo[(src, dst)] = res
120 | 
121 |         return res
122 | 
123 |     def reconstruct_path(self, dst, came_from):
124 |         route, queue = [dst], deque([dst])
125 |         while queue:
126 |             node = queue.popleft()
127 |             if came_from[node] is None:
128 |                 continue
129 |             route.append(came_from[node])
130 |             queue.append(came_from[node])
131 |         
132 |         return route[::-1]
133 | 
134 | 
135 | if __name__ == "__main__":
136 |     from stmm.graph import GeoDigraph
137 |     network = GeoDigraph()
138 |     network.load_checkpoint(ckpt='../../data/network/Shenzhen_graph_pygeos.ckpt')
139 |     # network.to_postgis('shenzhen')
140 | 
141 |     from tqdm import tqdm
142 |     from stmm.utils.serialization import load_checkpoint
143 |     astar_search_memo = load_checkpoint('../../data/debug/astar_search_memo.pkl')
144 | 


--------------------------------------------------------------------------------
/mapmatching/graph/base.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from collections import defaultdict
  4 | 
  5 | class Node:
  6 |     """
  7 |     Define the node in the road network 
  8 |     """
  9 | 
 10 |     def __init__(self, id):
 11 |         self.val = id
 12 |         self.x, self.y = [float(i) for i in id.split(',')]
 13 |         self.prev = set()
 14 |         self.nxt = set()
 15 |         self.indegree = 0
 16 |         self.outdegree = 0
 17 | 
 18 |     def add(self, point):
 19 |         self.nxt.add(point)
 20 |         self.outdegree += 1
 21 | 
 22 |         point.prev.add(self)
 23 |         point.indegree += 1
 24 | 
 25 |     def check_0_out_more_2_in(self):
 26 |         return self.outdegree == 0 and self.indegree >= 2
 27 | 
 28 |     def move_nxt_to_prev(self, node):
 29 |         if node not in self.nxt:
 30 |             return False
 31 | 
 32 |         self.nxt.remove(node)
 33 |         self.prev.add(node)
 34 |         self.indegree += 1
 35 |         self.outdegree -= 1
 36 |         return True
 37 | 
 38 |     def move_prev_to_nxt(self, node):
 39 |         if node not in self.prev:
 40 |             return False
 41 | 
 42 |         self.prev.remove(node)
 43 |         self.nxt.add(node)
 44 |         self.indegree -= 1
 45 |         self.outdegree += 1
 46 |         return True
 47 | 
 48 | 
 49 | class Digraph:
 50 |     def __init__(self, edges:list=None, nodes:dict=None, *args, **kwargs):
 51 |         """[summary]
 52 | 
 53 |         Args:
 54 |             edges (list, optional): Shape: (N, 2/3). Defaults to None.
 55 |             nodes (dict, optional): [description]. Defaults to None.
 56 |         """
 57 |         self.graph = {}
 58 |         self.graph_r = {}
 59 |         self.edges  = {}
 60 |         self.nodes  = {}
 61 |         
 62 |         self.eid_2_od = {}
 63 |         self.max_eid = 0
 64 | 
 65 |         if edges is not None:
 66 |             self.build_graph(edges)
 67 | 
 68 |         if nodes is not None:
 69 |             assert isinstance(nodes, dict), "Check the Node format"
 70 |             self.nodes = nodes
 71 |         
 72 |         self.calculate_degree()
 73 | 
 74 |     def __str__(self):
 75 |         return ""
 76 | 
 77 |     def add_edge(self, start, end, length=None):
 78 |         for p in [start, end]:
 79 |             for g in [self.graph, self.graph_r]:
 80 |                 if p in g:
 81 |                     continue
 82 |                 g[p] = {}
 83 |             
 84 |         self.graph[start][end] = {"eid": self.max_eid, "cost": length}
 85 |         self.graph_r[end][start] = {"eid": self.max_eid, "cost": length}
 86 |         self.eid_2_od[self.max_eid] = (start, end)
 87 |         self.max_eid += 1
 88 | 
 89 |         if length is not None:
 90 |             self.edges[(start, end)] = length
 91 |             
 92 |         pass
 93 | 
 94 |     def remove_edge(self, start, end):
 95 |         eid = self.get_eid(start, end)
 96 |         if eid is not None:
 97 |             del self.eid_2_od[eid]
 98 |         
 99 |         del self.graph[start][end]
100 |         if len(self.graph[start]) == 0:
101 |             del self.graph[start]
102 |         
103 |         del self.graph_r[end][start]
104 |         if len(self.graph_r[end]) == 0:
105 |             del self.graph_r[end]
106 | 
107 |         return True
108 | 
109 |     def get_eid(self, src, dst):
110 |         item = self.graph.get(src, None)
111 |         if item is None:
112 |             return None
113 | 
114 |         r = item.get(dst, None)
115 |         if r is None:
116 |             return None
117 |         
118 |         return r.get('eid', None)
119 | 
120 |     def build_graph(self, edges):
121 |         for edge in edges:
122 |             start, end, length = edge
123 |             assert not(np.isnan(start) or np.isnan(end)), f"Check the input ({start}, {end})"
124 |             
125 |             if isinstance(start, float):
126 |                 start = int(start)
127 |             if isinstance(end, float):
128 |                 end = int(end)
129 |             
130 |             self.add_edge(start, end, length)
131 |         
132 |         return self.graph
133 | 
134 |     def clean_empty_set(self):
135 |         for item in [self.graph_r, self.graph]:
136 |             for i in list(item.keys()):
137 |                 if len(item[i]) == 0:
138 |                     del item[i]
139 |         pass
140 | 
141 |     def calculate_degree(self,):
142 |         self.clean_empty_set()
143 |         self.degree = pd.merge(
144 |             pd.DataFrame([[key, len(self.graph_r[key])]
145 |                           for key in self.graph_r], columns=['pid', 'indegree']),
146 |             pd.DataFrame([[key, len(self.graph[key])]
147 |                           for key in self.graph], columns=['pid', 'outdegree']),
148 |             how='outer',
149 |             on='pid'
150 |         ).fillna(0).astype(int).set_index('pid')
151 |         
152 |         return self.degree
153 | 
154 |     def get_origin_point(self,):
155 |         
156 |         return self.calculate_degree().reset_index().query( "indegree == 0 and outdegree != 0" ).pid.values
157 | 
158 |     def cal_nodes_dist(self, src, dst):
159 |         return NotImplementedError
160 |     
161 |     def _simpify(self):
162 |         """
163 |         Simplify the graph, namely combine the edges with 1 indegree and 1 out degree
164 |         """
165 |         return NotImplementedError
166 | 
167 |     def search(self, src, dst, *args, **kwargs):
168 |         return NotImplementedError
169 |     
170 |     def _get_aux_nodes(self, exclude_list=None):
171 |         if getattr(self, 'degree') is None:
172 |             self.calculate_degree()
173 |         
174 |         aux_nids = self.degree.query( "indegree == 1 and outdegree == 1" ).index.unique()
175 |         if exclude_list is not None:
176 |             aux_nids = [id for id in aux_nids if id not in exclude_list]
177 | 
178 |         return aux_nids
179 | 
180 |     """ transfrom """
181 |     def transform_vpath_to_epath(self, seq:np.array):
182 |         if seq is None or len(seq) <= 1:
183 |             return None
184 |         
185 |         eids = [self.get_eid(seq[i], seq[i+1]) 
186 |                     for i in range(len(seq)-1)]
187 | 
188 |         return eids
189 |     
190 |     def transform_epath_to_vpath(self, path):
191 |         ods = [self.eid_2_od[e][0] for e in path[:-1]]
192 |         ods.extend(self.eid_2_od[path[-1]])
193 |         
194 |         return ods


--------------------------------------------------------------------------------
/mapmatching/graph/bi_astar.py:
--------------------------------------------------------------------------------
  1 | import heapq
  2 | import numpy as np
  3 | from loguru import logger
  4 | from haversine import haversine, Unit
  5 | from .astar import PathPlanning
  6 | 
  7 | 
  8 | class Bi_Astar(PathPlanning):
  9 |     def __init__(self, graph: dict, graph_r: dict, nodes: dict, 
 10 |                  search_memo: dict = {}, nodes_dist_memo: dict = {},
 11 |                  max_steps: int = 2000, max_dist: int = 10000, level='debug'):
 12 |         super().__init__(graph, nodes, search_memo, nodes_dist_memo, max_steps, max_dist, level)
 13 |         self.graph_r= graph_r
 14 | 
 15 |     def search(self, src, dst, max_steps=None, max_dist=None, level='debug'):
 16 |         status, info = self._check_od(src, dst, level)
 17 |         if not status:
 18 |             return info
 19 | 
 20 |         _memo = self._check_memo(src, dst)
 21 |         if _memo is not None:
 22 |             return _memo
 23 | 
 24 |         meet =self._searching(src, dst)
 25 |         if meet is None:
 26 |             return  {"status": 2, 'vpath': [], 'cost': np.inf} 
 27 | 
 28 |         path = self.extract_path(src, dst)
 29 |         cost = self.visited_backward[self.meet] + self.visited_forward[self.meet]
 30 |         res = {'status': 0, 'vpath': path, 'cost': cost}
 31 | 
 32 |         return res
 33 | 
 34 |     def _searching(self, src, dst):
 35 |         self.search_init(src, dst)
 36 | 
 37 |         def _helper(q1, q2):
 38 |             self.extend_queue(**q1)
 39 |             if self.meet is not None:
 40 |                 return True
 41 | 
 42 |             self.extend_queue(**q2)
 43 |             if self.meet is not None:
 44 |                 return True
 45 | 
 46 |             return False
 47 | 
 48 |         while self.queue_forward and self.queue_backward:
 49 |             if len(self.queue_forward) < len(self.queue_backward):
 50 |                 if _helper(self.params_forward, self.params_backward):
 51 |                     break
 52 |             else:
 53 |                 if _helper(self.params_backward, self.params_forward):
 54 |                     break
 55 | 
 56 |         if self.meet == -1:
 57 |             return -1
 58 | 
 59 |         return self.meet
 60 | 
 61 |     def search_init(self, src, dst):
 62 |         l0 = self.calculate_nodes_dist(src, dst)
 63 | 
 64 |         self.queue_forward = []
 65 |         self.parent_forward = {src: None}
 66 |         self.visited_forward = {src: 0}
 67 | 
 68 |         self.queue_backward = []
 69 |         self.parent_backward = {dst: None}
 70 |         self.visited_backward = {dst: 0}
 71 | 
 72 |         heapq.heappush(self.queue_forward, (l0, 0, src))
 73 |         heapq.heappush(self.queue_backward, (l0, 0, dst))
 74 | 
 75 |         self.params_forward = {
 76 |             'dst': dst,
 77 |             'queue': self.queue_forward,
 78 |             'visited': self.visited_forward,
 79 |             'opposite_visited': self.visited_backward,
 80 |             'parent': self.parent_forward,
 81 |             'graph': self.graph
 82 |         }
 83 | 
 84 |         self.params_backward = {
 85 |             'dst': src,
 86 |             'queue': self.queue_backward,
 87 |             'visited': self.visited_backward,
 88 |             'opposite_visited': self.visited_forward,
 89 |             'parent': self.parent_backward,
 90 |             'graph': self.graph_r
 91 |         }
 92 | 
 93 |         self.meet = None
 94 | 
 95 |     def extend_queue(self, dst, queue, visited, opposite_visited, parent, graph):
 96 |         _, dis, cur = heapq.heappop(queue)
 97 |         if cur not in graph:
 98 |             return None
 99 | 
100 |         for nxt, cost in graph[cur].items():
101 |             nxt_cost = dis + cost
102 |             if not self.is_valid(nxt, nxt_cost, graph, visited):
103 |                 continue
104 | 
105 |             visited[nxt] = nxt_cost
106 |             parent[nxt] = cur
107 |             if nxt in opposite_visited:
108 |                 self.meet = nxt
109 |                 return nxt
110 | 
111 |             _h = self.calculate_nodes_dist(nxt, dst)
112 |             heapq.heappush(queue, (nxt_cost + _h, nxt_cost, nxt))
113 | 
114 |         return None
115 | 
116 |     def is_valid(self, nxt, nxt_cost, graph, visited):
117 |         if nxt not in graph:
118 |             return False
119 | 
120 |         if nxt in visited and nxt_cost >= visited[nxt]:
121 |             return False
122 | 
123 |         return True
124 | 
125 |     def calculate_nodes_dist(self, src: int, dst: int, type='coord'):
126 |         assert src in self.nodes and dst in self.nodes, "Check the input o and d."
127 |         if (src, dst) in self.nodes_dist_memo:
128 |             return self.nodes_dist_memo[(src, dst)]
129 | 
130 |         if type == 'coord':
131 |             _src = self.nodes[src]
132 |             _dst = self.nodes[dst]
133 |             _len = haversine(
134 |                 (_src['y'], _src['x']),
135 |                 (_dst['y'], _dst['x']),
136 |                 unit=Unit.METERS
137 |             )
138 |         else:
139 |             raise NotImplementedError
140 | 
141 |         return _len
142 | 
143 |     def extract_path(self, src, dst):
144 |         # extract path for foreward part
145 |         path_fore = [self.meet]
146 |         s = self.meet
147 | 
148 |         while True:
149 |             s = self.parent_forward[s]
150 |             if s is None:
151 |                 break
152 |             path_fore.append(s)
153 | 
154 |         # extract path for backward part
155 |         path_back = []
156 |         s = self.meet
157 | 
158 |         while True:
159 |             s = self.parent_backward[s]
160 |             if s is None:
161 |                 break
162 |             path_back.append(s)
163 | 
164 |         return list(reversed(path_fore)) + list(path_back)
165 | 
166 |     def _check_od(self, src, dst, level='debug'):
167 |         if src in self.graph or dst in self.graph:
168 |             return True, None
169 | 
170 |         info = f"Trip ({src}, {dst})" + \
171 |             f"{', `src` not in graph' if src not in self.graph else ', '}" + \
172 |             f"{', `dst` not in graph' if dst not in self.graph else ''}"
173 | 
174 |         getattr(logger, level)(info)
175 | 
176 |         return False, {"status": 1, 'vpath': [], 'cost': np.inf}
177 | 
178 |     def _check_memo(self, src, dst):
179 |         if (src, dst) not in self.search_memo:
180 |             return None
181 | 
182 |         return self.search_memo[(src, dst)]
183 | 
184 |     def plot_searching_boundary(self, path, network):
185 |         points = set.union(set(self.visited_backward.keys()),
186 |                            set(self.visited_forward.keys()))
187 |         ax = network.df_nodes.loc[points].plot()
188 | 
189 |         eids = network.transform_node_seq_to_edge_seq(path)
190 |         network.df_edges.loc[eids].plot(ax=ax, label='Path')
191 |         network.df_nodes.loc[self.visited_backward].plot(
192 |             ax=ax, label='Backword', color='r', alpha=.8)
193 |         network.df_nodes.query(f"nid == {self.meet}").plot(
194 |             ax=ax,label='Meet', color='blue', alpha=.8, marker="*", zorder=8)
195 |         network.df_nodes.loc[self.visited_forward].plot(
196 |             ax=ax, label='Forword', color='y', alpha=.8)
197 | 
198 |         ax.legend()
199 | 
200 | if __name__ == "__main__":
201 |     from stmm.graph import GeoDigraph
202 |     network = GeoDigraph()
203 |     network.load_checkpoint(ckpt='../../data/network/Shenzhen_graph_pygeos.ckpt')
204 |     # network.to_postgis('shenzhen')
205 | 
206 |     from stmm.utils.serialization import load_checkpoint
207 |     astar_search_memo = load_checkpoint('../../data/debug/astar_search_memo.pkl')
208 | 
209 |     searcher = Bi_Astar(network.graph, network.graph_r, network.nodes)
210 |     
211 |     error_lst = []
212 |     for (src, dst), ans in astar_search_memo.items():
213 |         res = searcher.search(src, dst)
214 |         cond = np.array(res['vpath']) == np.array(ans['vpath'])
215 |         if isinstance(cond, np.ndarray):
216 |             cond = cond.all()
217 |         if not cond:
218 |             # print(res['cost'] == ans['cost'], cond)
219 |             print(f"\n\n({src}, {dst})\n\tans: {ans['vpath']}, {ans['cost']}\n\tres: {res['vpath']}, {res['cost']}")
220 | 


--------------------------------------------------------------------------------
/mapmatching/graph/geographx.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | 
 3 | class GeoGraph(nx.DiGraph):
 4 |     def __init__(self, incoming_graph_data=None, **attr):
 5 |         super().__init__(incoming_graph_data, **attr)
 6 | 
 7 | 
 8 |     """ vis """
 9 |     def add_edge_map(self, ax, *arg, **kwargs):
10 |         return
11 |         
12 |     """ property"""
13 |     @property
14 |     def crs(self):
15 |         return self.df_edges.crs
16 |     
17 |     @property
18 |     def epsg(self):
19 |         return self.df_edges.crs.to_epsg()
20 | 
21 | 


--------------------------------------------------------------------------------
/mapmatching/match/__int__.py:
--------------------------------------------------------------------------------
1 | from .code import STATUS


--------------------------------------------------------------------------------
/mapmatching/match/candidatesGraph.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from shapely import LineString
  4 | from geopandas import GeoDataFrame
  5 | 
  6 | from ..utils import timeit
  7 | from .status import CANDS_EDGE_TYPE
  8 | from ..geo.azimuth import cal_coords_seq_azimuth
  9 | from ..geo.ops.distance import coords_seq_distance
 10 | from ..geo.ops.to_array import points_geoseries_2_ndarray
 11 | from .misc import get_shared_line, merge_step_arrs
 12 | 
 13 | 
 14 | def cal_traj_params(points, move_dir=True, check=False):
 15 |     """
 16 |     Calculate trajectory parameters (e.g., euc dist, move dir) based on a series of points.
 17 | 
 18 |     Args:
 19 |         points (GeoSeries): A GeoSeries containing the trajectory points.
 20 |         move_dir (bool, optional): Whether to calculate the movement direction. Defaults to True.
 21 |         check (bool, optional): Whether to check for duplicate points. Defaults to False.
 22 | 
 23 |     Returns:
 24 |         DataFrame: A DataFrame containing the calculated trajectory parameters.
 25 | 
 26 |     Example:
 27 |         >>> points = gpd.GeoSeries([...])  # GeoSeries containing trajectory points
 28 |         >>> traj_params = cal_traj_params(points, move_dir=True, check=False)
 29 |         >>> print(traj_params)
 30 | 
 31 |     Notes:
 32 |         - The input points should be in a GeoSeries with a valid geometry column.
 33 |         - The DataFrame returned will contain columns such as 'pid_0', 'pid_1', 'd_euc' (Euclidean distance),
 34 |           and 'move_dir' (movement direction) if move_dir=True.
 35 | 
 36 |     """
 37 |     coords = points_geoseries_2_ndarray(points.geometry)
 38 |     dist_arr, _ = coords_seq_distance(coords)
 39 |     idxs = points.index
 40 |     
 41 |     if check:
 42 |         zero_idxs = np.where(dist_arr==0)[0]
 43 |         if len(zero_idxs):
 44 |             print(f"Exists dumplicates points: {[(i, i+1) for i in zero_idxs]}")
 45 |         
 46 |     _dict = {'pid_0': idxs[:-1],
 47 |              'pid_1': idxs[1:],
 48 |              'd_euc': dist_arr}
 49 | 
 50 |     if move_dir:
 51 |         dirs = cal_coords_seq_azimuth(coords)
 52 |         _dict['move_dir'] = dirs
 53 |     
 54 |     res = pd.DataFrame(_dict)
 55 | 
 56 |     return res
 57 | 
 58 | def identify_edge_flag(gt: pd.DataFrame, cands: GeoDataFrame, ratio_eps: float = 0.05, dist_eps: float = 5):
 59 |     """
 60 |     Identify the type of querying the shortest path from the candidate `src` to `dst` on the graph.
 61 | 
 62 |     Args:
 63 |         gt (pd.DataFrame): The graph DataFrame.
 64 |         cands (GeoDataFrame): The DataFrame containing candidate edges.
 65 |         ratio_eps (float, optional): The ratio epsilon parameter. Defaults to 0.05.
 66 |         dist_eps (float, optional): The distance epsilon parameter. Defaults to 5.
 67 | 
 68 |     Returns:
 69 |         pd.DataFrame: The graph DataFrame with the 'flag' column appended.
 70 | 
 71 |     Example:
 72 |         >>> graph = pd.DataFrame([...])  # Graph DataFrame
 73 |         >>> candidates = gpd.GeoDataFrame([...])  # Candidate edges DataFrame
 74 |         >>> flagged_graph = identify_edge_flag(graph, candidates, ratio_eps=0.05, dist_eps=5)
 75 |         >>> print(flagged_graph)
 76 | 
 77 |     Notes:
 78 |         - The 'gt' DataFrame represents the graph and should contain necessary columns such as 'eid_0', 'eid_1',
 79 |           'dist_0', 'step_0_len', 'step_n_len', etc.
 80 |         - The 'cands' DataFrame should contain candidate edges information, including columns such as 'pid', 'eid',
 81 |           'seg_0', 'len_0', etc.
 82 |         - The 'ratio_eps' and 'dist_eps' parameters control the thresholds for identifying different edge types.
 83 |         - The resulting graph DataFrame will have an additional 'flag' column indicating the edge type.
 84 | 
 85 |     Refs:
 86 |         - Fast map matching, an algorithm integrating hidden Markov model with precomputation, Fig 4.
 87 |     """
 88 |     # (src, dst) on the same edge
 89 |     gt.loc[:, 'flag'] = CANDS_EDGE_TYPE.NORMAL
 90 | 
 91 |     same_edge = gt.eid_0 == gt.eid_1
 92 |     tmp = gt['dist_0'] - gt['step_0_len']
 93 |     cond_1 = tmp <= gt['step_n_len']
 94 | 
 95 |     tmp = tmp.apply(lambda x: min(max(0, x - dist_eps), x * (1 - ratio_eps)))
 96 |     cond = tmp <= gt['step_n_len']
 97 | 
 98 |     # Perform merging of adjacent nodes within a certain range (5 meter)
 99 |     cond_approx_points = cond & (~cond_1)
100 |     _cands = cands[['pid', 'eid', 'seg_0', 'len_0']]\
101 |                   .set_index(['pid', 'eid']).to_dict('index')
102 |     # reset related params
103 |     gt.loc[cond_approx_points, ['step_n', 'step_n_len']] = gt.loc[cond_approx_points].apply(
104 |         lambda x: _cands[(x.pid_0, x.eid_0)].values(), axis=1, result_type='expand'
105 |     ).rename(columns={0: 'step_n', 1: 'step_n_len'})
106 | 
107 |     same_edge_normal = same_edge & cond
108 |     gt.loc[same_edge_normal, 'flag'] = CANDS_EDGE_TYPE.SAME_SRC_FIRST
109 |     gt.loc[same_edge_normal, ['src', 'dst']] = gt.loc[same_edge_normal, ['dst', 'src']].values
110 | 
111 |     same_edge_revert = same_edge & (~cond)
112 |     gt.loc[same_edge_revert, 'flag'] = CANDS_EDGE_TYPE.SAME_SRC_LAST
113 | 
114 |     return gt
115 | 
116 | @timeit
117 | def construct_graph( points,
118 |                      cands,
119 |                      common_attrs = ['pid', 'eid', 'dist', 'speed'], 
120 |                      left_attrs = ['dst', 'len_1', 'seg_1'], 
121 |                      right_attrs = ['src', 'len_0', 'seg_0', 'observ_prob'],
122 |                      rename_dict = {
123 |                             'seg_0': 'step_n',
124 |                             'len_0': 'step_n_len',
125 |                             'seg_1': 'step_0',
126 |                             'len_1': 'step_0_len',
127 |                             'cost': 'd_sht'},
128 |                      dir_trans = True,
129 |                      gt_keys = ['pid_0', 'eid_0', 'eid_1']
130 |     ):
131 |     """
132 |     Construct the candiadte graph (level, src, dst) for spatial and temporal analysis.
133 | 
134 |     Parameters:
135 |         path = step_0 + step_1 + step_n
136 |     """
137 |     layer_ids = np.sort(cands.pid.unique())
138 |     prev_layer_dict = {cur: layer_ids[i]
139 |                           for i, cur in enumerate(layer_ids[1:])}
140 |     prev_layer_dict[layer_ids[0]] = -1
141 | 
142 |     # left
143 |     left = cands[common_attrs + left_attrs]
144 |     left.loc[:, 'mgd'] = left.pid
145 | 
146 |     # right
147 |     right = cands[common_attrs + right_attrs]
148 |     right.loc[:, 'mgd'] = right.pid.apply(lambda x: prev_layer_dict[x])
149 |     right.query("mgd >= 0", inplace=True)
150 | 
151 |     # Cartesian product
152 |     gt = left.merge(right, on='mgd', suffixes=["_0", '_1'])\
153 |              .drop(columns='mgd')\
154 |              .reset_index(drop=True)\
155 |              .rename(columns=rename_dict)
156 | 
157 |     identify_edge_flag(gt, cands)
158 |     traj_info = cal_traj_params(points.loc[cands.pid.unique()], move_dir=dir_trans)
159 |     
160 |     gt = gt.merge(traj_info, on=['pid_0', 'pid_1'])
161 |     gt.loc[:, ['src', 'dst']] = gt.loc[:, ['src', 'dst']].astype(np.int64)
162 | 
163 |     if gt_keys:
164 |         gt.set_index(gt_keys, inplace=True)
165 |     
166 |     return gt
167 | 
168 | def get_shortest_geometry(gt:GeoDataFrame, geom='geometry', format='LineString'):
169 |     """
170 |     Generate the shortest path geometry based on the given conditions.
171 | 
172 |     Parameters:
173 |         gt (GeoDataFrame): A geospatial dataframe containing geometry objects and other attributes.
174 |         geom (str, optional): The column name for the geometry objects. Default is 'geometry'.
175 |         format (str, optional): The format of the returned geometry objects. Available options are 'LineString' or 'array'.
176 |                                 Default is 'LineString'.
177 | 
178 |     Returns:
179 |         GeoDataFrame: An updated geospatial dataframe with the shortest path geometry objects.
180 | 
181 |     Notes:
182 |         - Only 'LineString' and 'array' formats are supported.
183 |         - The input GeoDataFrame must have a 'flag' column indicating whether it represents the shortest path.
184 | 
185 |     Example:
186 |     >>> shortest_geo = get_shortest_geometry(geo_data, format='array')
187 |     >>> print(shortest_geo)
188 | 
189 |     Raises:
190 |     - AssertionError: If the provided format is not supported.
191 |     """
192 |     assert format in ['LineString', 'array']
193 | 
194 |     # FIXME: 1) step_1 is None; 2) same edge：w2h, level=27, 555->555
195 |     mask = gt.flag == 1
196 |     gt.loc[mask, geom] = gt.loc[mask].apply(lambda x: 
197 |         get_shared_line(x.step_0, x.step_n), axis=1)
198 |     gt.loc[~mask, geom] = gt.loc[~mask].apply(
199 |         merge_step_arrs, axis=1)
200 |     
201 |     if format == 'LineString':
202 |         gt.loc[:, geom] = gt[geom].apply(LineString)
203 | 
204 |     return gt
205 | 


--------------------------------------------------------------------------------
/mapmatching/match/dir_similarity.py:
--------------------------------------------------------------------------------
 1 | from geopandas import GeoDataFrame
 2 | 
 3 | from ..geo.azimuth import cal_linestring_azimuth_cos_dist
 4 | from .candidatesGraph import get_shortest_geometry
 5 | 
 6 | def cal_dir_prob(gt:GeoDataFrame, geom='geometry'):
 7 |     # TODO: 使用 sub_string 代替现有的情况
 8 |     # Add: dir_prob
 9 |     def _cal_dir_similarity(x):
10 |         return cal_linestring_azimuth_cos_dist(x[geom], x['move_dir'], weight=True)
11 |     
12 |     gt = get_shortest_geometry(gt, geom, format='array')
13 |     gt.loc[:, 'dir_prob'] = gt.apply(_cal_dir_similarity, axis=1)
14 |     
15 |     return gt
16 | 
17 | 


--------------------------------------------------------------------------------
/mapmatching/match/geometricAnalysis.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import geopandas as gpd
 3 | from ..geo.query import get_k_neigh_geoms
 4 | 
 5 | 
 6 | def cal_observ_prob(dist, bias=0, deviation=20, normal=True):
 7 |     """The obervation prob is defined as the likelihood that a GPS sampling point `p_i` 
 8 |     matches a candidate point `C_ij` computed based on the distance between the two points. 
 9 | 
10 |     Args:
11 |         df (gpd.GeoDataFrame): Distance series or arrays.
12 |         bias (float, optional): GPS measurement error bias. Defaults to 0.
13 |         deviation (float, optional): GPS measurement error deviation. Defaults to 20.
14 |         normal (bool, optional): Min-Max Scaling. Defaults to False.
15 | 
16 |     Returns:
17 |         _type_: _description_
18 |     """
19 |     observ_prob_factor = 1 / (np.sqrt(2 * np.pi) * deviation)
20 | 
21 |     def f(x): return observ_prob_factor * \
22 |         np.exp(-np.power(x - bias, 2)/(2 * np.power(deviation, 2)))
23 | 
24 |     _dist = f(dist)
25 |     if normal:
26 |         _dist /= _dist.max()
27 | 
28 |     return np.sqrt(_dist)
29 | 
30 | def analyse_geometric_info(points: gpd.GeoDataFrame,
31 |                            edges: gpd.GeoDataFrame,
32 |                            top_k: int = 5,
33 |                            radius: float = 50,
34 |                            pid: str = 'pid',
35 |                            eid: str = 'eid',
36 |                            ):
37 |     # TODO improve effeciency: get_k_neigbor_edges 50 %, project_point_to_line_segment 50 %
38 |     cands, _ = get_k_neigh_geoms(points.geometry, edges, 
39 |                                  query_id=pid, project=True, top_k=top_k, 
40 |                                  keep_geom=True, radius=radius)
41 |     if cands is not None:
42 |         cands.loc[:, 'observ_prob'] = cal_observ_prob(cands.dist_p2c)
43 | 
44 |     return cands
45 |     
46 | 


--------------------------------------------------------------------------------
/mapmatching/match/io.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import geopandas  as gpd
 3 | from ..geo.coord.coordTransfrom_shp import coord_transfer
 4 | from ..geo.ops.simplify import simplify_trajetory_points
 5 | 
 6 | 
 7 | def load_points(fn, simplify: bool = False, dp_thres: int = None, crs: int = None, in_sys: str = 'wgs', out_sys: str = 'wgs'):
 8 |     # BUG 重复节点需删除
 9 |     traj = gpd.read_file(fn, encoding='utf-8')
10 |     if crs is not None:
11 |         traj.set_crs(crs, allow_override=True, inplace=True)
12 | 
13 |     if 'time' in traj.columns:
14 |         traj.time = pd.to_datetime(
15 |             traj['time'], format='%Y-%m-%d %H:%M:%S')
16 | 
17 |     traj = coord_transfer(traj, in_sys, out_sys)
18 | 
19 |     if simplify:
20 |         traj_bak = traj.copy()
21 |         traj = traj = simplify_trajetory_points(traj, dp_thres, inplace=True)
22 |     else:
23 |         traj_bak = None
24 |         traj = traj
25 | 
26 |     return traj, traj_bak
27 | 


--------------------------------------------------------------------------------
/mapmatching/match/metric.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from ..geo.metric import lcss, edr, erp
 4 | from ..geo.ops.resample import resample_polyline_seq_to_point_seq, resample_point_seq
 5 | 
 6 | 
 7 | def eval(traj, res=None, path=None, resample=5, eps=10, metric='lcss', g=None):
 8 |     """
 9 |     lcss 的 dp 数组 循环部分，使用numba 加速，这个环节可以降低 10% 的时间消耗（20 ms） 
10 |     """
11 |     # BUG 
12 |     assert res is not None or path is not None
13 |     assert metric in ['lcss', 'edr', 'erp']
14 |     
15 |     if path is None:
16 |         path = self.transform_res_2_path(res)
17 | 
18 |     if traj.crs.to_epsg() != path.crs.to_epsg():
19 |         traj = traj.to_crs(path.crs.to_epsg())
20 | 
21 |     if resample:
22 |         _, path_coords_np = resample_polyline_seq_to_point_seq(path.geometry, step=resample,)
23 |         _, traj_coords_np = resample_point_seq(traj.geometry, step=resample)
24 |     else:
25 |         path_coords_np = np.concatenate(path.geometry.apply(lambda x: x.coords[:]).values)
26 |         traj_coords_np = np.concatenate(traj.geometry.apply(lambda x: x.coords[:]).values)
27 |         
28 |     eval_funs = {
29 |         'lcss': [lcss, (traj_coords_np, path_coords_np, eps, self.ll)], 
30 |         'edr': [edr, (traj_coords_np, path_coords_np, eps)], 
31 |         'edp': [erp, (traj_coords_np, path_coords_np, g)]
32 |     }
33 |     _eval = eval_funs[metric]
34 | 
35 |     return _eval[0](*_eval[1])


--------------------------------------------------------------------------------
/mapmatching/match/misc.py:
--------------------------------------------------------------------------------
 1 | import numba
 2 | import shapely
 3 | import warnings
 4 | import numpy as np
 5 | from shapely import LineString
 6 | from shapely.ops import linemerge
 7 | 
 8 | def merge_step_arrs(x, check=True):
 9 |     lst = [i for i in [x.step_0, x.step_1, x.step_n] if isinstance(i, (np.ndarray, list))]
10 |     if len(lst) == 0:
11 |         warnings.warn("All geoms are None")
12 |         return None
13 |     
14 |     if len(lst) == 1:
15 |         return lst[0]
16 | 
17 |     # TODO Nodes may be duplicated at the join
18 |     coords = np.concatenate(lst)
19 |     
20 |     return coords
21 | 
22 | @numba.jit
23 | def get_shared_arr(arr1:np.ndarray, arr2:np.ndarray):
24 |     lst = [arr1[0]]
25 |     right = 0
26 |     left = 1
27 |     n, m = len(arr1), len(arr2)
28 |     
29 |     while left < n:
30 |         while right < m and np.all(arr1[left] != arr2[right]):
31 |             right += 1
32 |         if right >= m:
33 |             break
34 |         lst.append(arr1[left])
35 |         left += 1
36 | 
37 |     if np.all(arr2[-1] != lst[-1]):
38 |         lst.append(arr2[-1])
39 |         
40 |     return lst
41 | 
42 | def get_shared_line(line_1:np.ndarray, line_2:np.ndarray):
43 |     if line_1 is not None:
44 |         warnings.warn('line_1 is empty')
45 |         coords = line_2
46 |     elif line_2 is not None:
47 |         warnings.warn('line_2 is empty')
48 |         coords = line_1
49 |     else:
50 |         coords = get_shared_arr(line_1, line_2)
51 |     
52 |     return coords
53 | 
54 | 
55 | if __name__ == "__main__":
56 |     # get_shared_line
57 |     line_1 = LineString([[.9, .9], [1, 1], [2,2]])
58 |     line_2 = LineString([[0,0], [1,1], [1.5, 1.5]])
59 |     print(get_shared_line(line_1, line_2))
60 |     
61 | 


--------------------------------------------------------------------------------
/mapmatching/match/postprocess.py:
--------------------------------------------------------------------------------
  1 | import shapely
  2 | import numpy as np
  3 | import pandas as pd
  4 | import geopandas as gpd
  5 | from geopandas import GeoDataFrame
  6 | 
  7 | from .status import STATUS
  8 | from ..utils.timer import timeit
  9 | from ..graph import GeoDigraph
 10 | from ..geo.ops.point2line import project_points_2_linestrings
 11 | 
 12 | 
 13 | @timeit
 14 | def get_path(rList:gpd.GeoDataFrame, 
 15 |              graph:gpd.GeoDataFrame, 
 16 |              cands:gpd.GeoDataFrame,
 17 |              metric = {},
 18 |              prob_thres = .8
 19 |              ):
 20 |     """Get path by matched sequence node.
 21 | 
 22 |     Args:
 23 |         rList ([type]): [description]
 24 |         graph_t ([type]): [description]
 25 |         net ([type]): [description]
 26 | 
 27 |     Returns:
 28 |         [list]: [path, connectors, steps]
 29 |     
 30 |     Example:
 31 |         rList
 32 |        |    |   pid |   eid |         src |         dst |\n
 33 |        |---:|------:|------:|------------:|------------:|\n
 34 |        |  0 |     0 | 17916 |  8169270272 |  2376751183 |\n
 35 |        |  1 |     1 | 17916 |  8169270272 |  2376751183 |
 36 |     """ 
 37 |     steps = rList.copy()
 38 |     steps.loc[:, 'eid_1'] = steps.eid.shift(-1).fillna(0).astype(int)
 39 |     idxs = steps[['pid', 'eid', 'eid_1']].values[:-1].tolist()
 40 |     steps = graph.loc[idxs, ['epath', 'd_sht', 'avg_speed', 'dist_prob', 'trans_prob']].reset_index()
 41 | 
 42 |     # FIXME 使用 numba 加速 loop 测试
 43 |     extract_eids = lambda x: np.concatenate([[x.eid_0], x.epath]) if x.epath else [x.eid_0]
 44 |     eids = np.concatenate(steps.apply(extract_eids, axis=1))
 45 |     eids = np.append(eids, [steps.iloc[-1].eid_1])
 46 |     keep_cond = np.append([True], eids[:-1] != eids[1:])
 47 |     eids_lst = eids[keep_cond].tolist()
 48 | 
 49 |     res = {'epath': eids_lst}
 50 |     step_0, step_n = _get_first_and_step_n(cands, rList)
 51 | 
 52 |     # Case: one step
 53 |     if len(eids_lst) == 1:
 54 |         # tmp = get_shared_arr(step_0, step_n)
 55 |         res['step_0'] = step_0
 56 |         res['step_n'] = step_n
 57 |         if metric.get('prob', 1) < prob_thres:
 58 |             metric['status'] = STATUS.FAILED
 59 |         else:
 60 |             metric['status'] = STATUS.SAME_LINK 
 61 |             
 62 |         return res, None
 63 | 
 64 |     # update first/last step 
 65 |     n = len(eids_lst) - 1
 66 |     assert n > 0, "Check od list"
 67 |     res['step_0'] = step_0
 68 |     res['step_n'] = step_n
 69 |     res['dist'] = steps.d_sht.sum()
 70 |     res['avg_speed'] = np.average(steps['avg_speed'].values, weights = steps['d_sht'].values)
 71 | 
 72 |     # update metric
 73 |     coef = 1 / len(steps.dist_prob)
 74 |     dist_prob = np.prod(steps.dist_prob)
 75 |     trans_prob = np.prod(steps.trans_prob)
 76 |     metric["norm_prob"], metric["dist_prob"], metric["trans_prob"] = \
 77 |         np.power([metric['prob'], dist_prob, trans_prob], coef)
 78 |     if "dir_prob" in list(graph):
 79 |         metric["dir_prob"] = metric["trans_prob"] / metric["dist_prob"]
 80 | 
 81 |     # status
 82 |     if metric["trans_prob"] < prob_thres:
 83 |         metric['status'] = STATUS.FAILED
 84 |     else:
 85 |         metric['status'] = STATUS.SUCCESS
 86 |             
 87 |     return res, steps
 88 | 
 89 | def _get_first_and_step_n(cands, rList):
 90 |     step_0 = cands.query(
 91 |         f'pid == {rList.iloc[0].pid} and eid == {rList.iloc[0].eid}').iloc[0]
 92 |     step_n = cands.query(
 93 |         f'pid == {rList.iloc[-1].pid} and eid == {rList.iloc[-1].eid}').iloc[0]
 94 | 
 95 |     cal_offset = lambda x: x['len_0'] / (x['len_0'] + x['len_1'])
 96 | 
 97 |     return cal_offset(step_0), cal_offset(step_n)
 98 | 
 99 | def transform_mathching_res_2_path(res: dict, net: GeoDigraph, ori_crs: bool=True, attrs: list=None):
100 |     if attrs is None:
101 |         attrs = ['eid', 'way_id', 'src', 'dst', 'name', 'road_type', 'link', 'speed', 'dist', 'geometry']
102 |     
103 |     path = net.get_edge(res['epath'], attrs, reset_index=True)
104 | 
105 |     _len = len(res['epath']) 
106 |     if _len == 1:
107 |         path.loc[0, 'dist'] *= res['step_n'] - res['step_0']
108 |         path.loc[0, 'geometry'] = shapely.ops.substring(
109 |             path.iloc[0].geometry, res['step_0'], res['step_n'], normalized=True)
110 |     else:
111 |         path.loc[0, 'dist'] *= 1 - res['step_0']
112 |         path.loc[0, 'geometry'] = shapely.ops.substring(
113 |             path.iloc[0].geometry, res['step_0'], 1, normalized=True)
114 | 
115 |         path.loc[_len - 1, 'dist'] *= res['step_n']
116 |         path.loc[_len - 1, 'geometry'] = shapely.ops.substring(
117 |             path.iloc[-1].geometry, 0, res['step_n'], normalized=True)
118 |     
119 |     path = path[~path.geometry.is_empty]
120 |     if ori_crs:
121 |         path = path.to_crs(res['ori_crs'])
122 | 
123 |     return path
124 | 
125 | def project(points: GeoDataFrame, path: GeoDataFrame, keep_attrs=['eid', 'proj_point'], normalized=True, reset_geom=True):
126 |     """
127 |     Project points onto a path represented by a GeoDataFrame.
128 |     
129 |     Args:
130 |         points (GeoDataFrame): Points to be projected.
131 |         path (GeoDataFrame): Path to project the points onto.
132 |         keep_attributes (list, optional): Attributes to keep in the projected points. Defaults to ['eid', 'proj_point'].
133 |         normalize (bool, optional): Whether to normalize the projection. Defaults to True.
134 |         reset_geometry (bool, optional): Whether to reset the geometry column in the projected points. Defaults to True.
135 | 
136 |     Returns:
137 |         GeoDataFrame: Projected points.
138 | 
139 |     Example:
140 |         projected_points = project_points(points, path)
141 |     """
142 |     _points = points[[points.geometry.name]]
143 |     ps = project_points_2_linestrings(_points, path.to_crs(points.crs), normalized=normalized)
144 |     
145 |     if keep_attrs:
146 |         ps = ps[keep_attrs]
147 | 
148 |     ps = gpd.GeoDataFrame(pd.concat([_points, ps], axis=1), crs=points.crs)
149 |     if reset_geom:
150 |         ps.loc[:, 'ori_geom'] = points.geometry.apply(lambda x: x.wkt)
151 |         ps.set_geometry('proj_point', inplace=True)
152 |         ps.drop(columns=['geometry'], inplace=True)
153 |     
154 |     return ps
155 | 


--------------------------------------------------------------------------------
/mapmatching/match/spatialAnalysis.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | import numpy as np
  3 | from geopandas import GeoDataFrame
  4 | 
  5 | from ..graph import GeoDigraph
  6 | from .dir_similarity import cal_dir_prob
  7 | from .candidatesGraph import construct_graph
  8 | 
  9 | 
 10 | def cal_dist_prob(gt: GeoDataFrame, net: GeoDigraph, max_steps: int = 2000, max_dist: int = 10000, eps: float = 1e-6):
 11 |     """
 12 |     Calculate the distance probability for each edge in the graph.
 13 | 
 14 |     Args:
 15 |         gt (GeoDataFrame): The graph GeoDataFrame.
 16 |         net (GeoDigraph): The network GeoDigraph.
 17 |         max_steps (int, optional): The maximum number of steps for route planning. Defaults to 2000.
 18 |         max_dist (int, optional): The maximum distance for route planning. Defaults to 10000.
 19 |         eps (float, optional): The epsilon value for comparing distances. Defaults to 1e-6.
 20 | 
 21 |     Returns:
 22 |         GeoDataFrame: The graph GeoDataFrame with additional columns 'd_sht' and 'dist_prob'.
 23 | 
 24 |     Example:
 25 |         >>> graph = GeoDataFrame([...])  # Graph GeoDataFrame
 26 |         >>> network = GeoDigraph([...])  # Network GeoDigraph
 27 |         >>> graph = cal_dist_prob(graph, network, max_steps=3000, max_dist=15000, eps=1e-5)
 28 |         >>> print(graph)
 29 | 
 30 |     Notes:
 31 |         - The 'gt' GeoDataFrame should contain the graph data with required columns including 'flag', 'cost', 'avg_speed', 'epath', 'coords', 'step_0_len', 'step_n_len', 'dist_0', 'd_euc'.
 32 |         - The 'net' GeoDigraph should be a network representation used for route planning.
 33 |         - The 'max_steps' parameter specifies the maximum number of steps for route planning.
 34 |         - The 'max_dist' parameter specifies the maximum distance for route planning.
 35 |         - The 'eps' parameter is used for comparing distances and should be a small positive value.
 36 |         - The function calculates the shortest paths and temporal probabilities for each edge in the graph.
 37 |         - It adds the following columns to the 'gt' GeoDataFrame:
 38 |             - 'cost': The cost of the shortest path.
 39 |             - 'avg_speed': The average speed on the shortest path.
 40 |             - 'epath': The edge path of the shortest path.
 41 |             - 'step_1': The first step of the shortest path.
 42 |             - 'd_sht': The total distance of the shortest path.
 43 |             - 'dist_prob': The distance probability for the edge.
 44 |         - The function modifies the 'gt' GeoDataFrame in place and returns the modified GeoDataFrame.
 45 |     """
 46 | 
 47 |     assert 'flag' in gt, "Check the attribute `flag` in gt or not"
 48 |     if gt.empty:
 49 |         warnings.warn("Empty graph layer")
 50 |         return gt
 51 | 
 52 |     sp_attrs = ['cost', "avg_speed", 'epath', 'coords']
 53 |     gt_sp_attrs = ['cost', "avg_speed", 'epath', 'step_1']
 54 |     rout_planning = lambda x: net.search(x.dst, x.src, max_steps, max_dist)
 55 |     paths = gt.apply(rout_planning, axis=1, result_type='expand')[sp_attrs]
 56 |     gt.loc[:, gt_sp_attrs] = paths.values
 57 | 
 58 |     cal_temporal_prob(gt)
 59 | 
 60 |     gt.loc[:, 'd_sht'] = gt.cost + gt.step_0_len + gt.step_n_len 
 61 | 
 62 |     # OD is on the same edge, but the starting point is relatively ahead of the endpoint
 63 |     flag_1_idxs = gt.query("flag == 1").index
 64 |     if len(flag_1_idxs):
 65 |         gt.loc[flag_1_idxs, ['epath', 'step_1']] = None, None
 66 |         gt.loc[flag_1_idxs, 'd_sht'] = gt.step_0_len + gt.step_n_len - gt.dist_0
 67 | 
 68 |         idx = gt.query(f"flag == 1 and d_sht < {eps}").index
 69 |         gt.loc[idx, 'd_sht'] = gt.d_euc
 70 | 
 71 |     # distance trans prob
 72 |     dist = gt.d_euc / gt.d_sht
 73 |     mask = dist > 1 
 74 |     dist[mask] = 1 / dist[mask]
 75 |     gt.loc[:, 'dist_prob'] = dist
 76 | 
 77 |     return gt
 78 | 
 79 | def cal_temporal_prob(gt: GeoDataFrame, eps=1e-6):
 80 |     """
 81 |     Calculate the temporal probability for each edge in the graph.
 82 | 
 83 |     Args:
 84 |         gt (GeoDataFrame): The graph GeoDataFrame.
 85 |         eps (float, optional): The epsilon value for handling infinite or zero weights. Defaults to 1e-6.
 86 | 
 87 |     Returns:
 88 |         GeoDataFrame: The graph GeoDataFrame with additional column 'avg_speed'.
 89 | 
 90 |     Example:
 91 |         >>> graph = GeoDataFrame([...])  # Graph GeoDataFrame
 92 |         >>> graph = cal_temporal_prob(graph, eps=1e-5)
 93 |         >>> print(graph)
 94 | 
 95 |     Notes:
 96 |         - The 'gt' GeoDataFrame should contain the graph data with required columns including 'speed_0', 'speed_1', 'avg_speed', 'step_0_len', 'step_n_len', 'cost'.
 97 |         - The 'eps' parameter is used for handling infinite or zero weights and should be a small positive value.
 98 |         - The function calculates the average speed for each edge based on the given weights.
 99 |         - It adds the 'avg_speed' column to the 'gt' GeoDataFrame.
100 |         - The function modifies the 'gt' GeoDataFrame in place and returns the modified GeoDataFrame.
101 |     """
102 |     speeds = gt[['speed_0', 'speed_1', 'avg_speed']].values
103 |     weights = gt[['step_0_len', 'step_n_len', 'cost']].values
104 |     weights[weights == np.inf] = eps
105 |     weights[weights == 0] = eps
106 |     avg_speeds = np.average(speeds, weights=weights, axis=1)
107 | 
108 |     gt.loc[:, 'avg_speed'] = avg_speeds
109 |     # gt.loc[:, 'eta'] = gt.d_sht.values / avg_speeds
110 | 
111 |     return gt
112 | 
113 | def cal_trans_prob(gt, geometry, dir_trans):
114 |     if not dir_trans:
115 |         gt.loc[:, 'trans_prob'] = gt.dist_prob
116 |         return gt
117 | 
118 |     cal_dir_prob(gt, geometry)
119 |     gt.loc[:, 'trans_prob'] = gt.dist_prob * gt.dir_prob
120 |     
121 |     return gt
122 | 
123 | def analyse_spatial_info(net: GeoDigraph,
124 |                          points: GeoDataFrame,
125 |                          cands: GeoDataFrame,
126 |                          dir_trans=False,
127 |                          max_steps: int = 2e3,
128 |                          max_dist: int = 1e5,
129 |                          gt_keys: list = ['pid_0', 'eid_0', 'eid_1'],
130 |                          geometry='whole_path'):
131 |     """
132 |     Geometric and topological info, the product of `observation prob` and the `transmission prob`
133 |     """
134 |     gt = construct_graph(points, cands, dir_trans=dir_trans, gt_keys=gt_keys)
135 |     
136 |     gt = cal_dist_prob(gt, net, max_steps, max_dist)
137 |     cal_trans_prob(gt, geometry, dir_trans)
138 | 
139 |     return gt
140 | 
141 | def get_trans_prob_bet_layers(gt, net, dir_trans=True, geometry='path'):
142 |     """
143 |     For beam-search
144 |     """
145 |     if gt.empty:
146 |         return gt
147 | 
148 |     gt = cal_dist_prob(gt, net)
149 |     cal_trans_prob(gt, geometry, dir_trans)
150 |     
151 |     return gt
152 | 


--------------------------------------------------------------------------------
/mapmatching/match/status.py:
--------------------------------------------------------------------------------
 1 | from enum import IntEnum
 2 | 
 3 | class STATUS:
 4 |     SUCCESS       = 0 # 成功匹配
 5 |     SAME_LINK     = 1 # 所有轨迹点位于同一条线上
 6 |     ONE_POINT     = 2 # 所有轨迹点位于同一个点上
 7 |     NO_CANDIDATES = 3 # 轨迹点无法映射到候选边上
 8 |     FAILED        = 4 # 匹配结果，prob低于阈值
 9 |     UNKNOWN       = 99
10 | 
11 | class CANDS_EDGE_TYPE:
12 |     NORMAL = 0         # od 不一样
13 |     SAME_SRC_FIRST = 1 # od 位于同一条edge上，但起点相对终点位置偏前
14 |     SAME_SRC_LAST  = 2 # 相对偏后
15 | 


--------------------------------------------------------------------------------
/mapmatching/match/temporalAnalysis.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | 
 5 | def cos_similarity(self, path_, v_cal=30):
 6 |     # TODO cos similarity for speed
 7 |     # path_ = [5434742616, 7346193109, 7346193114, 5434742611, 7346193115, 5434742612, 7346193183, 7346193182]
 8 |     seg = [[path_[i-1], path_[i]] for i in range(1, len(path_))]
 9 |     v_roads = pd.DataFrame(seg, columns=['src', 'dst']).merge(self.edges,  on=['src', 'dst']).v.values
10 |     
11 |     num = np.sum(v_roads.T * v_cal)
12 |     denom = np.linalg.norm(v_roads) * np.linalg.norm([v_cal for x in v_roads])
13 |     cos = num / denom
14 |     
15 |     return cos
16 | 
17 |  


--------------------------------------------------------------------------------
/mapmatching/match/topologicalAnalysis.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenke727/ST-MapMatching/2b88c219142cfc1d1460669027798538ee0b2ad0/mapmatching/match/topologicalAnalysis.py


--------------------------------------------------------------------------------
/mapmatching/match/viterbi.py:
--------------------------------------------------------------------------------
  1 | import heapq
  2 | import numpy as np
  3 | import pandas as pd
  4 | from loguru import logger
  5 | from collections import defaultdict
  6 | 
  7 | from .spatialAnalysis import get_trans_prob_bet_layers
  8 | from ..utils import Timer, timeit
  9 | 
 10 | 
 11 | def cal_prob_func(x, y, mode):
 12 |     if mode == '+':
 13 |         return x +  y
 14 |     elif mode == '*':
 15 |         return x *  y
 16 | 
 17 | def merge_k_heapq(arrays, count=100):
 18 |     queue, res = [], []
 19 |     eid_set = set()
 20 |     
 21 |     for eid, arr in arrays.items():
 22 |         if len(arr) == 0:
 23 |             continue
 24 |         heapq.heappush(queue, (arr[0][0], eid))
 25 |     
 26 |     while queue and count:
 27 |         _, eid = heapq.heappop(queue)
 28 |         prob, keys = heapq.heappop(arrays[eid])
 29 |         count -= 1
 30 |         
 31 |         if arrays[eid]:
 32 |             heapq.heappush(queue, (arrays[eid][0][0], eid))
 33 |         if eid not in eid_set:
 34 |             res.append(list(keys) + [-prob])
 35 |         eid_set.add(eid)
 36 |         
 37 |     return res
 38 | 
 39 | def prune_layer(df_layer, level, scores, start_level=3, prune=True, trim_factor=.75, use_pandas=False):
 40 |     if start_level > level:
 41 |         df = df_layer[['prob']].sort_values('prob', ascending=False)\
 42 |                                .groupby('eid_1')\
 43 |                                .head(1).reset_index()
 44 |         
 45 |         return df.set_index('eid_1')
 46 |     
 47 |     # prune -> pick the most likely one
 48 |     
 49 |     if use_pandas:
 50 |         _max_prob = df_layer['prob'].max()
 51 |         df = df_layer[['prob']].sort_values('prob', ascending=False)\
 52 |                                .head(100 if prune else 5)\
 53 |                                .query(f"prob > {_max_prob * trim_factor}")\
 54 |                                .groupby('eid_1')\
 55 |                                .head(1).reset_index()
 56 |     else:
 57 |         ps = df_layer.apply(lambda x: scores[x.name[1]] * x.prob, axis=1)
 58 |         prob_thred = ps.max() * trim_factor
 59 |         arrs = defaultdict(list)
 60 |         for row in df_layer[['prob']].itertuples():
 61 |             idx, prob = getattr(row, "Index"), getattr(row, "prob")
 62 |             if prob < prob_thred:
 63 |                 continue
 64 |             heapq.heappush(arrs[idx[2]], (-prob, idx))
 65 | 
 66 |         records = merge_k_heapq(arrs, 100 if prune else 5)   
 67 |         df = pd.DataFrame(records, columns=['pid_0', 'eid_0', 'eid_1', 'prob'])
 68 | 
 69 |     return df.set_index('eid_1')
 70 | 
 71 | def reconstruct_path(f_score, prev_path):
 72 |     epath = []
 73 |     state = None
 74 |     end_probs = []
 75 | 
 76 |     for idx in range(len(f_score) - 1, 0, -1):
 77 |         if state is None:
 78 |             state = get_max_state(f_score, idx)
 79 |             if state is None:
 80 |                 continue
 81 |             end_probs.append(f_score[idx][state])
 82 | 
 83 |         cur = (idx, state)
 84 |         if idx not in prev_path or state not in prev_path[idx]:
 85 |             state = None
 86 |             continue
 87 |         prev = prev_path[idx].get(state)
 88 |         if not epath or cur != epath[-1]:
 89 |             epath.append(cur)
 90 |         epath.append(prev)
 91 |         state = prev[1]
 92 | 
 93 |     epath = epath[::-1]
 94 | 
 95 |     return epath, sum(end_probs) / len(end_probs)
 96 | 
 97 | def get_max_state(f_score, idx):
 98 |     f = f_score[idx]
 99 |     if len(f) == 0:
100 |         return None
101 |     return max(f, key=f.get)
102 | 
103 | def print_level(df_layer):
104 |     f = lambda x: sorted(df_layer.index.get_level_values(x).unique())
105 |     return f"{f(1)} -> {f(2)}"
106 | 
107 | def find_matched_sequence(cands, gt, net, dir_trans=True, mode='*', prune_factor=0.75, prune_start_layer=3, level='trace'):
108 |     # Initialize
109 |     times = []
110 |     timer = Timer()
111 | 
112 |     gt_beam = []
113 |     layer_ids = np.sort(cands.pid.unique())
114 |     start_prob = cands.query("pid == 0").set_index('eid')['observ_prob'].to_dict()
115 |     f_score = [start_prob]
116 |     prev_path = defaultdict(dict)
117 |     prev_path[0] = {st: None for st in start_prob}
118 |     prev_states = list(start_prob.keys())
119 | 
120 |     for idx, lvl in enumerate(layer_ids[:-1]):
121 |         df_layer = gt.query(f"pid_0 == @lvl and eid_0 in @prev_states")
122 |         if df_layer.empty:
123 |             print(f"Matching traj break at idx: {idx}, level:  {lvl}")
124 |             df_layer = gt.query(f"pid_0 == @lvl")
125 |             prev_probs = 0 if mode == '+' else 1
126 |         else:
127 |             prev_probs = np.array(
128 |                 [f_score[-1][i] for i in df_layer.index.get_level_values(1)])
129 |         
130 |         # timer.start()
131 |         df_layer = get_trans_prob_bet_layers(df_layer, net, dir_trans)
132 |         # ti mes.append(timer.stop())
133 |         df_layer.loc[:, 'prob'] = cal_prob_func(prev_probs, df_layer.trans_prob * df_layer.observ_prob, mode)
134 |         _df = prune_layer(df_layer, idx, f_score[-1], prune_start_layer, prune_factor)
135 | 
136 |         # post-process
137 |         for name, item in _df.iterrows():
138 |             prev_path[idx + 1][name] = (idx, int(item.eid_0))
139 |         prev_states = list(_df.index.unique())
140 |         f_score.append(_df['prob'].to_dict())
141 |         gt_beam.append(df_layer)
142 | 
143 |     # epath
144 |     epath, end_prob = reconstruct_path(f_score, prev_path)
145 |     epath = ((layer_ids[idx], eid) for idx, eid in epath)
146 |     rList = cands.set_index(['pid', 'eid'])\
147 |                  .loc[epath, ['src', 'dst']].reset_index()
148 |     
149 |     gt_beam = pd.concat(gt_beam)
150 |     ratio = gt_beam.shape[0] / gt.shape[0]
151 |     _log = f"Route planning time cost: {np.sum(times):.3f} s, trim ratio: {(1 - ratio) * 100:.1f} %"
152 |     getattr(logger, level)(_log)
153 |     
154 |     return end_prob, rList, gt_beam
155 | 
156 | 
157 | """ normal """
158 | def viterbi_decode(nodes, trans):
159 |     """
160 |     Viterbi算法求最优路径
161 |     其中 nodes.shape=[seq_len, num_labels],
162 |         trans.shape=[num_labels, num_labels].
163 |     """
164 |     # 获得输入状态序列的长度，以及观察标签的个数
165 |     seq_len, num_labels = len(nodes), len(trans)
166 |     # 简单起见，先不考虑发射概率，直接用起始0时刻的分数
167 |     scores = nodes[0].reshape((-1, 1)) # (num_labels, 1)
168 |     
169 |     paths = []
170 |     # 递推求解上一时刻t-1到当前时刻t的最优
171 |     for t in range(1, seq_len):
172 |         # scores 表示起始0到t-1时刻的每个标签的最优分数
173 |         scores_repeat = np.repeat(scores, num_labels, axis=1) # (num_labels, num_labels)
174 |         
175 |         # observe当前时刻t的每个标签的观测分数
176 |         observe = nodes[t].reshape((1, -1)) # (1, num_labels)
177 |         observe_repeat = np.repeat(observe, num_labels, axis=0) # (num_labels, num_labels)
178 |         
179 |         # 从t-1时刻到t时刻最优分数的计算，这里需要考虑转移分数trans
180 |         M = scores_repeat + trans + observe_repeat
181 |         
182 |         # 寻找到t时刻的最优路径
183 |         scores = np.max(M, axis=0).reshape((-1, 1))
184 |         idxs = np.argmax(M, axis=0)
185 |         
186 |         # 路径保存
187 |         paths.append(idxs.tolist())
188 |         
189 |     best_path = [0] * seq_len
190 |     best_path[-1] = np.argmax(scores)
191 |     
192 |     # 最优路径回溯
193 |     for i in range(seq_len-2, -1, -1):
194 |         idx = best_path[i+1]
195 |         best_path[i] = paths[i][idx]
196 | 
197 | def get_trans_prob(trans_prob, layer_id):
198 |     return trans_prob[layer_id]
199 | 
200 | def decode(observations, states, start_prob, trans_prob, emit_prob, mode='+'):
201 |     def _formula(x, y):
202 |         if mode == '+':
203 |             return x +  y
204 |         elif mode == '*':
205 |             return x *  y
206 |             
207 |     V = [{}]
208 |     path = {}
209 | 
210 |     # Initialize
211 |     for st in states:
212 |         if st not in start_prob:
213 |             continue
214 |         V[0][st] = start_prob[st]
215 |         path[st] = [(observations[0], st)]
216 | 
217 |     # Run Viterbi when t > 0
218 |     for t in range(1, len(observations)):
219 |         V.append({})
220 |         newpath = {}
221 | 
222 |         for curr_st in states:
223 |             paths_to_curr_st = []
224 |             for prev_st in V[t-1]:
225 |                 _trans_prob = get_trans_prob(trans_prob, t-1)
226 |                 if (prev_st, curr_st) not in _trans_prob:
227 |                     continue
228 |                 
229 |                 v = V[t-1][prev_st]
230 |                 _v = _trans_prob[(prev_st, curr_st)]
231 |                 _e = emit_prob[curr_st][observations[t]]
232 |                 paths_to_curr_st.append(( _formula(v, _v * _e), prev_st))
233 |             
234 |             if not paths_to_curr_st:
235 |                 continue
236 |             
237 |             cur_prob, prev_state = max(paths_to_curr_st)
238 |             V[t][curr_st] = cur_prob
239 |             newpath[curr_st] = path[prev_state] + [(observations[t], curr_st)]
240 | 
241 |         # No need to keep the old paths
242 |         path = newpath
243 | 
244 |     prob, end_state = max([(V[-1][st], st) for st in states if st in V[-1]])
245 |     
246 |     return prob, path[end_state]
247 | 
248 | def prepare_viterbi_input(cands, gt):
249 |     states = cands.eid.unique()
250 |     observations = cands.pid.unique()
251 |     start_prob = cands.query("pid == 0").set_index('eid')['observ_prob'].to_dict()
252 |     # start_prob = {key:1 for key in start_prob}
253 | 
254 |     observ_dict = cands[['pid', 'eid', 'observ_prob']].set_index(['eid'])
255 |     emit_prob = {i: observ_dict.loc[[i]].set_index('pid')['observ_prob'].to_dict() for i in states}
256 | 
257 |     # BUG cands 坐标不连续的问题, 莫非是中断
258 |     trans_prob = [gt.loc[i]['f'].to_dict() for i in observations[:-1] ]
259 |     
260 |     return states, observations, start_prob, trans_prob, emit_prob
261 |         
262 | def process_viterbi_pipeline(cands, gt):
263 |     states, observations, start_prob, trans_prob, emit_prob = prepare_viterbi_input(cands, gt)
264 |     prob, rList = decode(observations, states, start_prob, trans_prob, emit_prob)
265 | 
266 |     rList = cands.set_index(['pid', 'eid']).loc[rList][[ 'src', 'dst']].reset_index()
267 |     
268 |     return prob, rList
269 | 
270 | 
271 | if __name__ == "__main__":
272 |     import sys
273 |     sys.path.append('../')
274 |     from utils.serialization import load_checkpoint
275 |     
276 |     fn = "../debug/traj_0_data_for_viterbi.pkl"
277 |     fn = "../../debug/traj_1_data_for_viterbi.pkl"
278 |     # fn = Path(__file__).parent / fn
279 |     data = load_checkpoint(fn)
280 | 
281 |     cands = data['cands']
282 |     gt = data['graph']
283 |     rList = data['rList']
284 | 
285 |     res = process_viterbi_pipeline(cands, gt)
286 | 


--------------------------------------------------------------------------------
/mapmatching/osmnet/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/mapmatching/osmnet/build_graph.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from ..graph import GeoDigraph
 3 | from ..osmnet.downloader import download_osm_xml
 4 | from ..osmnet.parse_osm_xml import parse_xml_to_graph
 5 | from ..setting import DATA_FOLDER
 6 | 
 7 | 
 8 | def load_geograph(ckpt, ll):
 9 |     graph = GeoDigraph()
10 |     graph.load_checkpoint(ckpt)
11 |     graph.init_searcher()
12 | 
13 |     if not ll:
14 |         graph.to_proj()
15 | 
16 |     return graph
17 | 
18 | 
19 | def build_geograph(xml_fn:str=None, bbox:list=None, ckpt:str=None, 
20 |                    ll=False, *args, **kwargs):
21 |     """Build geograph by one of the three type: 1) xml_fn, 2) bbox, 3) ckpt. 
22 |     The prior is: ckpt > xml_fn > bbox
23 | 
24 |     Args:
25 |         xml_fn (str, optional): Local OSM network file path. When `xml_fn` is not exist
26 |         and the `bbox` is config, the OSM file would be downloaded and save at that location. Defaults to None.
27 |         bbox (list, optional): Download the OSM netkork by the Bounding box. Defaults to None.
28 |         ckpt (str, optional): Checkpoint. Defaults to None.
29 |         ll (bool, optional): Use lon/lat coordination system. Defaults to False.
30 | 
31 |     Returns:
32 |         GeoDigraph: graph
33 |     """
34 |     assert xml_fn is not None or bbox is not None or ckpt is not None
35 |     if ckpt:
36 |         return load_geograph(ckpt, ll)
37 | 
38 |     if not os.path.exists(xml_fn):
39 |         assert bbox is not None, \
40 |               "The local osm file is not exists, please config bbox to dowload"
41 |         download_osm_xml(xml_fn, bbox, False)
42 |     
43 |     df_nodes, df_edges, df_ways = parse_xml_to_graph(xml_fn, *args, **kwargs)
44 |     
45 |     graph = GeoDigraph(df_edges, df_nodes, ll=ll)
46 |     if not ll:
47 |         graph.to_proj()
48 | 
49 |     return graph
50 | 
51 | if __name__ == "__main__":
52 |     # new graph
53 |     name = 'GBA'
54 |     graph = build_geograph(xml_fn = f"../../data/network/{name}.osm.xml")
55 |     graph.save_checkpoint(f'../../data/network/{name}_graph_pygeos.ckpt')
56 |     
57 |     # load ckpt
58 |     graph = build_geograph(ckpt=f'../../data/network/{name}_graph_pygeos.ckpt')
59 |     
60 |     # check
61 |     path = graph.search(src=7959990710, dst=499265789)
62 |     graph.get_edge(path['epath']).plot()
63 | 
64 |     # save to DB
65 |     # graph.to_postgis(name)
66 |     
67 |     import networkx


--------------------------------------------------------------------------------
/mapmatching/osmnet/combine_edges.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | from ..utils.interval_helper import merge_intervals
 5 | from ..utils.parallel_helper import parallel_process
 6 | 
 7 | 
 8 | def calculate_degree(df_edges):
 9 |     indegree = df_edges.groupby('dst').agg({'order': 'count'}).rename(columns={'order': 'indegree'})
10 |     outdegree = df_edges.groupby('src').agg({'order': 'count'}).rename(columns={'order': 'outdegree'})
11 | 
12 |     return pd.concat([indegree, outdegree], axis=1).fillna(0).astype(int)
13 | 
14 | def get_aux_points(df_edges, exclude_list=None):
15 |     degree = calculate_degree(df_edges)
16 |     
17 |     aux_node_lst = degree.query( "indegree == 1 and outdegree == 1" ).index.unique()
18 |     if exclude_list is not None:
19 |         aux_node_lst = [id for id in aux_node_lst if id not in exclude_list]
20 | 
21 |     return aux_node_lst
22 | 
23 | def combine_links(edges, combine_intervals):
24 |     """Combine OSM links with `rid`.
25 | 
26 |     Args:
27 |         rid (int): The id of link in OSM.
28 |         nodes (gdf.GeoDataFrame): The all nodes related to `rid` road.
29 |         links (gdf.GeoDataFrame): The all links related to `rid` road. 
30 |         omit_points (list): The points don't meet: 1) only has 1 indegree and 1 outdegree; 2) not the traffic_signals point.
31 | 
32 |     Returns:
33 |         gpd.GeoDataFrame: The links after combination.
34 |     """
35 |     if len(combine_intervals) == 0:
36 |         return edges
37 |     
38 |     if 'order' in edges.columns:
39 |         edges.set_index('order', inplace=True)
40 | 
41 |     drop_index = []
42 |     keep_iddex = []
43 |     # FIXME 区间
44 |     for start, end, _ in combine_intervals:
45 |         segs = edges.query(f"{start} <= order <= {end}")
46 |         _dst = segs.iloc[-1]['dst']
47 |         nids = np.append(segs.src.values, _dst).tolist()
48 | 
49 |         edges.loc[start, 'dst'] = _dst
50 |         edges.loc[start, 'dist'] = segs.dist.sum()
51 |         edges.loc[start, "waypoints"] = str(nids)
52 | 
53 |         drop_index += [i for i in range(start+1, end+1)]
54 | 
55 |     edges.drop(index=drop_index, inplace=True)
56 |     edges.reset_index(inplace=True)
57 |     edges.loc[:, "waypoints"] = edges.loc[:, "waypoints"].apply(lambda x: eval(x) if isinstance(x, str) else x)
58 |     
59 |     return edges
60 | 
61 | def pipeline_combine_links(df_edges:pd.DataFrame, exclude_list, n_jobs=8):
62 |     # BUG multi_edges
63 |     aux_nids = get_aux_points(df_edges, exclude_list=exclude_list)
64 | 
65 |     cands_edges = df_edges.query("src in @aux_nids").sort_values(by=['way_id', 'order'])
66 |     cands_way_ids = cands_edges.way_id.unique().tolist()
67 |     aux_edge_intervals = cands_edges.groupby('way_id')\
68 |                                     .order.apply(list)\
69 |                                     .apply(lambda lst: merge_intervals([[i-1, i] for i in lst if i > 0]))
70 | 
71 |     # parallel process, 不能使用 cands_edges, 因为涉及到上下游的合并
72 |     _df_edges = df_edges.query(f"way_id in @cands_way_ids")
73 |     params = ((df, aux_edge_intervals[i]) 
74 |                 for i, df in _df_edges.groupby('way_id'))
75 |     combined_edges = parallel_process(combine_links, params, pbar_switch=True,
76 |                                       n_jobs=n_jobs, total=len(cands_way_ids), desc='Combine edges')
77 | 
78 |     # keep edges
79 |     keep_edges = df_edges.query(f"way_id not in @cands_way_ids")
80 | 
81 |     # combine
82 |     df_edges_ = pd.concat(combined_edges + [keep_edges]).sort_values(['way_id', 'order']).reset_index(drop=True)
83 |     
84 |     return df_edges_
85 | 


--------------------------------------------------------------------------------
/mapmatching/osmnet/downloader.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from pathlib import Path
 3 | 
 4 | 
 5 | def download_osm_xml(fn, bbox, verbose=False):
 6 |     """Download OSM map of bbox from Internet.
 7 | 
 8 |     Args:
 9 |         fn (function): [description]
10 |         bbox ([type]): [description]
11 |         verbose (bool, optional): [description]. Defaults to False.
12 |     """
13 |     if type(fn) == str:
14 |         fn = Path(fn)
15 |         
16 |     if fn.exists():
17 |         return True
18 | 
19 |     fn.parent.mkdir(parents=True, exist_ok=True)
20 | 
21 |     if verbose:
22 |         print("Downloading {}".format(fn))
23 |     
24 |     if isinstance(bbox, list) or isinstance(bbox, np.array):
25 |         bbox = ",".join(map(str, bbox))
26 | 
27 |     try:
28 |         import requests
29 |         # https://dev.overpass-api.de/overpass-doc/en/index.html
30 |         # 通过参数控制的 API 可参考 https://github.com/categulario/map_matching/blob/master/mapmatching/overpass/streets.overpassql
31 |         url = f'http://overpass-api.de/api/map?bbox={bbox}'
32 |         
33 |         print(f"url: {url}")
34 |         r = requests.get(url, stream=True)
35 |         with open(fn, 'wb') as ofile:
36 |             for chunk in r.iter_content(chunk_size=1024):
37 |                 if chunk:
38 |                     ofile.write(chunk)
39 | 
40 |         if verbose:
41 |             print("Downloaded success.\n")
42 | 
43 |         return True
44 |     except:
45 |         return False
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     import sys
50 |     sys.path.append('..')
51 |     from setting import GBA_BBOX, SZ_BBOX
52 | 
53 |     download_osm_xml('/home/pcl/minio/geo_data/Shenzhen.osm.xml', SZ_BBOX)
54 |     download_osm_xml('../../cache/Futian.osm.xml', [114.03814, 22.51675, 114.06963, 22.56533])
55 |     download_osm_xml('../../cache/GBA.osm.xml', GBA_BBOX)
56 |     
57 |     proj_name = "GaoxinParkMiddle"
58 |     GaoxinParkMiddle_BBOX = [113.92517, 22.54057, 113.95619, 22.55917]
59 |     download_osm_xml(f'../../cache/{proj_name}.osm.xml', GaoxinParkMiddle_BBOX)
60 |     
61 | 


--------------------------------------------------------------------------------
/mapmatching/osmnet/misc.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import geopandas as gpd
 3 | from shapely.geometry import LineString
 4 | from haversine import haversine, Unit
 5 | 
 6 | 
 7 | class Bunch(dict):
 8 |     """A dict with attribute-access"""
 9 | 
10 |     def __getattr__(self, key):
11 |         try:
12 |             return self.__getitem__(key)
13 |         except KeyError:
14 |             raise AttributeError(key)
15 | 
16 |     def __dir__(self):
17 |         return self.keys()
18 | 
19 | 
20 | 
21 | def cal_od_straight_distance(df_edges, df_nodes, od=['src', 'dst']):
22 |     dist = df_edges.merge(df_nodes[['x', 'y']], left_on=od[0], right_index=True, suffixes=('_0', '_1'))\
23 |                    .merge(df_nodes[['x', 'y']], left_on=od[1], right_index=True, suffixes=('_0', '_1'))\
24 |                    .apply(lambda x: haversine((x.y_0, x.x_0), (x.y_1, x.x_1), unit=Unit.METERS), axis=1)
25 | 
26 |     return dist
27 | 
28 | def points_2_polyline(df_nodes:gpd.GeoDataFrame, points:list):
29 |     coords = []
30 |     for p in points:
31 |         item = df_nodes.loc[p]
32 |         coords.append(item.geometry.coords[0])
33 | 
34 |     return LineString(coords)
35 | 
36 | 
37 | def get_geom_length(geoms, from_crs='epsg:4326', to_crs='epsg:900913'):
38 |     assert isinstance(geoms, (pd.Series, gpd.GeoSeries))
39 |     
40 |     if geoms.name != 'geometry':
41 |         geoms.name = 'geometry'
42 |     lines = gpd.GeoDataFrame(geoms, crs=from_crs)
43 | 
44 |     return lines.to_crs(to_crs).length
45 | 


--------------------------------------------------------------------------------
/mapmatching/osmnet/osm_io.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | 
 4 | def load_graph(fn):
 5 |     with open(fn, 'rb') as f:
 6 |         graph = pickle.load(f)
 7 |     
 8 |     return graph['df_nodes'], graph['df_edges'], graph['df_ways']
 9 | 
10 | 
11 | def save_graph(df_nodes, df_edges, df_ways, fn):
12 |     graph = {
13 |         'df_nodes': df_nodes, 
14 |         'df_edges': df_edges, 
15 |         'df_ways': df_ways
16 |     }
17 | 
18 |     with open(fn, 'wb') as f:
19 |         pickle.dump(graph, f)
20 |     
21 |     return True
22 | 


--------------------------------------------------------------------------------
/mapmatching/osmnet/twoway_edge.py:
--------------------------------------------------------------------------------
  1 | import shapely
  2 | import numpy as np
  3 | import pandas as pd
  4 | from loguru import logger
  5 | from shapely.geometry import LineString
  6 | 
  7 | 
  8 | def swap_od(df_edge_rev:pd.DataFrame, od_attrs=['src', 'dst']):
  9 |     if df_edge_rev.empty:
 10 |         return df_edge_rev
 11 |     
 12 |     df_edge_rev.loc[:, 'dir']       = -1
 13 |     df_edge_rev.loc[:, 'order']     = -df_edge_rev.order - 1
 14 |     df_edge_rev.loc[:, 'waypoints'] = df_edge_rev.waypoints.apply(lambda x: x[::-1])
 15 |     df_edge_rev.rename(columns={od_attrs[0]: od_attrs[1], od_attrs[1]: od_attrs[0]}, inplace=True)
 16 |     if 'geometry' in list(df_edge_rev):
 17 |         df_edge_rev.loc[:, 'geometry']  = df_edge_rev.geometry.apply(lambda x: LineString(x.coords[::-1]) )
 18 | 
 19 |     return df_edge_rev
 20 | 
 21 | 
 22 | def add_reverse_edge(df_edges, df_ways, od_attrs=['src', 'dst'], offset=True):
 23 |     """Add reverse edge.
 24 | 
 25 |     Args:
 26 |         df_edges (gpd.GeoDataFrame): The edge file parsed from OSM.
 27 |     Check:
 28 |         rid = 34900355
 29 |         net.df_edges.query( f"rid == {rid} or rid == -{rid}" ).sort_values(['order','rid'])
 30 |     """
 31 |     assert 'oneway' in df_ways.columns, "Check `oneway` tag"
 32 |     df_edges.loc[:, 'dir'] = 1
 33 | 
 34 |     idxs = df_ways.query('oneway == False').index
 35 |     df_edge_rev = df_edges.query("way_id in @idxs")
 36 | 
 37 |     has_geom = 'geometry' in list(df_edges)
 38 |     if has_geom:
 39 |         ring_mask = df_edge_rev.geometry.apply(lambda x: x.is_ring)
 40 |         df_edge_rev = df_edge_rev[~ring_mask]
 41 | 
 42 |     df_edge_rev = swap_od(df_edge_rev, od_attrs)
 43 | 
 44 |     df_edges = pd.concat([df_edges, df_edge_rev]).reset_index(drop=True)
 45 | 
 46 |     if offset:
 47 |         df_edges = edge_offset(df_edges)
 48 |     
 49 |     return df_edges
 50 | 
 51 | 
 52 | def edge_offset(df_edges):
 53 |     way_ids = df_edges.query("dir == -1").way_id.unique()
 54 |     _df_edges = df_edges.query("way_id in @way_ids")
 55 |     
 56 |     _df_edges.loc[:, 'geom_origin'] = _df_edges.geometry.copy()
 57 |     # df_edge.loc[:, 'geom_origin'] = df_edge.geometry.apply(lambda x: x.to_wkt())
 58 |     geom_offset = _df_edges.apply( lambda x: parallel_offset_edge(x), axis=1 )
 59 |     _df_edges.loc[geom_offset.index, 'geometry'] = geom_offset
 60 |     
 61 |     df_edges = pd.concat([df_edges.query("way_id not in @way_ids"), _df_edges])
 62 |     
 63 | 
 64 |     return df_edges
 65 | 
 66 | 
 67 | def parallel_offset_edge(record:pd.Series, distance=1.25/110/1000, process_two_point=True, keep_endpoint_pos=True, logger=None):
 68 |     """Returns a LineString or MultiLineString geometry at a distance from the object on its right or its left side
 69 | 
 70 |     Args:
 71 |         record (LineString): The record object should have the `geometry` attitude.
 72 |         distance (float, optional): [description]. Defaults to 2/110/1000.
 73 |         keep_endpoint_pos (bool, optional): keep hte endpoints position or not. Defaults to False.
 74 | 
 75 |     Returns:
 76 |         [LineString]: The offset LineString.
 77 |     """
 78 |     if 'geometry' not in record:
 79 |         return None
 80 |     geom = record.geometry
 81 |     
 82 |     if len(geom.coords) <= 1:
 83 |         if logger is not None:
 84 |             logger.warning(f"{geom}: the length of it is less than 1.")
 85 |         return geom
 86 |     
 87 |     if geom.is_ring:
 88 |         return geom
 89 | 
 90 |     def _cal_dxdy(p0, p1, scale = 15):
 91 |         return ((p1[0]-p0[0])/scale, (p1[1]-p0[1])/scale)
 92 | 
 93 |     def _point_offset(p, dxdy, add=True):
 94 |         if add:
 95 |             return (p[0]+dxdy[0], p[1]+dxdy[1])
 96 | 
 97 |         return (p[0]-dxdy[0], p[1]-dxdy[1])
 98 |     
 99 |     try:
100 |         # shapely 2.0 以上，`[::-1]` 需删除
101 |         offset_coords = geom.parallel_offset(distance, side='right').coords
102 |         if int(shapely.__version__.split('.')[0]) < 2:
103 |             offset_coords = offset_coords[::-1]
104 | 
105 |         ori_s, ori_e = geom.coords[0], geom.coords[-1]
106 |         dxdy_s = _cal_dxdy(*geom.coords[:2])
107 |         dxdy_e = _cal_dxdy(*geom.coords[-2:])
108 |         turing_s =  _point_offset(offset_coords[0], dxdy_s, add=True )
109 |         turing_e =  _point_offset(offset_coords[-1], dxdy_e, add=False )
110 |         
111 |         coords = [ori_s] + [turing_s] + offset_coords[1:-1] + [turing_e] + [ori_e]
112 |         coords = np.round(coords, 7)
113 |         geom_new = LineString(coords)
114 |         
115 |         if logger is not None:
116 |             logger.info(f"{len(geom.coords)},{len(geom_new.coords)}\n{geom}\n{geom_new}")
117 |         
118 |         return geom_new
119 |     except:
120 |         if logger is not None:
121 |             logger.error(f"{record.name}, geom: {geom}, offset error")    
122 | 
123 |     return geom
124 | 


--------------------------------------------------------------------------------
/mapmatching/setting.py:
--------------------------------------------------------------------------------
 1 | """ Global config """
 2 | from pathlib import Path
 3 | 
 4 | IP = "192.168.135.16"
 5 | postgre_url= f"postgresql://postgres:pcl_A5A@{IP}:5432/gis"
 6 | 
 7 | root = Path(__file__).parent
 8 | DEBUG_FOLDER = root / "../debug"
 9 | LOG_FOLDER   = root / "../log"
10 | DATA_FOLDER  = root / "../data"
11 | 
12 | DIS_FACTOR = 1/110/1000
13 | 
14 | GBA_BBOX = [111.35669933,  21.56670092, 115.41989933,  24.39190092]
15 | SZ_BBOX  = [113.746280,  22.441466, 114.623972,  22.864722]
16 | PCL_BBOX = [113.930914,  22.570536, 113.945456,  22.585613]
17 | FT_BBOX  = [114.05097,   22.53447,  114.05863,   22.54605]
18 | 
19 | 
20 | """ road_type_filter """
21 | # Note: we adopt the filter logic from osmnx (https://github.com/gboeing/osmnx)
22 | # exclude links with tag attributes in the filters
23 | filters = {}
24 | 
25 | 
26 | # 道路含义：'service'：通往设施的道路
27 | filters['auto'] = {'area':['yes'],
28 |                    'highway':['cycleway','footway','path','pedestrian','steps','track','corridor','elevator','escalator',
29 |                               'proposed','construction','bridleway','abandoned','platform','raceway'],
30 |                    'motor_vehicle':['no'],
31 |                    'motorcar':['no'],
32 |                    'access':['private'],
33 |                    'service':['parking','parking_aisle','driveway','private','emergency_access']
34 |                    }
35 | 
36 | filters['bike'] = {'area':['yes'],
37 |                    'highway':['footway','steps','corridor','elevator','escalator','motor','proposed','construction','abandoned','platform','raceway'],
38 |                    'bicycle':['no'],
39 |                    'service':['private'],
40 |                    'access':['private']
41 |                    }
42 | 
43 | filters['walk'] = {'area':['yes'],
44 |                    'highway':['cycleway','motor','proposed','construction','abandoned','platform','raceway'],
45 |                    'foot':['no'],
46 |                    'service':['private'],
47 |                    'access':['private']
48 |                    }
49 | 
50 | highway_filters = filters['auto']['highway']
51 | 
52 | osm_highway_type_dict = {'motorway': ('motorway', False),
53 |                          'motorway_link': ('motorway', True),
54 |                          'trunk': ('trunk', False),
55 |                          'trunk_link': ('trunk', True),
56 |                          'primary': ('primary', False),
57 |                          'primary_link': ('primary', True),
58 |                          'secondary': ('secondary', False),
59 |                          'secondary_link': ('secondary', True),
60 |                          'tertiary': ('tertiary', False),
61 |                          'tertiary_link': ('tertiary', True),
62 |                          'residential': ('residential', False),
63 |                          'residential_link': ('residential', True),
64 |                          'service': ('service', False),
65 |                          'services': ('service', False),
66 |                          'cycleway': ('cycleway', False),
67 |                          'footway': ('footway', False),
68 |                          'pedestrian': ('footway', False),
69 |                          'steps': ('footway', False),
70 |                          'track': ('track', False),
71 |                          'unclassified': ('unclassified', False)}
72 | 
73 | link_type_level_dict = {'motorway':1, 'trunk':2, 'primary':3, 'secondary':4, 'tertiary':5, 'residential':6, 'service':7,
74 |                      'cycleway':8, 'footway':9, 'track':10, 'unclassified':11, 'connector':20, 'railway':30, 'aeroway':31}
75 | 
76 | default_lanes_dict = {'motorway': 4, 'trunk': 3, 'primary': 3, 'secondary': 2, 'tertiary': 2, 'residential': 1, 'service': 1,
77 |                       'cycleway':1, 'footway':1, 'track':1, 'unclassified': 1, 'connector': 2}
78 | 
79 | 
80 | link_speed_reduction_rate = .6
81 | 
82 | congestion_index = 1
83 | default_speed = 30 / congestion_index / 3.6
84 | default_speed_dict = {'motorway': 120, 'trunk': 100, 'primary': 80, 'secondary': 60, 'tertiary': 40, 'residential': 30, 'service': 30,
85 |                       'cycleway':5, 'footway':5, 'track':30, 'unclassified': 30, 'connector':120}
86 | default_speed_dict = {k: v / congestion_index / 3.6 for k, v in default_speed_dict.items()}
87 | 


--------------------------------------------------------------------------------
/mapmatching/update_network.py:
--------------------------------------------------------------------------------
 1 | import geopandas as gpd
 2 | from loguru import logger
 3 | 
 4 | def plot_topo_helper(seg, pos, neg, matcher):
 5 |     fig, ax = matcher.plot_result(seg, pos)
 6 |     neg_path = matcher.transform_res_2_path(neg)
 7 |     neg_path.plot(ax=ax, color='b', label = 'revert', linestyle=':')
 8 |     ax.legend()
 9 | 
10 |     return
11 | 
12 | def check_each_step(matcher, traj:gpd.GeoDataFrame, idx:int, factor=1.2, plot=False):
13 |     flag = False
14 |     seg = traj.iloc[idx: idx + 2]
15 |     net = matcher.net
16 | 
17 |     pos = matcher.matching(seg.reset_index(drop=True))
18 |     neg = matcher.matching(seg[::-1].reset_index(drop=True))
19 | 
20 |     if neg['status'] == 1 and pos['status'] == 4 or\
21 |         neg['probs']['prob'] > pos['probs']['prob'] * factor:
22 |         eids = neg['epath']
23 |         way_ids = net.get_edge(eids, 'way_id').unique()
24 | 
25 |         for idx in way_ids:
26 |             # TODO 上游确定是单向的，且仅有一个的情况下才增加
27 |             status = net.add_reverse_way(way_id=idx)
28 |             if status:
29 |                 if not flag:
30 |                     flag = True
31 |                 logger.info(f"add {idx}")
32 |             else:
33 |                 logger.info(f"{idx} exist")
34 |                 pass
35 | 
36 |         if plot:
37 |             plot_topo_helper(seg, pos, neg, matcher)
38 |     
39 |     return True
40 | 
41 | def check_steps(matcher, res, prob_thred=.75, factor=1.2):
42 |     flag = True
43 |     traj = res['details']['simplified_traj']
44 |     steps = res['details']['steps']
45 | 
46 |     if steps is None:
47 |         # FIXME 更精炼
48 |         logger.warning("Steps is None")
49 |         if res['status'] == 4:
50 |             _res = matcher.matching(traj[::-1].reset_index(drop=True))
51 |             if _res['status'] == 1:
52 |                 eids = _res['epath']
53 |                 way_ids = matcher.net.get_edge(eids, 'way_id').unique()
54 | 
55 |                 for idx in way_ids:
56 |                     # TODO 上游确定是单向的，且仅有一个的情况下才增加
57 |                     status = matcher.net.add_reverse_way(way_id=idx)
58 |                     if status:
59 |                         if not flag:
60 |                             flag = True
61 |                         logger.info(f"add {idx}")
62 |                     else:
63 |                         logger.info(f"{idx} exist")
64 |         
65 |         return flag
66 | 
67 |     cand_steps = steps.query(f'trans_prob < {prob_thred}')
68 |     for i, item in cand_steps.iterrows():
69 |         flag |= check_each_step(matcher, traj, i, factor)
70 | 
71 |     return flag
72 | 


--------------------------------------------------------------------------------
/mapmatching/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .timer import Timer, timeit
2 | 


--------------------------------------------------------------------------------
/mapmatching/utils/db.py:
--------------------------------------------------------------------------------
 1 | import geopandas as gpd
 2 | from sqlalchemy import create_engine
 3 | from ..setting import postgre_url
 4 | 
 5 | ENGINE = create_engine(postgre_url)
 6 | 
 7 | def gdf_to_postgis(gdf, name, engine=ENGINE, if_exists='replace', *args, **kwargs):
 8 |     """Save the GeoDataFrame to the db
 9 | 
10 |     Args:
11 |         gdf ([type]): [description]
12 |         name ([type]): [description]
13 |         engine ([type], optional): [description]. Defaults to ENGINE.
14 |         if_exists (str, optional): [description]. Defaults to 'replace'. if_exists{‘fail’, ‘replace’, ‘append’}
15 | 
16 |     Returns:
17 |         [type]: [description]
18 |     """
19 |     gdf.to_postgis(name=name, con=engine, if_exists=if_exists)
20 | 
21 | 
22 | def gdf_to_geojson(gdf, fn):
23 |     if not isinstance(gdf, gpd.GeoDataFrame):
24 |         print('Check the format of the gdf.')
25 |         return False
26 | 
27 |     if 'geojson' not in fn:
28 |         fn = f'{fn}.geojson'
29 |     
30 |     gdf.to_file(fn, driver="GeoJSON")
31 | 
32 |     return 
33 | 
34 | 


--------------------------------------------------------------------------------
/mapmatching/utils/img.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def merge_np_imgs(arrays, n_row, n_col):
 4 |     """
 5 |     Merge a set of tiles into a single array.
 6 | 
 7 |     Parameters
 8 |     ---------
 9 |     tiles : list of mercantile.Tile objects
10 |         The tiles to merge.
11 |     arrays : list of numpy arrays
12 |         The corresponding arrays (image pixels) of the tiles. This list
13 |         has the same length and order as the `tiles` argument.
14 | 
15 |     Returns
16 |     -------
17 |     img : np.ndarray
18 |         Merged arrays.
19 |     extent : tuple
20 |         Bounding box [west, south, east, north] of the returned image
21 |         in long/lat.
22 |     """
23 |     # get indices starting at zero
24 |     indices = []
25 |     for r in range(n_row):
26 |         for c in range(n_col):
27 |             indices.append((r, c))
28 | 
29 |     # the shape of individual tile images
30 |     h, w, d = arrays[0].shape
31 |     h = max([i.shape[0] for i in arrays])
32 |     w = max([i.shape[1] for i in arrays])
33 | 
34 |     # empty merged tiles array to be filled in
35 |     img = np.ones((h * n_row, w * n_col, d), dtype=np.uint8) * 255
36 | 
37 |     for ind, arr in zip(indices, arrays):
38 |         y, x = ind
39 |         _h, _w, _ = arr.shape
40 |         ori_x = x * w + (w - _w) // 2
41 |         ori_y = y * h + (h - _h) // 2
42 |         img[ori_y : ori_y + _h, ori_x : ori_x + _w, :] = arr
43 | 
44 |     return img
45 | 
46 | 


--------------------------------------------------------------------------------
/mapmatching/utils/interval_helper.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def merge_intervals(intervals):
 3 |     res = []
 4 |     for i in intervals:
 5 |         merge_intervals_helper(res, i[0], i[1])
 6 |         
 7 |     return res
 8 | 
 9 | 
10 | def merge_intervals_helper(intervals, start, end, height=None):
11 |     """merge intervals
12 | 
13 |     Args:
14 |         intervals ([type]): [description]
15 |         start ([type]): [description]
16 |         end ([type]): [description]
17 |         height ([type], optional): [description]. Defaults to None.
18 |     """
19 |     if start is None or height ==0 or start == end: 
20 |         return 
21 | 
22 |     if not intervals:
23 |         intervals.append( [start, end, height] )
24 |         return
25 |     
26 |     _, prev_end, prev_height = intervals[-1]
27 |     if prev_height == height and prev_end == start:
28 |         intervals[-1][1] = end
29 | 
30 |         return  
31 |     intervals.append([start, end, height])
32 | 
33 | 
34 | def insert_intervals(intervals, newInterval):
35 |     res = []
36 |     insertPos = 0
37 |     newInterval = newInterval.copy()
38 |     for interval in intervals:
39 |         if interval[1] < newInterval[0]:
40 |             res.append(interval)
41 |             insertPos += 1
42 |         elif interval[0] > newInterval[1]:
43 |             res.append(interval)
44 |         else:
45 |             newInterval[0] = min(interval[0], newInterval[0])
46 |             newInterval[1] = max(interval[1], newInterval[1])
47 |             newInterval[2] = interval[2]
48 |     
49 |     res.insert(insertPos, newInterval)
50 | 
51 |     return res
52 | 
53 | 


--------------------------------------------------------------------------------
/mapmatching/utils/log_helper.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import time
 4 | import logbook
 5 | from logbook import Logger, TimedRotatingFileHandler
 6 | from logbook.more import ColorizedStderrHandler
 7 | 
 8 | FILE_DIR = os.path.dirname(os.path.abspath(__file__))
 9 | BASE_DIR = os.path.join(FILE_DIR, '../../log/')
10 | logbook.set_datetime_format('local')
11 | 
12 | 
13 | def log_type(record, handler):
14 |     log_info = "[{date}] [{level}] [{filename}] [{func_name}] [{lineno}] {msg}".format(
15 |         date=record.time,                              # 日志时间
16 |         level=record.level_name,                       # 日志等级
17 |         filename=os.path.split(record.filename)[-1],   # 文件名
18 |         func_name=record.func_name,                    # 函数名
19 |         lineno=record.lineno,                          # 行号
20 |         msg=record.message                             # 日志内容
21 |     )
22 |     
23 |     return log_info
24 | 
25 | 
26 | class LogHelper(object):
27 |     def __init__(self, log_dir=BASE_DIR, log_name='log.log', backup_count=10, log_type=log_type, stdOutFlag=False):
28 |         if not os.path.exists(log_dir):
29 |             os.mkdir(log_dir)
30 |             
31 |         self.log_dir = log_dir
32 |         self.backup_count = backup_count
33 |         
34 |         handler = TimedRotatingFileHandler(filename= os.path.join(self.log_dir, log_name),
35 |                                         date_format='%Y-%m-%d',
36 |                                         backup_count=self.backup_count)
37 |         self.handler = handler
38 |         if log_type is not None:
39 |             handler.formatter = log_type
40 |         handler.push_application()
41 | 
42 |         if not stdOutFlag:
43 |             return
44 |         
45 |         handler_std = ColorizedStderrHandler(bubble=True)
46 |         if log_type is not None:
47 |             handler_std.formatter = log_type
48 |         handler_std.push_application()
49 | 
50 |     def get_current_handler(self):
51 |         return self.handler
52 | 
53 |     @staticmethod
54 |     def make_logger(level, name=str(os.getpid())):
55 |         return Logger(name=name, level=level)
56 | 
57 | 
58 | def log_helper(log_file, content):
59 |     log_file.write( f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}, {content}\n" )
60 |     return 
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     g_log_helper = LogHelper(log_name='log.log', stdOutFlag=True)
65 |     log = g_log_helper.make_logger(level=logbook.INFO)
66 |     log.critical("critical")    # 严重错误，会导致程序退出
67 |     log.error("error")          # 可控范围内的错误 
68 |     log.warning("warning")      # 警告信息
69 |     log.notice("notice")        # 大多情况下希望看到的记录
70 |     log.info("info")            # 大多情况不希望看到的记录
71 |     log.debug("debug")          # 调试程序时详细输出的记录
72 |     pass
73 | 


--------------------------------------------------------------------------------
/mapmatching/utils/logger_helper.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | 
 4 | def make_logger(folder, level='DEBUG', mode='w', console=False):
 5 |     from loguru import logger
 6 | 
 7 |     if not console:
 8 |         logger.remove()
 9 |     
10 |     logger.add(
11 |         os.path.join(folder, f"pano_base_{time.strftime('%Y-%m-%d', time.localtime())}.log"), 
12 |         enqueue=True,  
13 |         backtrace=True, 
14 |         diagnose=True,
15 |         level=level,
16 |         mode=mode
17 |     )
18 |     
19 |     return logger
20 | 


--------------------------------------------------------------------------------
/mapmatching/utils/misc.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from datetime import date
 3 | 
 4 | def get_date(fmt="%Y-%m-%d"):
 5 |     return date.today().strftime(fmt)
 6 | 
 7 | 
 8 | def add_datetime_attr(nodes):
 9 |     extract_date_from_pid = lambda pid: {'area': pid[: 10], "date": pid[10: 16], "time": pid[16: 22]}
10 |     nodes.loc[:, ['area', 'date', 'time']] = nodes.apply(lambda x: extract_date_from_pid(x.pid), axis=1, result_type='expand')
11 | 
12 |     return nodes
13 | 
14 | def SET_PANDAS_LOG_FORMET():
15 |     pd.set_option('display.max_rows', 50)
16 |     pd.set_option('display.max_columns', 500)
17 |     pd.set_option('display.width', 5000)
18 | 
19 |     return
20 | 
21 | 


--------------------------------------------------------------------------------
/mapmatching/utils/parallel_helper.py:
--------------------------------------------------------------------------------
 1 | from tqdm import tqdm
 2 | from multiprocessing import cpu_count, Pool
 3 | 
 4 | 
 5 | def parallel_process(func, queue, pbar_switch=False, desc='Parallel processing', total=None, n_jobs=-1):
 6 |     """parallel process helper
 7 | 
 8 |     Args:
 9 |         func (Function): The func need to be parallel accelerated.
10 |         queue ([tuple, tuple, ..., tuple]): the columns in df must contains the parmas in the func.
11 |         desc (str, optional): [description]. Defaults to 'Parallel processing'.
12 |         n_jobs (int, optional): [description]. Defaults to -1.
13 | 
14 |     Returns:
15 |         [type]: [description]
16 |     """
17 |     size = total
18 |     if hasattr(queue, "__len__"):
19 |         size = len(queue)
20 |         if size == 0:
21 |             return []
22 |     
23 |     n_jobs = cpu_count() if n_jobs == -1 or n_jobs > cpu_count() else n_jobs
24 |     pool = Pool(n_jobs)
25 |     
26 |     if pbar_switch:
27 |         pbar = tqdm(total=size, desc=desc)
28 |         update = lambda *args: pbar.update()
29 | 
30 |     res = []
31 |     for id, params in enumerate(queue):
32 |         tmp = pool.apply_async(func, params, callback=update if pbar_switch else None)
33 |         res.append(tmp)
34 |     pool.close()
35 |     pool.join() 
36 |     res = [r.get() for r in res]
37 | 
38 |     return res
39 | 
40 | 
41 | def _add(x, y):    
42 |     res = x + y 
43 |     # print(f"{x} + {y} = {res}")
44 |     
45 |     return res
46 | 
47 | 
48 | def parallel_process_for_df(df, pipeline, n_jobs=8):
49 |     """_summary_
50 | 
51 |     Args:
52 |         df (_type_): _description_
53 |         pipeline (_type_): _description_
54 |         n_jobs (int, optional): _description_. Defaults to 8.
55 | 
56 |     Returns:
57 |         _type_: _description_
58 |     """
59 |     # FIXME 多进程中多个参数的情况下，现有代码是串行的, 因此 pipeline 中需要固定其他的参数
60 |     import pandas as pd
61 |     
62 |     _size = df.shape[0] // n_jobs + 1
63 |     df.loc[:, 'part'] = df.index // _size
64 |     params = zip(df.groupby('part'))
65 |     df.drop(columns=['part'], inplace=True)
66 | 
67 |     res = parallel_process(pipeline, params, n_jobs=n_jobs)
68 |     sorted(res, key=lambda x: x[0])
69 | 
70 |     return pd.concat([i for _, i in res])
71 | 
72 | 
73 | def pipeline_for_df_test(df_tuple, bias=-2, verbose=True):
74 |     import time 
75 |     name, df = df_tuple
76 |     if verbose:
77 |         print(f"Part {name} start, size: {df.shape[0]}\n")
78 | 
79 |     time.sleep(10)
80 |     res = df.x + df.y + bias
81 |     if verbose: 
82 |         print(f"Part {name} Done\n")
83 |     
84 |     return name, res    
85 | 
86 | 
87 | if __name__ == "__main__":
88 |     res = parallel_process(_add, ((i, i) for i in range(10000)), True)
89 | 
90 |     # 基于 DataFrame 的多进程版本示例
91 |     import pandas as pd
92 |     df = pd.DataFrame({'x': range(0, 10000), 'y': range(0, 10000)})
93 |     ans = parallel_process_for_df(df, pipeline_for_df_test, n_jobs=8)
94 |     
95 |     ans 


--------------------------------------------------------------------------------
/mapmatching/utils/serialization.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import hashlib
  3 | import pickle
  4 | import os
  5 | 
  6 | 
  7 | def load_checkpoint(ckpt_file_name, obj=None):
  8 |     _dict = {}
  9 |     if obj is not None and hasattr(obj, "__dict__"):
 10 |         _dict = obj.__dict__
 11 | 
 12 |     with open(ckpt_file_name,'rb') as f:
 13 |         dict_ = pickle.load(f)
 14 |     _dict.update(dict_)
 15 |     
 16 |     return _dict
 17 |     
 18 | 
 19 | def save_checkpoint(obj, ckpt_file_name, ignore_att=[]):
 20 |     def _save(tmp):
 21 |         with open(ckpt_file_name, 'wb') as f:
 22 |             pickle.dump({ k: v for k, v in tmp.items() if k not in ignore_att}, f)
 23 |     
 24 |     if isinstance(obj, dict):
 25 |          _save(obj)
 26 |          return True
 27 |      
 28 |     try:                
 29 |         _save(obj.__dict__)
 30 |         return True
 31 |     except:
 32 |         return False
 33 | 
 34 | 
 35 | 
 36 | class PickleSaver():
 37 |     def __init__(self, folder='../cache'):
 38 |         self.create_time=time.time()
 39 |         self.folder = folder
 40 | 
 41 |     def md5(self):
 42 |         m=hashlib.md5()
 43 |         m.update(str(self.create_time).encode('utf-8'))
 44 |     
 45 |         return m.hexdigest()
 46 | 
 47 |     def save(self, obj, fn):
 48 |         if not os.path.exists(self.folder):
 49 |             os.mkdir(self.folder)
 50 | 
 51 |         if '.pkl' not in fn:
 52 |             fn = f"{fn}.pkl"
 53 |             
 54 |         with open(os.path.join(self.folder, fn),'wb') as f:
 55 |             pickle.dump(obj, f)
 56 |         
 57 |         return True
 58 | 
 59 |     # @staticmethod
 60 |     def read(self, fn):
 61 |         if '/' not in fn:
 62 |             fn = os.path.join(self.folder, fn)
 63 |             
 64 |         if '.pkl' not in fn:
 65 |             fn = f"{fn}.pkl"
 66 |             
 67 |         with open(fn,'rb') as f:
 68 |             try:
 69 |                 obj = pickle.load(f)
 70 |                 return obj
 71 |             except Exception as e:
 72 |                 pass
 73 |         
 74 |         return None
 75 | 
 76 | 
 77 | class Saver:
 78 |     def __init__(self, snapshot_file, desc=None):
 79 |         self.desc = desc
 80 |         self.snapshot_file = snapshot_file
 81 |         self.create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 
 82 |         
 83 |         pass
 84 | 
 85 |     def _save(self, ignore_att=['logger']):
 86 |         try:                
 87 |             with open(self.snapshot_file,'wb') as f:
 88 |                 pickle.dump({ k: v for k, v in self.__dict__.items() if k not in ignore_att}, f)
 89 |             return True
 90 |         except:
 91 |             return False
 92 | 
 93 |     def _load(self, fn):
 94 |         with open(fn,'rb') as f:
 95 |             dict_ = pickle.load(f)
 96 |         self.__dict__.update(dict_)
 97 |         
 98 |         return True
 99 | 
100 | 
101 | if __name__ == "__main__":
102 |     # fn = "../../cache/tmp.pkl"
103 |     # tmp = Saver(fn)
104 |     # print(tmp.create_time)
105 |     # tmp.save_()
106 |     
107 |     # tmp.load_(fn)
108 |     
109 |     ckpt = '../../cache/Shenzhen.graph.ckpt'
110 |     info = load_checkpoint(ckpt)


--------------------------------------------------------------------------------
/mapmatching/utils/timer.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import numpy as np
 3 | from loguru import logger
 4 | 
 5 | class Timer:
 6 |     """Record multiple running times."""
 7 |     def __init__(self):
 8 |         self.times = []
 9 |         self.start()
10 | 
11 |     def start(self):
12 |         """Start the timer."""
13 |         self.tik = time.time()
14 | 
15 |     def stop(self):
16 |         """Stop the timer and record the time in a list."""
17 |         self.times.append(time.time() - self.tik)
18 |         return self.times[-1]
19 | 
20 |     def avg(self):
21 |         """Return the average time."""
22 |         return sum(self.times) / len(self.times)
23 | 
24 |     def sum(self):
25 |         """Return the sum of time."""
26 |         return sum(self.times)
27 | 
28 |     def cumsum(self):
29 |         """Return the accumulated time."""
30 |         return np.array(self.times).cumsum().tolist()
31 | 
32 | 
33 | def timeit(func):
34 |     def inner(*args, **kwargs):
35 |         start = time.time()
36 |         res = func(*args, **kwargs)
37 |         end = time.time()
38 |         _log = f"{func.__name__}, cost: {(end - start) * 1000: .2f} ms"
39 |         # print(_log)
40 |         logger.info(_log)
41 |         
42 |         return res
43 |         
44 |     return inner


--------------------------------------------------------------------------------
/requirement.txt:
--------------------------------------------------------------------------------
 1 | geopandas==0.12.2
 2 | pandas==1.5.3
 3 | shapely==2.0.1
 4 | sqlalchemy==1.4.46
 5 | psycopg2==2.9.5
 6 | geoalchemy2==0.13.1
 7 | matplotlib==3.6.3
 8 | loguru==0.6.0
 9 | haversine==2.8.0
10 | numba==0.56.4
11 | osmium==3.6.0
12 | tqdm
13 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | #%%
  2 | from tqdm import tqdm
  3 | import networkx as nx
  4 | import itertools
  5 | import geopandas as gpd
  6 | from networkx import shortest_simple_paths
  7 | from pathlib import Path
  8 | 
  9 | from stmm import build_geograph, ST_Matching
 10 | from tilemap import plot_geodata
 11 | 
 12 | """step 1: 获取/加载路网"""
 13 | folder = Path("./data/network")
 14 | # 方法1：
 15 | # 根据 bbox 从 OSM 下载路网，从头解析获得路网数据
 16 | # net = build_geograph(bbox = [113.928518,  22.551085, 114.100451,  22.731744],
 17 | #                      xml_fn = folder / "SZN.osm.xml", ll=False, n_jobs=32)
 18 | # 将预处理路网保存为 ckpt
 19 | # net.save_checkpoint(folder / 'SZN_graph.ckpt')
 20 | 
 21 | # net = build_geograph(ckpt='../dataset/cache/SZN_graph.ckpt') 
 22 | net = build_geograph(ckpt = folder / 'SZN_graph.ckpt') 
 23 | matcher = ST_Matching(net=net, ll=False)
 24 | 
 25 | #%%
 26 | plot_geodata(net.df_edges.to_crs(4326))
 27 | net.df_edges.head(5)
 28 | 
 29 | # %%
 30 | 
 31 | def get_k_shortest_paths(G, u, v, k):
 32 |     paths_gen = shortest_simple_paths(G, u, v, "length")
 33 |     for path in itertools.islice(paths_gen, 0, k):
 34 |         yield path
 35 | 
 36 | def plot_top_k_shortest_path():
 37 |     geoms = []
 38 | 
 39 |     for path in get_k_shortest_paths(G, 9168697035, 9167366553, 3):
 40 |         epath = net.transform_vpath_to_epath(path)
 41 |         path_geom = net.transform_epath_to_linestring(epath)
 42 |         geoms.append(path_geom)
 43 | 
 44 |     geoms = gpd.GeoDataFrame(geometry=geoms, crs=net.df_edges.crs)
 45 | 
 46 |     plot_geodata(geoms.to_crs(4326).reset_index(), column='index', legend=True, alpha=.5)
 47 | 
 48 |     return 
 49 | 
 50 | G = nx.DiGraph()
 51 | 
 52 | # # 最短路测试
 53 | # nx.shortest_path(G, 9168697035, 9167366553, weight='dist')
 54 | 
 55 | 
 56 | #%%
 57 | import networkx as nx
 58 | import numpy as np
 59 | import pandas as pd
 60 | import geopandas as gpd
 61 | 
 62 | class GeoGraph(nx.DiGraph):
 63 |     def __init__(self, incoming_graph_data=None, reindex_node=True, **attr):
 64 |         super().__init__(incoming_graph_data, **attr)
 65 |         self.nodeid_long2short = {}
 66 |         self.nodeid_short2long = {}
 67 |         self.nxt_nid = 0
 68 |         self.reindex_node = reindex_node
 69 | 
 70 |     def search(self, o, d):
 71 |         return nx.shortest_path(self, o, d, weight='weight')
 72 | 
 73 |     def load_graph(self, edges:gpd.GeoDataFrame, src='src', dst='dst',  weight='dist'):
 74 |         # 新增边
 75 |         for name, item in tqdm(edges.iterrows()):
 76 |             o = item[src]
 77 |             d = item[dst]
 78 | 
 79 |             if self.reindex_node:
 80 |                 o = self._get_short_node_id(o)
 81 |                 d = self._get_short_node_id(d)
 82 | 
 83 |             _w = item[weight]
 84 |             self.add_edge(o, d, weight=_w)
 85 | 
 86 |     def _get_short_node_id(self, nid):
 87 |         if not self.reindex_node:
 88 |             return nid
 89 |         
 90 |         if nid in self.nodeid_long2short:
 91 |             return self.nodeid_long2short[nid]
 92 |         
 93 |         self.nodeid_long2short[nid] = self.nxt_nid
 94 |         self.nodeid_short2long[self.nxt_nid] = nid
 95 |         tmp = self.nxt_nid
 96 |         self.nxt_nid += 1
 97 |         
 98 |         return tmp
 99 | 
100 |     """ coordination """
101 |     def align_crs(self, gdf):
102 |         return
103 | 
104 |     """ vis """
105 |     def add_edge_map(self, ax, *arg, **kwargs):
106 |         return
107 |         
108 |     """ property"""
109 |     @property
110 |     def crs(self):
111 |         return self.df_edges.crs
112 |     
113 |     @property
114 |     def epsg(self):
115 |         return self.df_edges.crs.to_epsg()
116 | 
117 | 
118 | digraph = GeoGraph(reindex_node=False)
119 | digraph.load_graph(net.df_edges)
120 | o, d = 9168697035, 9167366553
121 | 
122 | o = digraph._get_short_node_id(o)
123 | d = digraph._get_short_node_id(d)
124 | digraph.search(o, d)
125 | 
126 | 
127 | # %%
128 | 


--------------------------------------------------------------------------------