├── .gitignore ├── README.md ├── LICENSE ├── src └── python │ ├── frontend │ ├── __init__.py │ ├── util.py │ ├── capabilities.py │ ├── __main__.py │ ├── fe.py │ ├── maphandler.py │ └── osmelement.py │ ├── apiserver │ ├── __init__.py │ ├── const.py │ └── osmelement.py │ ├── datastore │ ├── __init__.py │ ├── ds_couchdb.py │ ├── ds_geohash.py │ ├── ds_membase.py │ ├── slabutil.py │ ├── ds.py │ └── lrucache.py │ ├── dbmgr │ ├── __init__.py │ ├── dbm_stats.py │ ├── dbm_ops.py │ ├── dbm_input.py │ ├── __main__.py │ └── dbm_geotables.py │ ├── README.md │ ├── tests │ ├── __init__.py │ ├── test_geohash.py │ ├── test_slabutil.py │ ├── test_dsmembase.py │ ├── test_osmelement.py │ └── test_lrucache.py │ ├── db-mgr │ ├── front-end │ └── config │ └── osm-api-server.cfg └── doc ├── Home.md ├── ProvisioningInformation.md ├── Overview.md ├── SupportedRequests.md ├── Improvements.md ├── DesignMistakes.md ├── Roadmap.md ├── Dbmgr.md ├── DeploymentInstructions.md └── Slabs.org /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.pyc 3 | *.pyo 4 | .sconsign.dblite 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # README 2 | 3 | This is an experimental implementation of an API service that supports 4 | a (read-only) subset of the [OSM v0.6 API][osmapi]. 5 | 6 | The goal for this project is to explore an implementation of the 7 | [OSM API][osmapi] built over a distributed key/value store (i.e., a 8 | "NoSQL" backend). The service has been designed to be easy to scale 9 | horizontally. 10 | 11 | The implementation currently uses [Membase][membase] for the data 12 | store; however its design should work with other key/value systems. 13 | 14 | ## Current Status 15 | 16 | This repository contains a working snapshot of the service. 17 | The server only supports read queries on map data. 18 | 19 | ## Further Information 20 | 21 | Information on how to use this software package may be found in the 22 | project's [documentation][]. 23 | 24 | 25 | 26 | [membase]: http://www.membase.org/ "Membase" 27 | [osmapi]: http://wiki.openstreetmap.org/wiki/API_v0.6 "OSM v0.6 API" 28 | [documentation]: https://github.com/MapQuest/mapquest-osm-server/blob/master/doc/Home.md 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This software is distributed under the following MIT License. 2 | 3 | Copyright (c) 2011 AOL Inc. All Rights Reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/python/frontend/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | '''Front end for the OSM API Server.''' 24 | -------------------------------------------------------------------------------- /src/python/apiserver/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | # 23 | 24 | '''An API Server for the OpenStreetMap project.''' 25 | -------------------------------------------------------------------------------- /src/python/datastore/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | """A package containing datastore implementations. 24 | 25 | Each datastore module exports a class 'DB' whose methods implement 26 | the interface to the datastore. 27 | """ 28 | -------------------------------------------------------------------------------- /src/python/datastore/ds_couchdb.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | """An interface to a CouchDB based backend store.""" 24 | 25 | class Datastore: 26 | def __init__(self, config): 27 | "Initialize the datastore." 28 | pass 29 | 30 | -------------------------------------------------------------------------------- /doc/Home.md: -------------------------------------------------------------------------------- 1 | ## About 2 | 3 | This is an experimental API server for [Open Street Map][osm] map 4 | data. 5 | 6 | - The server supports most of the read operations on map data defined by 7 | [version 0.6][osmapi] of the OSM API (see [SupportedRequests][] for the 8 | precise list). 9 | - For its data store, the server currently uses [Membase][membase], a 10 | scalable distributed key/value store. Support for other scalable 11 | key/value stores should be easy to add. 12 | - The server has been designed to be easy to scale out horizontally. 13 | 14 | ## Further Reading 15 | 16 | * [Overview][] -- An overview of the implementation. 17 | * [DeploymentInstructions][] -- How to deploy the server. 18 | * [ProvisioningInformation][] -- Sizing information for running a server. 19 | * [Roadmap][] -- The steps going forward. 20 | * [SupportedRequests][] -- The list of supported HTTP requests. 21 | 22 | 23 | 24 | [DeploymentInstructions]: DeploymentInstructions.md 25 | [membase]: http://www.membase.org/ "Membase" 26 | [osm]: http://www.openstreetmap.org/ "Open Street Map" 27 | [osmapi]: http://wiki.openstreetmap.org/wiki/API_v0.6 "OSM API v0.6" 28 | [Overview]: Overview.md 29 | [python]: http://www.python.org/ "The Python Language" 30 | [ProvisioningInformation]: ProvisioningInformation.md 31 | [Roadmap]: Roadmap.md 32 | [SupportedRequests]: SupportedRequests.md 33 | -------------------------------------------------------------------------------- /src/python/dbmgr/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | """Database Manager. 24 | 25 | This tool allows OSM data to be loaded into a variety of backends. 26 | Currently supported backends include: 27 | 28 | * CouchDB 29 | * Membase 30 | * Riak 31 | """ 32 | -------------------------------------------------------------------------------- /src/python/README.md: -------------------------------------------------------------------------------- 1 | # README 2 | 3 | This directory contains a [Python][python] implementation of a 4 | scalable API server for OSM map data. 5 | 6 | ## Directory Contents 7 | 8 | * `apiserver/` 9 | 10 | Common definitions and utilities. 11 | 12 | * `datastore/` 13 | 14 | Interfaces to various key/value stores. 15 | 16 | * `dbmgr/` 17 | 18 | Code for the data store management utility. 19 | 20 | * `frontend/` 21 | 22 | Code for the 'frontend' of the API server. 23 | 24 | * `tests/` 25 | 26 | Test code. 27 | 28 | ## Running the code 'in-place' 29 | 30 | * Configuration information for these tools is specified in the file 31 | `config/osm-api-server.cfg`. 32 | * The script `front-end` starts the server. With the default 33 | configuration, this server would need to be run as root since it 34 | listens for API requests on port 80. The listening port may be 35 | changed using the configuration file (see section `front-end`, 36 | configuration item `port`). 37 | 38 | % sudo ./front-end 39 | 40 | * The script `db-mgr` invokes the ingestion tool. For example: 41 | * To initialize the data store, use: 42 | 43 | % ./db-mgr -I 44 | 45 | * To load a "planet.osm" planet dump into the data store, use: 46 | 47 | % ./db-mgr PLANET.OSM 48 | 49 | Both scripts support a `--help` option. 50 | 51 | 52 | 53 | [python]: http://www.python.org/ "The Python Programming Language" 54 | -------------------------------------------------------------------------------- /src/python/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | """Functional tests for the OSM API server. 24 | 25 | This Python package contains tests for the OSM API server and 26 | associated utilities. 27 | 28 | Note: The presence of this file causes the `py.test` test discovery 29 | tool to use the parent directory as its `basedir` (see py.test 30 | documentation for more information). 31 | """ 32 | -------------------------------------------------------------------------------- /src/python/db-mgr: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 4 | # 5 | # Permission is hereby granted, free of charge, to any person 6 | # obtaining a copy of this software and associated documentation files 7 | # (the "Software"), to deal in the Software without restriction, 8 | # including without limitation the rights to use, copy, modify, merge, 9 | # publish, distribute, sublicense, and/or sell copies of the Software, 10 | # and to permit persons to whom the Software is furnished to do so, 11 | # subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be 14 | # included in all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | # SOFTWARE. 24 | 25 | """Manage data in the backend of the API server.""" 26 | 27 | import runpy 28 | import sys 29 | 30 | package = 'dbmgr' 31 | 32 | version = sys.version_info[0:2] 33 | 34 | # Check for the specific Python version that we know works. 35 | if version == (2, 6): 36 | runpy.run_module('%s.__main__' % package, run_name='__main__', 37 | alter_sys=True) 38 | else: 39 | raise NotImplementedError("Needs Python version 2.6") 40 | -------------------------------------------------------------------------------- /src/python/front-end: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 4 | # 5 | # Permission is hereby granted, free of charge, to any person 6 | # obtaining a copy of this software and associated documentation files 7 | # (the "Software"), to deal in the Software without restriction, 8 | # including without limitation the rights to use, copy, modify, merge, 9 | # publish, distribute, sublicense, and/or sell copies of the Software, 10 | # and to permit persons to whom the Software is furnished to do so, 11 | # subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be 14 | # included in all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | # SOFTWARE. 24 | 25 | """Execute the frontend of the API server.""" 26 | 27 | import runpy 28 | import sys 29 | 30 | package = 'frontend' 31 | 32 | version = sys.version_info[0:2] 33 | 34 | # Check for the specific Python version that we know works. 35 | if version <= (2, 6): 36 | runpy.run_module('%s.__main__' % package, run_name='__main__', 37 | alter_sys=True) 38 | else: 39 | raise NotImplementedError("Needs Python version 2.6") 40 | -------------------------------------------------------------------------------- /src/python/datastore/ds_geohash.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | """Convenience routines for managing geo-hashes.""" 24 | 25 | import geohash 26 | 27 | import apiserver.const as C 28 | 29 | __GHKEYLENGTH = None 30 | __SCALEFACTOR = None 31 | 32 | def init_geohash(ghkeylength, scalefactor): 33 | "Initialize the module." 34 | global __GHKEYLENGTH, __SCALEFACTOR 35 | 36 | __GHKEYLENGTH = ghkeylength 37 | __SCALEFACTOR = scalefactor 38 | 39 | def geohash_key_for_element(elem): 40 | "Return a geohash key for element 'elem'." 41 | 42 | lat = min(C.MAXGHLAT, float(elem.get(C.LAT)) / __SCALEFACTOR) 43 | lon = float(elem.get(C.LON)) / __SCALEFACTOR 44 | 45 | return geohash.encode(lat, lon, precision=__GHKEYLENGTH) 46 | -------------------------------------------------------------------------------- /src/python/frontend/util.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | ## Utility functions. 24 | 25 | from lxml import etree as ET 26 | 27 | import apiserver.const as C 28 | 29 | def response_to_xml(elem): 30 | 'Create a pretty-printed XML response.' 31 | return ET.tostring(elem, encoding=C.UTF8, pretty_print=True, 32 | xml_declaration=True) 33 | 34 | def filter_references(namespace, items): 35 | "Look for references for items in the specified namespace." 36 | prefix = namespace[0].upper() 37 | results = set() 38 | for i in items: 39 | results.update(map(lambda x: x[1:], 40 | filter(lambda x: x[0] == prefix, 41 | i.get(C.REFERENCES, [])))) 42 | return results 43 | -------------------------------------------------------------------------------- /doc/ProvisioningInformation.md: -------------------------------------------------------------------------------- 1 | ## About 2 | 3 | This document contains information about the resource requirements 4 | needed for running an instance of this server. 5 | 6 | **Note**: A pending issue ([#13][issue13]) is currently preventing the load of a complete 7 | [planet][osmplanet]. The data below is therefore for a subset. 8 | 9 | ## Membase 10 | 11 | * Membase version: 1.6.5 (i386), running on a laptop with 3GB RAM, 12 | running Ubuntu GNU/Linux: 13 | * Source tree at commit [7bcb49c807f39fbb9989](https://github.com/MapQuest/mapquest-osm-server/commit/7bcb49c807f39fbb998958e3cfc14496077b065e). 14 | * Extract: `india.osm.bz2` from `download.geofabrik.de`, dated 15 | 2011-04-11: 16 | * Size: 53387268 bytes bzipped. 17 | * Containing 0 changesets, 3568521 nodes, 215498 ways, and 933 relations. 18 | * Resource usage (Membase): 19 | * Reported disk usage: 920MB. 20 | * Reported RAM usage: 882MB (seems high?). 21 | * 245137 unique keys in the data store (using the default slab settings). 22 | * Representative timings using the default configuration, with both Membase and front-end running on `localhost`: 23 | * First fetch of a node (i.e., with a 'cold' element cache):
24 | `[I 110413 21:53:53 web:849] 200 GET /api/0.6/node/15382163 (127.0.0.1) 15.50ms` 25 | * First fetch of the ways for a node:
26 | `[I 110413 21:53:57 web:849] 200 GET /api/0.6/node/15382163/ways (127.0.0.1) 5.40ms` 27 | * Subsequent re-fetch of the ways for the same node:
28 | `[I 110413 21:54:00 web:849] 200 GET /api/0.6/node/15382163/ways (127.0.0.1) 0.99ms` 29 | * A re-fetch of the same node:
30 | `[I 110413 21:54:10 web:849] 200 GET /api/0.6/node/15382163 (127.0.0.1) 0.68ms` 31 | 32 | I do not have scaling numbers under load (yet). 33 | 34 | ## Related Tickets 35 | 36 | * Ticket [#9][issue9] tracks efforts to reduce the data storage requirements for map data. 37 | * Ticket [#13][issue13] tracks efforts to speed up ingestion of a full planet dump. 38 | 39 | 40 | 41 | [issue9]: https://github.com/MapQuest/mapquest-osm-server/issues/9 42 | [issue13]: https://github.com/MapQuest/mapquest-osm-server/issues/13 43 | [osmplanet]: http://wiki.openstreetmap.org/wiki/Planet.osm "OSM Planet" 44 | 45 | -------------------------------------------------------------------------------- /doc/Overview.md: -------------------------------------------------------------------------------- 1 | ## About 2 | 3 | This document presents an overview of this map server. 4 | 5 | ## Goals 6 | 7 | The goal of this project is to explore an implementation of an 8 | OSM-like map server using a scalable, distributed, key/value system 9 | for data storage. 10 | 11 | Access to map data would be using the [APIs][osmapi] defined by the 12 | OSM project. Currently, this prototype supports a read-only subset of 13 | the [OSM APIs][osmapi]. 14 | 15 | ## Architecture 16 | 17 | The server has three major components. 18 | 19 | * The "data store". 20 | 21 | The data store is a persistent store of map data. Map data from 22 | "[planet.osm][osmplanet]" snapshots is processed by an ingestion tool 23 | (see below) and is stored in key/value form in the data store. 24 | 25 | The key/value store needs to be able to deal with a large number 26 | of keys; the current prototype uses [membase][]. 27 | 28 | * The "front end". 29 | 30 | The front end responds to HTTP requests of the form defined by the 31 | [OSM API][osmapi]. The list of supported requests may be found in 32 | [SupportedRequests][]. 33 | 34 | * The data store manager. 35 | 36 | This tool is used to ingest [planet.osm][osmplanet] and 37 | [OSM change][osmchange] files published by the [openstreetmap][] 38 | project into the data store. 39 | 40 | ## Configuration 41 | 42 | Most aspects of the operation of the server is controlled by a 43 | configuration file, see the file [osm-api-server.cfg][configsrc]. 44 | 45 | ## See Also 46 | 47 | * [DesignMistakes][] -- Alternative designs that were tried, but 48 | which did not work out well. 49 | * [Improvements][] -- (Near term) improvements to the design. 50 | 51 | 52 | 53 | [configsrc]: https://github.com/MapQuest/mapquest-osm-server/blob/master/src/python/config/osm-api-server.cfg "Configuration file" 54 | [DesignMistakes]: DesignMistakes.md 55 | [Improvements]: Improvements.md 56 | [membase]: http://www.membase.org/ "Membase" 57 | [osmapi]: http://wiki.openstreetmap.org/wiki/API_v0.6 "OSM v0.6 API" 58 | [osmchange]: http://wiki.openstreetmap.org/wiki/OsmChange "OSM Change" 59 | [osmplanet]: http://wiki.openstreetmap.org/Planet.osm "Planet.OSM" 60 | [openstreetmap]: http://www.openstreetmap.org/ "Open Street Map" 61 | [SupportedRequests]: SupportedRequests.md 62 | -------------------------------------------------------------------------------- /src/python/tests/test_geohash.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | """Test the 'datastore.geohash' utility module.""" 24 | 25 | import pytest 26 | 27 | import apiserver.const as C 28 | from apiserver.osmelement import new_osm_element 29 | from datastore.ds_geohash import init_geohash, geohash_key_for_element 30 | 31 | _GHKEYLENGTH = 5 32 | _SCALEFACTOR = 10000000 33 | 34 | def test_geokeys(): 35 | "Test geo hash keys returned for various coordinates." 36 | 37 | init_geohash(_GHKEYLENGTH, _SCALEFACTOR) 38 | expected = [ 39 | (0.0, 0.0, 's0000'), 40 | (89, 0.0, 'upb42'), 41 | (89.999999999999992, 0.0, 'upbpb'), # Max lat supported. 42 | (-90, 0.0, 'h0000'), 43 | (-90, -180, '00000'), 44 | (-90, +180, '00000'), 45 | (-90, +90, 'n0000'), 46 | (-90, -90, '40000'), 47 | (-45, -45, '70000'), 48 | (-45, 45, 'm0000'), 49 | (45, 45, 'v0000'), 50 | (45, -45, 'g0000') 51 | ] 52 | 53 | for (lat, lon, ghkey) in expected: 54 | elem = new_osm_element(C.NODE, '0') 55 | elem[C.LAT] = lat * _SCALEFACTOR 56 | elem[C.LON] = lon * _SCALEFACTOR 57 | res = geohash_key_for_element(elem) 58 | 59 | assert res == ghkey 60 | -------------------------------------------------------------------------------- /doc/SupportedRequests.md: -------------------------------------------------------------------------------- 1 | ## Supported HTTP requests 2 | 3 | This server currently supports the following subset of the [OSM v0.6 API](http://wiki.openstreetmap.org/wiki/API_v0.6). 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 |
Operation/URIDescription
GET /Return information about this server instance.
GET /api/capabilitiesRetrieve server information.
GET /api/0.6/capabilitiesRetrieve server information.
GET /api/0.6/map?bbox=l,b,r,tRetrieve information by a bounding box.
GET /api/0.6/node/NNNNRetrieve node `NNNN`.
GET /api/0.6/way/NNNNRetrieve way `NNNN`.
GET /api/0.6/relation/NNNNRetrieve relation `NNNN`.
GET /api/0.6/nodes?nodes=#,#,#,...Retrieve multiple nodes in one request.
GET /api/0.6/ways?ways=#,#,#,...Retrieve multiple ways in one request.
GET /api/0.6/relations?relations=#,#,#,...Retrieve multiple relations in one request.
GET /api/0.6/nodes/NNNN/relationsRetrieve relations for a node.
GET /api/0.6/ways/NNNN/relationsRetrieve relations for a way.
GET /api/0.6/relations/NNNN/relationsRetrieve relations for a relation.
GET /api/0.6/node/NNNN/waysRetrieve ways for a node.
GET /api/0.6/way/NNNN/fullRetrieve a way and all nodes referenced by the way.
GET /api/0.6/relation/NNNN/fullRetrieve a relation, all nodes and ways that are its members, and all nodes referenced by the ways being returned.
78 | -------------------------------------------------------------------------------- /doc/Improvements.md: -------------------------------------------------------------------------------- 1 | ## About 2 | 3 | This page describes enhancements to the current design of the API 4 | server. 5 | 6 | ## Speeding Up Ingestion 7 | 8 | Ingestion of a planet dump by the [ingestion tool][dbmgr] needs to be 9 | sped up. This issue is being tracked in [issue #13][issue13]. 10 | 11 | Currently: 12 | 13 | * When processing `nodes`, the tool appears to be limited by Python's 14 | bytecode interpretation overhead---I/O does not seem to introducing 15 | a bottleneck. 16 | * When processing `ways` and `relations` in the planet dump, the 17 | program becomes bound by I/O latencies. In particular, 18 | * The current design of the element cache is inefficient for 19 | nodes (see below). 20 | * The program processes one way or relation element at a time 21 | (i.e., in a single-threaded fashion). 22 | 23 | ## Improving Cache Efficiencies 24 | 25 | The upstream [OSM API][osmapi] numbers new map elements (nodes, ways 26 | and relations) sequentially, as and when they are created. This means 27 | that elements that are geographically 'close' can have ids that are 28 | far apart in numeric value. 29 | 30 | In the current design elements are [grouped into 'slabs'][slabutil.py] 31 | by element id. API queries however, tend to be for OSM elements which 32 | are geographically 'close' to each other. For such queries, the 33 | current scheme is inefficient both from the point of view of I/O 34 | traffic and (cache) RAM consumption. 35 | 36 | A better scheme would therefore be: 37 | 38 | * Group elements into geographically keyed slabs; elements in each 39 | slab would be "close by" in terms of geographical distance. 40 | * For direct lookups of elements via the API, use a mapping from 41 | element ids to the slabs holding element's definition. 42 | 43 | In this new scheme, direct lookups of elements would need two key 44 | retrievals from the data store, compared to one retrieval in the 45 | current scheme. However, the improvements to the efficiency of the 46 | element cache should compensate for this additional overhead. 47 | 48 | See also: Issue [#16][issue16]. 49 | 50 | 51 | 52 | [dbmgr]: https://github.com/MapQuest/mapquest-osm-server/tree/master/src/python/dbmgr 53 | [issue13]: https://github.com/MapQuest/mapquest-osm-server/issues/13 54 | [issue16]: https://github.com/MapQuest/mapquest-osm-server/issues/16 55 | [osmapi]: http://wiki.openstreetmap.org/wiki/API_v0.6 "OSM API v0.6" 56 | [slabutil.py]: https://github.com/MapQuest/mapquest-osm-server/blob/master/src/python/datastore/slabutil.py 57 | -------------------------------------------------------------------------------- /doc/DesignMistakes.md: -------------------------------------------------------------------------------- 1 | ## About 2 | 3 | This document contains information about implementation approaches 4 | that were tried but which did not work well. 5 | 6 | ## CouchDB's 'views' (slow responses) 7 | 8 | In an earlier version of this server I had used a [CouchDB][] backend 9 | to store map data. The `/map` API was implemented by using CouchDB's 10 | [views][couchdbviews]. 11 | 12 | The reasons I abandoned this approach were: 13 | 14 | 1. CouchDB's [views][couchdbviews] turned out to be slow, causing the 15 | `/map` call to take several hundreds of milliseconds to complete. This 16 | was well over my design goal. 17 | 2. [CouchDB][]'s on-disk storage scheme seemed to need a large amount 18 | of disk space. Given that the size of the OSM dataset is already 19 | large (over one billion nodes, nearly a hundred million ways, and 20 | growing), these high overheads were a concern. 21 | 3. [CouchDB][] uses HTTP based access; every data store access was 22 | thus high overhead. 23 | 24 | ## Vanilla Membase (high memory overheads) 25 | 26 | In the initial design of the [Membase based data store][dsmembase.py] 27 | I mapped each node, way and relation one to one to a Membase key. 28 | While this approach is simple, it does not scale well: [Membase][] as 29 | of the current version (v1.6.5), has an 30 | [overhead of 120 bytes][membasesizing] per key. Thus we would need 31 | 120G of RAM to store _just the keys_ for the current OSM data set. 32 | 33 | My current design [groups keys into "slabs"][slabutil.py]. This 34 | brings down the number of (membase) keys needed to manageable levels. 35 | I/O is done in terms of slabs, and a local 36 | ["cache" with LRU semantics][lrucache.py] is used to reduce the number 37 | of I/O requests sent to the Membase server. 38 | 39 | 40 | 41 | [couchdb]: http://couchdb.apache.org/ "Apache CouchDB" 42 | [couchdbviews]: http://wiki.apache.org/couchdb/Introduction_to_CouchDB_views "CouchDB Views" 43 | [ds.py]: https://github.com/MapQuest/mapquest-osm-server/blob/master/src/python/datastore/ds.py 44 | [dsmembase.py]: https://github.com/MapQuest/mapquest-osm-server/blob/master/src/python/datastore/ds_membase.py 45 | [lrucache.py]: https://github.com/MapQuest/mapquest-osm-server/blob/master/src/python/datastore/lrucache.py 46 | [membase]: http://www.membase.org/ "Membase" 47 | [membasesizing]: http://techzone.couchbase.com/wiki/display/membase/Sizing+Guidelines "Sizing Guidelines" 48 | [slabutil.py]: https://github.com/MapQuest/mapquest-osm-server/blob/master/src/python/datastore/slabutil.py 49 | -------------------------------------------------------------------------------- /doc/Roadmap.md: -------------------------------------------------------------------------------- 1 | ## About 2 | 3 | This page lists the proposed evolution of the server. 4 | 5 | ## Current Status 6 | 7 | * The code is functional: [planet dumps][osmplanet] can be ingested and their data retrieved using the [API][osmapi]. 8 | * Serving data via the API is quite fast (see [ProvisioningInformation][]), but ingesting a full planet is slow. 9 | * Modules have unit tests. 10 | * External documentation (i.e., the `doc/` directory) is upto-date. 11 | * The supported data store is: [Membase][]. 12 | 13 | ## Future work 14 | 15 | * We need to support 'full' [Planet dumps][fullosmplanet] dumps, along with 16 | retrieval of changesets, element history and prior versions of elements (tickets [#4][issue4] and [#14][issue14]). 17 | * Performance improvements that have been identified so far could be addressed: 18 | * The `/map` API call could be further speeded up by grouping nodes and ways based on geographical proximity. 19 | * The ingestion tool needs to be speeded up ([#13][issue13]) and possibly rewritten in a non-interpreted language. 20 | * Storage efficiency can be improved: 21 | * A separate string table for frequently used strings could cut down storage needs. 22 | * Slabs could be coded more efficiently ([#9][issue9]). 23 | * The "front-end" needs to be made fully asynchronous ([#2][issue2]). 24 | * System tests that verify end-to-end integrity of the ingestion process are needed. 25 | * More supported data stores: possibly [Riak][] ([#6][issue6]) for a scalable backend, or perhaps [BerkeleyDB][] for a single machine configuration. 26 | 27 | 28 | 29 | [BerkeleyDB]: http://www.oracle.com/technetwork/database/berkeleydb/overview/index.html "Berkeley DB" 30 | [fullosmplanet]: http://wiki.openstreetmap.org/wiki/Planet.osm/full "Full OSM Planet" 31 | [issue2]: https://github.com/MapQuest/mapquest-osm-server/issues/2 32 | [issue4]: https://github.com/MapQuest/mapquest-osm-server/issues/4 33 | [issue6]: https://github.com/MapQuest/mapquest-osm-server/issues/6 34 | [issue9]: https://github.com/MapQuest/mapquest-osm-server/issues/9 35 | [issue13]: https://github.com/MapQuest/mapquest-osm-server/issues/13 36 | [issue14]: https://github.com/MapQuest/mapquest-osm-server/issues/14 37 | [membase]: http://www.membase.org/ "Membase" 38 | [osmapi]: http://wiki.openstreetmap.org/wiki/API_v0.6 "OSM v0.6 API" 39 | [osmplanet]: http://wiki.openstreetmap.org/wiki/Planet.osm "OSM Planet" 40 | [ProvisioningInformation]: ProvisioningInformation.md 41 | [riak]: http://www.basho.com/ "Riak" 42 | [wiki]: https://github.com/MapQuest/mapquest-osm-server/wiki "Wiki" 43 | -------------------------------------------------------------------------------- /src/python/frontend/capabilities.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | ## Support retrieval of the server's capabilities. 24 | 25 | import tornado.web 26 | 27 | from lxml import etree as ET 28 | 29 | import apiserver.const as C 30 | from apiserver.osmelement import new_osm_response 31 | from util import response_to_xml 32 | 33 | # Sample output: 34 | # 35 | # 36 | # 37 | # 38 | # 39 | # 40 | # 41 | # 42 | # 43 | # 44 | # 45 | 46 | class CapabilitiesHandler(tornado.web.RequestHandler): 47 | "Handle requests for server capabilities." 48 | 49 | def initialize(self, cfg): 50 | self.cfg = cfg 51 | 52 | def get(self): 53 | self.set_header(C.CONTENT_TYPE, C.TEXT_XML) 54 | 55 | def _get(name): 56 | return self.cfg.get(C.FRONT_END, name) 57 | 58 | osm = new_osm_response() 59 | 60 | api = ET.SubElement(osm, "api") 61 | version = ET.SubElement(api, "version") 62 | version.attrib['minimum'] = _get(C.API_VERSION_MINIMUM) 63 | version.attrib['maximum'] = _get(C.API_VERSION_MAXIMUM) 64 | area = ET.SubElement(api, "area") 65 | area.attrib['maximum'] = _get(C.AREA_MAX) 66 | 67 | tracepoints = ET.SubElement(api, "tracepoints") 68 | tracepoints.attrib['per_page'] = _get(C.TRACEPOINTS_PER_PAGE) 69 | 70 | waynodes = ET.SubElement(api, "waynodes") 71 | waynodes.attrib['maximum'] = _get(C.WAYNODES_MAX) 72 | 73 | changesets = ET.SubElement(api, "changesets") 74 | changesets.attrib['maximum_elements'] = _get(C.CHANGESETS_MAX) 75 | 76 | timeout = ET.SubElement(api, "timeout") 77 | timeout.attrib['seconds'] = _get(C.API_CALL_TIMEOUT) 78 | 79 | self.write(response_to_xml(osm)) 80 | -------------------------------------------------------------------------------- /src/python/dbmgr/dbm_stats.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | """Manage statistics. 24 | 25 | """ 26 | 27 | import sys 28 | import threading 29 | 30 | import apiserver.const as C 31 | 32 | _timer_delay = 1 # Number of seconds between reports. 33 | _stats = {} # Hash maps tracking the collected statistics. 34 | _prevstats = {} 35 | 36 | _timer = None # Timer object. 37 | _is_active = None # Run state. 38 | _lock = None 39 | 40 | def _display_stats(): 41 | "Display statistics" 42 | global _lock, _prevstats 43 | 44 | def _format(prefix, absval, incr): 45 | """Helper function.""" 46 | s = "" 47 | if absval: 48 | s += ("%s: %%(_%sv)d" % (prefix.upper(), prefix)) 49 | if incr: 50 | s += ("(+%%(_%s)d)" % prefix) 51 | s += " " 52 | return s 53 | 54 | # Retrieve the previous and current counts. 55 | _c = _prevstats[C.CHANGESET] 56 | _n = _prevstats[C.NODE] 57 | _w = _prevstats[C.WAY] 58 | _r = _prevstats[C.RELATION] 59 | 60 | _lock.acquire() 61 | _cv = _stats[C.CHANGESET] 62 | _nv = _stats[C.NODE] 63 | _wv = _stats[C.WAY] 64 | _rv = _stats[C.RELATION] 65 | _prevstats.update(_stats) 66 | _lock.release() 67 | 68 | # Compute incremental changes. 69 | _c = _cv - _c 70 | _n = _nv - _n 71 | _w = _wv - _w 72 | _r = _rv - _r 73 | 74 | # Compute the format string. 75 | s = _format('c', _cv, _c) 76 | s += _format('n', _nv, _n) 77 | s += _format('w', _wv, _w) 78 | s += _format('r', _rv, _r) 79 | 80 | print s % locals() 81 | 82 | 83 | def _stats_timer(): 84 | "Invoke the actual display helper and re-arm the timer." 85 | 86 | _display_stats() 87 | 88 | global _timer 89 | if _is_active: 90 | _timer = threading.Timer(_timer_delay, _stats_timer) 91 | _timer.start() 92 | 93 | 94 | def init_statistics(config, options): 95 | "Initialize the module." 96 | global _stats, _prevstats 97 | 98 | for n in [C.CHANGESET, C.NODE, C.WAY, C.RELATION]: 99 | _stats[n] = _prevstats[n] = 0 100 | 101 | global _lock 102 | _lock = threading.Lock() 103 | 104 | if options.verbose: 105 | global _is_active, _timer 106 | 107 | _is_active = True 108 | _timer = threading.Timer(_timer_delay, _stats_timer) 109 | _timer.daemon = True 110 | _timer.start() 111 | 112 | 113 | def fini_statistics(options): 114 | "Shutdown the module." 115 | global _is_active 116 | _is_active = False 117 | 118 | if _timer: 119 | _timer.cancel() 120 | 121 | if options.verbose: 122 | _display_stats() 123 | 124 | 125 | def increment_stats(namespace): 126 | global _lock, _stats 127 | 128 | _lock.acquire() 129 | _stats[namespace] = _stats[namespace] + 1 130 | _lock.release() 131 | -------------------------------------------------------------------------------- /src/python/config/osm-api-server.cfg: -------------------------------------------------------------------------------- 1 | ## Configuration information for the OSM API server. 2 | # 3 | 4 | ## Defaults 5 | # 6 | # area-max - Max width of a requested area. 7 | # changesets-max - Max changes in a changeset. 8 | # project-wiki - URL to the project wiki. 9 | # source-repository - URL to the opensource repository for the server 10 | # tracepoints-per-page - Max tracepoints returned for a query. 11 | # waynodes-max - Maximum nodes in a way. 12 | 13 | [DEFAULT] 14 | area-max = 180.0 15 | changesets-max = 50000 16 | project-doc = %(source-repository)s/blob/master/doc/Home.md 17 | source-repository = https://github.com/Mapquest/mapquest-osm-server 18 | tracepoints-per-page = 5000 19 | waynodes-max = 2000 20 | 21 | 22 | ## Datastore related 23 | # 24 | # changesets-inline-size - Max size for a changeset residing in a slab. 25 | # changesets-per-slab - The number of changesets in a slab. 26 | # datastore-backend - The kind of datastore to use. 27 | # One of: "couchdb", "membase" or "riak". 28 | # datastore-encoding - Encoding to be used for elements in the datastore. 29 | # One of: "json", "native" (for CouchDB) or "protobuf". 30 | # geodoc-lru-size - The size of the geodoc LRU buffer. 31 | # geodoc-lru-threads - The number of threads used to write geodoc information. 32 | # geohash-length - Controls the granularity of documents containing 33 | # geographical information. 34 | # nodes-inline-size - Max size for a node residing in a slab. 35 | # nodes-per-slab - The number of nodes in a slab. 36 | # relations-inline-size - Max size for a relation residing in a slab. 37 | # relations-per-slab - The number of relations in a slab. 38 | # scale-factor - For converting fractional lat/lon values to integers 39 | # slab-lru-size - Number of slabs in an LRU buffer. 40 | # ways-inline-size - Max size for a way residing in a slab. 41 | # ways-per-slab - The number of ways in a slab. 42 | # 43 | # Note that the front end server reads the values of the 44 | # 'changesets-per-slab', 'nodes-per-slab', 'relations-per-slab' and 45 | # 'ways-per-slab' configuration items from the data store. 46 | 47 | [datastore] 48 | changesets-inline-size = 256 49 | changesets-per-slab = 256 50 | datastore-backend = membase 51 | datastore-encoding = json 52 | geodoc-lru-size = 4096 53 | geodoc-lru-threads = 4 54 | geohash-length = 5 55 | nodes-inline-size = 256 56 | nodes-per-slab = 256 57 | relations-inline-size = 1024 58 | relations-per-slab = 64 59 | scale-factor = 10000000 60 | slab-lru-size = 1024 61 | slab-lru-threads = 8 62 | ways-inline-size = 1024 63 | ways-per-slab = 64 64 | 65 | ## Database manager utility 66 | # 67 | # canonical-server - Upstream server to get missing changesets from 68 | 69 | [dbmgr] 70 | changeset-server = http://api.openstreetmap.org/ 71 | 72 | 73 | ## Configuration information for the front-end 74 | # 75 | # api-version - The current API version supported, reported by 76 | # 'version' attribute of the 77 | # api-version-{min,max}imum - The version range supported. 78 | # api-call-timeout - Timeout 79 | # port - TCP port on which to listen for API requests. 80 | # server-name - Name reported by the API server. 81 | # server-version - Version number for the prototype 82 | 83 | [front-end] 84 | api-version = 0.6 85 | api-version-minimum = %(api-version)s 86 | api-version-maximum = %(api-version)s 87 | api-call-timeout = 300 88 | port = 80 89 | server-name = OSM API Server Prototype %(server-version)s 90 | server-version = 0.6 91 | 92 | 93 | ## Configuration information for backends 94 | 95 | ## CouchDB 96 | # 97 | # dbname - Prefix used for each kind of couchdb database 98 | # dburl - Location of the CouchDB server. 99 | 100 | [couchdb] 101 | dbname = osm 102 | dburl = http://localhost:5984/ 103 | 104 | ## Membase 105 | # 106 | # dbadminport - Administration port used by membase. 107 | # dbadminpw - Administrative password. 108 | # dbadminuser - Administrative user. 109 | # dbhost - Datastore host. 110 | # dbport - Datastore bucket port. 111 | # dbname - Name of the membase 'bucket' to use. 112 | 113 | [membase] 114 | dbadminport = 8091 115 | dbadminpw = osmapiserver 116 | dbadminuser = Administrator 117 | dbhost = localhost 118 | dbname = default 119 | dbport = 11211 120 | 121 | ## Riak 122 | # 123 | # dburl - Location of the Riak server. 124 | 125 | [riak] 126 | dburl = http://localhost:8091/riak/ 127 | -------------------------------------------------------------------------------- /doc/Dbmgr.md: -------------------------------------------------------------------------------- 1 | ## About 2 | 3 | This document describes the `dbmgr` ingestion tool. 4 | 5 | ## What the tool does 6 | 7 | The `dbmgr` tool is used to: 8 | 9 | 1. initialize the data store, 10 | 2. to load data into the data store, 11 | 3. to incrementally change existing information in the data store. 12 | 13 | ## Requirements 14 | 15 | ### Initialization 16 | 17 | A command line option would specify that the data store needs to be 18 | reset. In the current code, this is the `-I` option. 19 | 20 | ### Input 21 | 22 | The following input formats are proposed to be accepted by the tool: 23 | 24 | 1. An OSM planet file in XML format. 25 | 26 | This would be used for initializing the data store from a regular 27 | [planet dump][planetdump]. 28 | 29 | 2. A "full" planet dump in XML format. 30 | 31 | See: task [#4][issue4]. 32 | 33 | This would be used for initializing the data store from a 34 | [full planet dump][fullplanetdump]. 35 | 36 | 3. `osmChange` files in XML format. 37 | 38 | See: task [#14][issue14]. 39 | 40 | These would be used for incremental updates to the data store, 41 | see the [planet.osm diffs page][planetdiffs]. 42 | 43 | 4. A planet file in [PBF format][planetpbf]. 44 | 45 | See: task [#3][issue3]. 46 | 47 | The PBF format has the same content as the OSM planet format, 48 | but is smaller and faster to process. 49 | 50 | Note that while "full" planet dumps include \ information, 51 | the [osmChange][] incremental format does not include information 52 | about new \s. Thus, if a full planet dump is being 53 | incrementally updated, additional changeset information for the change 54 | would need to be downloaded separately from the main OSM server. 55 | 56 | No data transfer format seems to support transfer of GPS tracks or of 57 | user information. 58 | 59 | ### Backends 60 | 61 | The following backends are planned to be supported (in approximate 62 | order of priority): 63 | 64 | 1. A Membase based backend. 65 | 2. A CouchDB/BigCouch based backend. 66 | 3. A Riak based backend. 67 | 68 | The code is to be structured in such a way that supporting another 69 | distributed key/value store should be easy. 70 | 71 | ## Live Updates 72 | 73 | The tool should be able to change data in the data store without 74 | cluster downtime. 75 | 76 | ## Non-requirements 77 | 78 | 1. Retrieval of diffs from 'planet.openstreetmap.org'. 79 | 80 | The tool does not automate the process of downloading 81 | minutely/hourly/daily diffs from . 82 | 83 | ## Handling multiple backends 84 | 85 | Code to support each type of backend (CouchDB, Membase, etc.) 86 | resides in a separate Python module (e.g., `datastore/ds_membase.py`). 87 | 88 | The specifically module needed is loaded in dynamically (using 89 | `__import__`); the module is expected to provide a class `Datastore` 90 | that implements the required backend. 91 | 92 | This approach avoids (package) dependencies on support code for unused 93 | backend modules. 94 | 95 | ## Sizing Numbers 96 | 97 | An analysis of `swales-101025.osm.bz2`. This subset contains: 98 | 99 | * 816036 nodes 100 | * 80690 ways 101 | * 382 relations 102 | 103 | ### Element sizes with JSON based storage 104 | 105 | The OSM elements in the `swales-101025.osm.bz2` subset were stored in 106 | the data store in JSON encoded form. The size distribution seen was 107 | as follows: 108 | 109 | * Nodes 110 | * Average size 202 bytes 111 | * 12157 (1.489%) nodes exceed 256 bytes of JSON 112 | * 2538 (0.311%) nodes exceed 512 bytes of JSON 113 | * Ways 114 | * Average size 351 bytes 115 | * 7134 (8.8%) exceed 512 bytes 116 | * 1267 (1.6%) exceed 1024 bytes 117 | * Relations 118 | * Average size 1477 bytes 119 | * 90 (23.6%) exceed 2048 bytes 120 | * 44 (11.5%) exceed 3072 bytes 121 | * 25 (6.5%) exceed 4096 bytes 122 | 123 | 124 | 125 | [fullplanetdump]: http://wiki.openstreetmap.org/wiki/Planet.osm/full 126 | [issue3]: https://github.com/MapQuest/mapquest-osm-server/issues/3 127 | [issue4]: https://github.com/MapQuest/mapquest-osm-server/issues/4 128 | [issue14]: https://github.com/MapQuest/mapquest-osm-server/issues/14 129 | [osmChange]: http://wiki.openstreetmap.org/wiki/OsmChange 130 | [planetdiffs]: http://wiki.openstreetmap.org/wiki/Planet.osm/diffs 131 | [planetdump]: http://wiki.openstreetmap.org/wiki/Planet.osm 132 | [planetpbf]: http://wiki.openstreetmap.org/wiki/PBF 133 | -------------------------------------------------------------------------------- /src/python/apiserver/const.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | ## Define "constants", so that Python catches mis-spellings early. 24 | 25 | _ID = '_id' 26 | API = 'api' 27 | API_CALL_TIMEOUT = 'api-call-timeout' 28 | API_VERSION = 'api-version' 29 | API_VERSION_MAXIMUM = 'api-version-maximum' 30 | API_VERSION_MINIMUM = 'api-version-minimum' 31 | AREA = 'area' 32 | AREA_MAX = 'area-max' 33 | BBOX = 'bbox' 34 | BOUNDS = 'bounds' 35 | CFGSLAB = 'cfgslab' 36 | CFGVERSION = 1 37 | CHANGESET = 'changeset' 38 | CHANGESETS = 'changesets' 39 | CHANGESETS_INLINE_SIZE = 'changesets-inline-size' 40 | CHANGESETS_PER_SLAB = 'changesets-per-slab' 41 | CHANGESETS_MAX = 'changesets-max' 42 | CONFIGURATION_SCHEMA_VERSION = 'configuration-schema-version' 43 | CONTENT_TYPE = 'Content-Type' 44 | COUCHDB = 'couchdb' 45 | DATASTORE = 'datastore' 46 | DATASTORE_BACKEND = 'datastore-backend' 47 | DATASTORE_CONFIG = 'datastore-config' 48 | DATASTORE_ENCODING = 'datastore-encoding' 49 | DBHOST = 'dbhost' 50 | DBJOB_ADDELEM = 'dbjob-add-elem' 51 | DBJOB_QUIT = 'dbjob-quit' 52 | DBNAME = 'dbname' 53 | DBNAME_SUFFIXES = 'cgnrw' # changesets, geodocs, nodes, relations, ways 54 | DBPORT = 'dbport' 55 | DBURL = 'dburl' 56 | DEFAULT = 'DEFAULT' 57 | ELEMENT = 'element' 58 | FRONT_END = 'front-end' 59 | GENERATOR = 'generator' 60 | GEODOC = 'geodoc' 61 | GEODOC_LRU_SIZE = 'geodoc-lru-size' 62 | GEODOC_LRU_THREADS = 'geodoc-lru-threads' 63 | GEOHASH_LENGTH = 'geohash-length' 64 | ID = 'id' 65 | JSON = 'json' 66 | K = 'k' 67 | LAT = 'lat' 68 | LAT_MAX = +90.0 69 | LAT_MIN = -90.0 70 | LON = 'lon' 71 | LON_MAX = +180.0 72 | LON_MIN = -180.0 73 | MAXIMUM = 'maximum' 74 | MAXIMUM_ELEMENTS = 'maximum_elements' 75 | MAXGHLAT = 89.999999999999992 76 | MAXLAT = 'maxlat' 77 | MAXLON = 'maxlon' 78 | MEMBASE = 'membase' 79 | MEMBASE_MAX_VALUE_LENGTH = 20 * 1024 * 1024 80 | MEMBER = 'member' 81 | MEMBERS = 'members' 82 | MINIMUM = 'minimum' 83 | MINLAT = 'minlat' 84 | MINLON = 'minlon' 85 | ND = 'nd' 86 | NODE = 'node' 87 | NODES = 'nodes' 88 | NODES_INLINE_SIZE = 'nodes-inline-size' 89 | NODES_PER_SLAB = 'nodes-per-slab' 90 | OSM = 'osm' 91 | PER_PAGE = 'per_page' 92 | PORT = 'port' 93 | PROJECT_DOC = 'project-doc' 94 | PROTOBUF = 'protobuf' 95 | REF = 'ref' 96 | REFERENCES = 'references' 97 | RELATION = 'relation' 98 | RELATIONS = 'relations' 99 | RELATIONS_INLINE_SIZE = 'relations-inline-size' 100 | RELATIONS_PER_SLAB = 'relations-per-slab' 101 | ROLE = 'role' 102 | SCALE_FACTOR = 'scale-factor' 103 | SECONDS = 'seconds' 104 | SERVER_NAME = 'server-name' 105 | SERVER_VERSION = 'server-version' 106 | SLAB_INDIRECT = 1 # Element 107 | SLAB_INLINE = 0 # Element is present inline. 108 | SLAB_LRU_SIZE = 'slab-lru-size' 109 | SLAB_LRU_THREADS = 'slab-lru-threads' 110 | SLAB_NOT_PRESENT = 2 # Element is not present in the slab. 111 | SOURCE_REPOSITORY = 'source-repository' 112 | STATUS = 'status' 113 | TAG = 'tag' 114 | TAGS = 'tags' 115 | TEXT_XML = 'text/xml' 116 | TIMEOUT = 'timeout' 117 | TRACEPOINTS = 'tracepoints' 118 | TRACEPOINTS_PER_PAGE = 'tracepoints-per-page' 119 | TYPE = 'type' 120 | UTF8 = 'utf-8' 121 | V = 'v' 122 | VERSION = 'version' 123 | WAY = 'way' 124 | WAYS = 'ways' 125 | WAYS_INLINE_SIZE = 'ways-inline-size' 126 | WAYS_PER_SLAB = 'ways-per-slab' 127 | WAYNODES = 'waynodes' 128 | WAYNODES_MAX = 'waynodes-max' 129 | -------------------------------------------------------------------------------- /src/python/dbmgr/dbm_ops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | """Operations on an OSM database.""" 24 | 25 | import apiserver.const as C 26 | 27 | from apiserver.osmelement import new_osm_element 28 | from dbmgr.dbm_stats import increment_stats 29 | from dbmgr.dbm_geotables import GeoGroupTable 30 | 31 | def make_backreference(namespace, elemid): 32 | """Create a backreference string. 33 | 34 | namespace -- The OSM namespace for the element. 35 | elemid -- Element ID in the namespace. 36 | """ 37 | 38 | return namespace[0].upper() + elemid 39 | 40 | class DBOps: 41 | """This class implements the semantics of adding OSM elements and 42 | changesets to the backend.""" 43 | 44 | def __init__(self, config, options, db): 45 | "Initialize an operations structure." 46 | self.db = db 47 | self.verbose = options.verbose 48 | self.geotable = GeoGroupTable(config, options, db) 49 | 50 | def add_element(self, elem): 51 | "Add an element to the datastore." 52 | 53 | self.db.store(elem) 54 | 55 | # If the element is a node, add it to the appropriate geodoc. 56 | ns = elem.namespace 57 | backreference = make_backreference(ns, elem.id) 58 | 59 | if self.verbose: 60 | increment_stats(ns) 61 | 62 | # Do element-specific processing. 63 | if ns == C.NODE: 64 | # Add the element to the appropriate geodoc. 65 | self.geotable.add(elem) 66 | 67 | elif ns == C.WAY: 68 | # Backlink referenced nodes to the current way. 69 | for (rstatus, node_or_key) in \ 70 | self.db.fetch_keys(C.NODE, map(str, elem[C.NODES])): 71 | if rstatus: 72 | node = node_or_key 73 | else: 74 | node = new_osm_element(C.NODE, node_or_key) 75 | node[C.REFERENCES].add(backreference) 76 | self.db.store(node) 77 | 78 | elif ns == C.RELATION: 79 | # If the element is a relation, backlink referenced ways & 80 | # relations. 81 | 82 | def _retrieve(selector, members): 83 | return [str(mref) for (mref, mrole, mtype) in members 84 | if mtype == selector] 85 | 86 | members = elem[C.MEMBERS] 87 | 88 | elements = [] 89 | for ns in [C.NODE, C.WAY, C.RELATIONS]: 90 | elements.append((ns, _retrieve(ns, members))) 91 | 92 | for (ns, refs) in elements: 93 | if len(refs) == 0: 94 | continue 95 | for (rstatus, node_or_key) in self.db.fetch_keys(ns, refs): 96 | # Retrieve all elements referenced by the relation. 97 | if rstatus: 98 | elem = node_or_key 99 | else: 100 | elem = new_osm_element(ns, node_or_key) 101 | 102 | # Add a backreference to the element being 103 | # referenced by this relation. 104 | elem[C.REFERENCES].add(backreference) 105 | self.db.store(elem) 106 | 107 | def add_changeset(self, changeset): 108 | "Add a changeset to the database." 109 | raise NotImplementedError 110 | 111 | def finish(self): 112 | """Signal the end of DB operations.""" 113 | 114 | # Push out all pending geodoc changes. 115 | self.geotable.flush() 116 | 117 | # Request the underlying database to wind up operation. 118 | self.db.finalize() 119 | -------------------------------------------------------------------------------- /src/python/frontend/__main__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | ## The script entry point for the front-end server. 24 | 25 | import os.path 26 | import sys 27 | import tornado.options 28 | 29 | import apiserver.const as C 30 | from fe import OSMFrontEndServer 31 | from apiserver.osmelement import init_osm_factory 32 | from datastore.slabutil import init_slabutil 33 | 34 | # Where to find configuration information. 35 | default_config_directory = "config" 36 | default_config_file = "osm-api-server.cfg" 37 | 38 | tornado.options.define("backend", default=None, 39 | type=str, metavar="BACKEND", 40 | help="datastore backend to use") 41 | tornado.options.define("config", default=default_config_file, 42 | type=str, metavar="FILE", 43 | help="configuration file to use") 44 | tornado.options.define("encoding", default=None, 45 | type=str, metavar="ENCODING", 46 | help="Encoding used for values") 47 | tornado.options.define("verbose", default=False, 48 | type=bool, metavar="BOOLEAN", 49 | help="Control verbosity") 50 | 51 | def error(message): 52 | "Print an error message and exit." 53 | sys.stderr.write("Error: " + message + "\n") 54 | sys.exit(1) 55 | 56 | ## 57 | ## Script entry point. 58 | ## 59 | def main(): 60 | """Launch the API server.""" 61 | # Parse command line options if present. 62 | tornado.options.parse_command_line() 63 | options = tornado.options.options 64 | 65 | # Bring in (server-wide) configuration information. 66 | try: 67 | import configparser # Python 3.0 68 | except ImportError: 69 | import ConfigParser as configparser 70 | 71 | # Read configuration information. 72 | configfiles = [options.config, 73 | os.path.join(sys.path[0], default_config_directory, 74 | default_config_file)] 75 | cfg = configparser.ConfigParser() 76 | cfg.read(configfiles) 77 | 78 | # Sanity check. 79 | if not cfg.has_section(C.FRONT_END): 80 | error("Incomplete configuration information, tried:\n\t" + 81 | "\n\t".join(configfiles)) 82 | 83 | # Allow command-line options to override the configuration file. 84 | if options.backend: 85 | cfg.set(C.DATASTORE, C.DATASTORE_BACKEND, options.backend) 86 | if options.encoding: 87 | cfg.set(C.DATASTORE, C.DATASTORE_ENCODING, options.encoding) 88 | 89 | # Load the desired interface to the datastore. 90 | backend = cfg.get(C.DATASTORE, C.DATASTORE_BACKEND) 91 | try: 92 | module = __import__('datastore.ds_' + backend, fromlist=['Datastore']) 93 | datastore = module.Datastore(cfg) 94 | except ImportError, x: 95 | error("Could not initialize datastore of type \"%s\": %s" % 96 | (backend, str(x))) 97 | 98 | # Initialize the OSM element factory and other modules. 99 | init_slabutil(cfg) 100 | init_osm_factory(cfg) 101 | 102 | # Create an instance of the front-end server. 103 | port = cfg.getint(C.FRONT_END, C.PORT) 104 | feserver = OSMFrontEndServer(cfg, options, datastore) 105 | http_server = tornado.httpserver.HTTPServer(feserver.application) 106 | http_server.listen(port) 107 | 108 | # Start the server. 109 | try: 110 | tornado.ioloop.IOLoop.instance().start() 111 | except KeyboardInterrupt: 112 | if options.verbose: 113 | pass # Print statistics etc. 114 | 115 | # 116 | # Invoke main() 117 | # 118 | if __name__ == "__main__": 119 | main() 120 | -------------------------------------------------------------------------------- /src/python/tests/test_slabutil.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | import apiserver.const as C 24 | from datastore.slabutil import * 25 | from ConfigParser import ConfigParser 26 | 27 | INLINE_SIZE = 256 28 | PER_SLAB = 1024 29 | SLAB_LRU_SIZE = 8 30 | 31 | def pytest_funcarg__config(request): 32 | "Prepare a configuration parser object" 33 | 34 | cfg = ConfigParser() 35 | cfg.add_section(C.DATASTORE) 36 | 37 | for k in [C.CHANGESETS_INLINE_SIZE, C.NODES_INLINE_SIZE, 38 | C.RELATIONS_INLINE_SIZE, C.WAYS_INLINE_SIZE]: 39 | cfg.set(C.DATASTORE, k, str(INLINE_SIZE)) 40 | 41 | for k in [C.CHANGESETS_PER_SLAB, C.NODES_PER_SLAB, 42 | C.RELATIONS_PER_SLAB, C.WAYS_PER_SLAB]: 43 | cfg.set(C.DATASTORE, k, str(PER_SLAB)) 44 | 45 | return cfg 46 | 47 | 48 | def test_use_slab(config): 49 | "Check that the expected namespaces use slabs." 50 | 51 | slabutil_init(config) 52 | 53 | # The following three namespaces use slabs. 54 | for ns in [C.CHANGESET, C.NODE, C.RELATION, C.WAY]: 55 | assert slabutil_use_slab(ns) is True 56 | 57 | # The following namespaces do not use slabs currently. 58 | for ns in [C.GEODOC]: 59 | assert slabutil_use_slab(ns) is False 60 | 61 | def test_get_config(config): 62 | "Check the return values from the 'slabutil_get_config()' method." 63 | slabutil_init(config) 64 | for ns in [C.CHANGESET, C.NODE, C.RELATION, C.WAY]: 65 | inline_size, per_slab = slabutil_get_config(ns) 66 | assert inline_size == INLINE_SIZE 67 | assert per_slab == PER_SLAB 68 | 69 | def test_groupkeys(config): 70 | "Check the expected return values from the 'group_keys()' API." 71 | 72 | slabutil_init(config) 73 | 74 | expected_node_slabset = { 75 | "NL0": set(['0', '1', '511', '1023']), 76 | "NL1024": set(['1024', '1025', '2047']), 77 | "NL2048": set(['2048']) 78 | } 79 | 80 | expected_way_slabset = { 81 | "WL0": set(['0', '1', '511', '1023']), 82 | "WL1024": set(['1024', '1025', '2047']), 83 | "WL2048": set(['2048']) 84 | } 85 | 86 | expected_relation_slabset = { 87 | "RL0": set(['0', '1', '511', '1023']), 88 | "RL1024": set(['1024', '1025', '2047']), 89 | "RL2048": set(['2048']) 90 | } 91 | 92 | keys = map(str, [0, 1, 511, 1023, 1024, 1025, 2047, 2048]) 93 | 94 | slabset = slabutil_group_keys(C.NODE, keys) 95 | assert slabset == expected_node_slabset 96 | 97 | slabset = slabutil_group_keys(C.WAY, keys) 98 | assert slabset == expected_way_slabset 99 | 100 | slabset = slabutil_group_keys(C.RELATION, keys) 101 | assert slabset == expected_relation_slabset 102 | 103 | def test_groupkeys_nonnumeric(config): 104 | "Check the expected return values from the 'group_keys()' API." 105 | 106 | slabutil_init(config) 107 | 108 | expected_geodoc_slabset = { 109 | "GLtdr4t": set(["tdr4t"]), 110 | "GLs0000": set(["s0000"]) 111 | } 112 | 113 | keys = ['tdr4t', 's0000'] 114 | slabset = slabutil_group_keys(C.GEODOC, keys) 115 | assert slabset == expected_geodoc_slabset 116 | 117 | def test_make_slabkey(config): 118 | "Test the make_slabkey() API." 119 | 120 | slabutil_init(config) 121 | 122 | expected = [ 123 | (C.CHANGESET, '4567', 'CL4096'), 124 | (C.GEODOC, 'tdr4t', 'GLtdr4t'), 125 | (C.NODE, '1234', 'NL1024'), 126 | (C.RELATION, '16385', 'RL16384'), 127 | (C.WAY, '2345', 'WL2048'), 128 | ] 129 | 130 | for (ns, key, slabkey) in expected: 131 | v = slabutil_make_slabkey(ns, key) 132 | assert v == slabkey 133 | -------------------------------------------------------------------------------- /src/python/dbmgr/dbm_input.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | """Turn input sources into iterables. 24 | 25 | Exported functions: 26 | 27 | makesource -- Turn a file into an iterable that returns OSM elements. 28 | """ 29 | 30 | import gzip 31 | import os 32 | 33 | from lxml.etree import iterparse 34 | from pipes import quote 35 | 36 | from apiserver.osmelement import encode_coordinate, new_osm_element 37 | import apiserver.const as C 38 | 39 | def _make_osm_iterator(config, f): 40 | "Return an iterator parsing the format" 41 | 42 | scalefactor = config.getint(C.DATASTORE, C.SCALE_FACTOR) 43 | 44 | parser = iter(iterparse(f, events=('start', 'end'))) 45 | event, root = parser.next() 46 | if root.tag != u'osm': 47 | raise ValueError, "Unexpected root tag: %s" % root.tag 48 | 49 | depth = 0 50 | doc = None 51 | ignored_elements = ['bound', 'bounds'] 52 | processed_elements = ('changeset', 'node', 'way', 'relation') 53 | 54 | # Parse the input file. 55 | for event, elem in parser: 56 | 57 | element_name = elem.tag 58 | if element_name in ignored_elements: 59 | continue 60 | 61 | if event == 'start': 62 | if element_name in processed_elements: 63 | assert depth == 0 64 | 65 | # Start of the element. Copy 'standard' attributes, 66 | # translating them to native values where possible. 67 | doc = new_osm_element(element_name.lower(), elem.get('id')) 68 | for k,v in elem.items(): 69 | if k == 'visible': 70 | v = bool(v) 71 | elif k == 'version' or k == 'uid': 72 | v = int(v) 73 | elif k == 'lat' or k == 'lon': 74 | v = encode_coordinate(v) 75 | doc[k] = v 76 | 77 | elif element_name == 'tag': 78 | # Each 'tag' has a key/value associated with it. 79 | doc.setdefault('tags', {})[elem.get('k')] = elem.get('v') 80 | 81 | elif element_name == 'nd': 82 | # elements contain references. 83 | doc['nodes'].add(int(elem.get('ref'))) 84 | 85 | elif element_name == 'member': 86 | # Collect the list of (ref, role, type) tuples. 87 | doc.setdefault('members', []).append((elem.get('ref'), 88 | elem.get('role'), 89 | elem.get('type'))) 90 | depth = depth + 1 91 | 92 | elif event == 'end': 93 | depth = depth - 1 94 | if depth == 0: 95 | yield doc # Return a complete element to the caller. 96 | 97 | root.clear() # Keep memory usage down. 98 | 99 | 100 | def makesource(config, options, fn): 101 | """Return an iterator returning elements contained in 'fn'.""" 102 | 103 | # Determine the uncompression technique needed. 104 | basefn, ext = os.path.splitext(fn) 105 | 106 | if ext in [".bz2", ".gz"]: 107 | if ext == ".bz2": 108 | f = os.popen("bzcat %s" % quote(fn), 'r') 109 | elif ext == ".gz": 110 | f = gzip.GzipFile(fn, mode='r') 111 | (basefn, _) = os.path.splitext(fn) 112 | else: 113 | basefn = fn 114 | f = open(fn, mode='r') 115 | 116 | # Determine the file format. 117 | if basefn.endswith(".osc"): 118 | raise NotImplementedError, "OsmChange input" 119 | if basefn.endswith(".pbf"): 120 | raise NotImplementedError, "PBF input" 121 | if basefn.endswith(".xml") or basefn.endswith(".osm"): 122 | return _make_osm_iterator(config, f) 123 | -------------------------------------------------------------------------------- /src/python/tests/test_dsmembase.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | import apiserver.const as C 24 | import memcache 25 | import json 26 | import pytest 27 | 28 | from datastore.ds_membase import Datastore 29 | from datastore.slabutil import slabutil_init 30 | from ConfigParser import ConfigParser 31 | import apiserver.osmelement as O 32 | 33 | __BADNAMESPACE = 'badnamespace' 34 | __DBHOST = 'localhost' 35 | __DBPORT = '11211' 36 | __INLINE_SIZE = 256 37 | __NOSUCHKEY = '__NOSUCHKEY__' 38 | __NOSUCHSLABELEMKEY = '-1' 39 | __PER_SLAB = 8 40 | __SLAB_LRU_SIZE = 8 41 | __SLAB_LRU_THREADS = 4 42 | 43 | # Helper function. 44 | def insert_key(key, value): 45 | c = memcache.Client(['%s:%s' % (__DBHOST, __DBPORT)]) 46 | c.set(key, value) 47 | 48 | def retrieve_key(key): 49 | c = memcache.Client(['%s:%s' % (__DBHOST, __DBPORT)]) 50 | return c.get(key) 51 | 52 | def pytest_funcarg__datastore(request): 53 | "Prepare a configuration parser object" 54 | 55 | cfg = ConfigParser() 56 | cfg.add_section(C.DATASTORE) 57 | 58 | for k in [C.CHANGESETS_INLINE_SIZE, C.NODES_INLINE_SIZE, 59 | C.RELATIONS_INLINE_SIZE, C.WAYS_INLINE_SIZE]: 60 | cfg.set(C.DATASTORE, k, str(__INLINE_SIZE)) 61 | 62 | for k in [C.CHANGESETS_PER_SLAB, C.NODES_PER_SLAB, 63 | C.RELATIONS_PER_SLAB, C.WAYS_PER_SLAB]: 64 | cfg.set(C.DATASTORE, k, str(__PER_SLAB)) 65 | 66 | cfg.set(C.DATASTORE, C.DATASTORE_ENCODING, 'json') 67 | 68 | cfg.add_section(C.MEMBASE) 69 | cfg.set(C.MEMBASE, C.DBHOST, __DBHOST) 70 | cfg.set(C.MEMBASE, C.DBPORT, __DBPORT) 71 | cfg.set(C.DATASTORE, C.SLAB_LRU_SIZE, str(__SLAB_LRU_SIZE)) 72 | cfg.set(C.DATASTORE, C.SLAB_LRU_THREADS, str(__SLAB_LRU_THREADS)) 73 | 74 | slabutil_init(cfg) 75 | 76 | return Datastore(cfg) 77 | 78 | def test_datastore_wrong_namespace(datastore): 79 | "Verify that an access to an unknown namespace is rejected." 80 | with pytest.raises(KeyError): 81 | v = datastore.fetch(__BADNAMESPACE, "0") 82 | 83 | def test_datastore_direct_fetch(datastore): 84 | "Verify that directly fetchable elements can be read." 85 | _direct_key = 'Gs0000' 86 | _direct_val = O.new_osm_element(C.GEODOC, _direct_key[1:]) 87 | insert_key(_direct_key, O.encode_json(_direct_val)) 88 | 89 | v = datastore.fetch(C.GEODOC, _direct_key[1:]) 90 | assert v == _direct_val 91 | 92 | def test_datastore_failed_direct_fetch(datastore): 93 | "Verify that a non-existent element cannot be fetched." 94 | v = datastore.fetch(C.GEODOC, __NOSUCHKEY) 95 | assert v is None 96 | 97 | def test_datastore_failed_slab_fetch(datastore): 98 | "Verify that a non-existent element in a slab cannot be fetched." 99 | v = datastore.fetch(C.NODE, __NOSUCHSLABELEMKEY) 100 | assert v is None 101 | 102 | def test_datastore_slab_inline_fetch(datastore): 103 | "Verify that elements in a slab are fetched." 104 | _slab_key = 'NL8' 105 | _slab_start = __PER_SLAB 106 | # Create a slab. 107 | slab = [] 108 | slabkeys = range(_slab_start, _slab_start + __PER_SLAB) 109 | for key in slabkeys: 110 | sk = str(key) 111 | if key % 2 == 0: 112 | n = O.new_osm_element(C.NODE, sk) 113 | slab.append((C.SLAB_INLINE, n)) 114 | 115 | insert_key(_slab_key, datastore.encode(slab)) 116 | 117 | c = 0 118 | i = 0 119 | for key in slabkeys: 120 | if key % 2 == 0: 121 | se,sn = slab[i] 122 | n = datastore.fetch(C.NODE, str(key)) 123 | assert n == sn 124 | i += 1 125 | else: 126 | v = datastore.fetch(C.NODE, str(key)) 127 | assert v is None 128 | c = c + 1 129 | 130 | def test_datastore_write_element(datastore): 131 | "Test the store_element() entry point." 132 | 133 | _geodoc_key = 'Gs0000' 134 | _geodoc_val = O.new_osm_element(C.GEODOC, _geodoc_key[1:]) 135 | 136 | datastore.store_element(C.GEODOC, _geodoc_key[1:], _geodoc_val) 137 | 138 | v = retrieve_key(_geodoc_key) 139 | assert v == O.encode_json(_geodoc_val) 140 | -------------------------------------------------------------------------------- /src/python/tests/test_osmelement.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | import math 24 | 25 | from ConfigParser import ConfigParser 26 | 27 | import apiserver.const as C 28 | import apiserver.osmelement as O 29 | 30 | from datastore.slabutil import slabutil_init 31 | 32 | def pytest_funcarg__config(request): 33 | "Prepare a configuration parser object." 34 | 35 | cfg = ConfigParser() 36 | 37 | cfg.add_section(C.DATASTORE) 38 | cfg.set(C.DATASTORE, C.SCALE_FACTOR, '10000000') 39 | cfg.set(C.DATASTORE, C.CHANGESETS_INLINE_SIZE, '1024') 40 | cfg.set(C.DATASTORE, C.CHANGESETS_PER_SLAB, '8') 41 | cfg.set(C.DATASTORE, C.NODES_INLINE_SIZE, '1024') 42 | cfg.set(C.DATASTORE, C.NODES_PER_SLAB, '8') 43 | cfg.set(C.DATASTORE, C.RELATIONS_INLINE_SIZE, '1024') 44 | cfg.set(C.DATASTORE, C.RELATIONS_PER_SLAB, '8') 45 | cfg.set(C.DATASTORE, C.WAYS_INLINE_SIZE, '1024') 46 | cfg.set(C.DATASTORE, C.WAYS_PER_SLAB, '8') 47 | 48 | cfg.add_section(C.FRONT_END) 49 | cfg.set(C.FRONT_END, C.SERVER_VERSION, '0.6') 50 | cfg.set(C.FRONT_END, C.SERVER_NAME, 'Test') 51 | 52 | slabutil_init(cfg) 53 | 54 | return cfg 55 | 56 | 57 | def test_new_node(config): 58 | "Test the creation a element." 59 | 60 | O.init_osm_factory(config) 61 | 62 | nodeid = '42' 63 | n = O.new_osm_element(C.NODE, nodeid) 64 | 65 | # Check the 'id' field. 66 | assert n.id == str(nodeid) 67 | # Check that C.REFERENCES field exists, and is an empty set. 68 | assert n[C.REFERENCES] == set() 69 | 70 | 71 | def test_new_way(config): 72 | "Test creation of a element." 73 | 74 | O.init_osm_factory(config) 75 | wayid = '42' 76 | w = O.new_osm_element(C.WAY, wayid) 77 | 78 | # Check the "id", NODES and REFERENCES attributes. 79 | assert w.id == str(wayid) 80 | assert w[C.REFERENCES] == set() 81 | assert w[C.NODES] == set() 82 | 83 | 84 | def test_new_relation(config): 85 | "Test creation of a element." 86 | 87 | O.init_osm_factory(config) 88 | relid = '42' 89 | r = O.new_osm_element(C.RELATION, relid) 90 | 91 | # Check the "id", MEMBER and REFERENCES attributes. 92 | assert r.id == str(relid) 93 | assert r[C.REFERENCES] == set() 94 | assert r[C.MEMBERS] == [] 95 | 96 | 97 | def test_new_geodoc(config): 98 | "Test the creation of a geodoc element." 99 | 100 | O.init_osm_factory(config) 101 | georegion = 'szmyg' # lat, long == 42, 42 102 | g = O.new_osm_element(C.GEODOC, georegion) 103 | 104 | # Check the "id" field. 105 | 106 | assert g.id == georegion 107 | assert g[C.NODES] == set() 108 | 109 | bbox = g[C.BBOX] 110 | assert set(bbox.keys()) == set(['n', 's', 'e', 'w']) 111 | 112 | 113 | def test_encode_coordinate(config): 114 | "Test encoding of a coordinate string." 115 | 116 | O.init_osm_factory(config) 117 | 118 | # The following tests assume that the scale factor in use is 10^7. 119 | _sf = config.getint(C.DATASTORE, C.SCALE_FACTOR) 120 | assert _sf == 10000000 121 | 122 | # 123 | # Test encoding of strings. 124 | # 125 | inputlist = [ ('0', 0), # Zero. 126 | ('0.00123456', 12345), # Tiny 127 | ('0.12345678', 1234567), # Fraction only 128 | ('1.23456789', 12345678), # Normal, small 129 | ('12.3456789', 123456789), # Normal 130 | ('123.456789', 1234567890), # Normal, large 131 | ('1', 10000000), # Integral, small 132 | ('12', 120000000), # Integral 133 | ('123', 1230000000) # Integral, large 134 | ] 135 | for (strval, refval) in inputlist: 136 | v = O.encode_coordinate(strval) 137 | assert refval == v 138 | 139 | 140 | # 141 | # Test encoding of floating point values. 142 | # 143 | inputlist = [ (0.0, 0), 144 | (0.123456, 1234560), 145 | (0.1234567, 1234567), 146 | (1.0, 10000000), 147 | (1.23456, 12345600), 148 | (12.3455899, 123455899) 149 | ] 150 | for (flval, refval) in inputlist: 151 | v = O.encode_coordinate(flval) 152 | assert v == refval 153 | -------------------------------------------------------------------------------- /doc/DeploymentInstructions.md: -------------------------------------------------------------------------------- 1 | ## About 2 | 3 | This document describes how to install and maintain an instance of 4 | this server. 5 | 6 | ## Note 7 | 8 | Currently, the 'front-end server' and the 'ingestion tool' (the 9 | document [Overview][] describes what these are) work "in-place" in the 10 | source tree. An install-friendly package is yet to be created; see 11 | ticket [#5][issue5]. 12 | 13 | ## Software Dependencies 14 | 15 | The server uses the following software packages: 16 | 17 | 1. The [Python][], programming language. 18 | 1. [Tornado][], a [Python][] web server framework, for the front-end. 19 | 1. [lxml][], a [Python][] XML parsing library, used by both the 20 | front-end and the ingestion tool. 21 | 1. The [cjson][] JSON (en|de)coder module. 22 | 1. [Membase][], a scalable, distributed key/value store, used as the 23 | data store. 24 | 1. [Python Geohash][pygeohash], a geohashing library. 25 | 1. [Python Memcache][pymemcache], a [Memcache][] interface for [Python][], 26 | used to connect to the [Membase][] server, in compatibility mode. 27 | 1. [Py.Test][pytest], a test framework. 28 | 29 | ### Installation on Ubuntu 10.04 LTS 30 | 31 | To install these dependencies on an Ubuntu GNU/Linux v10.04 LTS system, do: 32 | 33 | 1. Install [Membase][]: 34 | 1. Download the `.deb` file appropriate for your computer architecture 35 | from the project's [download page][membasedownload]. 36 | 1. Install the download `.deb` package using the **dpkg** utility. 37 | For example:
38 | `% sudo dpkg -i membase-server-community_x86_1.6.5.3.deb`
39 | Repeat this on all the machines that you wish to run your 40 | [Membase][] cluster on. 41 | 1. Using your browser, login to the membase console at 42 | http://*hostname*:8091, and create a default bucket of type 43 | 'membase' listening on port 11211. If you have multiple machines 44 | in your [Membase][] cluster, you would need to login and setup 45 | each of these. 46 | 47 | *Note*: By default [Membase][] will listen and accept protocol requests 48 | on *all* network interfaces. On an internet-facing server, you would 49 | need to adjust your firewall rules to prevent the world+dog from accessing 50 | your membase instance. 51 | 2. Install pre-packaged binaries:
52 | `% sudo apt-get install git-core gcc g++`
53 | `% sudo apt-get install python2.6 python2.6-dev python-lxml python-setuptools python-memcache python-cjson` 54 | 3. Install additional Python libraries and tools 55 | 1. Install [Python geohash][pygeohash] using `easy_install`:
56 | `% sudo easy_install python-geohash` 57 | 1. Install [Tornado][]:
58 | `% git clone https://github.com/facebook/tornado.git`
59 | `% cd tornado`
60 | `% sudo python setup.py install` 61 | 4. Optional stuff: 62 | 1. Install `py.test`, if you wish to run the tests:
63 | `% sudo easy_install pytest` 64 | 65 | ## Setup 66 | 67 | The procedure to bring up the server is as follows. 68 | 69 | 1. Install the dependencies listed above. 70 | 1. Checkout the server source from [GitHub][].
71 | `% git clone git://github.com/MapQuest/mapquest-osm-server.git` 72 | 1. Edit the file `src/python/config/osm-api-server.cfg`, and change 73 | the `dbhost` configuration item in section `membase` to point to 74 | where your [Membase][] instance lives. The default configuration 75 | assumes that your membase server is running on localhost. 76 | 1. Download a [planet.osm][osmplanet] dump or subset thereof, for 77 | example, from . 78 | 1. Load in the downloaded planet file using the `db-mgr` tool:
79 | `% cd src/python`
80 | `% ./db-mgr PATH-TO-THE-DOWNLOADED-PLANET` 81 | 1. Run the front-end of the server:
82 | `% sudo ./front-end`
83 | The server listens for API requests on port 80 by default. The 84 | configuration item `port` in the configuration section `front-end` 85 | can be used to change this. 86 | 1. Check operation of the server. Assuming that the default 87 | configuration, you could try the `api/capabilities` URL as 88 | below:
89 |
 90 |     % curl http://localhost/api/capabilities
 91 |     <?xml version='1.0' encoding='utf-8'?>
 92 |     <osm version="0.6" generator="OSM API Server Prototype 0.6">
 93 |      <api>
 94 |        <version minimum="0.6" maximum="0.6"/>
 95 |         <area maximum="180.0"/>
 96 |         <tracepoints per_page="5000"/>
 97 |         <waynodes maximum="2000"/>
 98 |         <changesets maximum_elements="50000"/>
 99 |         <timeout seconds="300"/>
100 |      </api>
101 |     </osm>
102 |     
103 | The document [SupportedRequests][] lists the current set of APIs supported. 104 | 105 | 106 | 107 | [github]: http://www.github.com/ "GitHub" 108 | [issue5]: https://github.com/MapQuest/mapquest-osm-server/issues/5 "Issue 5" 109 | [lxml]: http://lxml.de/ "XML Processing Library" 110 | [Membase]: http://www.membase.org/ "Membase" 111 | [membasedownload]: http://www.couchbase.com/downloads/membase-server/community 112 | [memcache]: http://memcached.org/ "Memcache" 113 | [osmplanet]: http://wiki.openstreetmap.org/wiki/Planet.osm "OSM Planet" 114 | [Overview]: Overview.md 115 | [pygeohash]: http://pypi.python.org/pypi/python-geohash "Geohashing library" 116 | [pymemcache]: http://pypi.python.org/pypi/python-memcached/ "Memcache interface" 117 | [pytest]: http://www.pytest.org/ "Py.Test" 118 | [Python]: http://www.python.org/ "The Python Programming Language" 119 | [SupportedRequests]: SupportedRequests.md 120 | [Tornado]: http://www.tornadoweb.org/ "The Tornado Web Server" 121 | [cjson]: http://pypi.python.org/pypi/python-cjson "The cjson JSON en/decoder library" 122 | -------------------------------------------------------------------------------- /doc/Slabs.org: -------------------------------------------------------------------------------- 1 | SLABS -*- mode: org; -*- 2 | 3 | * Motivation 4 | OSM data has a large number of independently addressable elements 5 | such as nodes, elements, ways and changesets. Each element is named 6 | using a decimal string. There are today over a billion (10^9) 7 | elements in the OSM database. 8 | 9 | If directly expressed in key/value form, this means that the 10 | datastore needs to be able to deal with about a billion keys. OSM 11 | key sizes are of the order of 10 bytes; OSM values are a few hundred 12 | bytes on the average. 13 | 14 | The Membase datastore keeps all its keys in RAM by design. Membase 15 | also has an overhead of 120 bytes per key. Thus a straightforward 16 | mapping of element IDs Membase's design leads to very large RAM 17 | requirements. 18 | 19 | Grouping multiple OSM elements into "slabs" is a work-around for 20 | this issue. Each "slab" is addressed using a Membase key. 21 | * Design 22 | ** Basic Design 23 | - Each element gets its own slab type (ways, nodes, changesets, 24 | and relations). 25 | - Elements are grouped into slabs. Elements can be in the following 26 | states in a slab: 27 | 1. Present in the datastore, and inline in the slab. Used for 28 | elements that are 'small' (for a configurable value of 29 | 'small'). 30 | 2. Present in datastore, but not 'inline' in the slab. 31 | These elements are "oversized" and are stored seperately. 32 | They are retrieved using an independent fetch from the 33 | datastore. 34 | 3. Not present in the datastore. Such elements may be 35 | `negatively cached', as an optimization. 36 | - Each kind of slab has two configuration variables: 37 | - The number of elements per slab (configuration variables: 38 | {nodes|ways|relations}-per-slab). 39 | - The max "inline" size of an element that resides in a slab. 40 | (configuration variables: {nodes|ways|relations}-inline-size). 41 | ** Size limits 42 | - Membase has a max size of approximately 20MB for each value. 43 | This sets the maximum size for the wire representation of each 44 | slab. 45 | - Membase keys are limited to 256 bytes. This limit is not 46 | expected to be a problem in the current design. 47 | ** Dealing with too-large elements 48 | If the total size for a slab is larger than some configurable 49 | limit, elements larger than the configuration limit 50 | (\*-inline-size) that are part of the slab can be made 51 | 'standalone'. 52 | ** I/O operations 53 | - I/O operations are done one slab at a time. Batching of slab I/O 54 | operations is not necessary since each slab would already be of a 55 | substantial size. 56 | ** Interaction with caching 57 | - OSM elements are cached locally so as to improve request 58 | handling latencies and to reduce the I/O transfer needs of 59 | the system. 60 | - The cache will hold all the elements for a given slab, or will 61 | hold none of them. 62 | - A cache element can be in one of the following states: 63 | - 'present' => present in the cache 64 | - 'not present' => not in the cache, but could be in the data 65 | store. 66 | - 'negatively cached' => definitely missing from the data store. 67 | - Slabs are managed by a buffer with 'least recently used' 68 | semantics. 69 | - Whenever an element in the cache is accessed, the slab to which 70 | the cache belongs is moved to the most-recently-used position 71 | in the slab LRU. 72 | - When the cache becomes 'full', the least recently used slab 73 | is ejected from the cache, along with all its contents. 74 | *** Reads of cache elements 75 | - A read miss causes the associated slab to be fetched and 76 | inserted into the most-recently-used end of the slab LRU buffer. 77 | All elements present in the slab will be inserted into the 78 | cache. 79 | - If I/O is in progress for the cache element/slab, then the 80 | thread of control performing the read will wait. 81 | - A read hit of a cache element causes its associated slab to move 82 | to the most-recently-used end of the slab LRU buffer. 83 | *** Writes of cache elements 84 | - A 'store' of a cache element will move its associated slab 85 | descriptor to the most-recently-used end of the slab LRU buffer. 86 | - If I/O is in progress for the slab associated with a cache 87 | element, the thread of control performing the write will wait 88 | for the I/O to complete. 89 | *** Reads of slabs 90 | - The current implementation handles one read request for a slab 91 | at a time. 92 | - When the I/O completes, all the elements in the slab are added 93 | to the cache. 94 | - Elements that would fall into the slab but are not present are 95 | marked as 'negatively cached'. 96 | - When performing a read of a slab: 97 | 1. Atomically mark the slab as I/O-in-progress. This causes 98 | subsequent retrievals of cache elements in the slab to block. 99 | 2. Issue the read. 100 | 3. Vivify elements based on the slab's contents, converting from 101 | the wire representation used (JSON/protobuf/whatever), and 102 | insert them into the cache. 103 | 4. If some elements in the slab are not 'inline', issue reads 104 | for these and vivify them. 105 | 5. Release the slab from the I/O-in-progress state, and insert 106 | it into the most-recently-used end of the slab LRU buffer. 107 | *** Writes of slabs 108 | - Slabs are scheduled to be written out in LRU order. 109 | - All 'inline' elements in a slab will be written out together 110 | (as part of the slab). 111 | - 'Non inline' elements are written back at the same time, but as 112 | individual objects. 113 | - All elements in the slab are removed from the cache when the 114 | slab is written to the data store. 115 | - Slabs that are to be written out are marked as 'I/O in progress' 116 | till the I/O completes. This is to prevent another thread from 117 | accessing an element/slab that is undergoing I/O. 118 | - When performing a write of slab: 119 | 1. Atomically mark the slab as 'I/O in progress'. This causes 120 | subsequent retrievals of cache elements referenced by the 121 | slab to block. 122 | 2. Collect all cache elements needed for creating the slab, 123 | and create the wire representation (JSON/protobuf/other) of 124 | the slab object. 125 | 3. Issue the write request. 126 | 4. When the write request completes, remove all the elements 127 | in the slab from the cache. 128 | 5. Finally, remove the slab from the slab LRU buffer. 129 | -------------------------------------------------------------------------------- /src/python/dbmgr/__main__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | ## Script entry point for the database management tool. 24 | 25 | import os.path 26 | import sys 27 | import itertools 28 | 29 | ## Tool configuration 30 | devconfigdir = 'config' 31 | devconfigfilename = 'osm-api-server.cfg' 32 | 33 | toolname = 'dbmgr' 34 | toolversion = '0.1' 35 | toolconfig = '/etc/openstreetmap/osm-api-server.cfg' 36 | 37 | from datastore.ds_geohash import init_geohash 38 | from datastore.slabutil import init_slabutil 39 | from dbmgr.dbm_input import makesource 40 | from dbmgr.dbm_ops import DBOps 41 | from dbmgr.dbm_stats import fini_statistics, init_statistics 42 | 43 | import apiserver.const as C 44 | from apiserver.osmelement import init_osm_factory 45 | 46 | # 47 | # 48 | # SCRIPT ENTRY POINT 49 | # 50 | 51 | usage = '''%prog [--I|--init] [options] 52 | %prog [options] [files]... 53 | 54 | Manage an OSM database. 55 | 56 | Use option -h/--help for help on usage.''' 57 | 58 | def main(): 59 | 'Manage the OSM DB during development.' 60 | from optparse import OptionParser 61 | 62 | parser = OptionParser(usage=usage, prog=toolname, 63 | version='%prog ' + toolversion) 64 | parser.add_option('-b', '--buffering', dest='buffering', metavar="NUMBER", 65 | default=64, type="int", 66 | help="Buffer size in KB for *zip uncompression " + 67 | "[%default]") 68 | parser.add_option('-B', '--backend', dest='backend', metavar='DBTYPE', 69 | default=None, 70 | help="Type of backend to use [from configuration file]"), 71 | parser.add_option('-C', '--config', dest='config', metavar="FILENAME", 72 | default=toolconfig, 73 | help="Path to configuration information [%default]") 74 | parser.add_option('-E', '--encoding', dest='datastore_encoding', 75 | metavar='ENCODING', default=None, type="str", 76 | help="Encoding for use for values [%default]"), 77 | parser.add_option('-I', '--init', dest='doinit', action='store_true', 78 | default=False, help='(Re-)initialize the backend'), 79 | parser.add_option('-n', '--dryrun', dest='dryrun', metavar="BOOLEAN", 80 | default=False, action="store_true", 81 | help="Parse, but do not upload data [%default]") 82 | parser.add_option('-T', '--nothreading', dest='nothreading', 83 | metavar="BOOLEAN", default=False, action="store_true", 84 | help="Do not use threads [%default]"), 85 | parser.add_option('-v', '--verbose', dest='verbose', metavar="BOOLEAN", 86 | default=False, action="store_true", 87 | help="Be verbose") 88 | parser.add_option("-x", '--nochangesets', dest="nochangesets", 89 | action="store_true", default=False, 90 | help="Skip retrieval of changeset information " 91 | "[%default]") 92 | 93 | options, args = parser.parse_args() 94 | 95 | # Read configuration information. 96 | configfiles = [options.config, os.path.join(sys.path[0], devconfigdir, 97 | devconfigfilename)] 98 | from ConfigParser import ConfigParser 99 | cfg = ConfigParser() 100 | cfg.read(configfiles) 101 | 102 | # Sanity check. 103 | if not cfg.has_section(C.FRONT_END): 104 | parser.error("Incomplete configuration, tried:\n\t" + 105 | "\n\t".join(configfiles)) 106 | 107 | # Override configuration options specified on the command line. 108 | if options.datastore_encoding: 109 | cfg.set(C.DATASTORE, C.DATASTORE_ENCODING, options.datastore_encoding) 110 | if options.backend: 111 | cfg.set(C.DATASTORE, C.DATASTORE_BACKEND, options.backend) 112 | 113 | # Initialize statistics. 114 | init_statistics(cfg, options) 115 | 116 | # Load in the desired interface to the datastore. 117 | backend = cfg.get(C.DATASTORE, C.DATASTORE_BACKEND) 118 | try: 119 | module = __import__('datastore.ds_' + backend, 120 | fromlist=['Datastore']) 121 | except ImportError, x: 122 | parser.exit("Error: Could not initialize backend of type \"%s\": %s" % 123 | (backend, str(x))) 124 | 125 | db = module.Datastore(cfg, not options.nothreading, True) 126 | 127 | if options.doinit: 128 | db.initialize() 129 | 130 | ops = DBOps(cfg, options, db) 131 | 132 | # Initialize the geohash module. 133 | init_geohash(cfg.getint(C.DATASTORE, C.GEOHASH_LENGTH), 134 | cfg.getint(C.DATASTORE, C.SCALE_FACTOR)) 135 | 136 | # Initialize the OSM element factory and related modules. 137 | init_slabutil(cfg) 138 | init_osm_factory(cfg) 139 | 140 | # Turn file names into iterators that deliver an element at a time. 141 | try: 142 | iterlist = map(lambda fn: makesource(cfg, options, fn), args) 143 | inputelements = itertools.chain(*iterlist) 144 | except Exception, x: 145 | parser.exit("Error: " + str(x)) 146 | 147 | for elem in inputelements: 148 | # Add basic elements 149 | if elem.namespace in [C.CHANGESET, C.NODE, C.RELATION, C.WAY]: 150 | ops.add_element(elem) 151 | else: 152 | raise NotImplementedError, "Element type: %s" % elem.kind 153 | 154 | ops.finish() 155 | fini_statistics(options) 156 | 157 | if __name__ == '__main__': 158 | main() 159 | -------------------------------------------------------------------------------- /src/python/datastore/ds_membase.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | """An interface to a Membase based backend store.""" 24 | 25 | import apiserver.const as C 26 | 27 | import memcache # Use Memcache bindings (for now). 28 | memcache.SERVER_MAX_VALUE_LENGTH = C.MEMBASE_MAX_VALUE_LENGTH # Update limit. 29 | 30 | import types 31 | import threading 32 | 33 | from apiserver.osmelement import new_osm_element, OSMElement 34 | from datastore.ds import DatastoreBase 35 | from datastore.slabutil import * 36 | 37 | class DatastoreMembase(DatastoreBase): 38 | "An interface to a Membase (www.membase.org) datastore." 39 | 40 | SLAB_CONFIGURATION_KEYS = [C.CHANGESETS_PER_SLAB, C.NODES_PER_SLAB, 41 | C.RELATIONS_PER_SLAB, C.WAYS_PER_SLAB] 42 | 43 | def __init__(self, config, usethreads=False, writeback=False): 44 | "Initialize the datastore." 45 | 46 | self.conndb = {} 47 | 48 | DatastoreBase.__init__(self, config, usethreads, writeback) 49 | 50 | dbhosts = config.get(C.MEMBASE, C.DBHOST) 51 | dbport = config.get(C.MEMBASE, C.DBPORT) 52 | 53 | self.membasehosts = [h + ':' + dbport for h in dbhosts.split()] 54 | 55 | threads = [threading.currentThread()] 56 | if usethreads: 57 | threads.extend(self.threads) 58 | 59 | self.register_threads(threads) 60 | 61 | if writeback: 62 | # Store slab configuration information for subsequent 63 | # retrieval by the front end. 64 | slabconfig = new_osm_element(C.DATASTORE_CONFIG, C.CFGSLAB) 65 | for k in DatastoreMembase.SLAB_CONFIGURATION_KEYS: 66 | slabconfig[k] = config.get(C.DATASTORE, k) 67 | slabconfig[C.CONFIGURATION_SCHEMA_VERSION] = C.CFGVERSION 68 | self.slabconfig = slabconfig 69 | else: 70 | # Read slab configuration information from the data store. 71 | self.slabconfig = slabconfig = \ 72 | self.retrieve_element(C.DATASTORE_CONFIG, C.CFGSLAB) 73 | if slabconfig is not None: 74 | schema_version = slabconfig.get(C.CONFIGURATION_SCHEMA_VERSION) 75 | if schema_version != C.CFGVERSION: 76 | raise ValueError, \ 77 | "Datastore schema version mismatch: expected %s, " \ 78 | "actual %s." % \ 79 | (str(C.CFGVERSION), str(schema_version)) 80 | for (k,v) in slabconfig.items(): 81 | if k in DatastoreMembase.SLAB_CONFIGURATION_KEYS: 82 | config.set(C.DATASTORE, k, v) 83 | else: 84 | raise ValueError, \ 85 | "Datastore is missing configuration information." 86 | 87 | 88 | def _get_connection(self): 89 | return self.conndb[threading.currentThread().name] 90 | 91 | def register_threads(self, threads): 92 | "Register threads with the datastore module." 93 | for t in threads: 94 | c = memcache.Client(self.membasehosts, debug=1) 95 | self.conndb[t.name] = c 96 | 97 | def retrieve_element(self, namespace, key): 98 | """Return the element for a key. 99 | 100 | Parameters: 101 | 102 | namespace - namespace for element. 103 | key - the key to retrieve. 104 | """ 105 | 106 | dskey = namespace[0].upper() + key 107 | 108 | db = self._get_connection() 109 | wirebits = db.get(dskey) 110 | 111 | if wirebits is None: 112 | return None 113 | n = new_osm_element(namespace, key) 114 | n.from_mapping(self.decode(wirebits)) 115 | return n 116 | 117 | def store_element(self, namespace, key, value): 118 | """Store an element at a key.""" 119 | 120 | assert isinstance(value, OSMElement) 121 | 122 | dskey = namespace[0].upper() + key 123 | db = self._get_connection() 124 | db.set(dskey, self.encode(value.as_mapping())) 125 | 126 | def retrieve_slab(self, namespace, slabkey): 127 | """Return a slab of elements.""" 128 | 129 | db = self._get_connection() 130 | wirebits = db.get(slabkey) 131 | 132 | if wirebits is None: 133 | return None 134 | 135 | slab = [] 136 | for (st, kv) in self.decode(wirebits): 137 | if st == C.SLAB_NOT_PRESENT: 138 | continue 139 | 140 | if st == C.SLAB_INDIRECT: 141 | elem = self.retrieve_element(namespace, kv) 142 | assert elem is not None, "Missing indirect element" 143 | elif st == C.SLAB_INLINE: 144 | elem = new_osm_element(namespace, kv[C.ID]) 145 | elem.from_mapping(kv) 146 | else: 147 | assert False, "Unknown status %d" % status 148 | slab.append((elem.id, elem)) 149 | 150 | return slab 151 | 152 | def store_slab(self, namespace, slabkey, slabelems): 153 | """Store a slab's worth of contents.""" 154 | 155 | _, nperslab = slabutil_get_config(namespace) 156 | assert len(slabelems) == nperslab 157 | 158 | slab = [] 159 | for (st, e) in slabelems.items(): 160 | if st: 161 | # Todo ... INDIRECT elements. 162 | slab.append((C.SLAB_INLINE, e.as_mapping())) 163 | 164 | rawbits = self.encode(slab) 165 | db = self._get_connection() 166 | db.set(slabkey, rawbits) 167 | 168 | def initialize(self): 169 | "Initialize the database." 170 | # Flush all existing elements. 171 | self.conndb[threading.currentThread().name].flush_all() 172 | 173 | # Save the current slab configuration. 174 | self.store_element(C.DATASTORE_CONFIG, C.CFGSLAB, self.slabconfig) 175 | 176 | 177 | Datastore = DatastoreMembase 178 | -------------------------------------------------------------------------------- /src/python/datastore/slabutil.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | """Utility functions used for managing slab based access.""" 24 | 25 | import collections 26 | 27 | import apiserver.const as C 28 | 29 | __all__ = [ 'init_slabutil', 'slabutil_get_config', 'slabutil_group_keys', 30 | 'slabutil_key_to_start_index', 'slabutil_make_slabkey', 31 | 'slabutil_make_slab', 'slabutil_use_slab' ] 32 | 33 | _slab_config = {} 34 | 35 | def _make_numeric_slabkey(ns, nperslab, elemid): 36 | slabno = (int(elemid) / nperslab) * nperslab 37 | return "%sL%d" % (ns, slabno) 38 | 39 | def _make_nonnumeric_slabkey(ns, elemid): 40 | return "%sL%s" % (ns, elemid) 41 | 42 | class _Slab: 43 | def __init__(self, namespace, slabkey): 44 | self.namespace = namespace 45 | self.slabkey = slabkey 46 | 47 | 48 | class _AlphabeticKeySlab(_Slab): 49 | def __init__(self, namespace, key, item): 50 | slabkey = _make_nonnumeric_slabkey(namespace[0].upper(), key) 51 | _Slab.__init__(self, namespace, slabkey) 52 | self._value = item 53 | self._key = key 54 | 55 | def __len__(self): 56 | return 1 57 | 58 | def items(self): 59 | return [(self._key, self._value)] 60 | 61 | def keys(self): 62 | return [self._key] 63 | 64 | def get(self, key): 65 | if key == self._key: 66 | return (True, self._value) 67 | return (False, key) 68 | 69 | def add(self, key, element): 70 | if key == self._key: 71 | assert element == self._value 72 | else: 73 | raise ValueError, "add() invoked multiple times." 74 | 75 | class _NumericKeySlab(_Slab): 76 | def __init__(self, namespace, items): 77 | if len(items) == 0 or not isinstance(items, list): 78 | raise ValueError, "items should be non-empty list." 79 | k, _ = items[0] 80 | _, nperslab = _slab_config[namespace] 81 | 82 | slabkey = _make_numeric_slabkey(namespace[0].upper(), nperslab, k) 83 | start = slabutil_key_to_start_index(namespace, slabkey) 84 | 85 | _Slab.__init__(self, namespace, slabkey) 86 | 87 | self._nperslab = nperslab 88 | self._start = start 89 | self._contents = [None] * nperslab 90 | for (k,v) in items: 91 | index = int(k) 92 | if index >= start + nperslab: 93 | raise ValueError, \ 94 | "Index too large %s (start: %d, index: %d)" % \ 95 | (slabkey, start, index) 96 | index = index % nperslab 97 | if self._contents[index]: 98 | raise ValueError, \ 99 | "Repeated insertion at %s:%d" % (slabkey, index) 100 | self._contents[index] = v 101 | 102 | def __len__(self): 103 | return len(self._contents) 104 | 105 | def keys(self): 106 | return map(str, range(self._start, self._start + self._nperslab)) 107 | 108 | def items(self): 109 | elements = [] 110 | for i in range(self._nperslab): 111 | v = self._contents[i] 112 | if v is not None: 113 | elements.append((True, v)) 114 | else: 115 | elements.append((False, str(self._start + i))) 116 | return elements 117 | 118 | def get(self, key): 119 | "Retrieve an object from the slab." 120 | index = int(key) % self._nperslab 121 | v = self._contents[index] 122 | if v is not None: 123 | return (True, v) 124 | return (False, key) 125 | 126 | 127 | def add(self, key, value): 128 | "Add an object at index." 129 | index = int(key) % self._nperslab 130 | self._contents[index] = value 131 | 132 | def init_slabutil(config): 133 | "Initialize the module." 134 | _slab_config[C.CHANGESET] = ( 135 | config.getint(C.DATASTORE, C.CHANGESETS_INLINE_SIZE), 136 | config.getint(C.DATASTORE, C.CHANGESETS_PER_SLAB)) 137 | _slab_config[C.NODE] = ( 138 | config.getint(C.DATASTORE, C.NODES_INLINE_SIZE), 139 | config.getint(C.DATASTORE, C.NODES_PER_SLAB)) 140 | _slab_config[C.RELATION] = ( 141 | config.getint(C.DATASTORE, C.RELATIONS_INLINE_SIZE), 142 | config.getint(C.DATASTORE, C.RELATIONS_PER_SLAB)) 143 | _slab_config[C.WAY] = ( 144 | config.getint(C.DATASTORE, C.WAYS_INLINE_SIZE), 145 | config.getint(C.DATASTORE, C.WAYS_PER_SLAB)) 146 | 147 | def slabutil_use_slab(namespace): 148 | "Return true of the given namespace uses slabs." 149 | return namespace in _slab_config 150 | 151 | def slabutil_make_slabkey(namespace, elemid): 152 | "Prepare a slab key for a given element and namespace." 153 | nsk = namespace[0].upper() 154 | if _slab_config.has_key(namespace): 155 | _, nperslab = _slab_config[namespace] 156 | return _make_numeric_slabkey(nsk, nperslab, elemid) 157 | else: 158 | return _make_nonnumeric_slabkey(nsk, elemid) 159 | 160 | def slabutil_group_keys(namespace, keys): 161 | "Group keys according to slabs." 162 | 163 | slabset = collections.defaultdict(set) 164 | nsk = namespace[0].upper() 165 | 166 | if slabutil_use_slab(namespace): 167 | _, nperslab = _slab_config[namespace] 168 | for k in keys: 169 | sk = _make_numeric_slabkey(nsk, nperslab, k) 170 | slabset[sk].add(k) 171 | else: 172 | for k in keys: 173 | sk = _make_nonnumeric_slabkey(nsk, k) 174 | slabset[sk].add(k) 175 | 176 | return slabset 177 | 178 | def slabutil_get_config(namespace): 179 | "Return the configuration for a given slab." 180 | return _slab_config[namespace] 181 | 182 | def slabutil_key_to_start_index(namespace, slabkey): 183 | """Return the start index of elements in a slab.""" 184 | assert slabkey[1] == 'L' 185 | if slabutil_use_slab(namespace): 186 | return int(slabkey[2:]) 187 | else: 188 | return slabkey[2:] 189 | 190 | def slabutil_make_slab(namespace, items): 191 | """Return a populated slab of the appropriate kind.""" 192 | 193 | if slabutil_use_slab(namespace): 194 | return _NumericKeySlab(namespace, items) 195 | else: 196 | return _AlphabeticKeySlab(namespace, items) 197 | -------------------------------------------------------------------------------- /src/python/dbmgr/dbm_geotables.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | """Group OSM nodes by their (lat, lon) coordinates. 24 | 25 | Exported classes: 26 | 27 | class GeoGroupTable -- track a set of geographical groupings. 28 | 29 | Usage: 30 | 31 | 1. Allocate a new table 32 | >>> gt = GeoGroupTable() 33 | 34 | 2. Add nodes to the table 35 | >>> for n in nodelist: gt.add(n) 36 | 37 | 3. Iterate over the unique groups 38 | >>> for k in gt.keys(): 39 | ... print "Key:", k, "nodes:", gt[k] 40 | >>> 41 | """ 42 | 43 | import geohash 44 | import collections 45 | import threading 46 | from Queue import Queue 47 | 48 | import apiserver.const as C 49 | from apiserver.osmelement import new_osm_element 50 | from datastore.lrucache import BoundedLRUBuffer 51 | from datastore.ds_geohash import geohash_key_for_element 52 | 53 | class NodeGroup: 54 | '''A set of OSM nodes, and their coordinates. 55 | ''' 56 | def __init__(self): 57 | '''Initialize a node group.''' 58 | self.nodecoords = {} 59 | 60 | def __contains__(self, nodeid): 61 | return nodeid in self.nodecoords 62 | 63 | def add(self, osmnode): 64 | '''Add a node to the group.''' 65 | nodeid = osmnode.id 66 | assert nodeid not in self.nodecoords, \ 67 | "Duplicate insertion of %s" % nodeid 68 | self.nodecoords[nodeid] = (osmnode[C.LAT], osmnode[C.LON]) 69 | 70 | def update(self, nodelist): 71 | '''Update a nodegroup from a nodelist.''' 72 | for (nid, lat, lon) in nodelist: 73 | if nid not in self.nodecoords: 74 | self.nodecoords[nid] = (lat, lon) 75 | else: 76 | assert (lat, lon) == self.nodecoords[nid] 77 | 78 | def aslist(self): 79 | '''Return the list representation of a nodegroup.''' 80 | return [(nodeid, lat, lon) for (nodeid, (lat, lon)) in 81 | self.nodecoords.items()] 82 | 83 | class GeoGroupTable: 84 | '''Group OSM nodes by their geographical coordinates. 85 | 86 | The coordinates of the globe are partitioned into disjoint areas. 87 | Each partition is named by the geohash code of its (n,w) corner. 88 | 89 | Grouping of nodes is implemented by restricting the length of 90 | the geohash codes used. 91 | ''' 92 | 93 | def __init__(self, config, options, db): 94 | '''Initialize the table. 95 | 96 | Keyword arguments: 97 | config - A ConfigParser instance. 98 | options - An optparse.OptionParser structure. 99 | db - A DB object supporting 'get()' and 'store()' 100 | methods. 101 | ''' 102 | self.geodb = collections.defaultdict(NodeGroup) 103 | self.db = db 104 | 105 | lrusize = config.getint(C.DATASTORE, C.GEODOC_LRU_SIZE) 106 | self.lru = BoundedLRUBuffer(bound=lrusize, callback=self._cb) 107 | 108 | if options.nothreading: 109 | nthreads = 0 110 | else: 111 | nthreads = config.getint(C.DATASTORE, C.GEODOC_LRU_THREADS) 112 | self.nthreads = max(0, nthreads) 113 | if self.nthreads: 114 | self.wrthreads = [] 115 | self.wrqueue = Queue(self.nthreads) 116 | self.wrcond = threading.Condition() 117 | self.wrpending = [] 118 | for n in range(self.nthreads): 119 | t = threading.Thread(target=self._worker) 120 | t.name = "GeoWB-%d" % n 121 | t.daemon = True 122 | self.wrthreads.append(t) 123 | t.start() 124 | 125 | db.register_threads(self.wrthreads) 126 | 127 | def _cb(self, key, value): 128 | "Callback called when an LRU item is ejected." 129 | nodeset = self.geodb.pop(key) 130 | if self.nthreads: # Defer processing to a worker thread. 131 | self.wrqueue.put((key, nodeset)) 132 | else: # Synchronous operation. 133 | self._write_geodoc(key, nodeset) 134 | 135 | def _worker(self): 136 | "Helper method, used by worker threads." 137 | while True: 138 | # Retrieve a work item. 139 | v = self.wrqueue.get() 140 | if v is None: # Exit the thread. 141 | self.wrqueue.task_done() 142 | return 143 | 144 | # Unpack the work item. 145 | key, nodeset = v 146 | 147 | # Mark the item as "I/O in progress". 148 | with self.wrcond: 149 | while key in self.wrpending: 150 | self.wrcond.wait() 151 | 152 | assert key not in self.wrpending 153 | self.wrpending.append(key) 154 | 155 | # Process this node set. 156 | self._write_geodoc(key, nodeset) 157 | 158 | # Remove the "I/O in progress" marker. 159 | with self.wrcond: 160 | assert key in self.wrpending 161 | self.wrpending.remove(key) 162 | self.wrcond.notifyAll() 163 | 164 | self.wrqueue.task_done() 165 | 166 | def _write_geodoc(self, key, nodegroup): 167 | "Merge in a group of nodes into a geodoc." 168 | assert isinstance(nodegroup, NodeGroup) 169 | 170 | geodoc = self.db.retrieve_element(C.GEODOC, key) 171 | if geodoc is None: # New document. 172 | geodoc = new_osm_element(C.GEODOC, key) 173 | nodegroup.update(geodoc[C.NODES]) 174 | geodoc[C.NODES] = nodegroup.aslist() 175 | self.db.store_element(C.GEODOC, key, geodoc) 176 | 177 | def add(self, elem): 178 | '''Add information about a node 'elem' to the geo table. 179 | 180 | Usage: 181 | >>> gt = GeoGroupTable() 182 | >>> gt = gt.add(elem) 183 | 184 | The node 'elem' should have a 'lat' and 'lon' fields that 185 | encode its latitude and longitude respectively. The 'id' 186 | field specifies the node's "id". 187 | ''' 188 | 189 | assert elem.namespace == C.NODE, "elem is not a node: %s" % str(elem) 190 | 191 | # Determine the geo-key for the node. 192 | ghkey = geohash_key_for_element(elem) 193 | # Retrieve the partition covering this location. 194 | ghdoc = self.geodb[ghkey] 195 | 196 | elemid = elem.id 197 | if elemid not in ghdoc: 198 | ghdoc.add(elem) 199 | self.lru[ghkey] = ghdoc 200 | 201 | def flush(self): 202 | "Wait pending I/Os" 203 | 204 | # Flush items from the LRU. 205 | self.lru.flush() 206 | 207 | if self.nthreads: 208 | # Wait for the work queue to drain. 209 | self.wrqueue.join() 210 | -------------------------------------------------------------------------------- /src/python/tests/test_lrucache.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | import pytest 24 | from ConfigParser import ConfigParser 25 | 26 | import apiserver.const as C 27 | from datastore.lrucache import LRUCache 28 | from datastore.slabutil import slabutil_make_slab, slabutil_init 29 | 30 | _INLINE_SIZE = 256 31 | _LRUSZ = 8 32 | _SLABSZ = 16 33 | _NOSUCHKEY = '__nosuchkey__' 34 | _KEY = '_key' 35 | _NS = 'node' 36 | _NS1 = 'way' 37 | 38 | def pytest_funcarg__lrucache(request): 39 | "Prepare a pre-initialized cache object." 40 | return LRUCache(_LRUSZ) 41 | 42 | def pytest_funcarg__slabutil(request): 43 | cfg = ConfigParser() 44 | cfg.add_section(C.DATASTORE) 45 | 46 | for k in [C.CHANGESETS_INLINE_SIZE, C.NODES_INLINE_SIZE, 47 | C.RELATIONS_INLINE_SIZE, C.WAYS_INLINE_SIZE]: 48 | cfg.set(C.DATASTORE, k, str(_INLINE_SIZE)) 49 | 50 | for k in [C.CHANGESETS_PER_SLAB, C.NODES_PER_SLAB, 51 | C.RELATIONS_PER_SLAB, C.WAYS_PER_SLAB]: 52 | cfg.set(C.DATASTORE, k, str(_SLABSZ)) 53 | 54 | slabutil_init(cfg) 55 | return cfg 56 | 57 | def test_empty(slabutil): 58 | "Test the properties of an empty cache object." 59 | 60 | lc = LRUCache(_LRUSZ) 61 | assert len(lc) == 0 62 | assert lc.get(_NS, _KEY) is None 63 | 64 | 65 | def test_get(lrucache, slabutil): 66 | "Test insert and retrieval of one slab descriptor." 67 | 68 | values = range(_SLABSZ) 69 | keys = map(str, values) 70 | slabitems = zip(keys, values) 71 | 72 | slabdesc = slabutil_make_slab(_NS, slabitems) 73 | lrucache.insert_slab(slabdesc) 74 | 75 | for i in xrange(_SLABSZ): 76 | st, v = lrucache.get(_NS, str(i)) 77 | assert st 78 | assert v == values[i] 79 | 80 | 81 | def test_duplicate_slabdesc(lrucache, slabutil): 82 | "Test insertion of a duplicate slab descriptor." 83 | 84 | values = range(_SLABSZ) 85 | keys = map(str, values) 86 | slabitems = zip(keys, values) 87 | slabdesc = slabutil_make_slab(_NS, slabitems) 88 | 89 | lrucache.insert_slab(slabdesc) 90 | with pytest.raises(ValueError): 91 | lrucache.insert_slab(slabdesc) 92 | 93 | 94 | def test_duplicate_values(lrucache, slabutil): 95 | "Test insertion of duplicate values in a namespace." 96 | 97 | values = range(_SLABSZ) 98 | keys = map(str, values) 99 | slabitems = zip(keys, values) 100 | slabdesc = slabutil_make_slab(_NS, slabitems) 101 | lrucache.insert_slab(slabdesc) 102 | 103 | slabdesc2 = slabutil_make_slab(_NS, slabitems) 104 | with pytest.raises(ValueError): 105 | lrucache.insert_slab(slabdesc2) 106 | 107 | def test_namespaces(lrucache, slabutil): 108 | "Test that different namespaces are distinct." 109 | values = range(_SLABSZ) 110 | slabdesc1 = slabutil_make_slab(_NS, zip(map(str, values), values)) 111 | lrucache.insert_slab(slabdesc1) 112 | 113 | slabdesc2 = slabutil_make_slab(_NS1, zip(map(str, values), 114 | map(lambda x: x*x, values))) 115 | lrucache.insert_slab(slabdesc2) 116 | 117 | for i in xrange(_SLABSZ): 118 | st, v1 = lrucache.get(_NS, str(i)) 119 | assert st 120 | assert v1 == i 121 | st, v2 = lrucache.get(_NS1, str(i)) 122 | assert st 123 | assert v2 == i*i 124 | 125 | 126 | def test_get_nonexistent(lrucache, slabutil): 127 | "Test that unknown keys are rejected." 128 | values = range(_SLABSZ) 129 | keys = map(str, values) 130 | slabitems = zip(keys, values) 131 | slabdesc = slabutil_make_slab(_NS, slabitems) 132 | lrucache.insert_slab(slabdesc) 133 | 134 | assert lrucache.get(_NS+_NS, '0') is None # Invalid namespace, valid key 135 | # Valid namespace, out-of-slab key 136 | assert lrucache.get(_NS, _SLABSZ+1) is None 137 | 138 | 139 | def test_get_nonexistent_element(lrucache, slabutil): 140 | "Test that a missing keys is shown as not-present." 141 | values = range(0, _SLABSZ, 2) # Alternate elements. 142 | keys = map(str, values) 143 | slabitems = zip(keys, values) 144 | slabdesc = slabutil_make_slab(_NS, slabitems) 145 | lrucache.insert_slab(slabdesc) 146 | 147 | st, v = lrucache.get(_NS, '1') # Valid namespace, missing key 148 | assert not st 149 | assert v == '1' 150 | 151 | def test_remove(lrucache, slabutil): 152 | "Test the remove_slab() method." 153 | 154 | values1 = range(_SLABSZ) 155 | slabdesc1 = slabutil_make_slab(_NS, zip(map(str, values1), values1)) 156 | lrucache.insert_slab(slabdesc1) 157 | 158 | values2 = range(_SLABSZ, 2*_SLABSZ) 159 | slabdesc2 = slabutil_make_slab(_NS, zip(map(str, values2), values2)) 160 | lrucache.insert_slab(slabdesc2) 161 | 162 | # Remove the first slab. 163 | lrucache.remove_slab(slabdesc1) 164 | 165 | # Items in the original slab should be missing. 166 | for i in xrange(_SLABSZ): 167 | st = lrucache.get(_NS, str(i)) 168 | assert st is None 169 | 170 | # Items in the second slab should be present. 171 | for i in xrange(_SLABSZ, 2 * _SLABSZ): 172 | st, v = lrucache.get(_NS, str(i)) 173 | assert st 174 | assert v == i 175 | 176 | 177 | def test_non_overflow(slabutil): 178 | "Test that slabs do not overflow upto the slab LRU size." 179 | slabs = [] 180 | def _mkslab(i): 181 | v = [i * _SLABSZ] 182 | return slabutil_make_slab(_NS, zip(map(str, v), v)) 183 | 184 | def _cb(self, key, slabdesc, seq=[0]): 185 | assert False 186 | 187 | lc = LRUCache(_LRUSZ, _cb) 188 | for i in xrange(_LRUSZ): 189 | sl = _mkslab(i) 190 | slabs.append(sl) 191 | lc.insert_slab(sl) 192 | 193 | 194 | def test_overflow(slabutil): 195 | "Test that slabs overflow in LRU sequence." 196 | 197 | slabs = [] 198 | def _mkslab(i): 199 | v = [i * _SLABSZ] 200 | return slabutil_make_slab(_NS, zip(map(str, v), v)) 201 | 202 | def _cb(slabkey, slabdesc, seq=[0]): 203 | n = seq[0] 204 | assert slabdesc is slabs[n] 205 | seq[0] = n + 1 206 | 207 | lc = LRUCache(_LRUSZ, _cb) 208 | for i in xrange(2*_LRUSZ): 209 | sl = _mkslab(i) 210 | slabs.append(sl) 211 | lc.insert_slab(sl) 212 | 213 | 214 | def test_flush(slabutil): 215 | "Test that flush presents slabs in sequence." 216 | 217 | slabs = [] 218 | def _mkslab(i): 219 | v = [i * _SLABSZ] 220 | return slabutil_make_slab(_NS, zip(map(str, v), v)) 221 | 222 | seen = [False] 223 | def _cb(slabkey, slabdesc, seq=[0], seen=seen): 224 | seen[0] = True 225 | n = seq[0] 226 | assert slabdesc is slabs[n] 227 | seq[0] = n + 1 228 | 229 | lc = LRUCache(_LRUSZ, _cb) 230 | for i in xrange(_LRUSZ): 231 | sl = _mkslab(i) 232 | slabs.append(sl) 233 | lc.insert_slab(sl) 234 | 235 | 236 | lc.flush() 237 | assert seen[0] is True 238 | -------------------------------------------------------------------------------- /src/python/frontend/fe.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | import tornado.httpserver 24 | import tornado.ioloop 25 | import tornado.web 26 | 27 | import platform 28 | import sys 29 | 30 | # 31 | # Local modules 32 | # 33 | import apiserver.const as C # 'constants' 34 | from capabilities import CapabilitiesHandler 35 | from maphandler import MapHandler 36 | from osmelement import OsmElementHandler, OsmElementRelationsHandler, \ 37 | OsmFullQueryHandler, OsmMultiElementHandler, OsmWaysForNodeHandler 38 | 39 | # 40 | # Handling access to '/'. 41 | # 42 | class RootHandler(tornado.web.RequestHandler): 43 | """Handle requests for "/". 44 | 45 | Print a message pointing the user to the right API calls.""" 46 | 47 | default_message = """\ 48 | 49 | 50 | A scalable, read-only, OSM API Server 51 | 52 | 53 |

Welcome

54 | 55 |

You have reached an experimental implementation of an API server 56 | serving map data from the OpenStreetMap project.

58 | 59 |

API Version / Operations Supported

60 |

This server supports read queries conforming to the v%(apiversion)s 62 | OSM API.

63 |

OSM API calls that change map data are not supported.

64 | 65 |

Examples of API use

66 |
    67 |
  • To retrieve the capabilities of this server, use: 68 |
    %% curl http://%(hostname)s/api/capabilities
    69 |
  • 70 |
  • To retrieve the contents of node 15382126 from the command-line, use: 71 |
    %% curl http://%(hostname)s/api/%(apiversion)s/node/15382126
    72 |
  • 73 |
  • To retrieve the ways for node 15382126, use: 74 |
    %% curl http://%(hostname)s/api/%(apiversion)s/node/15382126/ways
    75 |
  • 76 |
77 | 78 |

More Information

79 |

This server is being developed as an open-source project.

80 | 86 | 87 | """ 88 | 89 | def initialize(self, cfg): 90 | self.usagemessage = RootHandler.default_message % dict( 91 | apiversion=cfg.get(C.FRONT_END, C.API_VERSION), 92 | hostname=platform.node(), 93 | projectdoc=cfg.get(C.DEFAULT, C.PROJECT_DOC), 94 | sourcerepository=cfg.get(C.DEFAULT, C.SOURCE_REPOSITORY)) 95 | 96 | def get(self): 97 | self.write(self.usagemessage) 98 | 99 | 100 | class ReadOnlyHandler(tornado.web.RequestHandler): 101 | """Return an error for URLs that a read-only server does not support.""" 102 | 103 | def initialize(self, cfg=None): 104 | pass 105 | 106 | 107 | class NotImplementedHandler(tornado.web.RequestHandler): 108 | """Return an error for URIs that are unimplemented.""" 109 | 110 | def initialize(self, cfg=None): 111 | pass 112 | 113 | def get(self, request): 114 | raise tornado.web.HTTPError(501) # Not Implemented 115 | 116 | # 117 | # The OSM front end server. 118 | # 119 | class OSMFrontEndServer: 120 | """The OSM Front End. 121 | 122 | This wrapper class encapsulates an instance of a Tornado 123 | 'Application' implementing the front end server, and its 124 | associated configuration information. 125 | 126 | Example: 127 | >> cfg = ConfigParser.ConfigParser() 128 | >> cfg.read(my-config-file) 129 | >> db = 130 | >> frontend = OSMFrontEndServer(cfg, options, db) 131 | 132 | Attributes: 133 | 134 | application The Tornado 'Application' for this server 135 | instance. 136 | config Configuration information for this instance. 137 | datastore Datastore in use. 138 | """ 139 | 140 | def __init__(self, cfg, options, datastore): 141 | """Initialize an OSMFrontEnd. 142 | 143 | Parameters: 144 | 145 | config Configuration information. 146 | options Command line options. 147 | datastore Datastore in use. 148 | """ 149 | 150 | osm_api_version = cfg.get(C.FRONT_END, C.API_VERSION) 151 | 152 | # Link URLs to their handlers. 153 | application = tornado.web.Application([ 154 | (r"/api/%s/map" % osm_api_version, MapHandler, 155 | dict(cfg=cfg, datastore=datastore)), 156 | (r"/api/%s/capabilities" % osm_api_version, CapabilitiesHandler, 157 | dict(cfg=cfg)), 158 | (r"/api/%s/changeset/([0-9]+)/close" % osm_api_version, 159 | NotImplementedHandler, dict(cfg=cfg)), 160 | (r"/api/%s/changeset/([0-9]+)/download" % osm_api_version, 161 | NotImplementedHandler, dict(cfg=cfg)), 162 | (r"/api/%s/changeset/([0-9]+)/expand_bbox" % osm_api_version, 163 | NotImplementedHandler, dict(cfg=cfg)), 164 | (r"/api/%s/changeset/([0-9]+)/upload" % osm_api_version, 165 | NotImplementedHandler, dict(cfg=cfg)), 166 | (r"/api/%s/changesets" % osm_api_version, 167 | NotImplementedHandler, dict(cfg=cfg)), 168 | (r"/api/%s/node/([0-9]+)/ways" % osm_api_version, 169 | OsmWaysForNodeHandler, dict(datastore=datastore)), 170 | (r"/api/%s/(nodes|ways|relations)" % osm_api_version, 171 | OsmMultiElementHandler, dict(datastore=datastore)), 172 | (r"/api/%s/(node|way|relation)/create" % osm_api_version, 173 | ReadOnlyHandler, dict(cfg=cfg)), 174 | (r"/api/%s/(node|way|relation)/([0-9]+)/history" % 175 | osm_api_version, NotImplementedHandler, 176 | dict(cfg=cfg)), 177 | (r"/api/%s/(node|way|relation)/([0-9]+)/([0-9]+)" % 178 | osm_api_version, NotImplementedHandler, 179 | dict(cfg=cfg)), 180 | (r"/api/%s/(node|way|relation)/([0-9]+)/relations" % 181 | osm_api_version, OsmElementRelationsHandler, 182 | dict(datastore=datastore)), 183 | (r"/api/%s/(changeset|node|way|relation)/([0-9]+)" % 184 | osm_api_version, OsmElementHandler, dict(datastore=datastore)), 185 | (r"/api/%s/(way|relation)/([0-9]+)/full" % osm_api_version, 186 | OsmFullQueryHandler, dict(datastore=datastore)), 187 | (r"/api/capabilities", CapabilitiesHandler, dict(cfg=cfg)), 188 | (r"/", RootHandler, dict(cfg=cfg)) 189 | ]) 190 | 191 | self._application = application 192 | self._config = cfg 193 | self._datastore = datastore 194 | 195 | def _get_application(self): 196 | return self._application 197 | def _get_config(self): 198 | return self._config 199 | def _get_datastore(self): 200 | return self._datastore 201 | 202 | application = property(_get_application) 203 | config = property(_get_config) 204 | datastore = property(_get_datastore) 205 | -------------------------------------------------------------------------------- /src/python/datastore/ds.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | """An interface to the datastore.""" 24 | 25 | import threading 26 | 27 | import apiserver.const as C 28 | 29 | from apiserver.osmelement import decode_json, decode_protobuf, encode_json, \ 30 | encode_protobuf 31 | from datastore.lrucache import LRUIOCache 32 | from datastore.slabutil import * 33 | 34 | import threading 35 | from Queue import Queue 36 | 37 | class DatastoreBase: 38 | """Base class for accessing a data store.""" 39 | 40 | VALID_NAMESPACES = [ 41 | C.CHANGESET, C.GEODOC, C.NODE, C.RELATION, C.WAY 42 | ] 43 | 44 | def __init__(self, config, usethreads=False, writeback=False): 45 | "Initialize the datastore." 46 | encoding = config.get(C.DATASTORE, C.DATASTORE_ENCODING) 47 | if encoding == C.JSON: 48 | self.decode = decode_json 49 | self.encode = encode_json 50 | elif encoding == C.PROTOBUF: 51 | self.decode = decode_protobuf 52 | self.encode = encode_protobuf 53 | bound = config.getint(C.DATASTORE, C.SLAB_LRU_SIZE) 54 | if bound <= 0: 55 | raise ValueError, "Illegal SLAB LRU size %d" % bound 56 | if writeback: 57 | if usethreads: 58 | nthreads = config.getint(C.DATASTORE, C.SLAB_LRU_THREADS) 59 | else: 60 | nthreads = 0 61 | self.nthreads = nthreads 62 | if nthreads: 63 | self.threads = [] 64 | self.workqueue = Queue(nthreads) 65 | for n in xrange(nthreads): 66 | t = threading.Thread(target=self._worker) 67 | self.threads.append(t) 68 | t.daemon = True 69 | t.name = "DS-%d" % n 70 | t.start() 71 | callback = self._cbthreaded 72 | else: 73 | callback = self._cbwrite 74 | else: 75 | callback = None 76 | self.cache = LRUIOCache(bound=bound, callback=callback) 77 | 78 | def _worker(self): 79 | "Helper for the threaded case." 80 | while True: 81 | slabkey, slabdesc = self.workqueue.get() 82 | self._cbwrite(slabkey, slabdesc) 83 | 84 | def _cbthreaded(self, slabkey, slabdesc): 85 | "Call back for the threaded case: add job to the work queue." 86 | self.workqueue.put((slabkey, slabdesc)) 87 | 88 | 89 | def _cbwrite(self, slabkey, slabdesc): 90 | "Write back a slab." 91 | self.store_slab(slabdesc.namespace, slabkey, slabdesc) 92 | if self.nthreads: 93 | assert self.cache.isiopending(slabkey) 94 | self.workqueue.task_done() 95 | self.cache.iodone(slabkey) 96 | 97 | def fetch_keys(self, namespace, keys, cacheable=True): 98 | """Return an iterator returning values for keys. 99 | 100 | Parameters: 101 | 102 | namespace - element namespace 103 | keys - a list of keys to retrieve. 104 | cacheable - True if values from the data store are to 105 | be cached. 106 | """ 107 | 108 | assert namespace in DatastoreBase.VALID_NAMESPACES 109 | 110 | # Retrieve the requested keys from the cache, if present 111 | # there. 112 | keys_to_retrieve = set() 113 | elements = [] 114 | 115 | for k in keys: 116 | assert isinstance(k, basestring) 117 | v = self.cache.get(namespace, k) 118 | if v: # Status is known. 119 | assert len(v) == 2 120 | assert isinstance(v, tuple) 121 | elements.append(v) 122 | else: # Status is unknown. 123 | keys_to_retrieve.add(k) 124 | 125 | # Return elements that were present in the cache. 126 | for elem in elements: 127 | yield elem 128 | 129 | if len(keys_to_retrieve) == 0: 130 | return 131 | 132 | # Retrieve elements that were not in cache from the backing 133 | # store. 134 | if slabutil_use_slab(namespace): 135 | slabkeyset = slabutil_group_keys(namespace, keys_to_retrieve) 136 | 137 | while len(slabkeyset) > 0: 138 | elements = [] 139 | sk, keys = slabkeyset.popitem() 140 | 141 | # Read in the slab from the data store. 142 | items = self.retrieve_slab(namespace, sk) 143 | 144 | # Nothing to do if the entire slab missing. 145 | if items is None: 146 | continue 147 | 148 | # Prepare a slab descriptor, insert its contents into the cache. 149 | slabdesc = slabutil_make_slab(namespace, items) 150 | self.cache.insert_slab(slabdesc) 151 | 152 | # Bring in elements. 153 | for k in keys: 154 | try: 155 | elements.append(self.cache.get(namespace, k)) 156 | keys_to_retrieve.remove(k) 157 | except KeyError: 158 | assert False, "Element %s:%s not in cache" % (sk, k) 159 | 160 | # Return elements from this slab. 161 | for elem in elements: 162 | yield elem 163 | else: 164 | for k in keys_to_retrieve: 165 | elem = self.retrieve_element(namespace, k) 166 | if elem is None: 167 | yield (False, k) 168 | else: 169 | yield (True, elem) 170 | return 171 | 172 | # Return status information for keys that were missing in the 173 | # data store. 174 | for k in keys_to_retrieve: 175 | yield (False, k) 176 | 177 | def fetch(self, namespace, key): 178 | """Retrieve one value from the datastore.""" 179 | 180 | assert type(key) == str 181 | 182 | if namespace not in DatastoreBase.VALID_NAMESPACES: 183 | raise KeyError, namespace 184 | 185 | elems = [e for e in self.fetch_keys(namespace, [key])] 186 | 187 | # Only one key should be returned for a given key. 188 | assert len(elems) == 1, \ 189 | 'Multiple values for ns,key="%s","%s": %s' % \ 190 | (namespace, key, elems) 191 | 192 | rstatus, elem = elems[0] 193 | if rstatus: 194 | return elem 195 | else: 196 | return None 197 | 198 | def store(self, elem): 199 | "Create a new element in the data store." 200 | 201 | ns = elem.namespace 202 | elemid = elem.id 203 | slabdesc = self.cache.get_slab(ns, elemid) 204 | if slabdesc is None: # New slab. 205 | slabdesc = slabutil_make_slab(ns, [(elemid, elem)]) 206 | self.cache.insert_slab(slabdesc) 207 | else: 208 | slabdesc.add(elemid, elem) 209 | 210 | def _abort(self, *args, **kw): 211 | raise TypeError, "Abstract method invoked" 212 | 213 | def finalize(self): 214 | "Write back caches and finish pending I/Os." 215 | self.cache.flush() 216 | if self.nthreads: 217 | self.workqueue.join() 218 | 219 | # Abstract methods. 220 | register_threads = _abort 221 | retrieve_element = _abort 222 | retrieve_slab = _abort 223 | store_element = _abort 224 | store_slab = _abort 225 | -------------------------------------------------------------------------------- /src/python/frontend/maphandler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | ## Support retrieval of the map data in a bounding box. 24 | 25 | import geohash 26 | import tornado.web 27 | 28 | from lxml import etree as ET 29 | 30 | import apiserver.const as C 31 | from apiserver.osmelement import encode_coordinate, new_osm_response 32 | 33 | from util import filter_references, response_to_xml 34 | 35 | def _filter_in_bbox(bbox, geodocs): 36 | "Return the list of nodes that fall into the given bounding box." 37 | w,s,e,n = map(encode_coordinate, bbox) 38 | 39 | nodeset = set() 40 | for gd in geodocs: 41 | for (nid, lat, lon) in gd.get_node_info(): 42 | if w <= lon < e and s <= lat < n: 43 | nodeset.add(nid) 44 | return nodeset 45 | 46 | 47 | class MapHandler(tornado.web.RequestHandler): 48 | "Handle requests for the /map API." 49 | 50 | def initialize(self, cfg, datastore): 51 | self.datastore = datastore 52 | self.precision = cfg.getint(C.DATASTORE, C.GEOHASH_LENGTH) 53 | 54 | def get(self, *args, **kwargs): 55 | '''Service a GET request to the '/map' URI. 56 | 57 | The 'bbox' parameter contains 4 coordinates "l" (w), "b" (s), 58 | "r" (e) and "t" (n).''' 59 | 60 | # Sanity check the input. 61 | bbox_arg = self.get_argument('bbox', None) 62 | if not bbox_arg: 63 | raise tornado.web.HTTPError(400) # Bad Syntax 64 | bbox = bbox_arg.split(',') 65 | if len(bbox) != 4: 66 | raise tornado.web.HTTPError(400) 67 | try: 68 | w,s,e,n = map(float, bbox) 69 | except ValueError: 70 | raise tornado.web.HTTPError(400) 71 | 72 | # Check the "l,b,r,t" coordinates passed in for sanity. 73 | if w < C.LON_MIN or w > C.LON_MAX or \ 74 | e < C.LON_MIN or e > C.LON_MAX or \ 75 | s < C.LAT_MIN or s > C.LAT_MAX or \ 76 | n < C.LAT_MIN or n > C.LAT_MAX or \ 77 | n < s or e < w: 78 | raise tornado.web.HTTPError(400) 79 | 80 | nodelist, ways, relations = self.handle_map(bbox) 81 | response = self.build_bbox_response(nodelist, ways, relations, bbox) 82 | 83 | self.set_header(C.CONTENT_TYPE, C.TEXT_XML) 84 | self.write(response_to_xml(response)) 85 | 86 | def build_bbox_response(self, nodes, ways, relations, bbox): 87 | """Build an OSM response for the query.""" 88 | 89 | # Create a new response element. 90 | osm = new_osm_response() 91 | 92 | # Add a element. 93 | bb = ET.SubElement(osm, C.BOUNDS) 94 | (bb.attrib[C.MINLON], bb.attrib[C.MINLAT], 95 | bb.attrib[C.MAXLON], bb.attrib[C.MAXLAT]) = map(str, bbox) 96 | 97 | # Add nodes, ways and relations in that order. 98 | for n in nodes: 99 | n.build_response(osm) 100 | for w in ways: 101 | w.build_response(osm) 102 | for r in relations: 103 | r.build_response(osm) 104 | 105 | return osm 106 | 107 | def handle_map(self, bbox): 108 | """Implementation of the /map API. 109 | 110 | Parameters: 111 | 112 | bbox -- Bounding box coordinates. 113 | """ 114 | 115 | nodelist = [] 116 | relations = [] 117 | ways = [] 118 | 119 | # This implementation follows the current implementation of 120 | # the API server at api.openstreetmap.org (the 'rails' port). 121 | 122 | # Look up the geo coded documents covering the desired bbox. 123 | gckeys = self.get_geocodes(bbox) 124 | geodocs = self.datastore.fetch_keys(C.GEODOC, gckeys) 125 | 126 | # Step 1: Get the list of nodes contained in the given 127 | # bounding box. 128 | nodeset = _filter_in_bbox(bbox, 129 | [gd for (st, gd) in geodocs if st]) 130 | if len(nodeset) == 0: 131 | return (nodelist, ways, relations) 132 | 133 | nodelist = [z for (st, z) in self.datastore.fetch_keys( 134 | C.NODE, [n for n in nodeset]) if st] 135 | 136 | # Step 2: Retrieve all ways that reference at least one node 137 | # in the given bounding box. 138 | wayset = filter_references(C.WAY, nodelist) 139 | 140 | 141 | # Step 3: Retrieve any additional nodes referenced by the ways 142 | # retrieved. 143 | waynodeset = set() 144 | 145 | for (st,w) in self.datastore.fetch_keys(C.WAY, [w for w in wayset]): 146 | if st: 147 | ways.append(w) 148 | waynodeset.update(w.get_node_ids()) 149 | 150 | extranodeset = waynodeset - nodeset 151 | nodelist.extend([n for (st,n) in 152 | self.datastore.fetch_keys(C.NODE, 153 | [n for n in extranodeset]) 154 | if st]) 155 | nodeset = nodeset | extranodeset 156 | 157 | # Step 4: Retrieve the relations associated with these nodes. 158 | 159 | # ... all relations that reference nodes being returned. 160 | relset = filter_references(C.RELATION, nodelist) 161 | 162 | # ... and relations that reference one of the ways in the wayset. 163 | relset.update(filter_references(C.RELATION, ways)) 164 | 165 | # ... retrieve relations from the data store. 166 | relations = [xr for (st,xr) in 167 | self.datastore.fetch_keys(C.RELATION, 168 | [r for r in relset]) 169 | if st] 170 | 171 | # ... and relations referenced by existing relations 172 | # (one-pass only). 173 | extrarelset = filter_references(C.RELATION, relations) 174 | newrelset = extrarelset - relset 175 | 176 | newrels = [nr for (st, nr) in 177 | self.datastore.fetch_keys(C.RELATION, 178 | [r for r in newrelset]) 179 | if st] 180 | relations.extend(newrels) 181 | 182 | return (nodelist, ways, relations) 183 | 184 | 185 | def get_geocodes(self, bbox): 186 | """Return a list of keys covering a given area. 187 | 188 | Parameters: 189 | 190 | bbox -- Bounding box of the desired region. 191 | """ 192 | 193 | # TODO: Make this more efficient for sparse areas of the map. 194 | w, s, e, n = map(float, bbox) 195 | 196 | n = min(C.MAXGHLAT, n) # work around a geohash library 197 | s = min(C.MAXGHLAT, s) # limitation 198 | 199 | assert(w <= e and s <= n) 200 | 201 | gcset = set() 202 | gc = geohash.encode(s, w, self.precision) 203 | 204 | bl = geohash.bbox(gc) # Box containing point (s,w). 205 | 206 | s_ = bl['s']; 207 | while s_ < n: # Step south to north. 208 | w_ = bl['w'] 209 | 210 | gc = geohash.encode(s_, w_, self.precision) 211 | bb_sn = geohash.bbox(gc) # bounding box in S->N direction 212 | 213 | while w_ < e: # Step west to east. 214 | gcset.add(gc) 215 | 216 | bb_we = geohash.bbox(gc) # in W->E direction 217 | w_ = bb_we['e'] 218 | 219 | gc = geohash.encode(s_, w_, self.precision) 220 | 221 | s_ = bb_sn['n'] 222 | 223 | assert(len(gcset) > 0) 224 | 225 | return [gc for gc in gcset] 226 | -------------------------------------------------------------------------------- /src/python/frontend/osmelement.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | ## Handle API requests for nodes, ways and elements. 24 | 25 | import tornado.web 26 | 27 | import apiserver.const as C 28 | from apiserver.osmelement import new_osm_response 29 | from util import filter_references, response_to_xml 30 | 31 | class OsmElementHandler(tornado.web.RequestHandler): 32 | "Handle requests for the (changeset|node|way|relation)/ API." 33 | 34 | def initialize(self, datastore): 35 | self.datastore = datastore 36 | 37 | def delete(self, element): 38 | """Handle a DELETE HTTP request.""" 39 | 40 | raise tornado.web.HTTPError(501) # Not Implemented. 41 | 42 | def get(self, namespace, ident): 43 | self.set_header(C.CONTENT_TYPE, C.TEXT_XML) 44 | 45 | elem = self.datastore.fetch(namespace, ident) 46 | if elem is None: 47 | raise tornado.web.HTTPError(404) 48 | 49 | self.write(response_to_xml(elem.build_response(new_osm_response()))) 50 | 51 | def put(self, element): 52 | """Handle a PUT HTTP request.""" 53 | 54 | raise tornado.web.HTTPError(501) # Not Implemented. 55 | 56 | class OsmMultiElementHandler(tornado.web.RequestHandler): 57 | """Handle requests for the (nodes|ways|relations) API.""" 58 | 59 | def initialize(self, datastore): 60 | """Initialize the handler.""" 61 | self.datastore = datastore 62 | 63 | def get(self, element): 64 | """Retrieve multiple elements. 65 | 66 | The elements are specified by (nodes|ways|relations) parameter 67 | to the request, as a comma separated list of element IDs. 68 | """ 69 | 70 | if element not in [C.NODES, C.WAYS, C.RELATIONS]: 71 | # Programming error. 72 | raise tornado.web.HTTPError(500) 73 | 74 | # Determine the name space to use. 75 | if element == C.NODES: 76 | namespace = C.NODE 77 | elif element == C.WAYS: 78 | namespace = C.WAY 79 | elif element == C.RELATIONS: 80 | namespace = C.RELATION 81 | else: 82 | assert False, "Unexpected element '%s'" % element 83 | 84 | # The name of the parameter (i.e., one of "nodes", "ways" or 85 | # "relations") match the last component of the URI. 86 | params = self.get_argument(element, None) 87 | if not params: 88 | raise tornado.web.HTTPError(400) 89 | 90 | # Create a new response. 91 | osm = new_osm_response() 92 | 93 | # Add elements to the response. 94 | for (st,r) in self.datastore.fetch_keys(namespace, params.split(",")): 95 | if st: 96 | r.build_response(osm) 97 | 98 | # Send the XML representation back to the client. 99 | self.set_header(C.CONTENT_TYPE, C.TEXT_XML) 100 | self.write(response_to_xml(osm)) 101 | 102 | 103 | class OsmElementRelationsHandler(tornado.web.RequestHandler): 104 | """Retrieve relations for a node or way element.""" 105 | 106 | def initialize(self, datastore): 107 | """Initialize the handler.""" 108 | self.datastore = datastore 109 | 110 | def get(self, namespace, ident): 111 | """Retrieve relations for an element. 112 | 113 | The element can be a 'node' or 'way'. 114 | """ 115 | 116 | if namespace not in [C.NODE, C.WAY, C.RELATION]: 117 | raise tornado.web.HTTPError(500) 118 | 119 | elem = self.datastore.fetch(namespace, ident) 120 | 121 | osm = new_osm_response() 122 | 123 | if elem: 124 | relset = filter_references(C.RELATION, [elem]) 125 | if len(relset) > 0: 126 | relations = self.datastore.fetch_keys(C.RELATION, 127 | [r for r in relset]) 128 | for (st,r) in relations: 129 | if st: 130 | r.build_response(osm) 131 | 132 | self.set_header(C.CONTENT_TYPE, C.TEXT_XML) 133 | self.write(response_to_xml(osm)) 134 | 135 | class OsmWaysForNodeHandler(tornado.web.RequestHandler): 136 | """Retrieve ways associated with a node.""" 137 | 138 | def initialize(self, datastore): 139 | self.datastore = datastore 140 | 141 | def get(self, nodeid): 142 | "Retrieve the ways associated with a node." 143 | 144 | elem = self.datastore.fetch(C.NODE, nodeid) 145 | if elem is None: 146 | raise tornado.web.HTTPError(404) 147 | 148 | osm = new_osm_response() 149 | 150 | wayset = filter_references(C.WAY, [elem]) 151 | if len(wayset) > 0: 152 | ways = self.datastore.fetch_keys(C.WAY, 153 | [w for w in wayset]) 154 | for (st,w) in ways: 155 | if st: 156 | w.build_response(osm) 157 | 158 | self.set_header(C.CONTENT_TYPE, C.TEXT_XML) 159 | self.write(response_to_xml(osm)) 160 | 161 | class OsmFullQueryHandler(tornado.web.RequestHandler): 162 | """Handle a `full' query for a way or relation.""" 163 | 164 | def initialize(self, datastore): 165 | self.datastore = datastore 166 | 167 | def get(self, namespace, elemid): 168 | """Implement a 'GET' operation. 169 | 170 | For a way: 171 | - Return the way itself, 172 | - Return the full XML of all nodes referenced by the 173 | way. 174 | For a relation: 175 | - Return the relation itself, 176 | - All nodes and ways that are members of the relation. 177 | - All nodes referenced from the ways above. 178 | """ 179 | 180 | # Retrieve the element. 181 | element = self.datastore.fetch(namespace, elemid) 182 | if element is None: 183 | raise tornado.web.HTTPError(404) 184 | 185 | nodes = [] 186 | ways = [] 187 | relations = [] 188 | 189 | if namespace == C.RELATION: 190 | # Retrieve nodes directly referenced by the relation. 191 | nodeset = element.get_member_ids(C.NODE) 192 | nodes.extend([z for (st,z) in 193 | self.datastore.fetch_keys(C.NODE, [n for n in nodeset]) 194 | if st]) 195 | # Retrieve way IDs directly referenced by the relation. 196 | wayset = element.get_member_ids(C.WAY) 197 | # Include the relation itself. 198 | relations.append(element) 199 | else: 200 | nodeset = set() 201 | wayset = set([elemid]) 202 | 203 | # Fetch all ways. 204 | if len(wayset) > 0: 205 | ways.extend([z for (st, z) in 206 | self.datastore.fetch_keys(C.WAY, [w for w in wayset]) 207 | if st]) 208 | 209 | # Fetch additional nodes referenced by the ways in the 210 | # way set. 211 | additional_nodes = set() 212 | for w in ways: 213 | additional_nodes.update(w.get_node_ids()) 214 | 215 | additional_nodes = additional_nodes - nodeset 216 | nodes.extend([z for (st, z) in 217 | self.datastore.fetch_keys(C.NODE, [n for n in additional_nodes]) 218 | if st]) 219 | 220 | # Build and return a response. 221 | osm = new_osm_response() 222 | for n in nodes: 223 | n.build_response(osm) 224 | for w in ways: 225 | w.build_response(osm) 226 | for r in relations: 227 | r.build_response(osm) 228 | 229 | self.set_header(C.CONTENT_TYPE, C.TEXT_XML) 230 | self.write(response_to_xml(osm)) 231 | -------------------------------------------------------------------------------- /src/python/apiserver/osmelement.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | """Describe OSM elements. 24 | 25 | Exported functions: 26 | 27 | new_osm_element -- factory function to create a new OSM element. 28 | init_osm_factory -- initialize the factory. 29 | """ 30 | 31 | import geohash 32 | import math 33 | import types 34 | 35 | import cjson 36 | 37 | from lxml import etree as ET 38 | 39 | import apiserver.const as C 40 | from datastore.slabutil import slabutil_make_slabkey 41 | 42 | _server_version = None 43 | _server_name = None 44 | _scale_factor = None 45 | _fraction_width = None 46 | _coordinate_format = None 47 | 48 | def init_osm_factory(config): 49 | """Initialize the factory for OSM elements.""" 50 | 51 | global _coordinate_format, _fraction_width, _scale_factor, _server_name 52 | global _server_version 53 | 54 | _scale_factor = config.getint(C.DATASTORE, C.SCALE_FACTOR) 55 | _fraction_width = math.trunc(math.log10(_scale_factor)) 56 | _coordinate_format = "%%d.%%0%dd" % _fraction_width 57 | 58 | _server_version = config.get(C.FRONT_END, C.SERVER_VERSION) 59 | _server_name = config.get(C.FRONT_END, C.SERVER_NAME) 60 | 61 | def encode_coordinate(coordinate): 62 | """Encode a latitude or longitude as an integral value. 63 | 64 | Parameters: 65 | 66 | coordinate -- An OSM latitude or longitude as numeric value, or 67 | a string representation of a number. 68 | """ 69 | 70 | coordinate_type = type(coordinate) 71 | 72 | if coordinate_type in types.StringTypes: 73 | try: 74 | integral, fractional = coordinate.split(".") 75 | fractional = fractional[0:_fraction_width] 76 | except ValueError: 77 | integral = coordinate 78 | fractional = "0" 79 | 80 | fractional = fractional.ljust(_fraction_width, "0") 81 | return int(integral) * _scale_factor + int(fractional) 82 | 83 | elif coordinate_type == types.FloatType: 84 | fractional, integral = map(lambda x: int(x * _scale_factor), 85 | math.modf(coordinate)) 86 | return integral + fractional 87 | 88 | else: 89 | raise ValueError, \ 90 | "Unsupported conversion from '%s'" % coordinate_type 91 | 92 | def decode_coordinate(encodedvalue): 93 | """Decode an integral quantity into a OSM latitude or longitude.""" 94 | 95 | integral = encodedvalue / _scale_factor 96 | fractional = encodedvalue - (integral * _scale_factor) 97 | 98 | return _coordinate_format % (integral, fractional) 99 | 100 | def new_osm_response(): 101 | "Create an (empty) XML element." 102 | 103 | osm = ET.Element(C.OSM) 104 | osm.attrib[C.VERSION] = _server_version 105 | osm.attrib[C.GENERATOR] = _server_name 106 | 107 | return osm 108 | 109 | 110 | class OSMElement(dict): 111 | """A representation of an OSM Element""" 112 | 113 | ignoredkeys = [C.TAGS, C.REFERENCES] 114 | 115 | def __init__(self, namespace, elemid): 116 | """Initialize an OSMElement object. 117 | 118 | namespace -- the OSM namespace for the element. 119 | elemid -- the element id in the namespace. 120 | """ 121 | 122 | assert namespace in _namespace_to_factory.keys() 123 | assert isinstance(elemid, basestring) 124 | 125 | super(OSMElement, self).__init__() 126 | super(OSMElement, self).__setitem__(C.ID, elemid) 127 | super(OSMElement, self).__setitem__(C.REFERENCES, set()) 128 | self.namespace = namespace 129 | self.id = elemid 130 | self.slabkey = slabutil_make_slabkey(namespace, elemid) 131 | 132 | def __repr__(self): 133 | 'Return a human-friendly representation of an OSMElement.' 134 | docstr = super(OSMElement, self).__repr__() 135 | return "OSMElement<%s>%s" % (self.namespace, docstr) 136 | 137 | def from_mapping(self, d): 138 | "Translate between a mapping to an OSM element." 139 | setter = super(OSMElement, self).__setitem__ 140 | for k in d: 141 | if k == C.ID: 142 | assert self.id == str(d[k]) 143 | continue 144 | if k == C.REFERENCES: 145 | v = set(d[k]) 146 | else: 147 | v = d[k] 148 | setter(k, v) 149 | 150 | def as_mapping(self): 151 | "Translate to a Python mapping." 152 | d = {} 153 | for (k,v) in self.items(): 154 | if isinstance(v, set): # Convert sets to lists. 155 | v = [r for r in v] 156 | d[k] = v 157 | return d 158 | 159 | def build_response(self, element): 160 | "Return an XML representation of an element." 161 | raise TypeError, "Abstract method was invoked." 162 | 163 | def add_attributes(self, element, ignoredkeys=[]): 164 | "Translate from dictionary keys to XML attributes." 165 | for (k, v) in self.items(): 166 | if k in ignoredkeys: 167 | continue 168 | if k in OSMElement.ignoredkeys: 169 | continue 170 | element.attrib[k] = str(v) 171 | 172 | def add_tags(self, element): 173 | "Add children to an XML element." 174 | for (k, v) in self.get(C.TAGS, {}).items(): 175 | t = ET.SubElement(element, C.TAG) 176 | t.attrib[C.K] = k 177 | t.attrib[C.V] = v 178 | 179 | class OSMChangeSet(OSMElement): 180 | def __init__(self, elemid): 181 | super(OSMChangeSet, self).__init__(C.CHANGESET, elemid) 182 | 183 | def build_response(self, osm): 184 | """Return the XML representation for a .""" 185 | 186 | changeset = ET.SubElement(osm, C.CHANGESET) 187 | self.add_attributes(changeset) 188 | self.add_tags(changeset) 189 | 190 | return osm 191 | 192 | class OSMDatastoreConfig(OSMElement): 193 | def __init__(self, elemid): 194 | OSMElement.__init__(self, C.DATASTORE_CONFIG, elemid) 195 | 196 | class OSMGeoDoc(OSMElement): 197 | """A geodoc references nodes which fall into a given geographic area.""" 198 | def __init__(self, region): 199 | super(OSMGeoDoc, self).__init__(C.GEODOC, region) 200 | # Fill in default values for 'standard' fields. 201 | self.__setitem__(C.NODES, set()) 202 | self.__setitem__(C.BBOX, geohash.bbox(region)) 203 | 204 | def build_response(self, element): 205 | raise TypeError, "GeoDocuments have no XML representation." 206 | 207 | def get_node_info(self): 208 | "Return node ids and (lat, lon) coordinates in this document." 209 | return self[C.NODES] 210 | 211 | class OSMNode(OSMElement): 212 | 213 | special_attributes = [C.LAT, C.LON] 214 | 215 | def __init__(self, elemid): 216 | super(OSMNode, self).__init__(C.NODE, elemid) 217 | 218 | def build_response(self, osm): 219 | "Return an XML representation for a ." 220 | 221 | node = ET.SubElement(osm, C.NODE) 222 | 223 | self.add_attributes(node, ignoredkeys=OSMNode.special_attributes) 224 | 225 | for k in OSMNode.special_attributes: 226 | node.attrib[k] = decode_coordinate(self.get(k)) 227 | 228 | self.add_tags(node) 229 | 230 | return osm 231 | 232 | class OSMWay(OSMElement): 233 | def __init__(self, elemid): 234 | super(OSMWay, self).__init__(C.WAY, elemid) 235 | super(OSMWay, self).__setitem__(C.NODES, set()) 236 | 237 | def build_response(self, osm): 238 | "Incorporate an XML representation for a ." 239 | 240 | way = ET.SubElement(osm, C.WAY) 241 | 242 | self.add_attributes(way, ignoredkeys=[C.NODES]) 243 | 244 | nodes = self.get(C.NODES, []) 245 | for n in nodes: 246 | node = ET.SubElement(way, C.ND) 247 | node.attrib[C.REF] = str(n) 248 | 249 | self.add_tags(way) 250 | 251 | return osm 252 | 253 | def get_node_ids(self): 254 | "Return ids for the nodes associated with a way." 255 | return [str(n) for n in self[C.NODES]] 256 | 257 | class OSMRelation(OSMElement): 258 | def __init__(self, elemid): 259 | super(OSMRelation, self).__init__(C.RELATION, elemid) 260 | super(OSMRelation, self).__setitem__(C.MEMBERS, []) 261 | 262 | def build_response(self, osm): 263 | "Incorporate an XML representation for a ." 264 | 265 | rel = ET.SubElement(osm, C.RELATION) 266 | 267 | self.add_attributes(rel, ignoredkeys=[C.MEMBERS]) 268 | 269 | members = self.get(C.MEMBERS, []) 270 | for m in members: 271 | member = ET.SubElement(rel, C.MEMBER) 272 | (member.attrib[C.REF], member.attrib[C.ROLE], 273 | member.attrib[C.TYPE]) = m 274 | 275 | self.add_tags(rel) 276 | 277 | return osm 278 | 279 | def get_member_ids(self, namespace): 280 | "Return a set of members in the specified namespace." 281 | 282 | return set([str(mid) for (mid, mrole, mtype) in self[C.MEMBERS]]) 283 | 284 | 285 | # 286 | # Factory function. 287 | # 288 | 289 | _namespace_to_factory = { 290 | C.CHANGESET: OSMChangeSet, 291 | C.DATASTORE_CONFIG: OSMDatastoreConfig, 292 | C.GEODOC: OSMGeoDoc, 293 | C.NODE: OSMNode, 294 | C.WAY: OSMWay, 295 | C.RELATION: OSMRelation 296 | } 297 | 298 | def new_osm_element(namespace, elemid): 299 | '''Create an OSM element. 300 | 301 | namespace -- the OSM namespace. 302 | elemid -- element id for the element. 303 | ''' 304 | 305 | factory = _namespace_to_factory[namespace] 306 | 307 | return factory(elemid) 308 | 309 | # 310 | # JSON representation of an OSM element. 311 | # 312 | 313 | def decode_json(jsonvalue): 314 | "Returns a Python object, given its JSON representation." 315 | return cjson.decode(jsonvalue) 316 | 317 | def encode_json(obj): 318 | "Returns the JSON representation for a Python object." 319 | return cjson.encode(obj) 320 | 321 | # 322 | # Protobuf handling. 323 | # 324 | 325 | try: 326 | import protobuf 327 | 328 | def _notimplemented(_): 329 | raise NotImplementedError, "Protobuf support has not been written" 330 | 331 | decode_protobuf = _notimplemented 332 | encode_protobuf = _notimplemented 333 | 334 | except ImportError: 335 | 336 | def _noprotobufs(pbuf): 337 | "Returns an OSM element given its Protobuf representation." 338 | raise NotImplementedError, "Protobuf libraries are not present" 339 | 340 | decode_protobuf = _noprotobufs 341 | encode_protobuf = _noprotobufs 342 | -------------------------------------------------------------------------------- /src/python/datastore/lrucache.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person 4 | # obtaining a copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be 12 | # included in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | """A single-threaded cache supporting: 24 | - slab-based insertion of elements, 25 | - multiple namespaces, 26 | - lookups of individual elements in slabs, 27 | - LRU overflow of slabs. 28 | """ 29 | 30 | import collections 31 | import threading 32 | import types 33 | 34 | from .slabutil import slabutil_make_slabkey 35 | 36 | class BoundedLRUBuffer(collections.MutableMapping): 37 | """A bounded buffer with least-recently-used semantics. 38 | 39 | This buffer acts like a mapping with a bounded size. Key/value 40 | pairs can be added to the buffer as with a conventional mapping. 41 | Once the buffer reaches its size bound, additional inserts of 42 | key/value pairs will cause the least recently used key/value pair 43 | contained in the buffer to be ejected. 44 | 45 | The size of the bound and an optional callback for handling 46 | ejected items may be specified at buffer creation time. 47 | 48 | >>> b = BoundedLRUBuffer(bound=16, callback=None) 49 | 50 | Key/value pairs are added to buffer as for a conventional mapping. 51 | 52 | >>> b['key'] = 'value' 53 | >>> len(b) 54 | 1 55 | 56 | Normal mapping operations are supported. 57 | 58 | >>> 'key' in b 59 | True 60 | 61 | The 'pop()' method retrieves the least recently used key/value 62 | pair from the buffer. 63 | 64 | >> (k,v) = b.pop() # Returns the least recently used pair. 65 | 66 | Lookups and assignments of keys mark them as being most recently 67 | used. 68 | 69 | >>> v = b['key'] # 'key' becomes most recently used. 70 | >>> b['key'] = 'newvalue' # 'key' becomes most recently used. 71 | 72 | If a 'callback' had been specified at object creation time, it 73 | will be invoked with the ejected key/value pair as arguments. 74 | 75 | >>> def handle_overflow(key, value): 76 | ... # Handle overflow here. 77 | ... pass 78 | >>> b = BoundedLRUBuffer(callback=handle_overflow) 79 | 80 | The 'flush()' method may be used to empty the buffer. 81 | 82 | >>> b.flush() 83 | >>> len(b) 84 | 0 85 | """ 86 | 87 | # Methods implementing the mapping protocol. 88 | 89 | def __init__(self, bound=65536, callback=None): 90 | 91 | assert type(bound) is types.IntType 92 | self.bound = bound # Max size. 93 | 94 | self.callback = callback 95 | self.indices = {} # Map of keys to indices 96 | self.values = {} # Map of indices to values. 97 | self.count = 0 # The number of entries in the buffer. 98 | self.first = -1 # Smallest index in use. 99 | self.next = 0 # Next index to use. 100 | 101 | def __str__(self): 102 | return "BoundedLRUBuffer(%d){%s}" % \ 103 | (self.bound, ",".join(self.indices.keys())) 104 | 105 | def __contains__(self, key): 106 | return key in self.indices 107 | 108 | def __delitem__(self, key): 109 | index = self.indices[key] 110 | self._remove(index) 111 | 112 | def __getitem__(self, key): 113 | """Retrieve the item named by 'key' from the buffer. 114 | 115 | The value returned is pushed to the head of the buffer.""" 116 | 117 | entry_index = self.indices[key] 118 | 119 | (_, entry) = self._remove(entry_index) 120 | 121 | next_index = self._next_index(entry_index) 122 | self._insert(key, entry, next_index) 123 | 124 | return entry 125 | 126 | def __iter__(self): 127 | return iter(self.indices) 128 | 129 | def __len__(self): 130 | "Compute the number of items in the buffer." 131 | v = self.count 132 | assert v == len(self.indices) 133 | assert v == len(self.values) 134 | return v 135 | 136 | def __setitem__(self, key, value): 137 | """Store an item indexed by argument 'key'.""" 138 | 139 | if key in self.indices: 140 | index = self.indices[key] 141 | self._remove(index) 142 | else: 143 | index = None 144 | next_index = self._next_index(index) 145 | self._insert(key, value, next_index) 146 | ejected = self._maybe_eject() 147 | 148 | if self.callback and ejected is not None: 149 | self.callback(*ejected) 150 | 151 | def pop(self): 152 | "Return the first item in the LRU buffer." 153 | k, v = self._pop() 154 | return (k, v) 155 | 156 | 157 | # Additional method. 158 | 159 | def flush(self): 160 | "Write back the contents of the LRU buffer." 161 | while self.count > 0: 162 | k, v = self._pop() 163 | if self.callback: 164 | self.callback(k, v) 165 | 166 | 167 | # Internal helper functions. 168 | 169 | def _insert(self, key, value, lru): 170 | "Insert a key/value pair at the specified LRU index." 171 | self.values[lru] = (key, value) 172 | self.indices[key] = lru 173 | self.count += 1 174 | 175 | def _remove(self, index): 176 | "Remove the entry for key 'key'." 177 | key, value = self.values.pop(index) 178 | assert index == self.indices[key] 179 | self.indices.pop(key) 180 | self.count -= 1 181 | return (key, value) 182 | 183 | def _maybe_eject(self): 184 | if self.count <= self.bound: 185 | return None 186 | 187 | # Find the least recently used item. 188 | while self.first < self.next and not (self.first in self.values): 189 | self.first += 1 190 | assert self.first < self.next, "Empty buffer" 191 | return self._remove(self.first) 192 | 193 | def _next_index(self, index=None): 194 | "Compute an optimal index number for storing an element." 195 | # Optimize the case where we overwrite the most recently added 196 | # value. 197 | if index is not None and index == self.next - 1: 198 | return index 199 | index = self.next 200 | self.next += 1 201 | return index 202 | 203 | def _pop(self): 204 | "Helper function." 205 | # First the first index 206 | while not (self.first in self.values) and \ 207 | self.first < self.next: 208 | self.first += 1 209 | # Look for an empty buffer. 210 | if self.first == self.next: 211 | raise IndexError, "pop from empty buffer" 212 | return self._remove(self.first) 213 | 214 | 215 | class LRUCache: 216 | """...description here...""" 217 | 218 | def __init__(self, bound=65536, callback=None): 219 | self.bound = bound 220 | self.lru_cache = BoundedLRUBuffer(bound, self._lrucb) 221 | self.lru_key = {} 222 | self.callback = callback 223 | 224 | def __len__(self): 225 | return len(self.lru_key) 226 | 227 | def _lrucb(self, slabkey, slabdesc): 228 | assert slabkey not in self.lru_cache 229 | self._remove_slab_items(slabdesc) 230 | if self.callback: 231 | self.callback(slabkey, slabdesc) 232 | 233 | def _remove_slab_items(self, slabdesc): 234 | ns = slabdesc.namespace 235 | for k in slabdesc.keys(): 236 | del self.lru_key[(ns,k)] 237 | 238 | def get(self, namespace, key): 239 | try: 240 | lrukey = self.lru_key[(namespace,key)] 241 | except KeyError: # No such slab. 242 | return None 243 | slabdesc = self.lru_cache.get(lrukey) 244 | if slabdesc: 245 | return slabdesc.get(key) # Get item in the slab. 246 | else: 247 | return (False, key) # No such slab. 248 | 249 | def get_slab(self, namespace, key): 250 | "Return the slab descriptor for a key." 251 | try: 252 | slabkey = self.lru_key[(namespace, key)] 253 | except KeyError: 254 | return None 255 | return self.lru_cache[slabkey] 256 | 257 | 258 | def insert_slab(self, slabdesc): 259 | "Insert items from a slab." 260 | slabkey = slabdesc.slabkey 261 | if slabkey in self.lru_cache: 262 | raise ValueError, "Duplicate insertion of slab: %s" % str(slabkey) 263 | self.lru_cache[slabkey] = slabdesc 264 | ns = slabdesc.namespace 265 | for k in slabdesc.keys(): 266 | itemkey = (ns,k) 267 | if itemkey in self.lru_key: 268 | raise KeyError, "Duplicate insertion of (%s,%s)" % (ns,k) 269 | self.lru_key[itemkey] = slabkey 270 | 271 | def remove_slab(self, slabdesc): 272 | "Remove a slab from the cache." 273 | 274 | slabkey = slabdesc.slabkey 275 | assert slabkey in self.lru_cache 276 | self._remove_slab_items(slabdesc) 277 | del self.lru_cache[slabkey] 278 | 279 | def flush(self): 280 | "Flush the contents of the cache." 281 | 282 | self.lru_cache.flush() 283 | 284 | assert len(self.lru_cache) == 0 285 | assert len(self.lru_key) == 0 286 | 287 | 288 | class LRUIOCache(LRUCache): 289 | """An LRU cache that tracks I/O-in-flight progress of items.""" 290 | 291 | def __init__(self, bound=65536, callback=None): 292 | LRUCache.__init__(self, bound, self._iocb) 293 | self.iocallback = callback 294 | self.iocond = threading.Condition() 295 | self.iopending = [] 296 | 297 | def _iocb(self, slabkey, slabdesc): 298 | assert slabkey == slabdesc.slabkey 299 | with self.iocond: 300 | assert slabkey not in self.iopending 301 | self.iopending.append(slabkey) 302 | if self.iocallback: 303 | self.iocallback(slabkey, slabdesc) 304 | 305 | def get(self, namespace, key): 306 | """Retrieve an item from the cache. 307 | 308 | If an item is missing from the cache, wait for pending I/O to 309 | complete. 310 | """ 311 | v = LRUCache.get(self, namespace, key) 312 | if v is None: 313 | slabkey = slabutil_make_slabkey(namespace, key) 314 | with self.iocond: 315 | while slabkey in self.iopending: 316 | self.iocond.wait() 317 | return v 318 | 319 | def isiopending(self, slabkey): 320 | "Return True if I/O is pending on a slab." 321 | with self.iocond: 322 | status = slabkey in self.iopending 323 | return status 324 | 325 | def iodone(self, slabkey): 326 | "Mark I/O on a slabkey as completed." 327 | with self.iocond: 328 | assert slabkey in self.iopending 329 | self.iopending.remove(slabkey) 330 | self.iocond.notifyAll() 331 | --------------------------------------------------------------------------------