├── .gitignore
├── README.md
├── LICENSE
├── src
└── python
│ ├── frontend
│ ├── __init__.py
│ ├── util.py
│ ├── capabilities.py
│ ├── __main__.py
│ ├── fe.py
│ ├── maphandler.py
│ └── osmelement.py
│ ├── apiserver
│ ├── __init__.py
│ ├── const.py
│ └── osmelement.py
│ ├── datastore
│ ├── __init__.py
│ ├── ds_couchdb.py
│ ├── ds_geohash.py
│ ├── ds_membase.py
│ ├── slabutil.py
│ ├── ds.py
│ └── lrucache.py
│ ├── dbmgr
│ ├── __init__.py
│ ├── dbm_stats.py
│ ├── dbm_ops.py
│ ├── dbm_input.py
│ ├── __main__.py
│ └── dbm_geotables.py
│ ├── README.md
│ ├── tests
│ ├── __init__.py
│ ├── test_geohash.py
│ ├── test_slabutil.py
│ ├── test_dsmembase.py
│ ├── test_osmelement.py
│ └── test_lrucache.py
│ ├── db-mgr
│ ├── front-end
│ └── config
│ └── osm-api-server.cfg
└── doc
├── Home.md
├── ProvisioningInformation.md
├── Overview.md
├── SupportedRequests.md
├── Improvements.md
├── DesignMistakes.md
├── Roadmap.md
├── Dbmgr.md
├── DeploymentInstructions.md
└── Slabs.org
/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | *.pyc
3 | *.pyo
4 | .sconsign.dblite
5 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # README
2 |
3 | This is an experimental implementation of an API service that supports
4 | a (read-only) subset of the [OSM v0.6 API][osmapi].
5 |
6 | The goal for this project is to explore an implementation of the
7 | [OSM API][osmapi] built over a distributed key/value store (i.e., a
8 | "NoSQL" backend). The service has been designed to be easy to scale
9 | horizontally.
10 |
11 | The implementation currently uses [Membase][membase] for the data
12 | store; however its design should work with other key/value systems.
13 |
14 | ## Current Status
15 |
16 | This repository contains a working snapshot of the service.
17 | The server only supports read queries on map data.
18 |
19 | ## Further Information
20 |
21 | Information on how to use this software package may be found in the
22 | project's [documentation][].
23 |
24 |
25 |
26 | [membase]: http://www.membase.org/ "Membase"
27 | [osmapi]: http://wiki.openstreetmap.org/wiki/API_v0.6 "OSM v0.6 API"
28 | [documentation]: https://github.com/MapQuest/mapquest-osm-server/blob/master/doc/Home.md
29 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | This software is distributed under the following MIT License.
2 |
3 | Copyright (c) 2011 AOL Inc. All Rights Reserved.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/src/python/frontend/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | '''Front end for the OSM API Server.'''
24 |
--------------------------------------------------------------------------------
/src/python/apiserver/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 | #
23 |
24 | '''An API Server for the OpenStreetMap project.'''
25 |
--------------------------------------------------------------------------------
/src/python/datastore/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | """A package containing datastore implementations.
24 |
25 | Each datastore module exports a class 'DB' whose methods implement
26 | the interface to the datastore.
27 | """
28 |
--------------------------------------------------------------------------------
/src/python/datastore/ds_couchdb.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | """An interface to a CouchDB based backend store."""
24 |
25 | class Datastore:
26 | def __init__(self, config):
27 | "Initialize the datastore."
28 | pass
29 |
30 |
--------------------------------------------------------------------------------
/doc/Home.md:
--------------------------------------------------------------------------------
1 | ## About
2 |
3 | This is an experimental API server for [Open Street Map][osm] map
4 | data.
5 |
6 | - The server supports most of the read operations on map data defined by
7 | [version 0.6][osmapi] of the OSM API (see [SupportedRequests][] for the
8 | precise list).
9 | - For its data store, the server currently uses [Membase][membase], a
10 | scalable distributed key/value store. Support for other scalable
11 | key/value stores should be easy to add.
12 | - The server has been designed to be easy to scale out horizontally.
13 |
14 | ## Further Reading
15 |
16 | * [Overview][] -- An overview of the implementation.
17 | * [DeploymentInstructions][] -- How to deploy the server.
18 | * [ProvisioningInformation][] -- Sizing information for running a server.
19 | * [Roadmap][] -- The steps going forward.
20 | * [SupportedRequests][] -- The list of supported HTTP requests.
21 |
22 |
23 |
24 | [DeploymentInstructions]: DeploymentInstructions.md
25 | [membase]: http://www.membase.org/ "Membase"
26 | [osm]: http://www.openstreetmap.org/ "Open Street Map"
27 | [osmapi]: http://wiki.openstreetmap.org/wiki/API_v0.6 "OSM API v0.6"
28 | [Overview]: Overview.md
29 | [python]: http://www.python.org/ "The Python Language"
30 | [ProvisioningInformation]: ProvisioningInformation.md
31 | [Roadmap]: Roadmap.md
32 | [SupportedRequests]: SupportedRequests.md
33 |
--------------------------------------------------------------------------------
/src/python/dbmgr/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | """Database Manager.
24 |
25 | This tool allows OSM data to be loaded into a variety of backends.
26 | Currently supported backends include:
27 |
28 | * CouchDB
29 | * Membase
30 | * Riak
31 | """
32 |
--------------------------------------------------------------------------------
/src/python/README.md:
--------------------------------------------------------------------------------
1 | # README
2 |
3 | This directory contains a [Python][python] implementation of a
4 | scalable API server for OSM map data.
5 |
6 | ## Directory Contents
7 |
8 | * `apiserver/`
9 |
10 | Common definitions and utilities.
11 |
12 | * `datastore/`
13 |
14 | Interfaces to various key/value stores.
15 |
16 | * `dbmgr/`
17 |
18 | Code for the data store management utility.
19 |
20 | * `frontend/`
21 |
22 | Code for the 'frontend' of the API server.
23 |
24 | * `tests/`
25 |
26 | Test code.
27 |
28 | ## Running the code 'in-place'
29 |
30 | * Configuration information for these tools is specified in the file
31 | `config/osm-api-server.cfg`.
32 | * The script `front-end` starts the server. With the default
33 | configuration, this server would need to be run as root since it
34 | listens for API requests on port 80. The listening port may be
35 | changed using the configuration file (see section `front-end`,
36 | configuration item `port`).
37 |
38 | % sudo ./front-end
39 |
40 | * The script `db-mgr` invokes the ingestion tool. For example:
41 | * To initialize the data store, use:
42 |
43 | % ./db-mgr -I
44 |
45 | * To load a "planet.osm" planet dump into the data store, use:
46 |
47 | % ./db-mgr PLANET.OSM
48 |
49 | Both scripts support a `--help` option.
50 |
51 |
52 |
53 | [python]: http://www.python.org/ "The Python Programming Language"
54 |
--------------------------------------------------------------------------------
/src/python/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | """Functional tests for the OSM API server.
24 |
25 | This Python package contains tests for the OSM API server and
26 | associated utilities.
27 |
28 | Note: The presence of this file causes the `py.test` test discovery
29 | tool to use the parent directory as its `basedir` (see py.test
30 | documentation for more information).
31 | """
32 |
--------------------------------------------------------------------------------
/src/python/db-mgr:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #
3 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
4 | #
5 | # Permission is hereby granted, free of charge, to any person
6 | # obtaining a copy of this software and associated documentation files
7 | # (the "Software"), to deal in the Software without restriction,
8 | # including without limitation the rights to use, copy, modify, merge,
9 | # publish, distribute, sublicense, and/or sell copies of the Software,
10 | # and to permit persons to whom the Software is furnished to do so,
11 | # subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be
14 | # included in all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | # SOFTWARE.
24 |
25 | """Manage data in the backend of the API server."""
26 |
27 | import runpy
28 | import sys
29 |
30 | package = 'dbmgr'
31 |
32 | version = sys.version_info[0:2]
33 |
34 | # Check for the specific Python version that we know works.
35 | if version == (2, 6):
36 | runpy.run_module('%s.__main__' % package, run_name='__main__',
37 | alter_sys=True)
38 | else:
39 | raise NotImplementedError("Needs Python version 2.6")
40 |
--------------------------------------------------------------------------------
/src/python/front-end:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #
3 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
4 | #
5 | # Permission is hereby granted, free of charge, to any person
6 | # obtaining a copy of this software and associated documentation files
7 | # (the "Software"), to deal in the Software without restriction,
8 | # including without limitation the rights to use, copy, modify, merge,
9 | # publish, distribute, sublicense, and/or sell copies of the Software,
10 | # and to permit persons to whom the Software is furnished to do so,
11 | # subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be
14 | # included in all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | # SOFTWARE.
24 |
25 | """Execute the frontend of the API server."""
26 |
27 | import runpy
28 | import sys
29 |
30 | package = 'frontend'
31 |
32 | version = sys.version_info[0:2]
33 |
34 | # Check for the specific Python version that we know works.
35 | if version <= (2, 6):
36 | runpy.run_module('%s.__main__' % package, run_name='__main__',
37 | alter_sys=True)
38 | else:
39 | raise NotImplementedError("Needs Python version 2.6")
40 |
--------------------------------------------------------------------------------
/src/python/datastore/ds_geohash.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | """Convenience routines for managing geo-hashes."""
24 |
25 | import geohash
26 |
27 | import apiserver.const as C
28 |
29 | __GHKEYLENGTH = None
30 | __SCALEFACTOR = None
31 |
32 | def init_geohash(ghkeylength, scalefactor):
33 | "Initialize the module."
34 | global __GHKEYLENGTH, __SCALEFACTOR
35 |
36 | __GHKEYLENGTH = ghkeylength
37 | __SCALEFACTOR = scalefactor
38 |
39 | def geohash_key_for_element(elem):
40 | "Return a geohash key for element 'elem'."
41 |
42 | lat = min(C.MAXGHLAT, float(elem.get(C.LAT)) / __SCALEFACTOR)
43 | lon = float(elem.get(C.LON)) / __SCALEFACTOR
44 |
45 | return geohash.encode(lat, lon, precision=__GHKEYLENGTH)
46 |
--------------------------------------------------------------------------------
/src/python/frontend/util.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | ## Utility functions.
24 |
25 | from lxml import etree as ET
26 |
27 | import apiserver.const as C
28 |
29 | def response_to_xml(elem):
30 | 'Create a pretty-printed XML response.'
31 | return ET.tostring(elem, encoding=C.UTF8, pretty_print=True,
32 | xml_declaration=True)
33 |
34 | def filter_references(namespace, items):
35 | "Look for references for items in the specified namespace."
36 | prefix = namespace[0].upper()
37 | results = set()
38 | for i in items:
39 | results.update(map(lambda x: x[1:],
40 | filter(lambda x: x[0] == prefix,
41 | i.get(C.REFERENCES, []))))
42 | return results
43 |
--------------------------------------------------------------------------------
/doc/ProvisioningInformation.md:
--------------------------------------------------------------------------------
1 | ## About
2 |
3 | This document contains information about the resource requirements
4 | needed for running an instance of this server.
5 |
6 | **Note**: A pending issue ([#13][issue13]) is currently preventing the load of a complete
7 | [planet][osmplanet]. The data below is therefore for a subset.
8 |
9 | ## Membase
10 |
11 | * Membase version: 1.6.5 (i386), running on a laptop with 3GB RAM,
12 | running Ubuntu GNU/Linux:
13 | * Source tree at commit [7bcb49c807f39fbb9989](https://github.com/MapQuest/mapquest-osm-server/commit/7bcb49c807f39fbb998958e3cfc14496077b065e).
14 | * Extract: `india.osm.bz2` from `download.geofabrik.de`, dated
15 | 2011-04-11:
16 | * Size: 53387268 bytes bzipped.
17 | * Containing 0 changesets, 3568521 nodes, 215498 ways, and 933 relations.
18 | * Resource usage (Membase):
19 | * Reported disk usage: 920MB.
20 | * Reported RAM usage: 882MB (seems high?).
21 | * 245137 unique keys in the data store (using the default slab settings).
22 | * Representative timings using the default configuration, with both Membase and front-end running on `localhost`:
23 | * First fetch of a node (i.e., with a 'cold' element cache):
24 | `[I 110413 21:53:53 web:849] 200 GET /api/0.6/node/15382163 (127.0.0.1) 15.50ms`
25 | * First fetch of the ways for a node:
26 | `[I 110413 21:53:57 web:849] 200 GET /api/0.6/node/15382163/ways (127.0.0.1) 5.40ms`
27 | * Subsequent re-fetch of the ways for the same node:
28 | `[I 110413 21:54:00 web:849] 200 GET /api/0.6/node/15382163/ways (127.0.0.1) 0.99ms`
29 | * A re-fetch of the same node:
30 | `[I 110413 21:54:10 web:849] 200 GET /api/0.6/node/15382163 (127.0.0.1) 0.68ms`
31 |
32 | I do not have scaling numbers under load (yet).
33 |
34 | ## Related Tickets
35 |
36 | * Ticket [#9][issue9] tracks efforts to reduce the data storage requirements for map data.
37 | * Ticket [#13][issue13] tracks efforts to speed up ingestion of a full planet dump.
38 |
39 |
40 |
41 | [issue9]: https://github.com/MapQuest/mapquest-osm-server/issues/9
42 | [issue13]: https://github.com/MapQuest/mapquest-osm-server/issues/13
43 | [osmplanet]: http://wiki.openstreetmap.org/wiki/Planet.osm "OSM Planet"
44 |
45 |
--------------------------------------------------------------------------------
/doc/Overview.md:
--------------------------------------------------------------------------------
1 | ## About
2 |
3 | This document presents an overview of this map server.
4 |
5 | ## Goals
6 |
7 | The goal of this project is to explore an implementation of an
8 | OSM-like map server using a scalable, distributed, key/value system
9 | for data storage.
10 |
11 | Access to map data would be using the [APIs][osmapi] defined by the
12 | OSM project. Currently, this prototype supports a read-only subset of
13 | the [OSM APIs][osmapi].
14 |
15 | ## Architecture
16 |
17 | The server has three major components.
18 |
19 | * The "data store".
20 |
21 | The data store is a persistent store of map data. Map data from
22 | "[planet.osm][osmplanet]" snapshots is processed by an ingestion tool
23 | (see below) and is stored in key/value form in the data store.
24 |
25 | The key/value store needs to be able to deal with a large number
26 | of keys; the current prototype uses [membase][].
27 |
28 | * The "front end".
29 |
30 | The front end responds to HTTP requests of the form defined by the
31 | [OSM API][osmapi]. The list of supported requests may be found in
32 | [SupportedRequests][].
33 |
34 | * The data store manager.
35 |
36 | This tool is used to ingest [planet.osm][osmplanet] and
37 | [OSM change][osmchange] files published by the [openstreetmap][]
38 | project into the data store.
39 |
40 | ## Configuration
41 |
42 | Most aspects of the operation of the server is controlled by a
43 | configuration file, see the file [osm-api-server.cfg][configsrc].
44 |
45 | ## See Also
46 |
47 | * [DesignMistakes][] -- Alternative designs that were tried, but
48 | which did not work out well.
49 | * [Improvements][] -- (Near term) improvements to the design.
50 |
51 |
52 |
53 | [configsrc]: https://github.com/MapQuest/mapquest-osm-server/blob/master/src/python/config/osm-api-server.cfg "Configuration file"
54 | [DesignMistakes]: DesignMistakes.md
55 | [Improvements]: Improvements.md
56 | [membase]: http://www.membase.org/ "Membase"
57 | [osmapi]: http://wiki.openstreetmap.org/wiki/API_v0.6 "OSM v0.6 API"
58 | [osmchange]: http://wiki.openstreetmap.org/wiki/OsmChange "OSM Change"
59 | [osmplanet]: http://wiki.openstreetmap.org/Planet.osm "Planet.OSM"
60 | [openstreetmap]: http://www.openstreetmap.org/ "Open Street Map"
61 | [SupportedRequests]: SupportedRequests.md
62 |
--------------------------------------------------------------------------------
/src/python/tests/test_geohash.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | """Test the 'datastore.geohash' utility module."""
24 |
25 | import pytest
26 |
27 | import apiserver.const as C
28 | from apiserver.osmelement import new_osm_element
29 | from datastore.ds_geohash import init_geohash, geohash_key_for_element
30 |
31 | _GHKEYLENGTH = 5
32 | _SCALEFACTOR = 10000000
33 |
34 | def test_geokeys():
35 | "Test geo hash keys returned for various coordinates."
36 |
37 | init_geohash(_GHKEYLENGTH, _SCALEFACTOR)
38 | expected = [
39 | (0.0, 0.0, 's0000'),
40 | (89, 0.0, 'upb42'),
41 | (89.999999999999992, 0.0, 'upbpb'), # Max lat supported.
42 | (-90, 0.0, 'h0000'),
43 | (-90, -180, '00000'),
44 | (-90, +180, '00000'),
45 | (-90, +90, 'n0000'),
46 | (-90, -90, '40000'),
47 | (-45, -45, '70000'),
48 | (-45, 45, 'm0000'),
49 | (45, 45, 'v0000'),
50 | (45, -45, 'g0000')
51 | ]
52 |
53 | for (lat, lon, ghkey) in expected:
54 | elem = new_osm_element(C.NODE, '0')
55 | elem[C.LAT] = lat * _SCALEFACTOR
56 | elem[C.LON] = lon * _SCALEFACTOR
57 | res = geohash_key_for_element(elem)
58 |
59 | assert res == ghkey
60 |
--------------------------------------------------------------------------------
/doc/SupportedRequests.md:
--------------------------------------------------------------------------------
1 | ## Supported HTTP requests
2 |
3 | This server currently supports the following subset of the [OSM v0.6 API](http://wiki.openstreetmap.org/wiki/API_v0.6).
4 |
5 |
6 |
7 |
8 | | Operation/URI | Description |
9 |
10 |
11 |
12 |
13 | | GET / |
14 | Return information about this server instance. |
15 |
16 |
17 | | GET /api/capabilities |
18 | Retrieve server information. |
19 |
20 |
21 | | GET /api/0.6/capabilities |
22 | Retrieve server information. |
23 |
24 |
25 | | GET /api/0.6/map?bbox=l,b,r,t |
26 | Retrieve information by a bounding box. |
27 |
28 |
29 | | GET /api/0.6/node/NNNN |
30 | Retrieve node `NNNN`. |
31 |
32 |
33 | | GET /api/0.6/way/NNNN |
34 | Retrieve way `NNNN`. |
35 |
36 |
37 | | GET /api/0.6/relation/NNNN |
38 | Retrieve relation `NNNN`. |
39 |
40 |
41 | | GET /api/0.6/nodes?nodes=#,#,#,... |
42 | Retrieve multiple nodes in one request. |
43 |
44 |
45 | | GET /api/0.6/ways?ways=#,#,#,... |
46 | Retrieve multiple ways in one request. |
47 |
48 |
49 | | GET /api/0.6/relations?relations=#,#,#,... |
50 | Retrieve multiple relations in one request. |
51 |
52 |
53 | | GET /api/0.6/nodes/NNNN/relations |
54 | Retrieve relations for a node. |
55 |
56 |
57 | | GET /api/0.6/ways/NNNN/relations |
58 | Retrieve relations for a way. |
59 |
60 |
61 | | GET /api/0.6/relations/NNNN/relations |
62 | Retrieve relations for a relation. |
63 |
64 |
65 | | GET /api/0.6/node/NNNN/ways |
66 | Retrieve ways for a node. |
67 |
68 |
69 | | GET /api/0.6/way/NNNN/full |
70 | Retrieve a way and all nodes referenced by the way. |
71 |
72 |
73 | | GET /api/0.6/relation/NNNN/full |
74 | Retrieve a relation, all nodes and ways that are its members, and all nodes referenced by the ways being returned. |
75 |
76 |
77 |
78 |
--------------------------------------------------------------------------------
/doc/Improvements.md:
--------------------------------------------------------------------------------
1 | ## About
2 |
3 | This page describes enhancements to the current design of the API
4 | server.
5 |
6 | ## Speeding Up Ingestion
7 |
8 | Ingestion of a planet dump by the [ingestion tool][dbmgr] needs to be
9 | sped up. This issue is being tracked in [issue #13][issue13].
10 |
11 | Currently:
12 |
13 | * When processing `nodes`, the tool appears to be limited by Python's
14 | bytecode interpretation overhead---I/O does not seem to introducing
15 | a bottleneck.
16 | * When processing `ways` and `relations` in the planet dump, the
17 | program becomes bound by I/O latencies. In particular,
18 | * The current design of the element cache is inefficient for
19 | nodes (see below).
20 | * The program processes one way or relation element at a time
21 | (i.e., in a single-threaded fashion).
22 |
23 | ## Improving Cache Efficiencies
24 |
25 | The upstream [OSM API][osmapi] numbers new map elements (nodes, ways
26 | and relations) sequentially, as and when they are created. This means
27 | that elements that are geographically 'close' can have ids that are
28 | far apart in numeric value.
29 |
30 | In the current design elements are [grouped into 'slabs'][slabutil.py]
31 | by element id. API queries however, tend to be for OSM elements which
32 | are geographically 'close' to each other. For such queries, the
33 | current scheme is inefficient both from the point of view of I/O
34 | traffic and (cache) RAM consumption.
35 |
36 | A better scheme would therefore be:
37 |
38 | * Group elements into geographically keyed slabs; elements in each
39 | slab would be "close by" in terms of geographical distance.
40 | * For direct lookups of elements via the API, use a mapping from
41 | element ids to the slabs holding element's definition.
42 |
43 | In this new scheme, direct lookups of elements would need two key
44 | retrievals from the data store, compared to one retrieval in the
45 | current scheme. However, the improvements to the efficiency of the
46 | element cache should compensate for this additional overhead.
47 |
48 | See also: Issue [#16][issue16].
49 |
50 |
51 |
52 | [dbmgr]: https://github.com/MapQuest/mapquest-osm-server/tree/master/src/python/dbmgr
53 | [issue13]: https://github.com/MapQuest/mapquest-osm-server/issues/13
54 | [issue16]: https://github.com/MapQuest/mapquest-osm-server/issues/16
55 | [osmapi]: http://wiki.openstreetmap.org/wiki/API_v0.6 "OSM API v0.6"
56 | [slabutil.py]: https://github.com/MapQuest/mapquest-osm-server/blob/master/src/python/datastore/slabutil.py
57 |
--------------------------------------------------------------------------------
/doc/DesignMistakes.md:
--------------------------------------------------------------------------------
1 | ## About
2 |
3 | This document contains information about implementation approaches
4 | that were tried but which did not work well.
5 |
6 | ## CouchDB's 'views' (slow responses)
7 |
8 | In an earlier version of this server I had used a [CouchDB][] backend
9 | to store map data. The `/map` API was implemented by using CouchDB's
10 | [views][couchdbviews].
11 |
12 | The reasons I abandoned this approach were:
13 |
14 | 1. CouchDB's [views][couchdbviews] turned out to be slow, causing the
15 | `/map` call to take several hundreds of milliseconds to complete. This
16 | was well over my design goal.
17 | 2. [CouchDB][]'s on-disk storage scheme seemed to need a large amount
18 | of disk space. Given that the size of the OSM dataset is already
19 | large (over one billion nodes, nearly a hundred million ways, and
20 | growing), these high overheads were a concern.
21 | 3. [CouchDB][] uses HTTP based access; every data store access was
22 | thus high overhead.
23 |
24 | ## Vanilla Membase (high memory overheads)
25 |
26 | In the initial design of the [Membase based data store][dsmembase.py]
27 | I mapped each node, way and relation one to one to a Membase key.
28 | While this approach is simple, it does not scale well: [Membase][] as
29 | of the current version (v1.6.5), has an
30 | [overhead of 120 bytes][membasesizing] per key. Thus we would need
31 | 120G of RAM to store _just the keys_ for the current OSM data set.
32 |
33 | My current design [groups keys into "slabs"][slabutil.py]. This
34 | brings down the number of (membase) keys needed to manageable levels.
35 | I/O is done in terms of slabs, and a local
36 | ["cache" with LRU semantics][lrucache.py] is used to reduce the number
37 | of I/O requests sent to the Membase server.
38 |
39 |
40 |
41 | [couchdb]: http://couchdb.apache.org/ "Apache CouchDB"
42 | [couchdbviews]: http://wiki.apache.org/couchdb/Introduction_to_CouchDB_views "CouchDB Views"
43 | [ds.py]: https://github.com/MapQuest/mapquest-osm-server/blob/master/src/python/datastore/ds.py
44 | [dsmembase.py]: https://github.com/MapQuest/mapquest-osm-server/blob/master/src/python/datastore/ds_membase.py
45 | [lrucache.py]: https://github.com/MapQuest/mapquest-osm-server/blob/master/src/python/datastore/lrucache.py
46 | [membase]: http://www.membase.org/ "Membase"
47 | [membasesizing]: http://techzone.couchbase.com/wiki/display/membase/Sizing+Guidelines "Sizing Guidelines"
48 | [slabutil.py]: https://github.com/MapQuest/mapquest-osm-server/blob/master/src/python/datastore/slabutil.py
49 |
--------------------------------------------------------------------------------
/doc/Roadmap.md:
--------------------------------------------------------------------------------
1 | ## About
2 |
3 | This page lists the proposed evolution of the server.
4 |
5 | ## Current Status
6 |
7 | * The code is functional: [planet dumps][osmplanet] can be ingested and their data retrieved using the [API][osmapi].
8 | * Serving data via the API is quite fast (see [ProvisioningInformation][]), but ingesting a full planet is slow.
9 | * Modules have unit tests.
10 | * External documentation (i.e., the `doc/` directory) is upto-date.
11 | * The supported data store is: [Membase][].
12 |
13 | ## Future work
14 |
15 | * We need to support 'full' [Planet dumps][fullosmplanet] dumps, along with
16 | retrieval of changesets, element history and prior versions of elements (tickets [#4][issue4] and [#14][issue14]).
17 | * Performance improvements that have been identified so far could be addressed:
18 | * The `/map` API call could be further speeded up by grouping nodes and ways based on geographical proximity.
19 | * The ingestion tool needs to be speeded up ([#13][issue13]) and possibly rewritten in a non-interpreted language.
20 | * Storage efficiency can be improved:
21 | * A separate string table for frequently used strings could cut down storage needs.
22 | * Slabs could be coded more efficiently ([#9][issue9]).
23 | * The "front-end" needs to be made fully asynchronous ([#2][issue2]).
24 | * System tests that verify end-to-end integrity of the ingestion process are needed.
25 | * More supported data stores: possibly [Riak][] ([#6][issue6]) for a scalable backend, or perhaps [BerkeleyDB][] for a single machine configuration.
26 |
27 |
28 |
29 | [BerkeleyDB]: http://www.oracle.com/technetwork/database/berkeleydb/overview/index.html "Berkeley DB"
30 | [fullosmplanet]: http://wiki.openstreetmap.org/wiki/Planet.osm/full "Full OSM Planet"
31 | [issue2]: https://github.com/MapQuest/mapquest-osm-server/issues/2
32 | [issue4]: https://github.com/MapQuest/mapquest-osm-server/issues/4
33 | [issue6]: https://github.com/MapQuest/mapquest-osm-server/issues/6
34 | [issue9]: https://github.com/MapQuest/mapquest-osm-server/issues/9
35 | [issue13]: https://github.com/MapQuest/mapquest-osm-server/issues/13
36 | [issue14]: https://github.com/MapQuest/mapquest-osm-server/issues/14
37 | [membase]: http://www.membase.org/ "Membase"
38 | [osmapi]: http://wiki.openstreetmap.org/wiki/API_v0.6 "OSM v0.6 API"
39 | [osmplanet]: http://wiki.openstreetmap.org/wiki/Planet.osm "OSM Planet"
40 | [ProvisioningInformation]: ProvisioningInformation.md
41 | [riak]: http://www.basho.com/ "Riak"
42 | [wiki]: https://github.com/MapQuest/mapquest-osm-server/wiki "Wiki"
43 |
--------------------------------------------------------------------------------
/src/python/frontend/capabilities.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | ## Support retrieval of the server's capabilities.
24 |
25 | import tornado.web
26 |
27 | from lxml import etree as ET
28 |
29 | import apiserver.const as C
30 | from apiserver.osmelement import new_osm_response
31 | from util import response_to_xml
32 |
33 | # Sample output:
34 | #
35 | #
36 | #
37 | #
38 | #
39 | #
40 | #
41 | #
42 | #
43 | #
44 | #
45 |
46 | class CapabilitiesHandler(tornado.web.RequestHandler):
47 | "Handle requests for server capabilities."
48 |
49 | def initialize(self, cfg):
50 | self.cfg = cfg
51 |
52 | def get(self):
53 | self.set_header(C.CONTENT_TYPE, C.TEXT_XML)
54 |
55 | def _get(name):
56 | return self.cfg.get(C.FRONT_END, name)
57 |
58 | osm = new_osm_response()
59 |
60 | api = ET.SubElement(osm, "api")
61 | version = ET.SubElement(api, "version")
62 | version.attrib['minimum'] = _get(C.API_VERSION_MINIMUM)
63 | version.attrib['maximum'] = _get(C.API_VERSION_MAXIMUM)
64 | area = ET.SubElement(api, "area")
65 | area.attrib['maximum'] = _get(C.AREA_MAX)
66 |
67 | tracepoints = ET.SubElement(api, "tracepoints")
68 | tracepoints.attrib['per_page'] = _get(C.TRACEPOINTS_PER_PAGE)
69 |
70 | waynodes = ET.SubElement(api, "waynodes")
71 | waynodes.attrib['maximum'] = _get(C.WAYNODES_MAX)
72 |
73 | changesets = ET.SubElement(api, "changesets")
74 | changesets.attrib['maximum_elements'] = _get(C.CHANGESETS_MAX)
75 |
76 | timeout = ET.SubElement(api, "timeout")
77 | timeout.attrib['seconds'] = _get(C.API_CALL_TIMEOUT)
78 |
79 | self.write(response_to_xml(osm))
80 |
--------------------------------------------------------------------------------
/src/python/dbmgr/dbm_stats.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | """Manage statistics.
24 |
25 | """
26 |
27 | import sys
28 | import threading
29 |
30 | import apiserver.const as C
31 |
32 | _timer_delay = 1 # Number of seconds between reports.
33 | _stats = {} # Hash maps tracking the collected statistics.
34 | _prevstats = {}
35 |
36 | _timer = None # Timer object.
37 | _is_active = None # Run state.
38 | _lock = None
39 |
40 | def _display_stats():
41 | "Display statistics"
42 | global _lock, _prevstats
43 |
44 | def _format(prefix, absval, incr):
45 | """Helper function."""
46 | s = ""
47 | if absval:
48 | s += ("%s: %%(_%sv)d" % (prefix.upper(), prefix))
49 | if incr:
50 | s += ("(+%%(_%s)d)" % prefix)
51 | s += " "
52 | return s
53 |
54 | # Retrieve the previous and current counts.
55 | _c = _prevstats[C.CHANGESET]
56 | _n = _prevstats[C.NODE]
57 | _w = _prevstats[C.WAY]
58 | _r = _prevstats[C.RELATION]
59 |
60 | _lock.acquire()
61 | _cv = _stats[C.CHANGESET]
62 | _nv = _stats[C.NODE]
63 | _wv = _stats[C.WAY]
64 | _rv = _stats[C.RELATION]
65 | _prevstats.update(_stats)
66 | _lock.release()
67 |
68 | # Compute incremental changes.
69 | _c = _cv - _c
70 | _n = _nv - _n
71 | _w = _wv - _w
72 | _r = _rv - _r
73 |
74 | # Compute the format string.
75 | s = _format('c', _cv, _c)
76 | s += _format('n', _nv, _n)
77 | s += _format('w', _wv, _w)
78 | s += _format('r', _rv, _r)
79 |
80 | print s % locals()
81 |
82 |
83 | def _stats_timer():
84 | "Invoke the actual display helper and re-arm the timer."
85 |
86 | _display_stats()
87 |
88 | global _timer
89 | if _is_active:
90 | _timer = threading.Timer(_timer_delay, _stats_timer)
91 | _timer.start()
92 |
93 |
94 | def init_statistics(config, options):
95 | "Initialize the module."
96 | global _stats, _prevstats
97 |
98 | for n in [C.CHANGESET, C.NODE, C.WAY, C.RELATION]:
99 | _stats[n] = _prevstats[n] = 0
100 |
101 | global _lock
102 | _lock = threading.Lock()
103 |
104 | if options.verbose:
105 | global _is_active, _timer
106 |
107 | _is_active = True
108 | _timer = threading.Timer(_timer_delay, _stats_timer)
109 | _timer.daemon = True
110 | _timer.start()
111 |
112 |
113 | def fini_statistics(options):
114 | "Shutdown the module."
115 | global _is_active
116 | _is_active = False
117 |
118 | if _timer:
119 | _timer.cancel()
120 |
121 | if options.verbose:
122 | _display_stats()
123 |
124 |
125 | def increment_stats(namespace):
126 | global _lock, _stats
127 |
128 | _lock.acquire()
129 | _stats[namespace] = _stats[namespace] + 1
130 | _lock.release()
131 |
--------------------------------------------------------------------------------
/src/python/config/osm-api-server.cfg:
--------------------------------------------------------------------------------
1 | ## Configuration information for the OSM API server.
2 | #
3 |
4 | ## Defaults
5 | #
6 | # area-max - Max width of a requested area.
7 | # changesets-max - Max changes in a changeset.
8 | # project-wiki - URL to the project wiki.
9 | # source-repository - URL to the opensource repository for the server
10 | # tracepoints-per-page - Max tracepoints returned for a query.
11 | # waynodes-max - Maximum nodes in a way.
12 |
13 | [DEFAULT]
14 | area-max = 180.0
15 | changesets-max = 50000
16 | project-doc = %(source-repository)s/blob/master/doc/Home.md
17 | source-repository = https://github.com/Mapquest/mapquest-osm-server
18 | tracepoints-per-page = 5000
19 | waynodes-max = 2000
20 |
21 |
22 | ## Datastore related
23 | #
24 | # changesets-inline-size - Max size for a changeset residing in a slab.
25 | # changesets-per-slab - The number of changesets in a slab.
26 | # datastore-backend - The kind of datastore to use.
27 | # One of: "couchdb", "membase" or "riak".
28 | # datastore-encoding - Encoding to be used for elements in the datastore.
29 | # One of: "json", "native" (for CouchDB) or "protobuf".
30 | # geodoc-lru-size - The size of the geodoc LRU buffer.
31 | # geodoc-lru-threads - The number of threads used to write geodoc information.
32 | # geohash-length - Controls the granularity of documents containing
33 | # geographical information.
34 | # nodes-inline-size - Max size for a node residing in a slab.
35 | # nodes-per-slab - The number of nodes in a slab.
36 | # relations-inline-size - Max size for a relation residing in a slab.
37 | # relations-per-slab - The number of relations in a slab.
38 | # scale-factor - For converting fractional lat/lon values to integers
39 | # slab-lru-size - Number of slabs in an LRU buffer.
40 | # ways-inline-size - Max size for a way residing in a slab.
41 | # ways-per-slab - The number of ways in a slab.
42 | #
43 | # Note that the front end server reads the values of the
44 | # 'changesets-per-slab', 'nodes-per-slab', 'relations-per-slab' and
45 | # 'ways-per-slab' configuration items from the data store.
46 |
47 | [datastore]
48 | changesets-inline-size = 256
49 | changesets-per-slab = 256
50 | datastore-backend = membase
51 | datastore-encoding = json
52 | geodoc-lru-size = 4096
53 | geodoc-lru-threads = 4
54 | geohash-length = 5
55 | nodes-inline-size = 256
56 | nodes-per-slab = 256
57 | relations-inline-size = 1024
58 | relations-per-slab = 64
59 | scale-factor = 10000000
60 | slab-lru-size = 1024
61 | slab-lru-threads = 8
62 | ways-inline-size = 1024
63 | ways-per-slab = 64
64 |
65 | ## Database manager utility
66 | #
67 | # canonical-server - Upstream server to get missing changesets from
68 |
69 | [dbmgr]
70 | changeset-server = http://api.openstreetmap.org/
71 |
72 |
73 | ## Configuration information for the front-end
74 | #
75 | # api-version - The current API version supported, reported by
76 | # 'version' attribute of the
77 | # api-version-{min,max}imum - The version range supported.
78 | # api-call-timeout - Timeout
79 | # port - TCP port on which to listen for API requests.
80 | # server-name - Name reported by the API server.
81 | # server-version - Version number for the prototype
82 |
83 | [front-end]
84 | api-version = 0.6
85 | api-version-minimum = %(api-version)s
86 | api-version-maximum = %(api-version)s
87 | api-call-timeout = 300
88 | port = 80
89 | server-name = OSM API Server Prototype %(server-version)s
90 | server-version = 0.6
91 |
92 |
93 | ## Configuration information for backends
94 |
95 | ## CouchDB
96 | #
97 | # dbname - Prefix used for each kind of couchdb database
98 | # dburl - Location of the CouchDB server.
99 |
100 | [couchdb]
101 | dbname = osm
102 | dburl = http://localhost:5984/
103 |
104 | ## Membase
105 | #
106 | # dbadminport - Administration port used by membase.
107 | # dbadminpw - Administrative password.
108 | # dbadminuser - Administrative user.
109 | # dbhost - Datastore host.
110 | # dbport - Datastore bucket port.
111 | # dbname - Name of the membase 'bucket' to use.
112 |
113 | [membase]
114 | dbadminport = 8091
115 | dbadminpw = osmapiserver
116 | dbadminuser = Administrator
117 | dbhost = localhost
118 | dbname = default
119 | dbport = 11211
120 |
121 | ## Riak
122 | #
123 | # dburl - Location of the Riak server.
124 |
125 | [riak]
126 | dburl = http://localhost:8091/riak/
127 |
--------------------------------------------------------------------------------
/doc/Dbmgr.md:
--------------------------------------------------------------------------------
1 | ## About
2 |
3 | This document describes the `dbmgr` ingestion tool.
4 |
5 | ## What the tool does
6 |
7 | The `dbmgr` tool is used to:
8 |
9 | 1. initialize the data store,
10 | 2. to load data into the data store,
11 | 3. to incrementally change existing information in the data store.
12 |
13 | ## Requirements
14 |
15 | ### Initialization
16 |
17 | A command line option would specify that the data store needs to be
18 | reset. In the current code, this is the `-I` option.
19 |
20 | ### Input
21 |
22 | The following input formats are proposed to be accepted by the tool:
23 |
24 | 1. An OSM planet file in XML format.
25 |
26 | This would be used for initializing the data store from a regular
27 | [planet dump][planetdump].
28 |
29 | 2. A "full" planet dump in XML format.
30 |
31 | See: task [#4][issue4].
32 |
33 | This would be used for initializing the data store from a
34 | [full planet dump][fullplanetdump].
35 |
36 | 3. `osmChange` files in XML format.
37 |
38 | See: task [#14][issue14].
39 |
40 | These would be used for incremental updates to the data store,
41 | see the [planet.osm diffs page][planetdiffs].
42 |
43 | 4. A planet file in [PBF format][planetpbf].
44 |
45 | See: task [#3][issue3].
46 |
47 | The PBF format has the same content as the OSM planet format,
48 | but is smaller and faster to process.
49 |
50 | Note that while "full" planet dumps include \ information,
51 | the [osmChange][] incremental format does not include information
52 | about new \s. Thus, if a full planet dump is being
53 | incrementally updated, additional changeset information for the change
54 | would need to be downloaded separately from the main OSM server.
55 |
56 | No data transfer format seems to support transfer of GPS tracks or of
57 | user information.
58 |
59 | ### Backends
60 |
61 | The following backends are planned to be supported (in approximate
62 | order of priority):
63 |
64 | 1. A Membase based backend.
65 | 2. A CouchDB/BigCouch based backend.
66 | 3. A Riak based backend.
67 |
68 | The code is to be structured in such a way that supporting another
69 | distributed key/value store should be easy.
70 |
71 | ## Live Updates
72 |
73 | The tool should be able to change data in the data store without
74 | cluster downtime.
75 |
76 | ## Non-requirements
77 |
78 | 1. Retrieval of diffs from 'planet.openstreetmap.org'.
79 |
80 | The tool does not automate the process of downloading
81 | minutely/hourly/daily diffs from .
82 |
83 | ## Handling multiple backends
84 |
85 | Code to support each type of backend (CouchDB, Membase, etc.)
86 | resides in a separate Python module (e.g., `datastore/ds_membase.py`).
87 |
88 | The specifically module needed is loaded in dynamically (using
89 | `__import__`); the module is expected to provide a class `Datastore`
90 | that implements the required backend.
91 |
92 | This approach avoids (package) dependencies on support code for unused
93 | backend modules.
94 |
95 | ## Sizing Numbers
96 |
97 | An analysis of `swales-101025.osm.bz2`. This subset contains:
98 |
99 | * 816036 nodes
100 | * 80690 ways
101 | * 382 relations
102 |
103 | ### Element sizes with JSON based storage
104 |
105 | The OSM elements in the `swales-101025.osm.bz2` subset were stored in
106 | the data store in JSON encoded form. The size distribution seen was
107 | as follows:
108 |
109 | * Nodes
110 | * Average size 202 bytes
111 | * 12157 (1.489%) nodes exceed 256 bytes of JSON
112 | * 2538 (0.311%) nodes exceed 512 bytes of JSON
113 | * Ways
114 | * Average size 351 bytes
115 | * 7134 (8.8%) exceed 512 bytes
116 | * 1267 (1.6%) exceed 1024 bytes
117 | * Relations
118 | * Average size 1477 bytes
119 | * 90 (23.6%) exceed 2048 bytes
120 | * 44 (11.5%) exceed 3072 bytes
121 | * 25 (6.5%) exceed 4096 bytes
122 |
123 |
124 |
125 | [fullplanetdump]: http://wiki.openstreetmap.org/wiki/Planet.osm/full
126 | [issue3]: https://github.com/MapQuest/mapquest-osm-server/issues/3
127 | [issue4]: https://github.com/MapQuest/mapquest-osm-server/issues/4
128 | [issue14]: https://github.com/MapQuest/mapquest-osm-server/issues/14
129 | [osmChange]: http://wiki.openstreetmap.org/wiki/OsmChange
130 | [planetdiffs]: http://wiki.openstreetmap.org/wiki/Planet.osm/diffs
131 | [planetdump]: http://wiki.openstreetmap.org/wiki/Planet.osm
132 | [planetpbf]: http://wiki.openstreetmap.org/wiki/PBF
133 |
--------------------------------------------------------------------------------
/src/python/apiserver/const.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | ## Define "constants", so that Python catches mis-spellings early.
24 |
25 | _ID = '_id'
26 | API = 'api'
27 | API_CALL_TIMEOUT = 'api-call-timeout'
28 | API_VERSION = 'api-version'
29 | API_VERSION_MAXIMUM = 'api-version-maximum'
30 | API_VERSION_MINIMUM = 'api-version-minimum'
31 | AREA = 'area'
32 | AREA_MAX = 'area-max'
33 | BBOX = 'bbox'
34 | BOUNDS = 'bounds'
35 | CFGSLAB = 'cfgslab'
36 | CFGVERSION = 1
37 | CHANGESET = 'changeset'
38 | CHANGESETS = 'changesets'
39 | CHANGESETS_INLINE_SIZE = 'changesets-inline-size'
40 | CHANGESETS_PER_SLAB = 'changesets-per-slab'
41 | CHANGESETS_MAX = 'changesets-max'
42 | CONFIGURATION_SCHEMA_VERSION = 'configuration-schema-version'
43 | CONTENT_TYPE = 'Content-Type'
44 | COUCHDB = 'couchdb'
45 | DATASTORE = 'datastore'
46 | DATASTORE_BACKEND = 'datastore-backend'
47 | DATASTORE_CONFIG = 'datastore-config'
48 | DATASTORE_ENCODING = 'datastore-encoding'
49 | DBHOST = 'dbhost'
50 | DBJOB_ADDELEM = 'dbjob-add-elem'
51 | DBJOB_QUIT = 'dbjob-quit'
52 | DBNAME = 'dbname'
53 | DBNAME_SUFFIXES = 'cgnrw' # changesets, geodocs, nodes, relations, ways
54 | DBPORT = 'dbport'
55 | DBURL = 'dburl'
56 | DEFAULT = 'DEFAULT'
57 | ELEMENT = 'element'
58 | FRONT_END = 'front-end'
59 | GENERATOR = 'generator'
60 | GEODOC = 'geodoc'
61 | GEODOC_LRU_SIZE = 'geodoc-lru-size'
62 | GEODOC_LRU_THREADS = 'geodoc-lru-threads'
63 | GEOHASH_LENGTH = 'geohash-length'
64 | ID = 'id'
65 | JSON = 'json'
66 | K = 'k'
67 | LAT = 'lat'
68 | LAT_MAX = +90.0
69 | LAT_MIN = -90.0
70 | LON = 'lon'
71 | LON_MAX = +180.0
72 | LON_MIN = -180.0
73 | MAXIMUM = 'maximum'
74 | MAXIMUM_ELEMENTS = 'maximum_elements'
75 | MAXGHLAT = 89.999999999999992
76 | MAXLAT = 'maxlat'
77 | MAXLON = 'maxlon'
78 | MEMBASE = 'membase'
79 | MEMBASE_MAX_VALUE_LENGTH = 20 * 1024 * 1024
80 | MEMBER = 'member'
81 | MEMBERS = 'members'
82 | MINIMUM = 'minimum'
83 | MINLAT = 'minlat'
84 | MINLON = 'minlon'
85 | ND = 'nd'
86 | NODE = 'node'
87 | NODES = 'nodes'
88 | NODES_INLINE_SIZE = 'nodes-inline-size'
89 | NODES_PER_SLAB = 'nodes-per-slab'
90 | OSM = 'osm'
91 | PER_PAGE = 'per_page'
92 | PORT = 'port'
93 | PROJECT_DOC = 'project-doc'
94 | PROTOBUF = 'protobuf'
95 | REF = 'ref'
96 | REFERENCES = 'references'
97 | RELATION = 'relation'
98 | RELATIONS = 'relations'
99 | RELATIONS_INLINE_SIZE = 'relations-inline-size'
100 | RELATIONS_PER_SLAB = 'relations-per-slab'
101 | ROLE = 'role'
102 | SCALE_FACTOR = 'scale-factor'
103 | SECONDS = 'seconds'
104 | SERVER_NAME = 'server-name'
105 | SERVER_VERSION = 'server-version'
106 | SLAB_INDIRECT = 1 # Element
107 | SLAB_INLINE = 0 # Element is present inline.
108 | SLAB_LRU_SIZE = 'slab-lru-size'
109 | SLAB_LRU_THREADS = 'slab-lru-threads'
110 | SLAB_NOT_PRESENT = 2 # Element is not present in the slab.
111 | SOURCE_REPOSITORY = 'source-repository'
112 | STATUS = 'status'
113 | TAG = 'tag'
114 | TAGS = 'tags'
115 | TEXT_XML = 'text/xml'
116 | TIMEOUT = 'timeout'
117 | TRACEPOINTS = 'tracepoints'
118 | TRACEPOINTS_PER_PAGE = 'tracepoints-per-page'
119 | TYPE = 'type'
120 | UTF8 = 'utf-8'
121 | V = 'v'
122 | VERSION = 'version'
123 | WAY = 'way'
124 | WAYS = 'ways'
125 | WAYS_INLINE_SIZE = 'ways-inline-size'
126 | WAYS_PER_SLAB = 'ways-per-slab'
127 | WAYNODES = 'waynodes'
128 | WAYNODES_MAX = 'waynodes-max'
129 |
--------------------------------------------------------------------------------
/src/python/dbmgr/dbm_ops.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | """Operations on an OSM database."""
24 |
25 | import apiserver.const as C
26 |
27 | from apiserver.osmelement import new_osm_element
28 | from dbmgr.dbm_stats import increment_stats
29 | from dbmgr.dbm_geotables import GeoGroupTable
30 |
31 | def make_backreference(namespace, elemid):
32 | """Create a backreference string.
33 |
34 | namespace -- The OSM namespace for the element.
35 | elemid -- Element ID in the namespace.
36 | """
37 |
38 | return namespace[0].upper() + elemid
39 |
40 | class DBOps:
41 | """This class implements the semantics of adding OSM elements and
42 | changesets to the backend."""
43 |
44 | def __init__(self, config, options, db):
45 | "Initialize an operations structure."
46 | self.db = db
47 | self.verbose = options.verbose
48 | self.geotable = GeoGroupTable(config, options, db)
49 |
50 | def add_element(self, elem):
51 | "Add an element to the datastore."
52 |
53 | self.db.store(elem)
54 |
55 | # If the element is a node, add it to the appropriate geodoc.
56 | ns = elem.namespace
57 | backreference = make_backreference(ns, elem.id)
58 |
59 | if self.verbose:
60 | increment_stats(ns)
61 |
62 | # Do element-specific processing.
63 | if ns == C.NODE:
64 | # Add the element to the appropriate geodoc.
65 | self.geotable.add(elem)
66 |
67 | elif ns == C.WAY:
68 | # Backlink referenced nodes to the current way.
69 | for (rstatus, node_or_key) in \
70 | self.db.fetch_keys(C.NODE, map(str, elem[C.NODES])):
71 | if rstatus:
72 | node = node_or_key
73 | else:
74 | node = new_osm_element(C.NODE, node_or_key)
75 | node[C.REFERENCES].add(backreference)
76 | self.db.store(node)
77 |
78 | elif ns == C.RELATION:
79 | # If the element is a relation, backlink referenced ways &
80 | # relations.
81 |
82 | def _retrieve(selector, members):
83 | return [str(mref) for (mref, mrole, mtype) in members
84 | if mtype == selector]
85 |
86 | members = elem[C.MEMBERS]
87 |
88 | elements = []
89 | for ns in [C.NODE, C.WAY, C.RELATIONS]:
90 | elements.append((ns, _retrieve(ns, members)))
91 |
92 | for (ns, refs) in elements:
93 | if len(refs) == 0:
94 | continue
95 | for (rstatus, node_or_key) in self.db.fetch_keys(ns, refs):
96 | # Retrieve all elements referenced by the relation.
97 | if rstatus:
98 | elem = node_or_key
99 | else:
100 | elem = new_osm_element(ns, node_or_key)
101 |
102 | # Add a backreference to the element being
103 | # referenced by this relation.
104 | elem[C.REFERENCES].add(backreference)
105 | self.db.store(elem)
106 |
107 | def add_changeset(self, changeset):
108 | "Add a changeset to the database."
109 | raise NotImplementedError
110 |
111 | def finish(self):
112 | """Signal the end of DB operations."""
113 |
114 | # Push out all pending geodoc changes.
115 | self.geotable.flush()
116 |
117 | # Request the underlying database to wind up operation.
118 | self.db.finalize()
119 |
--------------------------------------------------------------------------------
/src/python/frontend/__main__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | ## The script entry point for the front-end server.
24 |
25 | import os.path
26 | import sys
27 | import tornado.options
28 |
29 | import apiserver.const as C
30 | from fe import OSMFrontEndServer
31 | from apiserver.osmelement import init_osm_factory
32 | from datastore.slabutil import init_slabutil
33 |
34 | # Where to find configuration information.
35 | default_config_directory = "config"
36 | default_config_file = "osm-api-server.cfg"
37 |
38 | tornado.options.define("backend", default=None,
39 | type=str, metavar="BACKEND",
40 | help="datastore backend to use")
41 | tornado.options.define("config", default=default_config_file,
42 | type=str, metavar="FILE",
43 | help="configuration file to use")
44 | tornado.options.define("encoding", default=None,
45 | type=str, metavar="ENCODING",
46 | help="Encoding used for values")
47 | tornado.options.define("verbose", default=False,
48 | type=bool, metavar="BOOLEAN",
49 | help="Control verbosity")
50 |
51 | def error(message):
52 | "Print an error message and exit."
53 | sys.stderr.write("Error: " + message + "\n")
54 | sys.exit(1)
55 |
56 | ##
57 | ## Script entry point.
58 | ##
59 | def main():
60 | """Launch the API server."""
61 | # Parse command line options if present.
62 | tornado.options.parse_command_line()
63 | options = tornado.options.options
64 |
65 | # Bring in (server-wide) configuration information.
66 | try:
67 | import configparser # Python 3.0
68 | except ImportError:
69 | import ConfigParser as configparser
70 |
71 | # Read configuration information.
72 | configfiles = [options.config,
73 | os.path.join(sys.path[0], default_config_directory,
74 | default_config_file)]
75 | cfg = configparser.ConfigParser()
76 | cfg.read(configfiles)
77 |
78 | # Sanity check.
79 | if not cfg.has_section(C.FRONT_END):
80 | error("Incomplete configuration information, tried:\n\t" +
81 | "\n\t".join(configfiles))
82 |
83 | # Allow command-line options to override the configuration file.
84 | if options.backend:
85 | cfg.set(C.DATASTORE, C.DATASTORE_BACKEND, options.backend)
86 | if options.encoding:
87 | cfg.set(C.DATASTORE, C.DATASTORE_ENCODING, options.encoding)
88 |
89 | # Load the desired interface to the datastore.
90 | backend = cfg.get(C.DATASTORE, C.DATASTORE_BACKEND)
91 | try:
92 | module = __import__('datastore.ds_' + backend, fromlist=['Datastore'])
93 | datastore = module.Datastore(cfg)
94 | except ImportError, x:
95 | error("Could not initialize datastore of type \"%s\": %s" %
96 | (backend, str(x)))
97 |
98 | # Initialize the OSM element factory and other modules.
99 | init_slabutil(cfg)
100 | init_osm_factory(cfg)
101 |
102 | # Create an instance of the front-end server.
103 | port = cfg.getint(C.FRONT_END, C.PORT)
104 | feserver = OSMFrontEndServer(cfg, options, datastore)
105 | http_server = tornado.httpserver.HTTPServer(feserver.application)
106 | http_server.listen(port)
107 |
108 | # Start the server.
109 | try:
110 | tornado.ioloop.IOLoop.instance().start()
111 | except KeyboardInterrupt:
112 | if options.verbose:
113 | pass # Print statistics etc.
114 |
115 | #
116 | # Invoke main()
117 | #
118 | if __name__ == "__main__":
119 | main()
120 |
--------------------------------------------------------------------------------
/src/python/tests/test_slabutil.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | import apiserver.const as C
24 | from datastore.slabutil import *
25 | from ConfigParser import ConfigParser
26 |
27 | INLINE_SIZE = 256
28 | PER_SLAB = 1024
29 | SLAB_LRU_SIZE = 8
30 |
31 | def pytest_funcarg__config(request):
32 | "Prepare a configuration parser object"
33 |
34 | cfg = ConfigParser()
35 | cfg.add_section(C.DATASTORE)
36 |
37 | for k in [C.CHANGESETS_INLINE_SIZE, C.NODES_INLINE_SIZE,
38 | C.RELATIONS_INLINE_SIZE, C.WAYS_INLINE_SIZE]:
39 | cfg.set(C.DATASTORE, k, str(INLINE_SIZE))
40 |
41 | for k in [C.CHANGESETS_PER_SLAB, C.NODES_PER_SLAB,
42 | C.RELATIONS_PER_SLAB, C.WAYS_PER_SLAB]:
43 | cfg.set(C.DATASTORE, k, str(PER_SLAB))
44 |
45 | return cfg
46 |
47 |
48 | def test_use_slab(config):
49 | "Check that the expected namespaces use slabs."
50 |
51 | slabutil_init(config)
52 |
53 | # The following three namespaces use slabs.
54 | for ns in [C.CHANGESET, C.NODE, C.RELATION, C.WAY]:
55 | assert slabutil_use_slab(ns) is True
56 |
57 | # The following namespaces do not use slabs currently.
58 | for ns in [C.GEODOC]:
59 | assert slabutil_use_slab(ns) is False
60 |
61 | def test_get_config(config):
62 | "Check the return values from the 'slabutil_get_config()' method."
63 | slabutil_init(config)
64 | for ns in [C.CHANGESET, C.NODE, C.RELATION, C.WAY]:
65 | inline_size, per_slab = slabutil_get_config(ns)
66 | assert inline_size == INLINE_SIZE
67 | assert per_slab == PER_SLAB
68 |
69 | def test_groupkeys(config):
70 | "Check the expected return values from the 'group_keys()' API."
71 |
72 | slabutil_init(config)
73 |
74 | expected_node_slabset = {
75 | "NL0": set(['0', '1', '511', '1023']),
76 | "NL1024": set(['1024', '1025', '2047']),
77 | "NL2048": set(['2048'])
78 | }
79 |
80 | expected_way_slabset = {
81 | "WL0": set(['0', '1', '511', '1023']),
82 | "WL1024": set(['1024', '1025', '2047']),
83 | "WL2048": set(['2048'])
84 | }
85 |
86 | expected_relation_slabset = {
87 | "RL0": set(['0', '1', '511', '1023']),
88 | "RL1024": set(['1024', '1025', '2047']),
89 | "RL2048": set(['2048'])
90 | }
91 |
92 | keys = map(str, [0, 1, 511, 1023, 1024, 1025, 2047, 2048])
93 |
94 | slabset = slabutil_group_keys(C.NODE, keys)
95 | assert slabset == expected_node_slabset
96 |
97 | slabset = slabutil_group_keys(C.WAY, keys)
98 | assert slabset == expected_way_slabset
99 |
100 | slabset = slabutil_group_keys(C.RELATION, keys)
101 | assert slabset == expected_relation_slabset
102 |
103 | def test_groupkeys_nonnumeric(config):
104 | "Check the expected return values from the 'group_keys()' API."
105 |
106 | slabutil_init(config)
107 |
108 | expected_geodoc_slabset = {
109 | "GLtdr4t": set(["tdr4t"]),
110 | "GLs0000": set(["s0000"])
111 | }
112 |
113 | keys = ['tdr4t', 's0000']
114 | slabset = slabutil_group_keys(C.GEODOC, keys)
115 | assert slabset == expected_geodoc_slabset
116 |
117 | def test_make_slabkey(config):
118 | "Test the make_slabkey() API."
119 |
120 | slabutil_init(config)
121 |
122 | expected = [
123 | (C.CHANGESET, '4567', 'CL4096'),
124 | (C.GEODOC, 'tdr4t', 'GLtdr4t'),
125 | (C.NODE, '1234', 'NL1024'),
126 | (C.RELATION, '16385', 'RL16384'),
127 | (C.WAY, '2345', 'WL2048'),
128 | ]
129 |
130 | for (ns, key, slabkey) in expected:
131 | v = slabutil_make_slabkey(ns, key)
132 | assert v == slabkey
133 |
--------------------------------------------------------------------------------
/src/python/dbmgr/dbm_input.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | """Turn input sources into iterables.
24 |
25 | Exported functions:
26 |
27 | makesource -- Turn a file into an iterable that returns OSM elements.
28 | """
29 |
30 | import gzip
31 | import os
32 |
33 | from lxml.etree import iterparse
34 | from pipes import quote
35 |
36 | from apiserver.osmelement import encode_coordinate, new_osm_element
37 | import apiserver.const as C
38 |
39 | def _make_osm_iterator(config, f):
40 | "Return an iterator parsing the format"
41 |
42 | scalefactor = config.getint(C.DATASTORE, C.SCALE_FACTOR)
43 |
44 | parser = iter(iterparse(f, events=('start', 'end')))
45 | event, root = parser.next()
46 | if root.tag != u'osm':
47 | raise ValueError, "Unexpected root tag: %s" % root.tag
48 |
49 | depth = 0
50 | doc = None
51 | ignored_elements = ['bound', 'bounds']
52 | processed_elements = ('changeset', 'node', 'way', 'relation')
53 |
54 | # Parse the input file.
55 | for event, elem in parser:
56 |
57 | element_name = elem.tag
58 | if element_name in ignored_elements:
59 | continue
60 |
61 | if event == 'start':
62 | if element_name in processed_elements:
63 | assert depth == 0
64 |
65 | # Start of the element. Copy 'standard' attributes,
66 | # translating them to native values where possible.
67 | doc = new_osm_element(element_name.lower(), elem.get('id'))
68 | for k,v in elem.items():
69 | if k == 'visible':
70 | v = bool(v)
71 | elif k == 'version' or k == 'uid':
72 | v = int(v)
73 | elif k == 'lat' or k == 'lon':
74 | v = encode_coordinate(v)
75 | doc[k] = v
76 |
77 | elif element_name == 'tag':
78 | # Each 'tag' has a key/value associated with it.
79 | doc.setdefault('tags', {})[elem.get('k')] = elem.get('v')
80 |
81 | elif element_name == 'nd':
82 | # elements contain references.
83 | doc['nodes'].add(int(elem.get('ref')))
84 |
85 | elif element_name == 'member':
86 | # Collect the list of (ref, role, type) tuples.
87 | doc.setdefault('members', []).append((elem.get('ref'),
88 | elem.get('role'),
89 | elem.get('type')))
90 | depth = depth + 1
91 |
92 | elif event == 'end':
93 | depth = depth - 1
94 | if depth == 0:
95 | yield doc # Return a complete element to the caller.
96 |
97 | root.clear() # Keep memory usage down.
98 |
99 |
100 | def makesource(config, options, fn):
101 | """Return an iterator returning elements contained in 'fn'."""
102 |
103 | # Determine the uncompression technique needed.
104 | basefn, ext = os.path.splitext(fn)
105 |
106 | if ext in [".bz2", ".gz"]:
107 | if ext == ".bz2":
108 | f = os.popen("bzcat %s" % quote(fn), 'r')
109 | elif ext == ".gz":
110 | f = gzip.GzipFile(fn, mode='r')
111 | (basefn, _) = os.path.splitext(fn)
112 | else:
113 | basefn = fn
114 | f = open(fn, mode='r')
115 |
116 | # Determine the file format.
117 | if basefn.endswith(".osc"):
118 | raise NotImplementedError, "OsmChange input"
119 | if basefn.endswith(".pbf"):
120 | raise NotImplementedError, "PBF input"
121 | if basefn.endswith(".xml") or basefn.endswith(".osm"):
122 | return _make_osm_iterator(config, f)
123 |
--------------------------------------------------------------------------------
/src/python/tests/test_dsmembase.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | import apiserver.const as C
24 | import memcache
25 | import json
26 | import pytest
27 |
28 | from datastore.ds_membase import Datastore
29 | from datastore.slabutil import slabutil_init
30 | from ConfigParser import ConfigParser
31 | import apiserver.osmelement as O
32 |
33 | __BADNAMESPACE = 'badnamespace'
34 | __DBHOST = 'localhost'
35 | __DBPORT = '11211'
36 | __INLINE_SIZE = 256
37 | __NOSUCHKEY = '__NOSUCHKEY__'
38 | __NOSUCHSLABELEMKEY = '-1'
39 | __PER_SLAB = 8
40 | __SLAB_LRU_SIZE = 8
41 | __SLAB_LRU_THREADS = 4
42 |
43 | # Helper function.
44 | def insert_key(key, value):
45 | c = memcache.Client(['%s:%s' % (__DBHOST, __DBPORT)])
46 | c.set(key, value)
47 |
48 | def retrieve_key(key):
49 | c = memcache.Client(['%s:%s' % (__DBHOST, __DBPORT)])
50 | return c.get(key)
51 |
52 | def pytest_funcarg__datastore(request):
53 | "Prepare a configuration parser object"
54 |
55 | cfg = ConfigParser()
56 | cfg.add_section(C.DATASTORE)
57 |
58 | for k in [C.CHANGESETS_INLINE_SIZE, C.NODES_INLINE_SIZE,
59 | C.RELATIONS_INLINE_SIZE, C.WAYS_INLINE_SIZE]:
60 | cfg.set(C.DATASTORE, k, str(__INLINE_SIZE))
61 |
62 | for k in [C.CHANGESETS_PER_SLAB, C.NODES_PER_SLAB,
63 | C.RELATIONS_PER_SLAB, C.WAYS_PER_SLAB]:
64 | cfg.set(C.DATASTORE, k, str(__PER_SLAB))
65 |
66 | cfg.set(C.DATASTORE, C.DATASTORE_ENCODING, 'json')
67 |
68 | cfg.add_section(C.MEMBASE)
69 | cfg.set(C.MEMBASE, C.DBHOST, __DBHOST)
70 | cfg.set(C.MEMBASE, C.DBPORT, __DBPORT)
71 | cfg.set(C.DATASTORE, C.SLAB_LRU_SIZE, str(__SLAB_LRU_SIZE))
72 | cfg.set(C.DATASTORE, C.SLAB_LRU_THREADS, str(__SLAB_LRU_THREADS))
73 |
74 | slabutil_init(cfg)
75 |
76 | return Datastore(cfg)
77 |
78 | def test_datastore_wrong_namespace(datastore):
79 | "Verify that an access to an unknown namespace is rejected."
80 | with pytest.raises(KeyError):
81 | v = datastore.fetch(__BADNAMESPACE, "0")
82 |
83 | def test_datastore_direct_fetch(datastore):
84 | "Verify that directly fetchable elements can be read."
85 | _direct_key = 'Gs0000'
86 | _direct_val = O.new_osm_element(C.GEODOC, _direct_key[1:])
87 | insert_key(_direct_key, O.encode_json(_direct_val))
88 |
89 | v = datastore.fetch(C.GEODOC, _direct_key[1:])
90 | assert v == _direct_val
91 |
92 | def test_datastore_failed_direct_fetch(datastore):
93 | "Verify that a non-existent element cannot be fetched."
94 | v = datastore.fetch(C.GEODOC, __NOSUCHKEY)
95 | assert v is None
96 |
97 | def test_datastore_failed_slab_fetch(datastore):
98 | "Verify that a non-existent element in a slab cannot be fetched."
99 | v = datastore.fetch(C.NODE, __NOSUCHSLABELEMKEY)
100 | assert v is None
101 |
102 | def test_datastore_slab_inline_fetch(datastore):
103 | "Verify that elements in a slab are fetched."
104 | _slab_key = 'NL8'
105 | _slab_start = __PER_SLAB
106 | # Create a slab.
107 | slab = []
108 | slabkeys = range(_slab_start, _slab_start + __PER_SLAB)
109 | for key in slabkeys:
110 | sk = str(key)
111 | if key % 2 == 0:
112 | n = O.new_osm_element(C.NODE, sk)
113 | slab.append((C.SLAB_INLINE, n))
114 |
115 | insert_key(_slab_key, datastore.encode(slab))
116 |
117 | c = 0
118 | i = 0
119 | for key in slabkeys:
120 | if key % 2 == 0:
121 | se,sn = slab[i]
122 | n = datastore.fetch(C.NODE, str(key))
123 | assert n == sn
124 | i += 1
125 | else:
126 | v = datastore.fetch(C.NODE, str(key))
127 | assert v is None
128 | c = c + 1
129 |
130 | def test_datastore_write_element(datastore):
131 | "Test the store_element() entry point."
132 |
133 | _geodoc_key = 'Gs0000'
134 | _geodoc_val = O.new_osm_element(C.GEODOC, _geodoc_key[1:])
135 |
136 | datastore.store_element(C.GEODOC, _geodoc_key[1:], _geodoc_val)
137 |
138 | v = retrieve_key(_geodoc_key)
139 | assert v == O.encode_json(_geodoc_val)
140 |
--------------------------------------------------------------------------------
/src/python/tests/test_osmelement.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | import math
24 |
25 | from ConfigParser import ConfigParser
26 |
27 | import apiserver.const as C
28 | import apiserver.osmelement as O
29 |
30 | from datastore.slabutil import slabutil_init
31 |
32 | def pytest_funcarg__config(request):
33 | "Prepare a configuration parser object."
34 |
35 | cfg = ConfigParser()
36 |
37 | cfg.add_section(C.DATASTORE)
38 | cfg.set(C.DATASTORE, C.SCALE_FACTOR, '10000000')
39 | cfg.set(C.DATASTORE, C.CHANGESETS_INLINE_SIZE, '1024')
40 | cfg.set(C.DATASTORE, C.CHANGESETS_PER_SLAB, '8')
41 | cfg.set(C.DATASTORE, C.NODES_INLINE_SIZE, '1024')
42 | cfg.set(C.DATASTORE, C.NODES_PER_SLAB, '8')
43 | cfg.set(C.DATASTORE, C.RELATIONS_INLINE_SIZE, '1024')
44 | cfg.set(C.DATASTORE, C.RELATIONS_PER_SLAB, '8')
45 | cfg.set(C.DATASTORE, C.WAYS_INLINE_SIZE, '1024')
46 | cfg.set(C.DATASTORE, C.WAYS_PER_SLAB, '8')
47 |
48 | cfg.add_section(C.FRONT_END)
49 | cfg.set(C.FRONT_END, C.SERVER_VERSION, '0.6')
50 | cfg.set(C.FRONT_END, C.SERVER_NAME, 'Test')
51 |
52 | slabutil_init(cfg)
53 |
54 | return cfg
55 |
56 |
57 | def test_new_node(config):
58 | "Test the creation a element."
59 |
60 | O.init_osm_factory(config)
61 |
62 | nodeid = '42'
63 | n = O.new_osm_element(C.NODE, nodeid)
64 |
65 | # Check the 'id' field.
66 | assert n.id == str(nodeid)
67 | # Check that C.REFERENCES field exists, and is an empty set.
68 | assert n[C.REFERENCES] == set()
69 |
70 |
71 | def test_new_way(config):
72 | "Test creation of a element."
73 |
74 | O.init_osm_factory(config)
75 | wayid = '42'
76 | w = O.new_osm_element(C.WAY, wayid)
77 |
78 | # Check the "id", NODES and REFERENCES attributes.
79 | assert w.id == str(wayid)
80 | assert w[C.REFERENCES] == set()
81 | assert w[C.NODES] == set()
82 |
83 |
84 | def test_new_relation(config):
85 | "Test creation of a element."
86 |
87 | O.init_osm_factory(config)
88 | relid = '42'
89 | r = O.new_osm_element(C.RELATION, relid)
90 |
91 | # Check the "id", MEMBER and REFERENCES attributes.
92 | assert r.id == str(relid)
93 | assert r[C.REFERENCES] == set()
94 | assert r[C.MEMBERS] == []
95 |
96 |
97 | def test_new_geodoc(config):
98 | "Test the creation of a geodoc element."
99 |
100 | O.init_osm_factory(config)
101 | georegion = 'szmyg' # lat, long == 42, 42
102 | g = O.new_osm_element(C.GEODOC, georegion)
103 |
104 | # Check the "id" field.
105 |
106 | assert g.id == georegion
107 | assert g[C.NODES] == set()
108 |
109 | bbox = g[C.BBOX]
110 | assert set(bbox.keys()) == set(['n', 's', 'e', 'w'])
111 |
112 |
113 | def test_encode_coordinate(config):
114 | "Test encoding of a coordinate string."
115 |
116 | O.init_osm_factory(config)
117 |
118 | # The following tests assume that the scale factor in use is 10^7.
119 | _sf = config.getint(C.DATASTORE, C.SCALE_FACTOR)
120 | assert _sf == 10000000
121 |
122 | #
123 | # Test encoding of strings.
124 | #
125 | inputlist = [ ('0', 0), # Zero.
126 | ('0.00123456', 12345), # Tiny
127 | ('0.12345678', 1234567), # Fraction only
128 | ('1.23456789', 12345678), # Normal, small
129 | ('12.3456789', 123456789), # Normal
130 | ('123.456789', 1234567890), # Normal, large
131 | ('1', 10000000), # Integral, small
132 | ('12', 120000000), # Integral
133 | ('123', 1230000000) # Integral, large
134 | ]
135 | for (strval, refval) in inputlist:
136 | v = O.encode_coordinate(strval)
137 | assert refval == v
138 |
139 |
140 | #
141 | # Test encoding of floating point values.
142 | #
143 | inputlist = [ (0.0, 0),
144 | (0.123456, 1234560),
145 | (0.1234567, 1234567),
146 | (1.0, 10000000),
147 | (1.23456, 12345600),
148 | (12.3455899, 123455899)
149 | ]
150 | for (flval, refval) in inputlist:
151 | v = O.encode_coordinate(flval)
152 | assert v == refval
153 |
--------------------------------------------------------------------------------
/doc/DeploymentInstructions.md:
--------------------------------------------------------------------------------
1 | ## About
2 |
3 | This document describes how to install and maintain an instance of
4 | this server.
5 |
6 | ## Note
7 |
8 | Currently, the 'front-end server' and the 'ingestion tool' (the
9 | document [Overview][] describes what these are) work "in-place" in the
10 | source tree. An install-friendly package is yet to be created; see
11 | ticket [#5][issue5].
12 |
13 | ## Software Dependencies
14 |
15 | The server uses the following software packages:
16 |
17 | 1. The [Python][], programming language.
18 | 1. [Tornado][], a [Python][] web server framework, for the front-end.
19 | 1. [lxml][], a [Python][] XML parsing library, used by both the
20 | front-end and the ingestion tool.
21 | 1. The [cjson][] JSON (en|de)coder module.
22 | 1. [Membase][], a scalable, distributed key/value store, used as the
23 | data store.
24 | 1. [Python Geohash][pygeohash], a geohashing library.
25 | 1. [Python Memcache][pymemcache], a [Memcache][] interface for [Python][],
26 | used to connect to the [Membase][] server, in compatibility mode.
27 | 1. [Py.Test][pytest], a test framework.
28 |
29 | ### Installation on Ubuntu 10.04 LTS
30 |
31 | To install these dependencies on an Ubuntu GNU/Linux v10.04 LTS system, do:
32 |
33 | 1. Install [Membase][]:
34 | 1. Download the `.deb` file appropriate for your computer architecture
35 | from the project's [download page][membasedownload].
36 | 1. Install the download `.deb` package using the **dpkg** utility.
37 | For example:
38 | `% sudo dpkg -i membase-server-community_x86_1.6.5.3.deb`
39 | Repeat this on all the machines that you wish to run your
40 | [Membase][] cluster on.
41 | 1. Using your browser, login to the membase console at
42 | http://*hostname*:8091, and create a default bucket of type
43 | 'membase' listening on port 11211. If you have multiple machines
44 | in your [Membase][] cluster, you would need to login and setup
45 | each of these.
46 |
47 | *Note*: By default [Membase][] will listen and accept protocol requests
48 | on *all* network interfaces. On an internet-facing server, you would
49 | need to adjust your firewall rules to prevent the world+dog from accessing
50 | your membase instance.
51 | 2. Install pre-packaged binaries:
52 | `% sudo apt-get install git-core gcc g++`
53 | `% sudo apt-get install python2.6 python2.6-dev python-lxml python-setuptools python-memcache python-cjson`
54 | 3. Install additional Python libraries and tools
55 | 1. Install [Python geohash][pygeohash] using `easy_install`:
56 | `% sudo easy_install python-geohash`
57 | 1. Install [Tornado][]:
58 | `% git clone https://github.com/facebook/tornado.git`
59 | `% cd tornado`
60 | `% sudo python setup.py install`
61 | 4. Optional stuff:
62 | 1. Install `py.test`, if you wish to run the tests:
63 | `% sudo easy_install pytest`
64 |
65 | ## Setup
66 |
67 | The procedure to bring up the server is as follows.
68 |
69 | 1. Install the dependencies listed above.
70 | 1. Checkout the server source from [GitHub][].
71 | `% git clone git://github.com/MapQuest/mapquest-osm-server.git`
72 | 1. Edit the file `src/python/config/osm-api-server.cfg`, and change
73 | the `dbhost` configuration item in section `membase` to point to
74 | where your [Membase][] instance lives. The default configuration
75 | assumes that your membase server is running on localhost.
76 | 1. Download a [planet.osm][osmplanet] dump or subset thereof, for
77 | example, from .
78 | 1. Load in the downloaded planet file using the `db-mgr` tool:
79 | `% cd src/python`
80 | `% ./db-mgr PATH-TO-THE-DOWNLOADED-PLANET`
81 | 1. Run the front-end of the server:
82 | `% sudo ./front-end`
83 | The server listens for API requests on port 80 by default. The
84 | configuration item `port` in the configuration section `front-end`
85 | can be used to change this.
86 | 1. Check operation of the server. Assuming that the default
87 | configuration, you could try the `api/capabilities` URL as
88 | below:
89 |
90 | % curl http://localhost/api/capabilities
91 | <?xml version='1.0' encoding='utf-8'?>
92 | <osm version="0.6" generator="OSM API Server Prototype 0.6">
93 | <api>
94 | <version minimum="0.6" maximum="0.6"/>
95 | <area maximum="180.0"/>
96 | <tracepoints per_page="5000"/>
97 | <waynodes maximum="2000"/>
98 | <changesets maximum_elements="50000"/>
99 | <timeout seconds="300"/>
100 | </api>
101 | </osm>
102 |
103 | The document [SupportedRequests][] lists the current set of APIs supported.
104 |
105 |
106 |
107 | [github]: http://www.github.com/ "GitHub"
108 | [issue5]: https://github.com/MapQuest/mapquest-osm-server/issues/5 "Issue 5"
109 | [lxml]: http://lxml.de/ "XML Processing Library"
110 | [Membase]: http://www.membase.org/ "Membase"
111 | [membasedownload]: http://www.couchbase.com/downloads/membase-server/community
112 | [memcache]: http://memcached.org/ "Memcache"
113 | [osmplanet]: http://wiki.openstreetmap.org/wiki/Planet.osm "OSM Planet"
114 | [Overview]: Overview.md
115 | [pygeohash]: http://pypi.python.org/pypi/python-geohash "Geohashing library"
116 | [pymemcache]: http://pypi.python.org/pypi/python-memcached/ "Memcache interface"
117 | [pytest]: http://www.pytest.org/ "Py.Test"
118 | [Python]: http://www.python.org/ "The Python Programming Language"
119 | [SupportedRequests]: SupportedRequests.md
120 | [Tornado]: http://www.tornadoweb.org/ "The Tornado Web Server"
121 | [cjson]: http://pypi.python.org/pypi/python-cjson "The cjson JSON en/decoder library"
122 |
--------------------------------------------------------------------------------
/doc/Slabs.org:
--------------------------------------------------------------------------------
1 | SLABS -*- mode: org; -*-
2 |
3 | * Motivation
4 | OSM data has a large number of independently addressable elements
5 | such as nodes, elements, ways and changesets. Each element is named
6 | using a decimal string. There are today over a billion (10^9)
7 | elements in the OSM database.
8 |
9 | If directly expressed in key/value form, this means that the
10 | datastore needs to be able to deal with about a billion keys. OSM
11 | key sizes are of the order of 10 bytes; OSM values are a few hundred
12 | bytes on the average.
13 |
14 | The Membase datastore keeps all its keys in RAM by design. Membase
15 | also has an overhead of 120 bytes per key. Thus a straightforward
16 | mapping of element IDs Membase's design leads to very large RAM
17 | requirements.
18 |
19 | Grouping multiple OSM elements into "slabs" is a work-around for
20 | this issue. Each "slab" is addressed using a Membase key.
21 | * Design
22 | ** Basic Design
23 | - Each element gets its own slab type (ways, nodes, changesets,
24 | and relations).
25 | - Elements are grouped into slabs. Elements can be in the following
26 | states in a slab:
27 | 1. Present in the datastore, and inline in the slab. Used for
28 | elements that are 'small' (for a configurable value of
29 | 'small').
30 | 2. Present in datastore, but not 'inline' in the slab.
31 | These elements are "oversized" and are stored seperately.
32 | They are retrieved using an independent fetch from the
33 | datastore.
34 | 3. Not present in the datastore. Such elements may be
35 | `negatively cached', as an optimization.
36 | - Each kind of slab has two configuration variables:
37 | - The number of elements per slab (configuration variables:
38 | {nodes|ways|relations}-per-slab).
39 | - The max "inline" size of an element that resides in a slab.
40 | (configuration variables: {nodes|ways|relations}-inline-size).
41 | ** Size limits
42 | - Membase has a max size of approximately 20MB for each value.
43 | This sets the maximum size for the wire representation of each
44 | slab.
45 | - Membase keys are limited to 256 bytes. This limit is not
46 | expected to be a problem in the current design.
47 | ** Dealing with too-large elements
48 | If the total size for a slab is larger than some configurable
49 | limit, elements larger than the configuration limit
50 | (\*-inline-size) that are part of the slab can be made
51 | 'standalone'.
52 | ** I/O operations
53 | - I/O operations are done one slab at a time. Batching of slab I/O
54 | operations is not necessary since each slab would already be of a
55 | substantial size.
56 | ** Interaction with caching
57 | - OSM elements are cached locally so as to improve request
58 | handling latencies and to reduce the I/O transfer needs of
59 | the system.
60 | - The cache will hold all the elements for a given slab, or will
61 | hold none of them.
62 | - A cache element can be in one of the following states:
63 | - 'present' => present in the cache
64 | - 'not present' => not in the cache, but could be in the data
65 | store.
66 | - 'negatively cached' => definitely missing from the data store.
67 | - Slabs are managed by a buffer with 'least recently used'
68 | semantics.
69 | - Whenever an element in the cache is accessed, the slab to which
70 | the cache belongs is moved to the most-recently-used position
71 | in the slab LRU.
72 | - When the cache becomes 'full', the least recently used slab
73 | is ejected from the cache, along with all its contents.
74 | *** Reads of cache elements
75 | - A read miss causes the associated slab to be fetched and
76 | inserted into the most-recently-used end of the slab LRU buffer.
77 | All elements present in the slab will be inserted into the
78 | cache.
79 | - If I/O is in progress for the cache element/slab, then the
80 | thread of control performing the read will wait.
81 | - A read hit of a cache element causes its associated slab to move
82 | to the most-recently-used end of the slab LRU buffer.
83 | *** Writes of cache elements
84 | - A 'store' of a cache element will move its associated slab
85 | descriptor to the most-recently-used end of the slab LRU buffer.
86 | - If I/O is in progress for the slab associated with a cache
87 | element, the thread of control performing the write will wait
88 | for the I/O to complete.
89 | *** Reads of slabs
90 | - The current implementation handles one read request for a slab
91 | at a time.
92 | - When the I/O completes, all the elements in the slab are added
93 | to the cache.
94 | - Elements that would fall into the slab but are not present are
95 | marked as 'negatively cached'.
96 | - When performing a read of a slab:
97 | 1. Atomically mark the slab as I/O-in-progress. This causes
98 | subsequent retrievals of cache elements in the slab to block.
99 | 2. Issue the read.
100 | 3. Vivify elements based on the slab's contents, converting from
101 | the wire representation used (JSON/protobuf/whatever), and
102 | insert them into the cache.
103 | 4. If some elements in the slab are not 'inline', issue reads
104 | for these and vivify them.
105 | 5. Release the slab from the I/O-in-progress state, and insert
106 | it into the most-recently-used end of the slab LRU buffer.
107 | *** Writes of slabs
108 | - Slabs are scheduled to be written out in LRU order.
109 | - All 'inline' elements in a slab will be written out together
110 | (as part of the slab).
111 | - 'Non inline' elements are written back at the same time, but as
112 | individual objects.
113 | - All elements in the slab are removed from the cache when the
114 | slab is written to the data store.
115 | - Slabs that are to be written out are marked as 'I/O in progress'
116 | till the I/O completes. This is to prevent another thread from
117 | accessing an element/slab that is undergoing I/O.
118 | - When performing a write of slab:
119 | 1. Atomically mark the slab as 'I/O in progress'. This causes
120 | subsequent retrievals of cache elements referenced by the
121 | slab to block.
122 | 2. Collect all cache elements needed for creating the slab,
123 | and create the wire representation (JSON/protobuf/other) of
124 | the slab object.
125 | 3. Issue the write request.
126 | 4. When the write request completes, remove all the elements
127 | in the slab from the cache.
128 | 5. Finally, remove the slab from the slab LRU buffer.
129 |
--------------------------------------------------------------------------------
/src/python/dbmgr/__main__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | ## Script entry point for the database management tool.
24 |
25 | import os.path
26 | import sys
27 | import itertools
28 |
29 | ## Tool configuration
30 | devconfigdir = 'config'
31 | devconfigfilename = 'osm-api-server.cfg'
32 |
33 | toolname = 'dbmgr'
34 | toolversion = '0.1'
35 | toolconfig = '/etc/openstreetmap/osm-api-server.cfg'
36 |
37 | from datastore.ds_geohash import init_geohash
38 | from datastore.slabutil import init_slabutil
39 | from dbmgr.dbm_input import makesource
40 | from dbmgr.dbm_ops import DBOps
41 | from dbmgr.dbm_stats import fini_statistics, init_statistics
42 |
43 | import apiserver.const as C
44 | from apiserver.osmelement import init_osm_factory
45 |
46 | #
47 | #
48 | # SCRIPT ENTRY POINT
49 | #
50 |
51 | usage = '''%prog [--I|--init] [options]
52 | %prog [options] [files]...
53 |
54 | Manage an OSM database.
55 |
56 | Use option -h/--help for help on usage.'''
57 |
58 | def main():
59 | 'Manage the OSM DB during development.'
60 | from optparse import OptionParser
61 |
62 | parser = OptionParser(usage=usage, prog=toolname,
63 | version='%prog ' + toolversion)
64 | parser.add_option('-b', '--buffering', dest='buffering', metavar="NUMBER",
65 | default=64, type="int",
66 | help="Buffer size in KB for *zip uncompression " +
67 | "[%default]")
68 | parser.add_option('-B', '--backend', dest='backend', metavar='DBTYPE',
69 | default=None,
70 | help="Type of backend to use [from configuration file]"),
71 | parser.add_option('-C', '--config', dest='config', metavar="FILENAME",
72 | default=toolconfig,
73 | help="Path to configuration information [%default]")
74 | parser.add_option('-E', '--encoding', dest='datastore_encoding',
75 | metavar='ENCODING', default=None, type="str",
76 | help="Encoding for use for values [%default]"),
77 | parser.add_option('-I', '--init', dest='doinit', action='store_true',
78 | default=False, help='(Re-)initialize the backend'),
79 | parser.add_option('-n', '--dryrun', dest='dryrun', metavar="BOOLEAN",
80 | default=False, action="store_true",
81 | help="Parse, but do not upload data [%default]")
82 | parser.add_option('-T', '--nothreading', dest='nothreading',
83 | metavar="BOOLEAN", default=False, action="store_true",
84 | help="Do not use threads [%default]"),
85 | parser.add_option('-v', '--verbose', dest='verbose', metavar="BOOLEAN",
86 | default=False, action="store_true",
87 | help="Be verbose")
88 | parser.add_option("-x", '--nochangesets', dest="nochangesets",
89 | action="store_true", default=False,
90 | help="Skip retrieval of changeset information "
91 | "[%default]")
92 |
93 | options, args = parser.parse_args()
94 |
95 | # Read configuration information.
96 | configfiles = [options.config, os.path.join(sys.path[0], devconfigdir,
97 | devconfigfilename)]
98 | from ConfigParser import ConfigParser
99 | cfg = ConfigParser()
100 | cfg.read(configfiles)
101 |
102 | # Sanity check.
103 | if not cfg.has_section(C.FRONT_END):
104 | parser.error("Incomplete configuration, tried:\n\t" +
105 | "\n\t".join(configfiles))
106 |
107 | # Override configuration options specified on the command line.
108 | if options.datastore_encoding:
109 | cfg.set(C.DATASTORE, C.DATASTORE_ENCODING, options.datastore_encoding)
110 | if options.backend:
111 | cfg.set(C.DATASTORE, C.DATASTORE_BACKEND, options.backend)
112 |
113 | # Initialize statistics.
114 | init_statistics(cfg, options)
115 |
116 | # Load in the desired interface to the datastore.
117 | backend = cfg.get(C.DATASTORE, C.DATASTORE_BACKEND)
118 | try:
119 | module = __import__('datastore.ds_' + backend,
120 | fromlist=['Datastore'])
121 | except ImportError, x:
122 | parser.exit("Error: Could not initialize backend of type \"%s\": %s" %
123 | (backend, str(x)))
124 |
125 | db = module.Datastore(cfg, not options.nothreading, True)
126 |
127 | if options.doinit:
128 | db.initialize()
129 |
130 | ops = DBOps(cfg, options, db)
131 |
132 | # Initialize the geohash module.
133 | init_geohash(cfg.getint(C.DATASTORE, C.GEOHASH_LENGTH),
134 | cfg.getint(C.DATASTORE, C.SCALE_FACTOR))
135 |
136 | # Initialize the OSM element factory and related modules.
137 | init_slabutil(cfg)
138 | init_osm_factory(cfg)
139 |
140 | # Turn file names into iterators that deliver an element at a time.
141 | try:
142 | iterlist = map(lambda fn: makesource(cfg, options, fn), args)
143 | inputelements = itertools.chain(*iterlist)
144 | except Exception, x:
145 | parser.exit("Error: " + str(x))
146 |
147 | for elem in inputelements:
148 | # Add basic elements
149 | if elem.namespace in [C.CHANGESET, C.NODE, C.RELATION, C.WAY]:
150 | ops.add_element(elem)
151 | else:
152 | raise NotImplementedError, "Element type: %s" % elem.kind
153 |
154 | ops.finish()
155 | fini_statistics(options)
156 |
157 | if __name__ == '__main__':
158 | main()
159 |
--------------------------------------------------------------------------------
/src/python/datastore/ds_membase.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | """An interface to a Membase based backend store."""
24 |
25 | import apiserver.const as C
26 |
27 | import memcache # Use Memcache bindings (for now).
28 | memcache.SERVER_MAX_VALUE_LENGTH = C.MEMBASE_MAX_VALUE_LENGTH # Update limit.
29 |
30 | import types
31 | import threading
32 |
33 | from apiserver.osmelement import new_osm_element, OSMElement
34 | from datastore.ds import DatastoreBase
35 | from datastore.slabutil import *
36 |
37 | class DatastoreMembase(DatastoreBase):
38 | "An interface to a Membase (www.membase.org) datastore."
39 |
40 | SLAB_CONFIGURATION_KEYS = [C.CHANGESETS_PER_SLAB, C.NODES_PER_SLAB,
41 | C.RELATIONS_PER_SLAB, C.WAYS_PER_SLAB]
42 |
43 | def __init__(self, config, usethreads=False, writeback=False):
44 | "Initialize the datastore."
45 |
46 | self.conndb = {}
47 |
48 | DatastoreBase.__init__(self, config, usethreads, writeback)
49 |
50 | dbhosts = config.get(C.MEMBASE, C.DBHOST)
51 | dbport = config.get(C.MEMBASE, C.DBPORT)
52 |
53 | self.membasehosts = [h + ':' + dbport for h in dbhosts.split()]
54 |
55 | threads = [threading.currentThread()]
56 | if usethreads:
57 | threads.extend(self.threads)
58 |
59 | self.register_threads(threads)
60 |
61 | if writeback:
62 | # Store slab configuration information for subsequent
63 | # retrieval by the front end.
64 | slabconfig = new_osm_element(C.DATASTORE_CONFIG, C.CFGSLAB)
65 | for k in DatastoreMembase.SLAB_CONFIGURATION_KEYS:
66 | slabconfig[k] = config.get(C.DATASTORE, k)
67 | slabconfig[C.CONFIGURATION_SCHEMA_VERSION] = C.CFGVERSION
68 | self.slabconfig = slabconfig
69 | else:
70 | # Read slab configuration information from the data store.
71 | self.slabconfig = slabconfig = \
72 | self.retrieve_element(C.DATASTORE_CONFIG, C.CFGSLAB)
73 | if slabconfig is not None:
74 | schema_version = slabconfig.get(C.CONFIGURATION_SCHEMA_VERSION)
75 | if schema_version != C.CFGVERSION:
76 | raise ValueError, \
77 | "Datastore schema version mismatch: expected %s, " \
78 | "actual %s." % \
79 | (str(C.CFGVERSION), str(schema_version))
80 | for (k,v) in slabconfig.items():
81 | if k in DatastoreMembase.SLAB_CONFIGURATION_KEYS:
82 | config.set(C.DATASTORE, k, v)
83 | else:
84 | raise ValueError, \
85 | "Datastore is missing configuration information."
86 |
87 |
88 | def _get_connection(self):
89 | return self.conndb[threading.currentThread().name]
90 |
91 | def register_threads(self, threads):
92 | "Register threads with the datastore module."
93 | for t in threads:
94 | c = memcache.Client(self.membasehosts, debug=1)
95 | self.conndb[t.name] = c
96 |
97 | def retrieve_element(self, namespace, key):
98 | """Return the element for a key.
99 |
100 | Parameters:
101 |
102 | namespace - namespace for element.
103 | key - the key to retrieve.
104 | """
105 |
106 | dskey = namespace[0].upper() + key
107 |
108 | db = self._get_connection()
109 | wirebits = db.get(dskey)
110 |
111 | if wirebits is None:
112 | return None
113 | n = new_osm_element(namespace, key)
114 | n.from_mapping(self.decode(wirebits))
115 | return n
116 |
117 | def store_element(self, namespace, key, value):
118 | """Store an element at a key."""
119 |
120 | assert isinstance(value, OSMElement)
121 |
122 | dskey = namespace[0].upper() + key
123 | db = self._get_connection()
124 | db.set(dskey, self.encode(value.as_mapping()))
125 |
126 | def retrieve_slab(self, namespace, slabkey):
127 | """Return a slab of elements."""
128 |
129 | db = self._get_connection()
130 | wirebits = db.get(slabkey)
131 |
132 | if wirebits is None:
133 | return None
134 |
135 | slab = []
136 | for (st, kv) in self.decode(wirebits):
137 | if st == C.SLAB_NOT_PRESENT:
138 | continue
139 |
140 | if st == C.SLAB_INDIRECT:
141 | elem = self.retrieve_element(namespace, kv)
142 | assert elem is not None, "Missing indirect element"
143 | elif st == C.SLAB_INLINE:
144 | elem = new_osm_element(namespace, kv[C.ID])
145 | elem.from_mapping(kv)
146 | else:
147 | assert False, "Unknown status %d" % status
148 | slab.append((elem.id, elem))
149 |
150 | return slab
151 |
152 | def store_slab(self, namespace, slabkey, slabelems):
153 | """Store a slab's worth of contents."""
154 |
155 | _, nperslab = slabutil_get_config(namespace)
156 | assert len(slabelems) == nperslab
157 |
158 | slab = []
159 | for (st, e) in slabelems.items():
160 | if st:
161 | # Todo ... INDIRECT elements.
162 | slab.append((C.SLAB_INLINE, e.as_mapping()))
163 |
164 | rawbits = self.encode(slab)
165 | db = self._get_connection()
166 | db.set(slabkey, rawbits)
167 |
168 | def initialize(self):
169 | "Initialize the database."
170 | # Flush all existing elements.
171 | self.conndb[threading.currentThread().name].flush_all()
172 |
173 | # Save the current slab configuration.
174 | self.store_element(C.DATASTORE_CONFIG, C.CFGSLAB, self.slabconfig)
175 |
176 |
177 | Datastore = DatastoreMembase
178 |
--------------------------------------------------------------------------------
/src/python/datastore/slabutil.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | """Utility functions used for managing slab based access."""
24 |
25 | import collections
26 |
27 | import apiserver.const as C
28 |
29 | __all__ = [ 'init_slabutil', 'slabutil_get_config', 'slabutil_group_keys',
30 | 'slabutil_key_to_start_index', 'slabutil_make_slabkey',
31 | 'slabutil_make_slab', 'slabutil_use_slab' ]
32 |
33 | _slab_config = {}
34 |
35 | def _make_numeric_slabkey(ns, nperslab, elemid):
36 | slabno = (int(elemid) / nperslab) * nperslab
37 | return "%sL%d" % (ns, slabno)
38 |
39 | def _make_nonnumeric_slabkey(ns, elemid):
40 | return "%sL%s" % (ns, elemid)
41 |
42 | class _Slab:
43 | def __init__(self, namespace, slabkey):
44 | self.namespace = namespace
45 | self.slabkey = slabkey
46 |
47 |
48 | class _AlphabeticKeySlab(_Slab):
49 | def __init__(self, namespace, key, item):
50 | slabkey = _make_nonnumeric_slabkey(namespace[0].upper(), key)
51 | _Slab.__init__(self, namespace, slabkey)
52 | self._value = item
53 | self._key = key
54 |
55 | def __len__(self):
56 | return 1
57 |
58 | def items(self):
59 | return [(self._key, self._value)]
60 |
61 | def keys(self):
62 | return [self._key]
63 |
64 | def get(self, key):
65 | if key == self._key:
66 | return (True, self._value)
67 | return (False, key)
68 |
69 | def add(self, key, element):
70 | if key == self._key:
71 | assert element == self._value
72 | else:
73 | raise ValueError, "add() invoked multiple times."
74 |
75 | class _NumericKeySlab(_Slab):
76 | def __init__(self, namespace, items):
77 | if len(items) == 0 or not isinstance(items, list):
78 | raise ValueError, "items should be non-empty list."
79 | k, _ = items[0]
80 | _, nperslab = _slab_config[namespace]
81 |
82 | slabkey = _make_numeric_slabkey(namespace[0].upper(), nperslab, k)
83 | start = slabutil_key_to_start_index(namespace, slabkey)
84 |
85 | _Slab.__init__(self, namespace, slabkey)
86 |
87 | self._nperslab = nperslab
88 | self._start = start
89 | self._contents = [None] * nperslab
90 | for (k,v) in items:
91 | index = int(k)
92 | if index >= start + nperslab:
93 | raise ValueError, \
94 | "Index too large %s (start: %d, index: %d)" % \
95 | (slabkey, start, index)
96 | index = index % nperslab
97 | if self._contents[index]:
98 | raise ValueError, \
99 | "Repeated insertion at %s:%d" % (slabkey, index)
100 | self._contents[index] = v
101 |
102 | def __len__(self):
103 | return len(self._contents)
104 |
105 | def keys(self):
106 | return map(str, range(self._start, self._start + self._nperslab))
107 |
108 | def items(self):
109 | elements = []
110 | for i in range(self._nperslab):
111 | v = self._contents[i]
112 | if v is not None:
113 | elements.append((True, v))
114 | else:
115 | elements.append((False, str(self._start + i)))
116 | return elements
117 |
118 | def get(self, key):
119 | "Retrieve an object from the slab."
120 | index = int(key) % self._nperslab
121 | v = self._contents[index]
122 | if v is not None:
123 | return (True, v)
124 | return (False, key)
125 |
126 |
127 | def add(self, key, value):
128 | "Add an object at index."
129 | index = int(key) % self._nperslab
130 | self._contents[index] = value
131 |
132 | def init_slabutil(config):
133 | "Initialize the module."
134 | _slab_config[C.CHANGESET] = (
135 | config.getint(C.DATASTORE, C.CHANGESETS_INLINE_SIZE),
136 | config.getint(C.DATASTORE, C.CHANGESETS_PER_SLAB))
137 | _slab_config[C.NODE] = (
138 | config.getint(C.DATASTORE, C.NODES_INLINE_SIZE),
139 | config.getint(C.DATASTORE, C.NODES_PER_SLAB))
140 | _slab_config[C.RELATION] = (
141 | config.getint(C.DATASTORE, C.RELATIONS_INLINE_SIZE),
142 | config.getint(C.DATASTORE, C.RELATIONS_PER_SLAB))
143 | _slab_config[C.WAY] = (
144 | config.getint(C.DATASTORE, C.WAYS_INLINE_SIZE),
145 | config.getint(C.DATASTORE, C.WAYS_PER_SLAB))
146 |
147 | def slabutil_use_slab(namespace):
148 | "Return true of the given namespace uses slabs."
149 | return namespace in _slab_config
150 |
151 | def slabutil_make_slabkey(namespace, elemid):
152 | "Prepare a slab key for a given element and namespace."
153 | nsk = namespace[0].upper()
154 | if _slab_config.has_key(namespace):
155 | _, nperslab = _slab_config[namespace]
156 | return _make_numeric_slabkey(nsk, nperslab, elemid)
157 | else:
158 | return _make_nonnumeric_slabkey(nsk, elemid)
159 |
160 | def slabutil_group_keys(namespace, keys):
161 | "Group keys according to slabs."
162 |
163 | slabset = collections.defaultdict(set)
164 | nsk = namespace[0].upper()
165 |
166 | if slabutil_use_slab(namespace):
167 | _, nperslab = _slab_config[namespace]
168 | for k in keys:
169 | sk = _make_numeric_slabkey(nsk, nperslab, k)
170 | slabset[sk].add(k)
171 | else:
172 | for k in keys:
173 | sk = _make_nonnumeric_slabkey(nsk, k)
174 | slabset[sk].add(k)
175 |
176 | return slabset
177 |
178 | def slabutil_get_config(namespace):
179 | "Return the configuration for a given slab."
180 | return _slab_config[namespace]
181 |
182 | def slabutil_key_to_start_index(namespace, slabkey):
183 | """Return the start index of elements in a slab."""
184 | assert slabkey[1] == 'L'
185 | if slabutil_use_slab(namespace):
186 | return int(slabkey[2:])
187 | else:
188 | return slabkey[2:]
189 |
190 | def slabutil_make_slab(namespace, items):
191 | """Return a populated slab of the appropriate kind."""
192 |
193 | if slabutil_use_slab(namespace):
194 | return _NumericKeySlab(namespace, items)
195 | else:
196 | return _AlphabeticKeySlab(namespace, items)
197 |
--------------------------------------------------------------------------------
/src/python/dbmgr/dbm_geotables.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | """Group OSM nodes by their (lat, lon) coordinates.
24 |
25 | Exported classes:
26 |
27 | class GeoGroupTable -- track a set of geographical groupings.
28 |
29 | Usage:
30 |
31 | 1. Allocate a new table
32 | >>> gt = GeoGroupTable()
33 |
34 | 2. Add nodes to the table
35 | >>> for n in nodelist: gt.add(n)
36 |
37 | 3. Iterate over the unique groups
38 | >>> for k in gt.keys():
39 | ... print "Key:", k, "nodes:", gt[k]
40 | >>>
41 | """
42 |
43 | import geohash
44 | import collections
45 | import threading
46 | from Queue import Queue
47 |
48 | import apiserver.const as C
49 | from apiserver.osmelement import new_osm_element
50 | from datastore.lrucache import BoundedLRUBuffer
51 | from datastore.ds_geohash import geohash_key_for_element
52 |
53 | class NodeGroup:
54 | '''A set of OSM nodes, and their coordinates.
55 | '''
56 | def __init__(self):
57 | '''Initialize a node group.'''
58 | self.nodecoords = {}
59 |
60 | def __contains__(self, nodeid):
61 | return nodeid in self.nodecoords
62 |
63 | def add(self, osmnode):
64 | '''Add a node to the group.'''
65 | nodeid = osmnode.id
66 | assert nodeid not in self.nodecoords, \
67 | "Duplicate insertion of %s" % nodeid
68 | self.nodecoords[nodeid] = (osmnode[C.LAT], osmnode[C.LON])
69 |
70 | def update(self, nodelist):
71 | '''Update a nodegroup from a nodelist.'''
72 | for (nid, lat, lon) in nodelist:
73 | if nid not in self.nodecoords:
74 | self.nodecoords[nid] = (lat, lon)
75 | else:
76 | assert (lat, lon) == self.nodecoords[nid]
77 |
78 | def aslist(self):
79 | '''Return the list representation of a nodegroup.'''
80 | return [(nodeid, lat, lon) for (nodeid, (lat, lon)) in
81 | self.nodecoords.items()]
82 |
83 | class GeoGroupTable:
84 | '''Group OSM nodes by their geographical coordinates.
85 |
86 | The coordinates of the globe are partitioned into disjoint areas.
87 | Each partition is named by the geohash code of its (n,w) corner.
88 |
89 | Grouping of nodes is implemented by restricting the length of
90 | the geohash codes used.
91 | '''
92 |
93 | def __init__(self, config, options, db):
94 | '''Initialize the table.
95 |
96 | Keyword arguments:
97 | config - A ConfigParser instance.
98 | options - An optparse.OptionParser structure.
99 | db - A DB object supporting 'get()' and 'store()'
100 | methods.
101 | '''
102 | self.geodb = collections.defaultdict(NodeGroup)
103 | self.db = db
104 |
105 | lrusize = config.getint(C.DATASTORE, C.GEODOC_LRU_SIZE)
106 | self.lru = BoundedLRUBuffer(bound=lrusize, callback=self._cb)
107 |
108 | if options.nothreading:
109 | nthreads = 0
110 | else:
111 | nthreads = config.getint(C.DATASTORE, C.GEODOC_LRU_THREADS)
112 | self.nthreads = max(0, nthreads)
113 | if self.nthreads:
114 | self.wrthreads = []
115 | self.wrqueue = Queue(self.nthreads)
116 | self.wrcond = threading.Condition()
117 | self.wrpending = []
118 | for n in range(self.nthreads):
119 | t = threading.Thread(target=self._worker)
120 | t.name = "GeoWB-%d" % n
121 | t.daemon = True
122 | self.wrthreads.append(t)
123 | t.start()
124 |
125 | db.register_threads(self.wrthreads)
126 |
127 | def _cb(self, key, value):
128 | "Callback called when an LRU item is ejected."
129 | nodeset = self.geodb.pop(key)
130 | if self.nthreads: # Defer processing to a worker thread.
131 | self.wrqueue.put((key, nodeset))
132 | else: # Synchronous operation.
133 | self._write_geodoc(key, nodeset)
134 |
135 | def _worker(self):
136 | "Helper method, used by worker threads."
137 | while True:
138 | # Retrieve a work item.
139 | v = self.wrqueue.get()
140 | if v is None: # Exit the thread.
141 | self.wrqueue.task_done()
142 | return
143 |
144 | # Unpack the work item.
145 | key, nodeset = v
146 |
147 | # Mark the item as "I/O in progress".
148 | with self.wrcond:
149 | while key in self.wrpending:
150 | self.wrcond.wait()
151 |
152 | assert key not in self.wrpending
153 | self.wrpending.append(key)
154 |
155 | # Process this node set.
156 | self._write_geodoc(key, nodeset)
157 |
158 | # Remove the "I/O in progress" marker.
159 | with self.wrcond:
160 | assert key in self.wrpending
161 | self.wrpending.remove(key)
162 | self.wrcond.notifyAll()
163 |
164 | self.wrqueue.task_done()
165 |
166 | def _write_geodoc(self, key, nodegroup):
167 | "Merge in a group of nodes into a geodoc."
168 | assert isinstance(nodegroup, NodeGroup)
169 |
170 | geodoc = self.db.retrieve_element(C.GEODOC, key)
171 | if geodoc is None: # New document.
172 | geodoc = new_osm_element(C.GEODOC, key)
173 | nodegroup.update(geodoc[C.NODES])
174 | geodoc[C.NODES] = nodegroup.aslist()
175 | self.db.store_element(C.GEODOC, key, geodoc)
176 |
177 | def add(self, elem):
178 | '''Add information about a node 'elem' to the geo table.
179 |
180 | Usage:
181 | >>> gt = GeoGroupTable()
182 | >>> gt = gt.add(elem)
183 |
184 | The node 'elem' should have a 'lat' and 'lon' fields that
185 | encode its latitude and longitude respectively. The 'id'
186 | field specifies the node's "id".
187 | '''
188 |
189 | assert elem.namespace == C.NODE, "elem is not a node: %s" % str(elem)
190 |
191 | # Determine the geo-key for the node.
192 | ghkey = geohash_key_for_element(elem)
193 | # Retrieve the partition covering this location.
194 | ghdoc = self.geodb[ghkey]
195 |
196 | elemid = elem.id
197 | if elemid not in ghdoc:
198 | ghdoc.add(elem)
199 | self.lru[ghkey] = ghdoc
200 |
201 | def flush(self):
202 | "Wait pending I/Os"
203 |
204 | # Flush items from the LRU.
205 | self.lru.flush()
206 |
207 | if self.nthreads:
208 | # Wait for the work queue to drain.
209 | self.wrqueue.join()
210 |
--------------------------------------------------------------------------------
/src/python/tests/test_lrucache.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | import pytest
24 | from ConfigParser import ConfigParser
25 |
26 | import apiserver.const as C
27 | from datastore.lrucache import LRUCache
28 | from datastore.slabutil import slabutil_make_slab, slabutil_init
29 |
30 | _INLINE_SIZE = 256
31 | _LRUSZ = 8
32 | _SLABSZ = 16
33 | _NOSUCHKEY = '__nosuchkey__'
34 | _KEY = '_key'
35 | _NS = 'node'
36 | _NS1 = 'way'
37 |
38 | def pytest_funcarg__lrucache(request):
39 | "Prepare a pre-initialized cache object."
40 | return LRUCache(_LRUSZ)
41 |
42 | def pytest_funcarg__slabutil(request):
43 | cfg = ConfigParser()
44 | cfg.add_section(C.DATASTORE)
45 |
46 | for k in [C.CHANGESETS_INLINE_SIZE, C.NODES_INLINE_SIZE,
47 | C.RELATIONS_INLINE_SIZE, C.WAYS_INLINE_SIZE]:
48 | cfg.set(C.DATASTORE, k, str(_INLINE_SIZE))
49 |
50 | for k in [C.CHANGESETS_PER_SLAB, C.NODES_PER_SLAB,
51 | C.RELATIONS_PER_SLAB, C.WAYS_PER_SLAB]:
52 | cfg.set(C.DATASTORE, k, str(_SLABSZ))
53 |
54 | slabutil_init(cfg)
55 | return cfg
56 |
57 | def test_empty(slabutil):
58 | "Test the properties of an empty cache object."
59 |
60 | lc = LRUCache(_LRUSZ)
61 | assert len(lc) == 0
62 | assert lc.get(_NS, _KEY) is None
63 |
64 |
65 | def test_get(lrucache, slabutil):
66 | "Test insert and retrieval of one slab descriptor."
67 |
68 | values = range(_SLABSZ)
69 | keys = map(str, values)
70 | slabitems = zip(keys, values)
71 |
72 | slabdesc = slabutil_make_slab(_NS, slabitems)
73 | lrucache.insert_slab(slabdesc)
74 |
75 | for i in xrange(_SLABSZ):
76 | st, v = lrucache.get(_NS, str(i))
77 | assert st
78 | assert v == values[i]
79 |
80 |
81 | def test_duplicate_slabdesc(lrucache, slabutil):
82 | "Test insertion of a duplicate slab descriptor."
83 |
84 | values = range(_SLABSZ)
85 | keys = map(str, values)
86 | slabitems = zip(keys, values)
87 | slabdesc = slabutil_make_slab(_NS, slabitems)
88 |
89 | lrucache.insert_slab(slabdesc)
90 | with pytest.raises(ValueError):
91 | lrucache.insert_slab(slabdesc)
92 |
93 |
94 | def test_duplicate_values(lrucache, slabutil):
95 | "Test insertion of duplicate values in a namespace."
96 |
97 | values = range(_SLABSZ)
98 | keys = map(str, values)
99 | slabitems = zip(keys, values)
100 | slabdesc = slabutil_make_slab(_NS, slabitems)
101 | lrucache.insert_slab(slabdesc)
102 |
103 | slabdesc2 = slabutil_make_slab(_NS, slabitems)
104 | with pytest.raises(ValueError):
105 | lrucache.insert_slab(slabdesc2)
106 |
107 | def test_namespaces(lrucache, slabutil):
108 | "Test that different namespaces are distinct."
109 | values = range(_SLABSZ)
110 | slabdesc1 = slabutil_make_slab(_NS, zip(map(str, values), values))
111 | lrucache.insert_slab(slabdesc1)
112 |
113 | slabdesc2 = slabutil_make_slab(_NS1, zip(map(str, values),
114 | map(lambda x: x*x, values)))
115 | lrucache.insert_slab(slabdesc2)
116 |
117 | for i in xrange(_SLABSZ):
118 | st, v1 = lrucache.get(_NS, str(i))
119 | assert st
120 | assert v1 == i
121 | st, v2 = lrucache.get(_NS1, str(i))
122 | assert st
123 | assert v2 == i*i
124 |
125 |
126 | def test_get_nonexistent(lrucache, slabutil):
127 | "Test that unknown keys are rejected."
128 | values = range(_SLABSZ)
129 | keys = map(str, values)
130 | slabitems = zip(keys, values)
131 | slabdesc = slabutil_make_slab(_NS, slabitems)
132 | lrucache.insert_slab(slabdesc)
133 |
134 | assert lrucache.get(_NS+_NS, '0') is None # Invalid namespace, valid key
135 | # Valid namespace, out-of-slab key
136 | assert lrucache.get(_NS, _SLABSZ+1) is None
137 |
138 |
139 | def test_get_nonexistent_element(lrucache, slabutil):
140 | "Test that a missing keys is shown as not-present."
141 | values = range(0, _SLABSZ, 2) # Alternate elements.
142 | keys = map(str, values)
143 | slabitems = zip(keys, values)
144 | slabdesc = slabutil_make_slab(_NS, slabitems)
145 | lrucache.insert_slab(slabdesc)
146 |
147 | st, v = lrucache.get(_NS, '1') # Valid namespace, missing key
148 | assert not st
149 | assert v == '1'
150 |
151 | def test_remove(lrucache, slabutil):
152 | "Test the remove_slab() method."
153 |
154 | values1 = range(_SLABSZ)
155 | slabdesc1 = slabutil_make_slab(_NS, zip(map(str, values1), values1))
156 | lrucache.insert_slab(slabdesc1)
157 |
158 | values2 = range(_SLABSZ, 2*_SLABSZ)
159 | slabdesc2 = slabutil_make_slab(_NS, zip(map(str, values2), values2))
160 | lrucache.insert_slab(slabdesc2)
161 |
162 | # Remove the first slab.
163 | lrucache.remove_slab(slabdesc1)
164 |
165 | # Items in the original slab should be missing.
166 | for i in xrange(_SLABSZ):
167 | st = lrucache.get(_NS, str(i))
168 | assert st is None
169 |
170 | # Items in the second slab should be present.
171 | for i in xrange(_SLABSZ, 2 * _SLABSZ):
172 | st, v = lrucache.get(_NS, str(i))
173 | assert st
174 | assert v == i
175 |
176 |
177 | def test_non_overflow(slabutil):
178 | "Test that slabs do not overflow upto the slab LRU size."
179 | slabs = []
180 | def _mkslab(i):
181 | v = [i * _SLABSZ]
182 | return slabutil_make_slab(_NS, zip(map(str, v), v))
183 |
184 | def _cb(self, key, slabdesc, seq=[0]):
185 | assert False
186 |
187 | lc = LRUCache(_LRUSZ, _cb)
188 | for i in xrange(_LRUSZ):
189 | sl = _mkslab(i)
190 | slabs.append(sl)
191 | lc.insert_slab(sl)
192 |
193 |
194 | def test_overflow(slabutil):
195 | "Test that slabs overflow in LRU sequence."
196 |
197 | slabs = []
198 | def _mkslab(i):
199 | v = [i * _SLABSZ]
200 | return slabutil_make_slab(_NS, zip(map(str, v), v))
201 |
202 | def _cb(slabkey, slabdesc, seq=[0]):
203 | n = seq[0]
204 | assert slabdesc is slabs[n]
205 | seq[0] = n + 1
206 |
207 | lc = LRUCache(_LRUSZ, _cb)
208 | for i in xrange(2*_LRUSZ):
209 | sl = _mkslab(i)
210 | slabs.append(sl)
211 | lc.insert_slab(sl)
212 |
213 |
214 | def test_flush(slabutil):
215 | "Test that flush presents slabs in sequence."
216 |
217 | slabs = []
218 | def _mkslab(i):
219 | v = [i * _SLABSZ]
220 | return slabutil_make_slab(_NS, zip(map(str, v), v))
221 |
222 | seen = [False]
223 | def _cb(slabkey, slabdesc, seq=[0], seen=seen):
224 | seen[0] = True
225 | n = seq[0]
226 | assert slabdesc is slabs[n]
227 | seq[0] = n + 1
228 |
229 | lc = LRUCache(_LRUSZ, _cb)
230 | for i in xrange(_LRUSZ):
231 | sl = _mkslab(i)
232 | slabs.append(sl)
233 | lc.insert_slab(sl)
234 |
235 |
236 | lc.flush()
237 | assert seen[0] is True
238 |
--------------------------------------------------------------------------------
/src/python/frontend/fe.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | import tornado.httpserver
24 | import tornado.ioloop
25 | import tornado.web
26 |
27 | import platform
28 | import sys
29 |
30 | #
31 | # Local modules
32 | #
33 | import apiserver.const as C # 'constants'
34 | from capabilities import CapabilitiesHandler
35 | from maphandler import MapHandler
36 | from osmelement import OsmElementHandler, OsmElementRelationsHandler, \
37 | OsmFullQueryHandler, OsmMultiElementHandler, OsmWaysForNodeHandler
38 |
39 | #
40 | # Handling access to '/'.
41 | #
42 | class RootHandler(tornado.web.RequestHandler):
43 | """Handle requests for "/".
44 |
45 | Print a message pointing the user to the right API calls."""
46 |
47 | default_message = """\
48 |
49 |
50 | A scalable, read-only, OSM API Server
51 |
52 |
53 | Welcome
54 |
55 | You have reached an experimental implementation of an API server
56 | serving map data from the OpenStreetMap project.
58 |
59 | API Version / Operations Supported
60 | This server supports read queries conforming to the v%(apiversion)s
62 | OSM API.
63 | OSM API calls that change map data are not supported.
64 |
65 | Examples of API use
66 |
67 | - To retrieve the capabilities of this server, use:
68 |
%% curl http://%(hostname)s/api/capabilities
69 |
70 | - To retrieve the contents of node
15382126 from the command-line, use:
71 | %% curl http://%(hostname)s/api/%(apiversion)s/node/15382126
72 |
73 | - To retrieve the ways for node
15382126, use:
74 | %% curl http://%(hostname)s/api/%(apiversion)s/node/15382126/ways
75 |
76 |
77 |
78 | More Information
79 | This server is being developed as an open-source project.
80 |
86 |
87 | """
88 |
89 | def initialize(self, cfg):
90 | self.usagemessage = RootHandler.default_message % dict(
91 | apiversion=cfg.get(C.FRONT_END, C.API_VERSION),
92 | hostname=platform.node(),
93 | projectdoc=cfg.get(C.DEFAULT, C.PROJECT_DOC),
94 | sourcerepository=cfg.get(C.DEFAULT, C.SOURCE_REPOSITORY))
95 |
96 | def get(self):
97 | self.write(self.usagemessage)
98 |
99 |
100 | class ReadOnlyHandler(tornado.web.RequestHandler):
101 | """Return an error for URLs that a read-only server does not support."""
102 |
103 | def initialize(self, cfg=None):
104 | pass
105 |
106 |
107 | class NotImplementedHandler(tornado.web.RequestHandler):
108 | """Return an error for URIs that are unimplemented."""
109 |
110 | def initialize(self, cfg=None):
111 | pass
112 |
113 | def get(self, request):
114 | raise tornado.web.HTTPError(501) # Not Implemented
115 |
116 | #
117 | # The OSM front end server.
118 | #
119 | class OSMFrontEndServer:
120 | """The OSM Front End.
121 |
122 | This wrapper class encapsulates an instance of a Tornado
123 | 'Application' implementing the front end server, and its
124 | associated configuration information.
125 |
126 | Example:
127 | >> cfg = ConfigParser.ConfigParser()
128 | >> cfg.read(my-config-file)
129 | >> db =
130 | >> frontend = OSMFrontEndServer(cfg, options, db)
131 |
132 | Attributes:
133 |
134 | application The Tornado 'Application' for this server
135 | instance.
136 | config Configuration information for this instance.
137 | datastore Datastore in use.
138 | """
139 |
140 | def __init__(self, cfg, options, datastore):
141 | """Initialize an OSMFrontEnd.
142 |
143 | Parameters:
144 |
145 | config Configuration information.
146 | options Command line options.
147 | datastore Datastore in use.
148 | """
149 |
150 | osm_api_version = cfg.get(C.FRONT_END, C.API_VERSION)
151 |
152 | # Link URLs to their handlers.
153 | application = tornado.web.Application([
154 | (r"/api/%s/map" % osm_api_version, MapHandler,
155 | dict(cfg=cfg, datastore=datastore)),
156 | (r"/api/%s/capabilities" % osm_api_version, CapabilitiesHandler,
157 | dict(cfg=cfg)),
158 | (r"/api/%s/changeset/([0-9]+)/close" % osm_api_version,
159 | NotImplementedHandler, dict(cfg=cfg)),
160 | (r"/api/%s/changeset/([0-9]+)/download" % osm_api_version,
161 | NotImplementedHandler, dict(cfg=cfg)),
162 | (r"/api/%s/changeset/([0-9]+)/expand_bbox" % osm_api_version,
163 | NotImplementedHandler, dict(cfg=cfg)),
164 | (r"/api/%s/changeset/([0-9]+)/upload" % osm_api_version,
165 | NotImplementedHandler, dict(cfg=cfg)),
166 | (r"/api/%s/changesets" % osm_api_version,
167 | NotImplementedHandler, dict(cfg=cfg)),
168 | (r"/api/%s/node/([0-9]+)/ways" % osm_api_version,
169 | OsmWaysForNodeHandler, dict(datastore=datastore)),
170 | (r"/api/%s/(nodes|ways|relations)" % osm_api_version,
171 | OsmMultiElementHandler, dict(datastore=datastore)),
172 | (r"/api/%s/(node|way|relation)/create" % osm_api_version,
173 | ReadOnlyHandler, dict(cfg=cfg)),
174 | (r"/api/%s/(node|way|relation)/([0-9]+)/history" %
175 | osm_api_version, NotImplementedHandler,
176 | dict(cfg=cfg)),
177 | (r"/api/%s/(node|way|relation)/([0-9]+)/([0-9]+)" %
178 | osm_api_version, NotImplementedHandler,
179 | dict(cfg=cfg)),
180 | (r"/api/%s/(node|way|relation)/([0-9]+)/relations" %
181 | osm_api_version, OsmElementRelationsHandler,
182 | dict(datastore=datastore)),
183 | (r"/api/%s/(changeset|node|way|relation)/([0-9]+)" %
184 | osm_api_version, OsmElementHandler, dict(datastore=datastore)),
185 | (r"/api/%s/(way|relation)/([0-9]+)/full" % osm_api_version,
186 | OsmFullQueryHandler, dict(datastore=datastore)),
187 | (r"/api/capabilities", CapabilitiesHandler, dict(cfg=cfg)),
188 | (r"/", RootHandler, dict(cfg=cfg))
189 | ])
190 |
191 | self._application = application
192 | self._config = cfg
193 | self._datastore = datastore
194 |
195 | def _get_application(self):
196 | return self._application
197 | def _get_config(self):
198 | return self._config
199 | def _get_datastore(self):
200 | return self._datastore
201 |
202 | application = property(_get_application)
203 | config = property(_get_config)
204 | datastore = property(_get_datastore)
205 |
--------------------------------------------------------------------------------
/src/python/datastore/ds.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | """An interface to the datastore."""
24 |
25 | import threading
26 |
27 | import apiserver.const as C
28 |
29 | from apiserver.osmelement import decode_json, decode_protobuf, encode_json, \
30 | encode_protobuf
31 | from datastore.lrucache import LRUIOCache
32 | from datastore.slabutil import *
33 |
34 | import threading
35 | from Queue import Queue
36 |
37 | class DatastoreBase:
38 | """Base class for accessing a data store."""
39 |
40 | VALID_NAMESPACES = [
41 | C.CHANGESET, C.GEODOC, C.NODE, C.RELATION, C.WAY
42 | ]
43 |
44 | def __init__(self, config, usethreads=False, writeback=False):
45 | "Initialize the datastore."
46 | encoding = config.get(C.DATASTORE, C.DATASTORE_ENCODING)
47 | if encoding == C.JSON:
48 | self.decode = decode_json
49 | self.encode = encode_json
50 | elif encoding == C.PROTOBUF:
51 | self.decode = decode_protobuf
52 | self.encode = encode_protobuf
53 | bound = config.getint(C.DATASTORE, C.SLAB_LRU_SIZE)
54 | if bound <= 0:
55 | raise ValueError, "Illegal SLAB LRU size %d" % bound
56 | if writeback:
57 | if usethreads:
58 | nthreads = config.getint(C.DATASTORE, C.SLAB_LRU_THREADS)
59 | else:
60 | nthreads = 0
61 | self.nthreads = nthreads
62 | if nthreads:
63 | self.threads = []
64 | self.workqueue = Queue(nthreads)
65 | for n in xrange(nthreads):
66 | t = threading.Thread(target=self._worker)
67 | self.threads.append(t)
68 | t.daemon = True
69 | t.name = "DS-%d" % n
70 | t.start()
71 | callback = self._cbthreaded
72 | else:
73 | callback = self._cbwrite
74 | else:
75 | callback = None
76 | self.cache = LRUIOCache(bound=bound, callback=callback)
77 |
78 | def _worker(self):
79 | "Helper for the threaded case."
80 | while True:
81 | slabkey, slabdesc = self.workqueue.get()
82 | self._cbwrite(slabkey, slabdesc)
83 |
84 | def _cbthreaded(self, slabkey, slabdesc):
85 | "Call back for the threaded case: add job to the work queue."
86 | self.workqueue.put((slabkey, slabdesc))
87 |
88 |
89 | def _cbwrite(self, slabkey, slabdesc):
90 | "Write back a slab."
91 | self.store_slab(slabdesc.namespace, slabkey, slabdesc)
92 | if self.nthreads:
93 | assert self.cache.isiopending(slabkey)
94 | self.workqueue.task_done()
95 | self.cache.iodone(slabkey)
96 |
97 | def fetch_keys(self, namespace, keys, cacheable=True):
98 | """Return an iterator returning values for keys.
99 |
100 | Parameters:
101 |
102 | namespace - element namespace
103 | keys - a list of keys to retrieve.
104 | cacheable - True if values from the data store are to
105 | be cached.
106 | """
107 |
108 | assert namespace in DatastoreBase.VALID_NAMESPACES
109 |
110 | # Retrieve the requested keys from the cache, if present
111 | # there.
112 | keys_to_retrieve = set()
113 | elements = []
114 |
115 | for k in keys:
116 | assert isinstance(k, basestring)
117 | v = self.cache.get(namespace, k)
118 | if v: # Status is known.
119 | assert len(v) == 2
120 | assert isinstance(v, tuple)
121 | elements.append(v)
122 | else: # Status is unknown.
123 | keys_to_retrieve.add(k)
124 |
125 | # Return elements that were present in the cache.
126 | for elem in elements:
127 | yield elem
128 |
129 | if len(keys_to_retrieve) == 0:
130 | return
131 |
132 | # Retrieve elements that were not in cache from the backing
133 | # store.
134 | if slabutil_use_slab(namespace):
135 | slabkeyset = slabutil_group_keys(namespace, keys_to_retrieve)
136 |
137 | while len(slabkeyset) > 0:
138 | elements = []
139 | sk, keys = slabkeyset.popitem()
140 |
141 | # Read in the slab from the data store.
142 | items = self.retrieve_slab(namespace, sk)
143 |
144 | # Nothing to do if the entire slab missing.
145 | if items is None:
146 | continue
147 |
148 | # Prepare a slab descriptor, insert its contents into the cache.
149 | slabdesc = slabutil_make_slab(namespace, items)
150 | self.cache.insert_slab(slabdesc)
151 |
152 | # Bring in elements.
153 | for k in keys:
154 | try:
155 | elements.append(self.cache.get(namespace, k))
156 | keys_to_retrieve.remove(k)
157 | except KeyError:
158 | assert False, "Element %s:%s not in cache" % (sk, k)
159 |
160 | # Return elements from this slab.
161 | for elem in elements:
162 | yield elem
163 | else:
164 | for k in keys_to_retrieve:
165 | elem = self.retrieve_element(namespace, k)
166 | if elem is None:
167 | yield (False, k)
168 | else:
169 | yield (True, elem)
170 | return
171 |
172 | # Return status information for keys that were missing in the
173 | # data store.
174 | for k in keys_to_retrieve:
175 | yield (False, k)
176 |
177 | def fetch(self, namespace, key):
178 | """Retrieve one value from the datastore."""
179 |
180 | assert type(key) == str
181 |
182 | if namespace not in DatastoreBase.VALID_NAMESPACES:
183 | raise KeyError, namespace
184 |
185 | elems = [e for e in self.fetch_keys(namespace, [key])]
186 |
187 | # Only one key should be returned for a given key.
188 | assert len(elems) == 1, \
189 | 'Multiple values for ns,key="%s","%s": %s' % \
190 | (namespace, key, elems)
191 |
192 | rstatus, elem = elems[0]
193 | if rstatus:
194 | return elem
195 | else:
196 | return None
197 |
198 | def store(self, elem):
199 | "Create a new element in the data store."
200 |
201 | ns = elem.namespace
202 | elemid = elem.id
203 | slabdesc = self.cache.get_slab(ns, elemid)
204 | if slabdesc is None: # New slab.
205 | slabdesc = slabutil_make_slab(ns, [(elemid, elem)])
206 | self.cache.insert_slab(slabdesc)
207 | else:
208 | slabdesc.add(elemid, elem)
209 |
210 | def _abort(self, *args, **kw):
211 | raise TypeError, "Abstract method invoked"
212 |
213 | def finalize(self):
214 | "Write back caches and finish pending I/Os."
215 | self.cache.flush()
216 | if self.nthreads:
217 | self.workqueue.join()
218 |
219 | # Abstract methods.
220 | register_threads = _abort
221 | retrieve_element = _abort
222 | retrieve_slab = _abort
223 | store_element = _abort
224 | store_slab = _abort
225 |
--------------------------------------------------------------------------------
/src/python/frontend/maphandler.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | ## Support retrieval of the map data in a bounding box.
24 |
25 | import geohash
26 | import tornado.web
27 |
28 | from lxml import etree as ET
29 |
30 | import apiserver.const as C
31 | from apiserver.osmelement import encode_coordinate, new_osm_response
32 |
33 | from util import filter_references, response_to_xml
34 |
35 | def _filter_in_bbox(bbox, geodocs):
36 | "Return the list of nodes that fall into the given bounding box."
37 | w,s,e,n = map(encode_coordinate, bbox)
38 |
39 | nodeset = set()
40 | for gd in geodocs:
41 | for (nid, lat, lon) in gd.get_node_info():
42 | if w <= lon < e and s <= lat < n:
43 | nodeset.add(nid)
44 | return nodeset
45 |
46 |
47 | class MapHandler(tornado.web.RequestHandler):
48 | "Handle requests for the /map API."
49 |
50 | def initialize(self, cfg, datastore):
51 | self.datastore = datastore
52 | self.precision = cfg.getint(C.DATASTORE, C.GEOHASH_LENGTH)
53 |
54 | def get(self, *args, **kwargs):
55 | '''Service a GET request to the '/map' URI.
56 |
57 | The 'bbox' parameter contains 4 coordinates "l" (w), "b" (s),
58 | "r" (e) and "t" (n).'''
59 |
60 | # Sanity check the input.
61 | bbox_arg = self.get_argument('bbox', None)
62 | if not bbox_arg:
63 | raise tornado.web.HTTPError(400) # Bad Syntax
64 | bbox = bbox_arg.split(',')
65 | if len(bbox) != 4:
66 | raise tornado.web.HTTPError(400)
67 | try:
68 | w,s,e,n = map(float, bbox)
69 | except ValueError:
70 | raise tornado.web.HTTPError(400)
71 |
72 | # Check the "l,b,r,t" coordinates passed in for sanity.
73 | if w < C.LON_MIN or w > C.LON_MAX or \
74 | e < C.LON_MIN or e > C.LON_MAX or \
75 | s < C.LAT_MIN or s > C.LAT_MAX or \
76 | n < C.LAT_MIN or n > C.LAT_MAX or \
77 | n < s or e < w:
78 | raise tornado.web.HTTPError(400)
79 |
80 | nodelist, ways, relations = self.handle_map(bbox)
81 | response = self.build_bbox_response(nodelist, ways, relations, bbox)
82 |
83 | self.set_header(C.CONTENT_TYPE, C.TEXT_XML)
84 | self.write(response_to_xml(response))
85 |
86 | def build_bbox_response(self, nodes, ways, relations, bbox):
87 | """Build an OSM response for the query."""
88 |
89 | # Create a new response element.
90 | osm = new_osm_response()
91 |
92 | # Add a element.
93 | bb = ET.SubElement(osm, C.BOUNDS)
94 | (bb.attrib[C.MINLON], bb.attrib[C.MINLAT],
95 | bb.attrib[C.MAXLON], bb.attrib[C.MAXLAT]) = map(str, bbox)
96 |
97 | # Add nodes, ways and relations in that order.
98 | for n in nodes:
99 | n.build_response(osm)
100 | for w in ways:
101 | w.build_response(osm)
102 | for r in relations:
103 | r.build_response(osm)
104 |
105 | return osm
106 |
107 | def handle_map(self, bbox):
108 | """Implementation of the /map API.
109 |
110 | Parameters:
111 |
112 | bbox -- Bounding box coordinates.
113 | """
114 |
115 | nodelist = []
116 | relations = []
117 | ways = []
118 |
119 | # This implementation follows the current implementation of
120 | # the API server at api.openstreetmap.org (the 'rails' port).
121 |
122 | # Look up the geo coded documents covering the desired bbox.
123 | gckeys = self.get_geocodes(bbox)
124 | geodocs = self.datastore.fetch_keys(C.GEODOC, gckeys)
125 |
126 | # Step 1: Get the list of nodes contained in the given
127 | # bounding box.
128 | nodeset = _filter_in_bbox(bbox,
129 | [gd for (st, gd) in geodocs if st])
130 | if len(nodeset) == 0:
131 | return (nodelist, ways, relations)
132 |
133 | nodelist = [z for (st, z) in self.datastore.fetch_keys(
134 | C.NODE, [n for n in nodeset]) if st]
135 |
136 | # Step 2: Retrieve all ways that reference at least one node
137 | # in the given bounding box.
138 | wayset = filter_references(C.WAY, nodelist)
139 |
140 |
141 | # Step 3: Retrieve any additional nodes referenced by the ways
142 | # retrieved.
143 | waynodeset = set()
144 |
145 | for (st,w) in self.datastore.fetch_keys(C.WAY, [w for w in wayset]):
146 | if st:
147 | ways.append(w)
148 | waynodeset.update(w.get_node_ids())
149 |
150 | extranodeset = waynodeset - nodeset
151 | nodelist.extend([n for (st,n) in
152 | self.datastore.fetch_keys(C.NODE,
153 | [n for n in extranodeset])
154 | if st])
155 | nodeset = nodeset | extranodeset
156 |
157 | # Step 4: Retrieve the relations associated with these nodes.
158 |
159 | # ... all relations that reference nodes being returned.
160 | relset = filter_references(C.RELATION, nodelist)
161 |
162 | # ... and relations that reference one of the ways in the wayset.
163 | relset.update(filter_references(C.RELATION, ways))
164 |
165 | # ... retrieve relations from the data store.
166 | relations = [xr for (st,xr) in
167 | self.datastore.fetch_keys(C.RELATION,
168 | [r for r in relset])
169 | if st]
170 |
171 | # ... and relations referenced by existing relations
172 | # (one-pass only).
173 | extrarelset = filter_references(C.RELATION, relations)
174 | newrelset = extrarelset - relset
175 |
176 | newrels = [nr for (st, nr) in
177 | self.datastore.fetch_keys(C.RELATION,
178 | [r for r in newrelset])
179 | if st]
180 | relations.extend(newrels)
181 |
182 | return (nodelist, ways, relations)
183 |
184 |
185 | def get_geocodes(self, bbox):
186 | """Return a list of keys covering a given area.
187 |
188 | Parameters:
189 |
190 | bbox -- Bounding box of the desired region.
191 | """
192 |
193 | # TODO: Make this more efficient for sparse areas of the map.
194 | w, s, e, n = map(float, bbox)
195 |
196 | n = min(C.MAXGHLAT, n) # work around a geohash library
197 | s = min(C.MAXGHLAT, s) # limitation
198 |
199 | assert(w <= e and s <= n)
200 |
201 | gcset = set()
202 | gc = geohash.encode(s, w, self.precision)
203 |
204 | bl = geohash.bbox(gc) # Box containing point (s,w).
205 |
206 | s_ = bl['s'];
207 | while s_ < n: # Step south to north.
208 | w_ = bl['w']
209 |
210 | gc = geohash.encode(s_, w_, self.precision)
211 | bb_sn = geohash.bbox(gc) # bounding box in S->N direction
212 |
213 | while w_ < e: # Step west to east.
214 | gcset.add(gc)
215 |
216 | bb_we = geohash.bbox(gc) # in W->E direction
217 | w_ = bb_we['e']
218 |
219 | gc = geohash.encode(s_, w_, self.precision)
220 |
221 | s_ = bb_sn['n']
222 |
223 | assert(len(gcset) > 0)
224 |
225 | return [gc for gc in gcset]
226 |
--------------------------------------------------------------------------------
/src/python/frontend/osmelement.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | ## Handle API requests for nodes, ways and elements.
24 |
25 | import tornado.web
26 |
27 | import apiserver.const as C
28 | from apiserver.osmelement import new_osm_response
29 | from util import filter_references, response_to_xml
30 |
31 | class OsmElementHandler(tornado.web.RequestHandler):
32 | "Handle requests for the (changeset|node|way|relation)/ API."
33 |
34 | def initialize(self, datastore):
35 | self.datastore = datastore
36 |
37 | def delete(self, element):
38 | """Handle a DELETE HTTP request."""
39 |
40 | raise tornado.web.HTTPError(501) # Not Implemented.
41 |
42 | def get(self, namespace, ident):
43 | self.set_header(C.CONTENT_TYPE, C.TEXT_XML)
44 |
45 | elem = self.datastore.fetch(namespace, ident)
46 | if elem is None:
47 | raise tornado.web.HTTPError(404)
48 |
49 | self.write(response_to_xml(elem.build_response(new_osm_response())))
50 |
51 | def put(self, element):
52 | """Handle a PUT HTTP request."""
53 |
54 | raise tornado.web.HTTPError(501) # Not Implemented.
55 |
56 | class OsmMultiElementHandler(tornado.web.RequestHandler):
57 | """Handle requests for the (nodes|ways|relations) API."""
58 |
59 | def initialize(self, datastore):
60 | """Initialize the handler."""
61 | self.datastore = datastore
62 |
63 | def get(self, element):
64 | """Retrieve multiple elements.
65 |
66 | The elements are specified by (nodes|ways|relations) parameter
67 | to the request, as a comma separated list of element IDs.
68 | """
69 |
70 | if element not in [C.NODES, C.WAYS, C.RELATIONS]:
71 | # Programming error.
72 | raise tornado.web.HTTPError(500)
73 |
74 | # Determine the name space to use.
75 | if element == C.NODES:
76 | namespace = C.NODE
77 | elif element == C.WAYS:
78 | namespace = C.WAY
79 | elif element == C.RELATIONS:
80 | namespace = C.RELATION
81 | else:
82 | assert False, "Unexpected element '%s'" % element
83 |
84 | # The name of the parameter (i.e., one of "nodes", "ways" or
85 | # "relations") match the last component of the URI.
86 | params = self.get_argument(element, None)
87 | if not params:
88 | raise tornado.web.HTTPError(400)
89 |
90 | # Create a new response.
91 | osm = new_osm_response()
92 |
93 | # Add elements to the response.
94 | for (st,r) in self.datastore.fetch_keys(namespace, params.split(",")):
95 | if st:
96 | r.build_response(osm)
97 |
98 | # Send the XML representation back to the client.
99 | self.set_header(C.CONTENT_TYPE, C.TEXT_XML)
100 | self.write(response_to_xml(osm))
101 |
102 |
103 | class OsmElementRelationsHandler(tornado.web.RequestHandler):
104 | """Retrieve relations for a node or way element."""
105 |
106 | def initialize(self, datastore):
107 | """Initialize the handler."""
108 | self.datastore = datastore
109 |
110 | def get(self, namespace, ident):
111 | """Retrieve relations for an element.
112 |
113 | The element can be a 'node' or 'way'.
114 | """
115 |
116 | if namespace not in [C.NODE, C.WAY, C.RELATION]:
117 | raise tornado.web.HTTPError(500)
118 |
119 | elem = self.datastore.fetch(namespace, ident)
120 |
121 | osm = new_osm_response()
122 |
123 | if elem:
124 | relset = filter_references(C.RELATION, [elem])
125 | if len(relset) > 0:
126 | relations = self.datastore.fetch_keys(C.RELATION,
127 | [r for r in relset])
128 | for (st,r) in relations:
129 | if st:
130 | r.build_response(osm)
131 |
132 | self.set_header(C.CONTENT_TYPE, C.TEXT_XML)
133 | self.write(response_to_xml(osm))
134 |
135 | class OsmWaysForNodeHandler(tornado.web.RequestHandler):
136 | """Retrieve ways associated with a node."""
137 |
138 | def initialize(self, datastore):
139 | self.datastore = datastore
140 |
141 | def get(self, nodeid):
142 | "Retrieve the ways associated with a node."
143 |
144 | elem = self.datastore.fetch(C.NODE, nodeid)
145 | if elem is None:
146 | raise tornado.web.HTTPError(404)
147 |
148 | osm = new_osm_response()
149 |
150 | wayset = filter_references(C.WAY, [elem])
151 | if len(wayset) > 0:
152 | ways = self.datastore.fetch_keys(C.WAY,
153 | [w for w in wayset])
154 | for (st,w) in ways:
155 | if st:
156 | w.build_response(osm)
157 |
158 | self.set_header(C.CONTENT_TYPE, C.TEXT_XML)
159 | self.write(response_to_xml(osm))
160 |
161 | class OsmFullQueryHandler(tornado.web.RequestHandler):
162 | """Handle a `full' query for a way or relation."""
163 |
164 | def initialize(self, datastore):
165 | self.datastore = datastore
166 |
167 | def get(self, namespace, elemid):
168 | """Implement a 'GET' operation.
169 |
170 | For a way:
171 | - Return the way itself,
172 | - Return the full XML of all nodes referenced by the
173 | way.
174 | For a relation:
175 | - Return the relation itself,
176 | - All nodes and ways that are members of the relation.
177 | - All nodes referenced from the ways above.
178 | """
179 |
180 | # Retrieve the element.
181 | element = self.datastore.fetch(namespace, elemid)
182 | if element is None:
183 | raise tornado.web.HTTPError(404)
184 |
185 | nodes = []
186 | ways = []
187 | relations = []
188 |
189 | if namespace == C.RELATION:
190 | # Retrieve nodes directly referenced by the relation.
191 | nodeset = element.get_member_ids(C.NODE)
192 | nodes.extend([z for (st,z) in
193 | self.datastore.fetch_keys(C.NODE, [n for n in nodeset])
194 | if st])
195 | # Retrieve way IDs directly referenced by the relation.
196 | wayset = element.get_member_ids(C.WAY)
197 | # Include the relation itself.
198 | relations.append(element)
199 | else:
200 | nodeset = set()
201 | wayset = set([elemid])
202 |
203 | # Fetch all ways.
204 | if len(wayset) > 0:
205 | ways.extend([z for (st, z) in
206 | self.datastore.fetch_keys(C.WAY, [w for w in wayset])
207 | if st])
208 |
209 | # Fetch additional nodes referenced by the ways in the
210 | # way set.
211 | additional_nodes = set()
212 | for w in ways:
213 | additional_nodes.update(w.get_node_ids())
214 |
215 | additional_nodes = additional_nodes - nodeset
216 | nodes.extend([z for (st, z) in
217 | self.datastore.fetch_keys(C.NODE, [n for n in additional_nodes])
218 | if st])
219 |
220 | # Build and return a response.
221 | osm = new_osm_response()
222 | for n in nodes:
223 | n.build_response(osm)
224 | for w in ways:
225 | w.build_response(osm)
226 | for r in relations:
227 | r.build_response(osm)
228 |
229 | self.set_header(C.CONTENT_TYPE, C.TEXT_XML)
230 | self.write(response_to_xml(osm))
231 |
--------------------------------------------------------------------------------
/src/python/apiserver/osmelement.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | """Describe OSM elements.
24 |
25 | Exported functions:
26 |
27 | new_osm_element -- factory function to create a new OSM element.
28 | init_osm_factory -- initialize the factory.
29 | """
30 |
31 | import geohash
32 | import math
33 | import types
34 |
35 | import cjson
36 |
37 | from lxml import etree as ET
38 |
39 | import apiserver.const as C
40 | from datastore.slabutil import slabutil_make_slabkey
41 |
42 | _server_version = None
43 | _server_name = None
44 | _scale_factor = None
45 | _fraction_width = None
46 | _coordinate_format = None
47 |
48 | def init_osm_factory(config):
49 | """Initialize the factory for OSM elements."""
50 |
51 | global _coordinate_format, _fraction_width, _scale_factor, _server_name
52 | global _server_version
53 |
54 | _scale_factor = config.getint(C.DATASTORE, C.SCALE_FACTOR)
55 | _fraction_width = math.trunc(math.log10(_scale_factor))
56 | _coordinate_format = "%%d.%%0%dd" % _fraction_width
57 |
58 | _server_version = config.get(C.FRONT_END, C.SERVER_VERSION)
59 | _server_name = config.get(C.FRONT_END, C.SERVER_NAME)
60 |
61 | def encode_coordinate(coordinate):
62 | """Encode a latitude or longitude as an integral value.
63 |
64 | Parameters:
65 |
66 | coordinate -- An OSM latitude or longitude as numeric value, or
67 | a string representation of a number.
68 | """
69 |
70 | coordinate_type = type(coordinate)
71 |
72 | if coordinate_type in types.StringTypes:
73 | try:
74 | integral, fractional = coordinate.split(".")
75 | fractional = fractional[0:_fraction_width]
76 | except ValueError:
77 | integral = coordinate
78 | fractional = "0"
79 |
80 | fractional = fractional.ljust(_fraction_width, "0")
81 | return int(integral) * _scale_factor + int(fractional)
82 |
83 | elif coordinate_type == types.FloatType:
84 | fractional, integral = map(lambda x: int(x * _scale_factor),
85 | math.modf(coordinate))
86 | return integral + fractional
87 |
88 | else:
89 | raise ValueError, \
90 | "Unsupported conversion from '%s'" % coordinate_type
91 |
92 | def decode_coordinate(encodedvalue):
93 | """Decode an integral quantity into a OSM latitude or longitude."""
94 |
95 | integral = encodedvalue / _scale_factor
96 | fractional = encodedvalue - (integral * _scale_factor)
97 |
98 | return _coordinate_format % (integral, fractional)
99 |
100 | def new_osm_response():
101 | "Create an (empty) XML element."
102 |
103 | osm = ET.Element(C.OSM)
104 | osm.attrib[C.VERSION] = _server_version
105 | osm.attrib[C.GENERATOR] = _server_name
106 |
107 | return osm
108 |
109 |
110 | class OSMElement(dict):
111 | """A representation of an OSM Element"""
112 |
113 | ignoredkeys = [C.TAGS, C.REFERENCES]
114 |
115 | def __init__(self, namespace, elemid):
116 | """Initialize an OSMElement object.
117 |
118 | namespace -- the OSM namespace for the element.
119 | elemid -- the element id in the namespace.
120 | """
121 |
122 | assert namespace in _namespace_to_factory.keys()
123 | assert isinstance(elemid, basestring)
124 |
125 | super(OSMElement, self).__init__()
126 | super(OSMElement, self).__setitem__(C.ID, elemid)
127 | super(OSMElement, self).__setitem__(C.REFERENCES, set())
128 | self.namespace = namespace
129 | self.id = elemid
130 | self.slabkey = slabutil_make_slabkey(namespace, elemid)
131 |
132 | def __repr__(self):
133 | 'Return a human-friendly representation of an OSMElement.'
134 | docstr = super(OSMElement, self).__repr__()
135 | return "OSMElement<%s>%s" % (self.namespace, docstr)
136 |
137 | def from_mapping(self, d):
138 | "Translate between a mapping to an OSM element."
139 | setter = super(OSMElement, self).__setitem__
140 | for k in d:
141 | if k == C.ID:
142 | assert self.id == str(d[k])
143 | continue
144 | if k == C.REFERENCES:
145 | v = set(d[k])
146 | else:
147 | v = d[k]
148 | setter(k, v)
149 |
150 | def as_mapping(self):
151 | "Translate to a Python mapping."
152 | d = {}
153 | for (k,v) in self.items():
154 | if isinstance(v, set): # Convert sets to lists.
155 | v = [r for r in v]
156 | d[k] = v
157 | return d
158 |
159 | def build_response(self, element):
160 | "Return an XML representation of an element."
161 | raise TypeError, "Abstract method was invoked."
162 |
163 | def add_attributes(self, element, ignoredkeys=[]):
164 | "Translate from dictionary keys to XML attributes."
165 | for (k, v) in self.items():
166 | if k in ignoredkeys:
167 | continue
168 | if k in OSMElement.ignoredkeys:
169 | continue
170 | element.attrib[k] = str(v)
171 |
172 | def add_tags(self, element):
173 | "Add children to an XML element."
174 | for (k, v) in self.get(C.TAGS, {}).items():
175 | t = ET.SubElement(element, C.TAG)
176 | t.attrib[C.K] = k
177 | t.attrib[C.V] = v
178 |
179 | class OSMChangeSet(OSMElement):
180 | def __init__(self, elemid):
181 | super(OSMChangeSet, self).__init__(C.CHANGESET, elemid)
182 |
183 | def build_response(self, osm):
184 | """Return the XML representation for a ."""
185 |
186 | changeset = ET.SubElement(osm, C.CHANGESET)
187 | self.add_attributes(changeset)
188 | self.add_tags(changeset)
189 |
190 | return osm
191 |
192 | class OSMDatastoreConfig(OSMElement):
193 | def __init__(self, elemid):
194 | OSMElement.__init__(self, C.DATASTORE_CONFIG, elemid)
195 |
196 | class OSMGeoDoc(OSMElement):
197 | """A geodoc references nodes which fall into a given geographic area."""
198 | def __init__(self, region):
199 | super(OSMGeoDoc, self).__init__(C.GEODOC, region)
200 | # Fill in default values for 'standard' fields.
201 | self.__setitem__(C.NODES, set())
202 | self.__setitem__(C.BBOX, geohash.bbox(region))
203 |
204 | def build_response(self, element):
205 | raise TypeError, "GeoDocuments have no XML representation."
206 |
207 | def get_node_info(self):
208 | "Return node ids and (lat, lon) coordinates in this document."
209 | return self[C.NODES]
210 |
211 | class OSMNode(OSMElement):
212 |
213 | special_attributes = [C.LAT, C.LON]
214 |
215 | def __init__(self, elemid):
216 | super(OSMNode, self).__init__(C.NODE, elemid)
217 |
218 | def build_response(self, osm):
219 | "Return an XML representation for a ."
220 |
221 | node = ET.SubElement(osm, C.NODE)
222 |
223 | self.add_attributes(node, ignoredkeys=OSMNode.special_attributes)
224 |
225 | for k in OSMNode.special_attributes:
226 | node.attrib[k] = decode_coordinate(self.get(k))
227 |
228 | self.add_tags(node)
229 |
230 | return osm
231 |
232 | class OSMWay(OSMElement):
233 | def __init__(self, elemid):
234 | super(OSMWay, self).__init__(C.WAY, elemid)
235 | super(OSMWay, self).__setitem__(C.NODES, set())
236 |
237 | def build_response(self, osm):
238 | "Incorporate an XML representation for a ."
239 |
240 | way = ET.SubElement(osm, C.WAY)
241 |
242 | self.add_attributes(way, ignoredkeys=[C.NODES])
243 |
244 | nodes = self.get(C.NODES, [])
245 | for n in nodes:
246 | node = ET.SubElement(way, C.ND)
247 | node.attrib[C.REF] = str(n)
248 |
249 | self.add_tags(way)
250 |
251 | return osm
252 |
253 | def get_node_ids(self):
254 | "Return ids for the nodes associated with a way."
255 | return [str(n) for n in self[C.NODES]]
256 |
257 | class OSMRelation(OSMElement):
258 | def __init__(self, elemid):
259 | super(OSMRelation, self).__init__(C.RELATION, elemid)
260 | super(OSMRelation, self).__setitem__(C.MEMBERS, [])
261 |
262 | def build_response(self, osm):
263 | "Incorporate an XML representation for a ."
264 |
265 | rel = ET.SubElement(osm, C.RELATION)
266 |
267 | self.add_attributes(rel, ignoredkeys=[C.MEMBERS])
268 |
269 | members = self.get(C.MEMBERS, [])
270 | for m in members:
271 | member = ET.SubElement(rel, C.MEMBER)
272 | (member.attrib[C.REF], member.attrib[C.ROLE],
273 | member.attrib[C.TYPE]) = m
274 |
275 | self.add_tags(rel)
276 |
277 | return osm
278 |
279 | def get_member_ids(self, namespace):
280 | "Return a set of members in the specified namespace."
281 |
282 | return set([str(mid) for (mid, mrole, mtype) in self[C.MEMBERS]])
283 |
284 |
285 | #
286 | # Factory function.
287 | #
288 |
289 | _namespace_to_factory = {
290 | C.CHANGESET: OSMChangeSet,
291 | C.DATASTORE_CONFIG: OSMDatastoreConfig,
292 | C.GEODOC: OSMGeoDoc,
293 | C.NODE: OSMNode,
294 | C.WAY: OSMWay,
295 | C.RELATION: OSMRelation
296 | }
297 |
298 | def new_osm_element(namespace, elemid):
299 | '''Create an OSM element.
300 |
301 | namespace -- the OSM namespace.
302 | elemid -- element id for the element.
303 | '''
304 |
305 | factory = _namespace_to_factory[namespace]
306 |
307 | return factory(elemid)
308 |
309 | #
310 | # JSON representation of an OSM element.
311 | #
312 |
313 | def decode_json(jsonvalue):
314 | "Returns a Python object, given its JSON representation."
315 | return cjson.decode(jsonvalue)
316 |
317 | def encode_json(obj):
318 | "Returns the JSON representation for a Python object."
319 | return cjson.encode(obj)
320 |
321 | #
322 | # Protobuf handling.
323 | #
324 |
325 | try:
326 | import protobuf
327 |
328 | def _notimplemented(_):
329 | raise NotImplementedError, "Protobuf support has not been written"
330 |
331 | decode_protobuf = _notimplemented
332 | encode_protobuf = _notimplemented
333 |
334 | except ImportError:
335 |
336 | def _noprotobufs(pbuf):
337 | "Returns an OSM element given its Protobuf representation."
338 | raise NotImplementedError, "Protobuf libraries are not present"
339 |
340 | decode_protobuf = _noprotobufs
341 | encode_protobuf = _noprotobufs
342 |
--------------------------------------------------------------------------------
/src/python/datastore/lrucache.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2011 AOL Inc. All Rights Reserved.
2 | #
3 | # Permission is hereby granted, free of charge, to any person
4 | # obtaining a copy of this software and associated documentation files
5 | # (the "Software"), to deal in the Software without restriction,
6 | # including without limitation the rights to use, copy, modify, merge,
7 | # publish, distribute, sublicense, and/or sell copies of the Software,
8 | # and to permit persons to whom the Software is furnished to do so,
9 | # subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be
12 | # included in all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | # SOFTWARE.
22 |
23 | """A single-threaded cache supporting:
24 | - slab-based insertion of elements,
25 | - multiple namespaces,
26 | - lookups of individual elements in slabs,
27 | - LRU overflow of slabs.
28 | """
29 |
30 | import collections
31 | import threading
32 | import types
33 |
34 | from .slabutil import slabutil_make_slabkey
35 |
36 | class BoundedLRUBuffer(collections.MutableMapping):
37 | """A bounded buffer with least-recently-used semantics.
38 |
39 | This buffer acts like a mapping with a bounded size. Key/value
40 | pairs can be added to the buffer as with a conventional mapping.
41 | Once the buffer reaches its size bound, additional inserts of
42 | key/value pairs will cause the least recently used key/value pair
43 | contained in the buffer to be ejected.
44 |
45 | The size of the bound and an optional callback for handling
46 | ejected items may be specified at buffer creation time.
47 |
48 | >>> b = BoundedLRUBuffer(bound=16, callback=None)
49 |
50 | Key/value pairs are added to buffer as for a conventional mapping.
51 |
52 | >>> b['key'] = 'value'
53 | >>> len(b)
54 | 1
55 |
56 | Normal mapping operations are supported.
57 |
58 | >>> 'key' in b
59 | True
60 |
61 | The 'pop()' method retrieves the least recently used key/value
62 | pair from the buffer.
63 |
64 | >> (k,v) = b.pop() # Returns the least recently used pair.
65 |
66 | Lookups and assignments of keys mark them as being most recently
67 | used.
68 |
69 | >>> v = b['key'] # 'key' becomes most recently used.
70 | >>> b['key'] = 'newvalue' # 'key' becomes most recently used.
71 |
72 | If a 'callback' had been specified at object creation time, it
73 | will be invoked with the ejected key/value pair as arguments.
74 |
75 | >>> def handle_overflow(key, value):
76 | ... # Handle overflow here.
77 | ... pass
78 | >>> b = BoundedLRUBuffer(callback=handle_overflow)
79 |
80 | The 'flush()' method may be used to empty the buffer.
81 |
82 | >>> b.flush()
83 | >>> len(b)
84 | 0
85 | """
86 |
87 | # Methods implementing the mapping protocol.
88 |
89 | def __init__(self, bound=65536, callback=None):
90 |
91 | assert type(bound) is types.IntType
92 | self.bound = bound # Max size.
93 |
94 | self.callback = callback
95 | self.indices = {} # Map of keys to indices
96 | self.values = {} # Map of indices to values.
97 | self.count = 0 # The number of entries in the buffer.
98 | self.first = -1 # Smallest index in use.
99 | self.next = 0 # Next index to use.
100 |
101 | def __str__(self):
102 | return "BoundedLRUBuffer(%d){%s}" % \
103 | (self.bound, ",".join(self.indices.keys()))
104 |
105 | def __contains__(self, key):
106 | return key in self.indices
107 |
108 | def __delitem__(self, key):
109 | index = self.indices[key]
110 | self._remove(index)
111 |
112 | def __getitem__(self, key):
113 | """Retrieve the item named by 'key' from the buffer.
114 |
115 | The value returned is pushed to the head of the buffer."""
116 |
117 | entry_index = self.indices[key]
118 |
119 | (_, entry) = self._remove(entry_index)
120 |
121 | next_index = self._next_index(entry_index)
122 | self._insert(key, entry, next_index)
123 |
124 | return entry
125 |
126 | def __iter__(self):
127 | return iter(self.indices)
128 |
129 | def __len__(self):
130 | "Compute the number of items in the buffer."
131 | v = self.count
132 | assert v == len(self.indices)
133 | assert v == len(self.values)
134 | return v
135 |
136 | def __setitem__(self, key, value):
137 | """Store an item indexed by argument 'key'."""
138 |
139 | if key in self.indices:
140 | index = self.indices[key]
141 | self._remove(index)
142 | else:
143 | index = None
144 | next_index = self._next_index(index)
145 | self._insert(key, value, next_index)
146 | ejected = self._maybe_eject()
147 |
148 | if self.callback and ejected is not None:
149 | self.callback(*ejected)
150 |
151 | def pop(self):
152 | "Return the first item in the LRU buffer."
153 | k, v = self._pop()
154 | return (k, v)
155 |
156 |
157 | # Additional method.
158 |
159 | def flush(self):
160 | "Write back the contents of the LRU buffer."
161 | while self.count > 0:
162 | k, v = self._pop()
163 | if self.callback:
164 | self.callback(k, v)
165 |
166 |
167 | # Internal helper functions.
168 |
169 | def _insert(self, key, value, lru):
170 | "Insert a key/value pair at the specified LRU index."
171 | self.values[lru] = (key, value)
172 | self.indices[key] = lru
173 | self.count += 1
174 |
175 | def _remove(self, index):
176 | "Remove the entry for key 'key'."
177 | key, value = self.values.pop(index)
178 | assert index == self.indices[key]
179 | self.indices.pop(key)
180 | self.count -= 1
181 | return (key, value)
182 |
183 | def _maybe_eject(self):
184 | if self.count <= self.bound:
185 | return None
186 |
187 | # Find the least recently used item.
188 | while self.first < self.next and not (self.first in self.values):
189 | self.first += 1
190 | assert self.first < self.next, "Empty buffer"
191 | return self._remove(self.first)
192 |
193 | def _next_index(self, index=None):
194 | "Compute an optimal index number for storing an element."
195 | # Optimize the case where we overwrite the most recently added
196 | # value.
197 | if index is not None and index == self.next - 1:
198 | return index
199 | index = self.next
200 | self.next += 1
201 | return index
202 |
203 | def _pop(self):
204 | "Helper function."
205 | # First the first index
206 | while not (self.first in self.values) and \
207 | self.first < self.next:
208 | self.first += 1
209 | # Look for an empty buffer.
210 | if self.first == self.next:
211 | raise IndexError, "pop from empty buffer"
212 | return self._remove(self.first)
213 |
214 |
215 | class LRUCache:
216 | """...description here..."""
217 |
218 | def __init__(self, bound=65536, callback=None):
219 | self.bound = bound
220 | self.lru_cache = BoundedLRUBuffer(bound, self._lrucb)
221 | self.lru_key = {}
222 | self.callback = callback
223 |
224 | def __len__(self):
225 | return len(self.lru_key)
226 |
227 | def _lrucb(self, slabkey, slabdesc):
228 | assert slabkey not in self.lru_cache
229 | self._remove_slab_items(slabdesc)
230 | if self.callback:
231 | self.callback(slabkey, slabdesc)
232 |
233 | def _remove_slab_items(self, slabdesc):
234 | ns = slabdesc.namespace
235 | for k in slabdesc.keys():
236 | del self.lru_key[(ns,k)]
237 |
238 | def get(self, namespace, key):
239 | try:
240 | lrukey = self.lru_key[(namespace,key)]
241 | except KeyError: # No such slab.
242 | return None
243 | slabdesc = self.lru_cache.get(lrukey)
244 | if slabdesc:
245 | return slabdesc.get(key) # Get item in the slab.
246 | else:
247 | return (False, key) # No such slab.
248 |
249 | def get_slab(self, namespace, key):
250 | "Return the slab descriptor for a key."
251 | try:
252 | slabkey = self.lru_key[(namespace, key)]
253 | except KeyError:
254 | return None
255 | return self.lru_cache[slabkey]
256 |
257 |
258 | def insert_slab(self, slabdesc):
259 | "Insert items from a slab."
260 | slabkey = slabdesc.slabkey
261 | if slabkey in self.lru_cache:
262 | raise ValueError, "Duplicate insertion of slab: %s" % str(slabkey)
263 | self.lru_cache[slabkey] = slabdesc
264 | ns = slabdesc.namespace
265 | for k in slabdesc.keys():
266 | itemkey = (ns,k)
267 | if itemkey in self.lru_key:
268 | raise KeyError, "Duplicate insertion of (%s,%s)" % (ns,k)
269 | self.lru_key[itemkey] = slabkey
270 |
271 | def remove_slab(self, slabdesc):
272 | "Remove a slab from the cache."
273 |
274 | slabkey = slabdesc.slabkey
275 | assert slabkey in self.lru_cache
276 | self._remove_slab_items(slabdesc)
277 | del self.lru_cache[slabkey]
278 |
279 | def flush(self):
280 | "Flush the contents of the cache."
281 |
282 | self.lru_cache.flush()
283 |
284 | assert len(self.lru_cache) == 0
285 | assert len(self.lru_key) == 0
286 |
287 |
288 | class LRUIOCache(LRUCache):
289 | """An LRU cache that tracks I/O-in-flight progress of items."""
290 |
291 | def __init__(self, bound=65536, callback=None):
292 | LRUCache.__init__(self, bound, self._iocb)
293 | self.iocallback = callback
294 | self.iocond = threading.Condition()
295 | self.iopending = []
296 |
297 | def _iocb(self, slabkey, slabdesc):
298 | assert slabkey == slabdesc.slabkey
299 | with self.iocond:
300 | assert slabkey not in self.iopending
301 | self.iopending.append(slabkey)
302 | if self.iocallback:
303 | self.iocallback(slabkey, slabdesc)
304 |
305 | def get(self, namespace, key):
306 | """Retrieve an item from the cache.
307 |
308 | If an item is missing from the cache, wait for pending I/O to
309 | complete.
310 | """
311 | v = LRUCache.get(self, namespace, key)
312 | if v is None:
313 | slabkey = slabutil_make_slabkey(namespace, key)
314 | with self.iocond:
315 | while slabkey in self.iopending:
316 | self.iocond.wait()
317 | return v
318 |
319 | def isiopending(self, slabkey):
320 | "Return True if I/O is pending on a slab."
321 | with self.iocond:
322 | status = slabkey in self.iopending
323 | return status
324 |
325 | def iodone(self, slabkey):
326 | "Mark I/O on a slabkey as completed."
327 | with self.iocond:
328 | assert slabkey in self.iopending
329 | self.iopending.remove(slabkey)
330 | self.iocond.notifyAll()
331 |
--------------------------------------------------------------------------------