├── .gitignore ├── Dockerfile ├── LICENSE.txt ├── MANIFEST.in ├── README.rst ├── buildout.cfg ├── docs ├── conf.py ├── index.txt ├── install.txt └── queries │ ├── orm │ └── README.md │ └── sql │ ├── README.md │ └── vehicle_queries.sql ├── gtfsdb ├── __init__.py ├── api.py ├── config.py ├── configs │ └── app.ini ├── data │ ├── route_filter.txt │ └── route_type.txt ├── model │ ├── __init__.py │ ├── agency.py │ ├── base.py │ ├── block.py │ ├── calendar.py │ ├── db.py │ ├── fare.py │ ├── feed_info.py │ ├── frequency.py │ ├── gtfs.py │ ├── pattern.py │ ├── pattern_base.py │ ├── route.py │ ├── route_base.py │ ├── route_stop.py │ ├── route_stop_base.py │ ├── shape.py │ ├── stop.py │ ├── stop_base.py │ ├── stop_feature.py │ ├── stop_time.py │ ├── transfer.py │ ├── translation.py │ └── trip.py ├── scripts.py ├── tests │ ├── __init__.py │ ├── base.py │ ├── large-sample-feed.zip │ ├── multi-date-feed.zip │ ├── sample-feed.zip │ ├── test_current.py │ ├── test_dates.py │ ├── test_geom_queries.py │ ├── test_load.py │ └── test_model.py └── util.py ├── setup.py └── versions.cfg /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | [a-z].py 3 | *.py[cod] 4 | .settings 5 | COMMIT_EDITMSG 6 | .git/ 7 | .git/* 8 | *.old 9 | .*# 10 | .#* 11 | *# 12 | #* 13 | .idea 14 | *.iml 15 | gtfs.zip 16 | .DS_Store 17 | **/tests/*feed 18 | 19 | # C extensions 20 | *.so 21 | 22 | # Packages 23 | *.egg 24 | *.egg-info 25 | dist 26 | build 27 | eggs 28 | parts 29 | bin 30 | var 31 | sdist 32 | develop-eggs 33 | .installed.cfg 34 | lib 35 | lib64 36 | 37 | # Installer logs 38 | pip-log.txt 39 | 40 | # Unit test / coverage reports 41 | .coverage 42 | .tox 43 | nosetests.xml 44 | 45 | # Translations 46 | *.mo 47 | 48 | # Mr Developer 49 | .mr.developer.cfg 50 | .project 51 | .pydevproject 52 | .*~ 53 | *.bak 54 | *.db 55 | *.sqlite 56 | *.db-journal 57 | nohup.out 58 | [a-z].json 59 | .git 60 | .git/FETCH_HEAD 61 | gtfsdb.db.bkup 62 | x 63 | t 64 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3 2 | 3 | RUN pip install zc.buildout psycopg2-binary 4 | 5 | WORKDIR /app 6 | COPY . . 7 | 8 | RUN buildout install prod postgresql 9 | 10 | ENTRYPOINT ["bin/gtfsdb-load"] 11 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Mozilla Public License Version 2.0 2 | ================================== 3 | 4 | 1. Definitions 5 | -------------- 6 | 7 | 1.1. "Contributor" 8 | means each individual or legal entity that creates, contributes to 9 | the creation of, or owns Covered Software. 10 | 11 | 1.2. "Contributor Version" 12 | means the combination of the Contributions of others (if any) used 13 | by a Contributor and that particular Contributor's Contribution. 14 | 15 | 1.3. "Contribution" 16 | means Covered Software of a particular Contributor. 17 | 18 | 1.4. "Covered Software" 19 | means Source Code Form to which the initial Contributor has attached 20 | the notice in Exhibit A, the Executable Form of such Source Code 21 | Form, and Modifications of such Source Code Form, in each case 22 | including portions thereof. 23 | 24 | 1.5. "Incompatible With Secondary Licenses" 25 | means 26 | 27 | (a) that the initial Contributor has attached the notice described 28 | in Exhibit B to the Covered Software; or 29 | 30 | (b) that the Covered Software was made available under the terms of 31 | version 1.1 or earlier of the License, but not also under the 32 | terms of a Secondary License. 33 | 34 | 1.6. "Executable Form" 35 | means any form of the work other than Source Code Form. 36 | 37 | 1.7. "Larger Work" 38 | means a work that combines Covered Software with other material, in 39 | a separate file or files, that is not Covered Software. 40 | 41 | 1.8. "License" 42 | means this document. 43 | 44 | 1.9. "Licensable" 45 | means having the right to grant, to the maximum extent possible, 46 | whether at the time of the initial grant or subsequently, any and 47 | all of the rights conveyed by this License. 48 | 49 | 1.10. "Modifications" 50 | means any of the following: 51 | 52 | (a) any file in Source Code Form that results from an addition to, 53 | deletion from, or modification of the contents of Covered 54 | Software; or 55 | 56 | (b) any new file in Source Code Form that contains any Covered 57 | Software. 58 | 59 | 1.11. "Patent Claims" of a Contributor 60 | means any patent claim(s), including without limitation, method, 61 | process, and apparatus claims, in any patent Licensable by such 62 | Contributor that would be infringed, but for the grant of the 63 | License, by the making, using, selling, offering for sale, having 64 | made, import, or transfer of either its Contributions or its 65 | Contributor Version. 66 | 67 | 1.12. "Secondary License" 68 | means either the GNU General Public License, Version 2.0, the GNU 69 | Lesser General Public License, Version 2.1, the GNU Affero General 70 | Public License, Version 3.0, or any later versions of those 71 | licenses. 72 | 73 | 1.13. "Source Code Form" 74 | means the form of the work preferred for making modifications. 75 | 76 | 1.14. "You" (or "Your") 77 | means an individual or a legal entity exercising rights under this 78 | License. For legal entities, "You" includes any entity that 79 | controls, is controlled by, or is under common control with You. For 80 | purposes of this definition, "control" means (a) the power, direct 81 | or indirect, to cause the direction or management of such entity, 82 | whether by contract or otherwise, or (b) ownership of more than 83 | fifty percent (50%) of the outstanding shares or beneficial 84 | ownership of such entity. 85 | 86 | 2. License Grants and Conditions 87 | -------------------------------- 88 | 89 | 2.1. Grants 90 | 91 | Each Contributor hereby grants You a world-wide, royalty-free, 92 | non-exclusive license: 93 | 94 | (a) under intellectual property rights (other than patent or trademark) 95 | Licensable by such Contributor to use, reproduce, make available, 96 | modify, display, perform, distribute, and otherwise exploit its 97 | Contributions, either on an unmodified basis, with Modifications, or 98 | as part of a Larger Work; and 99 | 100 | (b) under Patent Claims of such Contributor to make, use, sell, offer 101 | for sale, have made, import, and otherwise transfer either its 102 | Contributions or its Contributor Version. 103 | 104 | 2.2. Effective Date 105 | 106 | The licenses granted in Section 2.1 with respect to any Contribution 107 | become effective for each Contribution on the date the Contributor first 108 | distributes such Contribution. 109 | 110 | 2.3. Limitations on Grant Scope 111 | 112 | The licenses granted in this Section 2 are the only rights granted under 113 | this License. No additional rights or licenses will be implied from the 114 | distribution or licensing of Covered Software under this License. 115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 116 | Contributor: 117 | 118 | (a) for any code that a Contributor has removed from Covered Software; 119 | or 120 | 121 | (b) for infringements caused by: (i) Your and any other third party's 122 | modifications of Covered Software, or (ii) the combination of its 123 | Contributions with other software (except as part of its Contributor 124 | Version); or 125 | 126 | (c) under Patent Claims infringed by Covered Software in the absence of 127 | its Contributions. 128 | 129 | This License does not grant any rights in the trademarks, service marks, 130 | or logos of any Contributor (except as may be necessary to comply with 131 | the notice requirements in Section 3.4). 132 | 133 | 2.4. Subsequent Licenses 134 | 135 | No Contributor makes additional grants as a result of Your choice to 136 | distribute the Covered Software under a subsequent version of this 137 | License (see Section 10.2) or under the terms of a Secondary License (if 138 | permitted under the terms of Section 3.3). 139 | 140 | 2.5. Representation 141 | 142 | Each Contributor represents that the Contributor believes its 143 | Contributions are its original creation(s) or it has sufficient rights 144 | to grant the rights to its Contributions conveyed by this License. 145 | 146 | 2.6. Fair Use 147 | 148 | This License is not intended to limit any rights You have under 149 | applicable copyright doctrines of fair use, fair dealing, or other 150 | equivalents. 151 | 152 | 2.7. Conditions 153 | 154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 155 | in Section 2.1. 156 | 157 | 3. Responsibilities 158 | ------------------- 159 | 160 | 3.1. Distribution of Source Form 161 | 162 | All distribution of Covered Software in Source Code Form, including any 163 | Modifications that You create or to which You contribute, must be under 164 | the terms of this License. You must inform recipients that the Source 165 | Code Form of the Covered Software is governed by the terms of this 166 | License, and how they can obtain a copy of this License. You may not 167 | attempt to alter or restrict the recipients' rights in the Source Code 168 | Form. 169 | 170 | 3.2. Distribution of Executable Form 171 | 172 | If You distribute Covered Software in Executable Form then: 173 | 174 | (a) such Covered Software must also be made available in Source Code 175 | Form, as described in Section 3.1, and You must inform recipients of 176 | the Executable Form how they can obtain a copy of such Source Code 177 | Form by reasonable means in a timely manner, at a charge no more 178 | than the cost of distribution to the recipient; and 179 | 180 | (b) You may distribute such Executable Form under the terms of this 181 | License, or sublicense it under different terms, provided that the 182 | license for the Executable Form does not attempt to limit or alter 183 | the recipients' rights in the Source Code Form under this License. 184 | 185 | 3.3. Distribution of a Larger Work 186 | 187 | You may create and distribute a Larger Work under terms of Your choice, 188 | provided that You also comply with the requirements of this License for 189 | the Covered Software. If the Larger Work is a combination of Covered 190 | Software with a work governed by one or more Secondary Licenses, and the 191 | Covered Software is not Incompatible With Secondary Licenses, this 192 | License permits You to additionally distribute such Covered Software 193 | under the terms of such Secondary License(s), so that the recipient of 194 | the Larger Work may, at their option, further distribute the Covered 195 | Software under the terms of either this License or such Secondary 196 | License(s). 197 | 198 | 3.4. Notices 199 | 200 | You may not remove or alter the substance of any license notices 201 | (including copyright notices, patent notices, disclaimers of warranty, 202 | or limitations of liability) contained within the Source Code Form of 203 | the Covered Software, except that You may alter any license notices to 204 | the extent required to remedy known factual inaccuracies. 205 | 206 | 3.5. Application of Additional Terms 207 | 208 | You may choose to offer, and to charge a fee for, warranty, support, 209 | indemnity or liability obligations to one or more recipients of Covered 210 | Software. However, You may do so only on Your own behalf, and not on 211 | behalf of any Contributor. You must make it absolutely clear that any 212 | such warranty, support, indemnity, or liability obligation is offered by 213 | You alone, and You hereby agree to indemnify every Contributor for any 214 | liability incurred by such Contributor as a result of warranty, support, 215 | indemnity or liability terms You offer. You may include additional 216 | disclaimers of warranty and limitations of liability specific to any 217 | jurisdiction. 218 | 219 | 4. Inability to Comply Due to Statute or Regulation 220 | --------------------------------------------------- 221 | 222 | If it is impossible for You to comply with any of the terms of this 223 | License with respect to some or all of the Covered Software due to 224 | statute, judicial order, or regulation then You must: (a) comply with 225 | the terms of this License to the maximum extent possible; and (b) 226 | describe the limitations and the code they affect. Such description must 227 | be placed in a text file included with all distributions of the Covered 228 | Software under this License. Except to the extent prohibited by statute 229 | or regulation, such description must be sufficiently detailed for a 230 | recipient of ordinary skill to be able to understand it. 231 | 232 | 5. Termination 233 | -------------- 234 | 235 | 5.1. The rights granted under this License will terminate automatically 236 | if You fail to comply with any of its terms. However, if You become 237 | compliant, then the rights granted under this License from a particular 238 | Contributor are reinstated (a) provisionally, unless and until such 239 | Contributor explicitly and finally terminates Your grants, and (b) on an 240 | ongoing basis, if such Contributor fails to notify You of the 241 | non-compliance by some reasonable means prior to 60 days after You have 242 | come back into compliance. Moreover, Your grants from a particular 243 | Contributor are reinstated on an ongoing basis if such Contributor 244 | notifies You of the non-compliance by some reasonable means, this is the 245 | first time You have received notice of non-compliance with this License 246 | from such Contributor, and You become compliant prior to 30 days after 247 | Your receipt of the notice. 248 | 249 | 5.2. If You initiate litigation against any entity by asserting a patent 250 | infringement claim (excluding declaratory judgment actions, 251 | counter-claims, and cross-claims) alleging that a Contributor Version 252 | directly or indirectly infringes any patent, then the rights granted to 253 | You by any and all Contributors for the Covered Software under Section 254 | 2.1 of this License shall terminate. 255 | 256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all 257 | end user license agreements (excluding distributors and resellers) which 258 | have been validly granted by You or Your distributors under this License 259 | prior to termination shall survive termination. 260 | 261 | ************************************************************************ 262 | * * 263 | * 6. Disclaimer of Warranty * 264 | * ------------------------- * 265 | * * 266 | * Covered Software is provided under this License on an "as is" * 267 | * basis, without warranty of any kind, either expressed, implied, or * 268 | * statutory, including, without limitation, warranties that the * 269 | * Covered Software is free of defects, merchantable, fit for a * 270 | * particular purpose or non-infringing. The entire risk as to the * 271 | * quality and performance of the Covered Software is with You. * 272 | * Should any Covered Software prove defective in any respect, You * 273 | * (not any Contributor) assume the cost of any necessary servicing, * 274 | * repair, or correction. This disclaimer of warranty constitutes an * 275 | * essential part of this License. No use of any Covered Software is * 276 | * authorized under this License except under this disclaimer. * 277 | * * 278 | ************************************************************************ 279 | 280 | ************************************************************************ 281 | * * 282 | * 7. Limitation of Liability * 283 | * -------------------------- * 284 | * * 285 | * Under no circumstances and under no legal theory, whether tort * 286 | * (including negligence), contract, or otherwise, shall any * 287 | * Contributor, or anyone who distributes Covered Software as * 288 | * permitted above, be liable to You for any direct, indirect, * 289 | * special, incidental, or consequential damages of any character * 290 | * including, without limitation, damages for lost profits, loss of * 291 | * goodwill, work stoppage, computer failure or malfunction, or any * 292 | * and all other commercial damages or losses, even if such party * 293 | * shall have been informed of the possibility of such damages. This * 294 | * limitation of liability shall not apply to liability for death or * 295 | * personal injury resulting from such party's negligence to the * 296 | * extent applicable law prohibits such limitation. Some * 297 | * jurisdictions do not allow the exclusion or limitation of * 298 | * incidental or consequential damages, so this exclusion and * 299 | * limitation may not apply to You. * 300 | * * 301 | ************************************************************************ 302 | 303 | 8. Litigation 304 | ------------- 305 | 306 | Any litigation relating to this License may be brought only in the 307 | courts of a jurisdiction where the defendant maintains its principal 308 | place of business and such litigation shall be governed by laws of that 309 | jurisdiction, without reference to its conflict-of-law provisions. 310 | Nothing in this Section shall prevent a party's ability to bring 311 | cross-claims or counter-claims. 312 | 313 | 9. Miscellaneous 314 | ---------------- 315 | 316 | This License represents the complete agreement concerning the subject 317 | matter hereof. If any provision of this License is held to be 318 | unenforceable, such provision shall be reformed only to the extent 319 | necessary to make it enforceable. Any law or regulation which provides 320 | that the language of a contract shall be construed against the drafter 321 | shall not be used to construe this License against a Contributor. 322 | 323 | 10. Versions of the License 324 | --------------------------- 325 | 326 | 10.1. New Versions 327 | 328 | Mozilla Foundation is the license steward. Except as provided in Section 329 | 10.3, no one other than the license steward has the right to modify or 330 | publish new versions of this License. Each version will be given a 331 | distinguishing version number. 332 | 333 | 10.2. Effect of New Versions 334 | 335 | You may distribute the Covered Software under the terms of the version 336 | of the License under which You originally received the Covered Software, 337 | or under the terms of any subsequent version published by the license 338 | steward. 339 | 340 | 10.3. Modified Versions 341 | 342 | If you create software not governed by this License, and you want to 343 | create a new license for such software, you may create and use a 344 | modified version of this License if you rename the license and remove 345 | any references to the name of the license steward (except to note that 346 | such modified license differs from this License). 347 | 348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 349 | Licenses 350 | 351 | If You choose to distribute Source Code Form that is Incompatible With 352 | Secondary Licenses under the terms of this version of the License, the 353 | notice described in Exhibit B of this License must be attached. 354 | 355 | Exhibit A - Source Code Form License Notice 356 | ------------------------------------------- 357 | 358 | This Source Code Form is subject to the terms of the Mozilla Public 359 | License, v. 2.0. If a copy of the MPL was not distributed with this 360 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 361 | 362 | If it is not possible or desirable to put the notice in a particular 363 | file, then You may include the notice in a location (such as a LICENSE 364 | file in a relevant directory) where a recipient would be likely to look 365 | for such a notice. 366 | 367 | You may add additional accurate notices of copyright ownership. 368 | 369 | Exhibit B - "Incompatible With Secondary Licenses" Notice 370 | --------------------------------------------------------- 371 | 372 | This Source Code Form is "Incompatible With Secondary Licenses", as 373 | defined by the Mozilla Public License, v. 2.0. 374 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include gtfsdb/configs * 2 | recursive-include gtfsdb/data * 3 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | =========== 2 | GTFSDB 3 | =========== 4 | 5 | 6 | .. image:: https://badges.gitter.im/Join%20Chat.svg 7 | :alt: Join the chat at https://gitter.im/OpenTransitTools/gtfsdb 8 | :target: https://gitter.im/OpenTransitTools/gtfsdb?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge 9 | 10 | 11 | Supported Databases 12 | ******************* 13 | 14 | * PostgreSQL (PostGIS for Geo tables) - preferred 15 | * Oracle - tested 16 | * MySQL - tested 17 | * SQLite - tested 18 | 19 | 20 | GTFS (General Transit Feed Specification) Database 21 | ************************************************** 22 | 23 | Python code that will load GTFS data into a relational database, and SQLAlchemy ORM bindings to the GTFS tables in the gtfsdb. The gtfsdb project's focus is on making GTFS data available in a programmatic context for software developers. The need for the gtfsdb project comes from the fact that a lot of developers start out a GTFS-related effort by first building some amount of code to read GTFS data (whether that's an in-memory loader, a database loader, etc...); GTFSDB can hopefully reduce the need for such drudgery, and give developers a starting point beyond the first step of dealing with GTFS in .csv file format. 24 | 25 | Available on pypi: https://pypi.python.org/pypi/gtfsdb 26 | 27 | 28 | Install from source via github (if you want the latest code) : 29 | ************************************************************** 30 | 31 | #. Install Python 3.x https://www.python.org/downloads/ (code also runs on 2.7 if you are stuck on that version) 32 | #. `pip install zc.buildout` - https://pypi.org/project/zc.buildout 33 | #. (optional step for **postgres users**: 'pip install psycopg2-binary') 34 | #. git clone https://github.com/OpenTransitTools/gtfsdb.git 35 | #. cd gtfsdb 36 | #. buildout install prod -- NOTE: if you're using postgres, do a 'buildout install prod postgresql' 37 | #. bin/gtfsdb-load --database_url 38 | #. examples: 39 | 40 | * bin/gtfsdb-load --database_url sqlite:///gtfs.db gtfsdb/tests/large-sample-feed.zip 41 | 42 | * bin/gtfsdb-load --database_url sqlite:///gtfs.db http://developer.trimet.org/schedule/gtfs.zip 43 | 44 | * bin/gtfsdb-load --database_url postgresql://postgres@localhost:5432 --is_geospatial http://developer.trimet.org/schedule/gtfs.zip 45 | 46 | .. note:: adding the `is_geospatial` cmdline flag, when paired with a spatial-database ala PostGIS (e.g., is_spatial is meaningless with sqlite), will take longer to load...but will create geometry columns for both rendering and calculating nearest distances, etc... 47 | 48 | #. view db ( example: https://sqliteonline.com ) 49 | 50 | The best way to get gtfsdb up and running is via the 'zc.buildout' tool. Highly recommended to first install 51 | buildout (e.g., pip install zc.buildout) before doing much of anything else. 52 | 53 | Postgres users, gtfsdb requires the psycopg2-binary database driver. Installing that via `pip install psycopg2-binary` will relieve gtfsdb from re-installing locally as part of the build. And if after the fact, you see *exceptions* mentioning 54 | 55 | .. note:: if you get the message "ImportError: No module named psycopg2", then 'pip install psycopg2-binary' should fix things. (Assumes you have postgres also installed on the machine you're trying to use the pg driver). 56 | 57 | 58 | Usage with Docker: 59 | ****************** 60 | 61 | #. Build the image with `docker build -t gtfsdb .` 62 | #. Run it with: 63 | 64 | .. code-block:: bash 65 | 66 | docker run gtfsdb --database_url 67 | 68 | .. note:: The entrypoint command is `bin/gtfsdb-load` so the arguments will be passed to it. 69 | 70 | 71 | Example Queries: 72 | **************** 73 | 74 | * get first stop time of each trip for route_id 1 75 | 76 | .. code-block:: sql 77 | 78 | select * 79 | from trips t, stop_times st 80 | where t.route_id = '1' 81 | and t.trip_id = st.trip_id 82 | and st.stop_sequence = 1 83 | 84 | * get agency name and number of routes 85 | 86 | .. code-block:: sql 87 | 88 | select a.agency_name, a.agency_id, count(r.route_id) 89 | from routes r, agency a 90 | where r.agency_id = a.agency_id 91 | group by a.agency_id, a.agency_name 92 | order by 3 desc 93 | -------------------------------------------------------------------------------- /buildout.cfg: -------------------------------------------------------------------------------- 1 | [buildout] 2 | extends = versions.cfg 3 | update-versions-file = versions.cfg 4 | parts = dev postgresql prod testrunner 5 | develop = . 6 | app-egg-name = gtfsdb 7 | newest = false 8 | include-site-packages = true 9 | allowed-eggs-from-site-packages = psycopg2-binary Setuptools zc.buildout 10 | prefer-final = true 11 | 12 | [dev] 13 | recipe = zc.recipe.egg 14 | dependent-scripts = true 15 | eggs = gtfsdb[dev] 16 | interpreter = python 17 | 18 | [oracle] 19 | recipe = zc.recipe.egg 20 | dependent-scripts = true 21 | eggs = gtfsdb[oracle] 22 | interpreter = python 23 | 24 | [postgresql] 25 | recipe = zc.recipe.egg 26 | dependent-scripts = true 27 | eggs = gtfsdb[postgresql] 28 | interpreter = python 29 | 30 | [prod] 31 | recipe = zc.recipe.egg 32 | dependent-scripts = true 33 | eggs = gtfsdb 34 | interpreter = python 35 | 36 | [testrunner] 37 | recipe = zc.recipe.testrunner 38 | eggs = ${prod:eggs} 39 | script = test 40 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # GTFSDB documentation build configuration file, created by 4 | # sphinx-quickstart on Wed Apr 21 13:47:38 2010. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys, os 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | #sys.path.append(os.path.abspath('.')) 20 | 21 | # -- General configuration ----------------------------------------------------- 22 | 23 | # Add any Sphinx extension module names here, as strings. They can be extensions 24 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 25 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.coverage'] 26 | 27 | # Add any paths that contain templates here, relative to this directory. 28 | templates_path = ['_templates'] 29 | 30 | # The suffix of source filenames. 31 | source_suffix = '.txt' 32 | 33 | # The encoding of source files. 34 | #source_encoding = 'utf-8' 35 | 36 | # The master toctree document. 37 | master_doc = 'index' 38 | 39 | # General information about the project. 40 | project = u'GTFSDB' 41 | copyright = u'2017, Mike Gilligan, Frank Purcell' 42 | 43 | # The version info for the project you're documenting, acts as replacement for 44 | # |version| and |release|, also used in various other places throughout the 45 | # built documents. 46 | # 47 | # The short X.Y version. 48 | version = '0.1' 49 | # The full version, including alpha/beta/rc tags. 50 | release = '0.1a1' 51 | 52 | # The language for content autogenerated by Sphinx. Refer to documentation 53 | # for a list of supported languages. 54 | #language = None 55 | 56 | # There are two options for replacing |today|: either, you set today to some 57 | # non-false value, then it is used: 58 | #today = '' 59 | # Else, today_fmt is used as the format for a strftime call. 60 | #today_fmt = '%B %d, %Y' 61 | 62 | # List of documents that shouldn't be included in the build. 63 | #unused_docs = [] 64 | 65 | # List of directories, relative to source directory, that shouldn't be searched 66 | # for source files. 67 | exclude_trees = ['_build'] 68 | 69 | # The reST default role (used for this markup: `text`) to use for all documents. 70 | #default_role = None 71 | 72 | # If true, '()' will be appended to :func: etc. cross-reference text. 73 | #add_function_parentheses = True 74 | 75 | # If true, the current module name will be prepended to all description 76 | # unit titles (such as .. function::). 77 | #add_module_names = True 78 | 79 | # If true, sectionauthor and moduleauthor directives will be shown in the 80 | # output. They are ignored by default. 81 | #show_authors = False 82 | 83 | # The name of the Pygments (syntax highlighting) style to use. 84 | pygments_style = 'sphinx' 85 | 86 | # A list of ignored prefixes for module index sorting. 87 | #modindex_common_prefix = [] 88 | 89 | 90 | # -- Options for HTML output --------------------------------------------------- 91 | 92 | # The theme to use for HTML and HTML Help pages. Major themes that come with 93 | # Sphinx are currently 'default' and 'sphinxdoc'. 94 | html_theme = 'default' 95 | 96 | # Theme options are theme-specific and customize the look and feel of a theme 97 | # further. For a list of options available for each theme, see the 98 | # documentation. 99 | #html_theme_options = {} 100 | 101 | # Add any paths that contain custom themes here, relative to this directory. 102 | #html_theme_path = [] 103 | 104 | # The name for this set of Sphinx documents. If None, it defaults to 105 | # " v documentation". 106 | #html_title = None 107 | 108 | # A shorter title for the navigation bar. Default is the same as html_title. 109 | #html_short_title = None 110 | 111 | # The name of an image file (relative to this directory) to place at the top 112 | # of the sidebar. 113 | #html_logo = None 114 | 115 | # The name of an image file (within the static path) to use as favicon of the 116 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 117 | # pixels large. 118 | #html_favicon = None 119 | 120 | # Add any paths that contain custom static files (such as style sheets) here, 121 | # relative to this directory. They are copied after the builtin static files, 122 | # so a file named "default.css" will overwrite the builtin "default.css". 123 | #html_static_path = ['_static'] 124 | 125 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 126 | # using the given strftime format. 127 | #html_last_updated_fmt = '%b %d, %Y' 128 | 129 | # If true, SmartyPants will be used to convert quotes and dashes to 130 | # typographically correct entities. 131 | #html_use_smartypants = True 132 | 133 | # Custom sidebar templates, maps document names to template names. 134 | #html_sidebars = {} 135 | 136 | # Additional templates that should be rendered to pages, maps page names to 137 | # template names. 138 | #html_additional_pages = {} 139 | 140 | # If false, no module index is generated. 141 | #html_use_modindex = True 142 | 143 | # If false, no index is generated. 144 | #html_use_index = True 145 | 146 | # If true, the index is split into individual pages for each letter. 147 | #html_split_index = False 148 | 149 | # If true, links to the reST sources are added to the pages. 150 | #html_show_sourcelink = True 151 | 152 | # If true, an OpenSearch description file will be output, and all pages will 153 | # contain a tag referring to it. The value of this option must be the 154 | # base URL from which the finished HTML is served. 155 | #html_use_opensearch = '' 156 | 157 | # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). 158 | #html_file_suffix = '' 159 | 160 | # Output file base name for HTML help builder. 161 | htmlhelp_basename = 'GTFSDBdoc' 162 | 163 | 164 | # -- Options for LaTeX output -------------------------------------------------- 165 | 166 | # The paper size ('letter' or 'a4'). 167 | #latex_paper_size = 'letter' 168 | 169 | # The font size ('10pt', '11pt' or '12pt'). 170 | #latex_font_size = '10pt' 171 | 172 | # Grouping the document tree into LaTeX files. List of tuples 173 | # (source start file, target name, title, author, documentclass [howto/manual]). 174 | latex_documents = [ 175 | ('index', 'GTFSDB.tex', u'GTFSDB Documentation', 176 | u'Mike Gilligan, Frank Purcell', 'manual'), 177 | ] 178 | 179 | # The name of an image file (relative to this directory) to place at the top of 180 | # the title page. 181 | #latex_logo = None 182 | 183 | # For "manual" documents, if this is true, then toplevel headings are parts, 184 | # not chapters. 185 | #latex_use_parts = False 186 | 187 | # Additional stuff for the LaTeX preamble. 188 | #latex_preamble = '' 189 | 190 | # Documents to append as an appendix to all manuals. 191 | #latex_appendices = [] 192 | 193 | # If false, no module index is generated. 194 | #latex_use_modindex = True 195 | -------------------------------------------------------------------------------- /docs/index.txt: -------------------------------------------------------------------------------- 1 | .. GTFSDB documentation master file, created by 2 | sphinx-quickstart on Wed Apr 21 13:47:38 2010. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | GTFSDB Documentation 7 | ==================== 8 | 9 | There's not much here yet, but there will be soon! 10 | 11 | Contents: 12 | 13 | .. toctree:: 14 | :maxdepth: 2 15 | 16 | install 17 | 18 | 19 | Indices and tables 20 | ================== 21 | 22 | * :ref:`genindex` 23 | * :ref:`modindex` 24 | * :ref:`search` 25 | -------------------------------------------------------------------------------- /docs/install.txt: -------------------------------------------------------------------------------- 1 | GTFSDB Installation 2 | =================== 3 | 4 | # ~/gtfsdb-0.1.6 $ 5 | python build.py build 6 | 7 | Install only for this user. Python packages install to $HOME/.local 8 | (looks like the local directory we used before, but its different, 9 | note the dot before local), so you'll need to add $HOME/.local/bin to 10 | your shell's path. 11 | 12 | # ~/gtfsdb-0.1.6 $ 13 | export PATH=$PATH:$HOME/.local/bin 14 | 15 | # ~/gtfsdb-0.1.6 $ 16 | python build.py install --user 17 | 18 | Gtfsdb requires the psychopg2 database driver. If it is not available 19 | in your linux package system, download & install locally using the 20 | same technique as for gtfsdb. It will require the python and postgresql 21 | development packages in order to build. 22 | https://pypi.python.org/pypi/psycopg2/ 23 | 24 | Gtfsdb also requires geoalchemy 0.7.2, and sqlalchemy 0.8.4 (not 0.9.x 25 | which are the newest version) availabe at 26 | https://pypi.python.org/pypi/GeoAlchemy/0.7.2 and 27 | https://pypi.python.org/pypi/SQLAlchemy/0.8.4 respectively, and are 28 | both installed using the same "build", "install --user" technique. You 29 | may have to remove the SQLAlchemy version 0.9.x if that had been 30 | installed automatically by accident, in order for gtfsdb to find the 31 | correct 0.8.4 version. 32 | 33 | -------------------------------------------------------------------------------- /docs/queries/orm/README.md: -------------------------------------------------------------------------------- 1 | GTFSDB ORM Queries 2 | ================== 3 | 4 | Stop query examples: 5 | --- 6 | 1. X 7 | 1. Y 8 | 1. Z 9 | 10 | 11 | Vehicle Position query examples: 12 | --- 13 | 1. X 14 | 1. Y 15 | 1. Z 16 | 17 | -------------------------------------------------------------------------------- /docs/queries/sql/README.md: -------------------------------------------------------------------------------- 1 | GTFSDB SQL Queries 2 | ================== 3 | 4 | Stop query examples: 5 | --- 6 | 1. Simple stop /q: `select * from trimet.stops limit 1;` 7 | 1. 8 | 1. Z 9 | 10 | 11 | Vehicle Position query examples: 12 | --- 13 | 1. https://medium.com/@mondaymaps/making-an-animated-transit-map-with-gtfs-data-9764da09c08d 14 | 1. Y 15 | 1. Z 16 | 17 | -------------------------------------------------------------------------------- /docs/queries/sql/vehicle_queries.sql: -------------------------------------------------------------------------------- 1 | 2 | -- 3 | -- how to find stop time nearest to a date & time 4 | -- TODO - min / max / etc... ??? 5 | -- 6 | select * 7 | from trimet.trips t 8 | where t.service_id in (select service_id from trimet.universal_calendar uc where uc.date in ('2018-08-08', '2018-08-09')) 9 | and t.route_id = '20' 10 | and t.trip_id = st.trip_id 11 | and '12:13:00' > st.arrival_time 12 | limit 5; 13 | -------------------------------------------------------------------------------- /gtfsdb/__init__.py: -------------------------------------------------------------------------------- 1 | from gtfsdb.model.db import Database 2 | from gtfsdb.model.gtfs import GTFS 3 | 4 | from gtfsdb.model.agency import Agency # noqa 5 | from gtfsdb.model.calendar import * # noqa 6 | from gtfsdb.model.fare import * # noqa 7 | from gtfsdb.model.feed_info import FeedInfo # noqa 8 | from gtfsdb.model.frequency import Frequency # noqa 9 | from gtfsdb.model.route import * # noqa 10 | from gtfsdb.model.route_stop import * # noqa 11 | from gtfsdb.model.shape import * # noqa 12 | from gtfsdb.model.pattern import * # noqa 13 | from gtfsdb.model.pattern_base import * # noqa 14 | from gtfsdb.model.stop import * # noqa 15 | from gtfsdb.model.stop_feature import * # noqa 16 | from gtfsdb.model.stop_time import StopTime # noqa 17 | from gtfsdb.model.transfer import Transfer # noqa 18 | from gtfsdb.model.translation import Translation # noqa 19 | from gtfsdb.model.trip import Trip # noqa 20 | from gtfsdb.model.block import Block # noqa 21 | 22 | 23 | SORTED_CLASS_NAMES = [ 24 | RouteType.__name__, 25 | RouteFilter.__name__, 26 | FeedInfo.__name__, 27 | Agency.__name__, 28 | Block.__name__, 29 | Calendar.__name__, 30 | CalendarDate.__name__, 31 | Route.__name__, 32 | RouteDirection.__name__, 33 | Stop.__name__, 34 | StopFeature.__name__, 35 | Transfer.__name__, 36 | Shape.__name__, 37 | Pattern.__name__, 38 | PatternBase.__name__, 39 | Trip.__name__, 40 | StopTime.__name__, 41 | RouteStop.__name__, 42 | Frequency.__name__, 43 | FareAttribute.__name__, 44 | FareRule.__name__, 45 | UniversalCalendar.__name__, 46 | Translation.__name__, 47 | ] 48 | 49 | 50 | CURRENT_CLASS_NAMES = [ 51 | CurrentRoutes.__name__, 52 | CurrentRouteStops.__name__, 53 | CurrentStops.__name__, 54 | ] -------------------------------------------------------------------------------- /gtfsdb/api.py: -------------------------------------------------------------------------------- 1 | from gtfsdb import Database, GTFS 2 | 3 | 4 | def database_load(filename, **kwargs): 5 | """ 6 | Basic API to load a GTFS zip file into a database 7 | 8 | arguments: 9 | filename: URL or local path to GTFS zip file 10 | 11 | keyword arguments: 12 | batch_size: record batch size for memory management 13 | is_geospatial: if database is support geo functions 14 | schema: database schema name 15 | tables: limited list of tables to load 16 | url: SQLAlchemy database url 17 | """ 18 | db = Database(**kwargs) 19 | db.create() 20 | gtfs = GTFS(filename) 21 | gtfs.load(db, **kwargs) 22 | return db 23 | -------------------------------------------------------------------------------- /gtfsdb/config.py: -------------------------------------------------------------------------------- 1 | try: 2 | from ConfigParser import ConfigParser 3 | except ImportError: 4 | from configparser import ConfigParser 5 | 6 | import os 7 | import logging.config 8 | from pkg_resources import resource_filename # @UnresolvedImport 9 | 10 | 11 | """ parse configuration file and setup logging """ 12 | config = ConfigParser() 13 | ini_file = os.path.join(resource_filename('gtfsdb', 'configs'), 'app.ini') 14 | config.read(ini_file) 15 | if config.has_section('loggers'): 16 | logging.config.fileConfig(ini_file, disable_existing_loggers=False) 17 | 18 | 19 | """ application defaults """ 20 | DEFAULT_BATCH_SIZE = 10000 21 | DEFAULT_DATABASE_URL = 'sqlite://' 22 | DEFAULT_IS_GEOSPATIAL = False 23 | DEFAULT_SCHEMA = None 24 | 25 | 26 | """ data source constants """ 27 | DATASOURCE_GTFS = 1 28 | DATASOURCE_LOOKUP = 2 29 | DATASOURCE_DERIVED = 3 30 | 31 | 32 | """ geometry constants """ 33 | SRID = 4326 34 | -------------------------------------------------------------------------------- /gtfsdb/configs/app.ini: -------------------------------------------------------------------------------- 1 | [loggers] 2 | keys = root 3 | 4 | [handlers] 5 | keys = console 6 | 7 | [formatters] 8 | keys = generic 9 | 10 | [logger_root] 11 | handlers = console 12 | level = DEBUG 13 | 14 | [handler_console] 15 | class = StreamHandler 16 | args = (sys.stdout,) 17 | formatter = generic 18 | 19 | [formatter_generic] 20 | datefmt = %H:%M:%S 21 | format = %(asctime)s,%(msecs)03d %(levelname)-5.5s [%(name)s] %(message)s 22 | -------------------------------------------------------------------------------- /gtfsdb/data/route_filter.txt: -------------------------------------------------------------------------------- 1 | route_id,agency_id,description 2 | XXX_TEST_XXX,*,RouteFilter (table) can be used to block a route(s) from the active list or routes 3 | -------------------------------------------------------------------------------- /gtfsdb/data/route_type.txt: -------------------------------------------------------------------------------- 1 | route_type,otp_type,route_type_name,route_type_desc 2 | 0,"TRAM","Tram, Streetcar, Light rail","Any light rail or street level system within a metropolitan area" 3 | 1,"SUBWAY","Subway, Metro","Any underground rail system within a metropolitan area" 4 | 2,"RAIL","Rail","Used for intercity or long-distance travel" 5 | 3,"BUS","Bus","Used for short- and long-distance bus routes" 6 | 4,"FERRY","Ferry","Used for short- and long-distance boat service" 7 | 5,"CABLE_CAR","Cable car","Used for street-level cable cars where the cable runs beneath the car" 8 | 6,"GONDOLA","Gondola, Suspended cable car","Typically used for aerial cable cars where the car is suspended from the cable" 9 | 7,"FUNICULAR","Funicular","Any rail system designed for steep inclines" -------------------------------------------------------------------------------- /gtfsdb/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTransitTools/gtfsdb/38c20e1e83577a2d0efb7b20ad3e262d27c88168/gtfsdb/model/__init__.py -------------------------------------------------------------------------------- /gtfsdb/model/agency.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import Column, Sequence 2 | from sqlalchemy.types import Integer, String 3 | 4 | from gtfsdb import config 5 | from gtfsdb.model.base import Base 6 | 7 | 8 | class Agency(Base): 9 | datasource = config.DATASOURCE_GTFS 10 | filename = 'agency.txt' 11 | 12 | __tablename__ = 'agency' 13 | 14 | id = Column(Integer, Sequence(None, optional=True), primary_key=True, nullable=True) 15 | agency_id = Column(String(255), index=True, unique=True) 16 | agency_name = Column(String(255), nullable=False) 17 | agency_url = Column(String(255), nullable=False) 18 | agency_timezone = Column(String(50), nullable=False) 19 | agency_lang = Column(String(10)) 20 | agency_phone = Column(String(50)) 21 | agency_fare_url = Column(String(255)) 22 | agency_email = Column(String(255)) 23 | -------------------------------------------------------------------------------- /gtfsdb/model/base.py: -------------------------------------------------------------------------------- 1 | from pkg_resources import resource_filename # @UnresolvedImport 2 | 3 | from gtfsdb import config, util 4 | from sqlalchemy.ext.declarative import declarative_base 5 | from sqlalchemy.orm import object_session 6 | 7 | import csv 8 | import datetime 9 | import os 10 | import sys 11 | import time 12 | 13 | import logging 14 | log = logging.getLogger(__name__) 15 | 16 | 17 | try: 18 | unicode = unicode 19 | except NameError: 20 | # 'unicode' is undefined, must be Python 3 21 | str = str 22 | unicode = str 23 | bytes = bytes 24 | basestring = (str, bytes) 25 | else: 26 | # 'unicode' exists, must be Python 2 27 | str = str 28 | unicode = unicode 29 | bytes = str 30 | basestring = basestring 31 | 32 | 33 | class _Base(object): 34 | 35 | filename = None 36 | 37 | @property 38 | def session(self): 39 | #import pdb; pdb.set_trace() 40 | ret_val = None 41 | try: 42 | ret_val = object_session(self) 43 | except: 44 | log.warning("can't get a session from object") 45 | return ret_val 46 | 47 | @classmethod 48 | def set_schema(cls, schema): 49 | cls.__table__.schema = schema 50 | 51 | @classmethod 52 | def get_schema(cls, def_val=None): 53 | ret_val = def_val 54 | if hasattr(cls, '__table__') and cls.__table__.schema: 55 | ret_val = cls.__table__.schema 56 | return ret_val 57 | 58 | @classmethod 59 | def make_geom_lazy(cls): 60 | from sqlalchemy.orm import deferred 61 | try: 62 | cls.__mapper__.add_property('geom', deferred(cls.__table__.c.geom)) 63 | except Exception as e: 64 | log.warning(e) 65 | 66 | @classmethod 67 | def from_dict(cls, attrs): 68 | clean_dict = cls.make_record(attrs) 69 | return cls(**clean_dict) 70 | 71 | @property 72 | def to_dict(self): 73 | """ 74 | convert a SQLAlchemy object into a dict that is serializable to JSON 75 | """ 76 | ret_val = self.__dict__.copy() 77 | 78 | """ not crazy about this hack, but ... the __dict__ on a SQLAlchemy 79 | object contains hidden crap that we delete from the class dict 80 | """ 81 | if set(['_sa_instance_state']).issubset(ret_val): 82 | del ret_val['_sa_instance_state'] 83 | 84 | """ we're using 'created' as the date parameter, so convert values 85 | to strings : better would be to detect date & datetime objects, 86 | and convert those... 87 | """ 88 | if set(['created']).issubset(ret_val): 89 | ret_val['created'] = ret_val['created'].__str__() 90 | 91 | return ret_val 92 | 93 | def get_up_date_name(self, attribute_name): 94 | """ 95 | return attribute name of where we'll store an update variable 96 | """ 97 | return "{0}_update_utc".format(attribute_name) 98 | 99 | def is_cached_data_valid(self, attribute_name, max_age=2): 100 | """ 101 | we have to see both the attribute name exist in our object, as well as 102 | that object having a last update date (@see update_cached_data below) 103 | and that update date being less than 2 days ago... 104 | """ 105 | ret_val = False 106 | try: 107 | # import pdb; pdb.set_trace() 108 | if hasattr(self, attribute_name): 109 | attribute_update = self.get_up_date_name(attribute_name) 110 | if hasattr(self, attribute_update): 111 | epoch = datetime.datetime.utcfromtimestamp(0) 112 | delta = getattr(self, attribute_update) - epoch 113 | if delta.days <= max_age: 114 | ret_val = True 115 | except: 116 | log.warning("is_cached_data_valid(): saw a cache exception with attribute {0}".format(attribute_name)) 117 | ret_val = False 118 | 119 | return ret_val 120 | 121 | def update_cached_data(self, attribute_name): 122 | """ 123 | """ 124 | try: 125 | # import pdb; pdb.set_trace() 126 | attribute_update = self.get_up_date_name(attribute_name) 127 | setattr(self, attribute_update, datetime.datetime.now()) 128 | except: 129 | log.warning("update_cached_data(): threw an exception with attribute {0}".format(attribute_name)) 130 | 131 | @classmethod 132 | def load(cls, db, **kwargs): 133 | """ 134 | Load method for ORM 135 | 136 | arguments: 137 | db: instance of gtfsdb.Database 138 | 139 | keyword arguments: 140 | gtfs_directory: path to unzipped GTFS files 141 | batch_size: batch size for memory management 142 | """ 143 | 144 | # step 0: set up some vars, including setting the log output to show the child of base that we're processing 145 | start_time = time.time() 146 | batch_size = kwargs.get('batch_size', config.DEFAULT_BATCH_SIZE) 147 | log = logging.getLogger(cls.__module__) 148 | 149 | # step 1: check that we have elements of a file path (a file name and a directory) for the data we'll load 150 | if cls.filename is None: 151 | if cls.datasource is not config.DATASOURCE_DERIVED: 152 | log.info("{0} didn't specify a 'filename', so won't bohter trying to load() a null file (early exit from load()).".format(cls.__name__)) 153 | return # note early exit 154 | if cls.datasource is not config.DATASOURCE_GTFS and cls.datasource is not config.DATASOURCE_LOOKUP: 155 | log.info("{0}.datasource != DATASOURCE_GTFS or DATASOURCE_LOOKUP (exit load).".format(cls.__name__)) 156 | return # note early exit 157 | 158 | # step 2: load either a GTFS file from the unzipped file or a resource file (from a dir specified in config) 159 | directory = None 160 | if cls.datasource == config.DATASOURCE_GTFS: 161 | directory = kwargs.get('gtfs_directory') 162 | elif cls.datasource == config.DATASOURCE_LOOKUP: 163 | directory = resource_filename('gtfsdb', 'data') 164 | 165 | # step 3: load the file 166 | log.info("load {0}".format(cls.__name__)) 167 | records = [] 168 | file_path = os.path.join(directory, cls.filename) 169 | if os.path.exists(file_path): 170 | if sys.version_info >= (3, 0): 171 | f = open(file_path, 'rb') 172 | else: 173 | f = open(file_path, 'r') 174 | utf8_file = util.UTF8Recoder(f, 'utf-8-sig') 175 | reader = csv.DictReader(utf8_file) 176 | reader.fieldnames = [field.strip().lower() for field in reader.fieldnames] 177 | table = cls.__table__ 178 | try: 179 | db.engine.execute(table.delete()) 180 | except: 181 | log.debug("NOTE: couldn't delete this table") 182 | 183 | i = 0 184 | for row in reader: 185 | records.append(cls.make_record(row)) 186 | i += 1 187 | if i >= batch_size: 188 | db.engine.execute(table.insert(), records) 189 | sys.stdout.write('*') 190 | records = [] 191 | i = 0 192 | if len(records) > 0: 193 | db.engine.execute(table.insert(), records) 194 | f.close() 195 | 196 | # step 4: done... 197 | process_time = time.time() - start_time 198 | log.debug('{0}.load ({1:.0f} seconds)'.format(cls.__name__, process_time)) 199 | 200 | @classmethod 201 | def post_process(cls, db, **kwargs): 202 | """ 203 | Post-process processing method. This method is a placeholder 204 | that may be overridden in children... 205 | @see: stop_time.py or route.py 206 | """ 207 | pass 208 | 209 | @classmethod 210 | def make_record(cls, row): 211 | for k, v in row.copy().items(): 212 | if isinstance(v, basestring): 213 | row[k] = v.strip() 214 | 215 | try: 216 | if k: 217 | if (k not in cls.__table__.c): 218 | del row[k] 219 | elif not v: 220 | row[k] = None 221 | elif k.endswith('date'): 222 | row[k] = datetime.datetime.strptime(v, '%Y%m%d').date() 223 | else: 224 | log.info("I've got issues with your GTFS {0} data. I'll continue, but expect more errors...".format(cls.__name__)) 225 | except Exception as e: 226 | log.warning(e) 227 | 228 | """ if this is a geospatially enabled database, add a geom """ 229 | if hasattr(cls, 'geom') and hasattr(cls, 'add_geom_to_dict'): 230 | cls.add_geom_to_dict(row) 231 | 232 | """ post make_record gives the calling class a chance to fix things up prior to being sent down to database """ 233 | row = cls.post_make_record(row) 234 | return row 235 | 236 | @classmethod 237 | def post_make_record(cls, row): 238 | """ Base does nothing, but a derived class now has a chance to clean up the record prior to db commit """ 239 | return row 240 | 241 | 242 | Base = declarative_base(cls=_Base) 243 | -------------------------------------------------------------------------------- /gtfsdb/model/block.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | 4 | from sqlalchemy import Column, Sequence 5 | from sqlalchemy.orm import relationship 6 | from sqlalchemy.types import Integer, String 7 | 8 | from gtfsdb import config 9 | from gtfsdb.model.base import Base 10 | from gtfsdb.model.trip import Trip 11 | 12 | import logging 13 | log = logging.getLogger(__name__) 14 | 15 | 16 | class Block(Base): 17 | """ 18 | This is really a BlockTripService table, in that we have entries for each Block / Trip pair, so that we can see 19 | the order of trips served by a given vehicle (block) for a particular service. 20 | 21 | One purpose is to know which trips might begin and end at a given stop .. we often don't want to show 22 | 'arrival stops' in either our list of RouteStops or Stop Schedule listings... 23 | """ 24 | datasource = config.DATASOURCE_DERIVED 25 | 26 | __tablename__ = 'blocks' 27 | 28 | id = Column(Integer, Sequence(None, optional=True), primary_key=True) 29 | sequence = Column(Integer) 30 | block_id = Column(String(255), index=True, nullable=False) 31 | service_id = Column(String(255), index=True, nullable=False) 32 | trip_id = Column(String(255), index=True, nullable=False) 33 | prev_trip_id = Column(String(255)) 34 | next_trip_id = Column(String(255)) 35 | start_stop_id = Column(String(255), index=True, nullable=False) 36 | end_stop_id = Column(String(255), index=True, nullable=False) 37 | 38 | universal_calendar = relationship( 39 | 'UniversalCalendar', 40 | primaryjoin='Block.service_id==UniversalCalendar.service_id', 41 | foreign_keys='(Block.service_id)', 42 | uselist=True, viewonly=True) 43 | 44 | trip = relationship( 45 | 'Trip', 46 | primaryjoin='Block.trip_id==Trip.trip_id', 47 | foreign_keys='(Block.trip_id)', 48 | uselist=False, viewonly=True) 49 | 50 | next_trip = relationship( 51 | 'Trip', 52 | primaryjoin='Block.next_trip_id==Trip.trip_id', 53 | foreign_keys='(Block.next_trip_id)', 54 | uselist=False, viewonly=True) 55 | 56 | prev_trip = relationship( 57 | 'Trip', 58 | primaryjoin='Block.prev_trip_id==Trip.trip_id', 59 | foreign_keys='(Block.prev_trip_id)', 60 | uselist=False, viewonly=True) 61 | 62 | start_stop = relationship( 63 | 'Stop', 64 | primaryjoin='Stop.stop_id==Block.start_stop_id', 65 | foreign_keys='(Block.start_stop_id)', 66 | uselist=False, viewonly=True) 67 | 68 | end_stop = relationship( 69 | 'Stop', 70 | primaryjoin='Stop.stop_id==Block.end_stop_id', 71 | foreign_keys='(Block.end_stop_id)', 72 | uselist=False, viewonly=True) 73 | 74 | def __init__(self, sequence, block_id, service_id, trip_id, prev_trip_id, next_trip_id, start_stop_id, end_stop_id): 75 | self.sequence = sequence 76 | self.block_id = block_id 77 | self.service_id = service_id 78 | self.trip_id = trip_id 79 | self.prev_trip_id = prev_trip_id 80 | self.next_trip_id = next_trip_id 81 | self.start_stop_id = start_stop_id 82 | self.end_stop_id = end_stop_id 83 | 84 | def is_arrival(self, stop_id=None): 85 | """ 86 | check whether two sequential trips running on this block first arrive and then depart at this stop... 87 | if this is an 'arrival' stop, then we probably don't want to show it, etc... 88 | """ 89 | ret_val = False 90 | 91 | # default is end_stop_id 92 | if stop_id is None: 93 | stop_id = self.end_stop_id 94 | 95 | if self.next_trip and self.next_trip.start_stop.stop_id == stop_id: 96 | # import pdb; pdb.set_trace() 97 | ret_val = True 98 | return ret_val 99 | 100 | @classmethod 101 | def load(cls, db, **kwargs): 102 | log.debug('{0}.load (loaded later in post_process)'.format(cls.__name__)) 103 | pass 104 | 105 | @classmethod 106 | def post_process(cls, db, **kwargs): 107 | ignore_blocks = kwargs.get('ignore_blocks', None) 108 | log.debug('{0} {1}.post_process'.format("skip" if ignore_blocks else "run", cls.__name__)) 109 | if not ignore_blocks: 110 | cls.populate(db) 111 | 112 | @classmethod 113 | def populate(cls, db): 114 | """ 115 | loop thru a full trip table and break things into buckets based on service key and block id 116 | """ 117 | start_time = time.time() 118 | batch_size = config.DEFAULT_BATCH_SIZE 119 | num_recs = 0 120 | 121 | # step 1: loop thru all trips, sorted by block and service key 122 | trips = db.session.query(Trip).order_by(Trip.block_id, Trip.service_id).all() 123 | i = 0 124 | while i < len(trips): 125 | # make sure the trip has a couple stops 126 | if not trips[i].is_valid: 127 | i = i + 1 128 | continue 129 | 130 | b = trips[i].block_id 131 | s = trips[i].service_id 132 | 133 | # need block (optional) and service id info ... if we don't have that, continue to next trip 134 | if b is None or s is None: 135 | i = i + 1 136 | continue 137 | 138 | # step 2: grab a batch of trips that have the same block and service id 139 | t = [] 140 | while i < len(trips): 141 | if not trips[i].is_valid: 142 | i = i + 1 143 | continue 144 | 145 | if trips[i].block_id != b or trips[i].service_id != s: 146 | break 147 | t.append(trips[i]) 148 | i = i + 1 149 | 150 | # step 3: sort our bucket 151 | sorted_blocks = sorted(t, key=lambda t: t.start_time) 152 | sb_len = len(sorted_blocks) - 1 153 | 154 | # step 4: create block objects 155 | for j, k in enumerate(sorted_blocks): 156 | prev = None 157 | next = None 158 | if j > 0: 159 | prev = sorted_blocks[j - 1].trip_id 160 | if j < sb_len: 161 | next = sorted_blocks[j + 1].trip_id 162 | block = Block( 163 | sequence=j + 1, 164 | block_id=b, 165 | service_id=s, 166 | trip_id=k.trip_id, 167 | prev_trip_id=prev, 168 | next_trip_id=next, 169 | start_stop_id=k.start_stop.stop_id, 170 | end_stop_id=k.end_stop.stop_id 171 | ) 172 | db.session.add(block) 173 | 174 | # step 5: insert in the db 175 | num_recs = num_recs + sb_len 176 | if num_recs >= batch_size: 177 | sys.stdout.write('*') 178 | db.session.flush() 179 | db.session.commit() 180 | num_recs = 0 181 | 182 | # step 5b: (final) insert into the db 183 | db.session.flush() 184 | db.session.commit() 185 | 186 | processing_time = time.time() - start_time 187 | log.debug('{0}.populate ({1:.0f} seconds)'.format(cls.__name__, processing_time)) 188 | 189 | @classmethod 190 | def start_stop_ids(cls, session): 191 | """ 192 | return an array of distinct starting stop_ids 193 | """ 194 | ret_val = [] 195 | blocks = session.query(Block).all() 196 | for b in blocks: 197 | if b.start_stop_id not in ret_val: 198 | ret_val.append(b.start_stop_id) 199 | return ret_val 200 | 201 | @classmethod 202 | def end_stop_ids(cls, session): 203 | """ 204 | return an array of distinct ending stop_ids 205 | """ 206 | ret_val = [] 207 | blocks = session.query(Block).all() 208 | for b in blocks: 209 | if b.end_stop_id not in ret_val: 210 | ret_val.append(b.end_stop_id) 211 | return ret_val 212 | 213 | @classmethod 214 | def active_stop_ids(cls, session, limit=None): 215 | """ 216 | return an array of unique starting and ending stop_ids 217 | use the dict {'stop_id':id} format for return (compatible with Stops.active_stop_ids()) 218 | """ 219 | stops = cls.start_stop_ids(session) 220 | stops.extend(cls.end_stop_ids(session)) 221 | unique = set(stops) 222 | 223 | ret_val = [] 224 | for i, s in enumerate(unique): 225 | if limit and i > int(limit): 226 | break 227 | ret_val.append({'stop_id': s}) 228 | return ret_val 229 | 230 | @classmethod 231 | def blocks_by_stop_id(cls, session, stop_id, trip_id=None, service_keys=None, by_start_stop=False, by_end_stop=False): 232 | """ 233 | query blocks by stop id and service keys ... 234 | """ 235 | q = session.query(Block) 236 | if trip_id: 237 | q = q.filter(Block.trip_id == trip_id) 238 | if by_start_stop: 239 | q = q.filter(Block.start_stop_id == stop_id) 240 | if by_end_stop: 241 | q = q.filter(Block.end_stop_id == stop_id) 242 | if service_keys: 243 | q = q.filter(Block.service_id.in_(service_keys)) 244 | blocks = q.all() 245 | return blocks 246 | 247 | @classmethod 248 | def blocks_by_start_stop_id(cls, session, stop_id, trip_id=None, service_keys=None): 249 | """ 250 | query blocks by the start stop 251 | """ 252 | return cls.blocks_by_stop_id(session, stop_id, trip_id=trip_id, service_keys=service_keys, by_start_stop=True) 253 | 254 | @classmethod 255 | def blocks_by_end_stop_id(cls, session, stop_id, trip_id=None, service_keys=None): 256 | """ 257 | query blocks by the end stop 258 | """ 259 | return cls.blocks_by_stop_id(session, stop_id, trip_id=trip_id, service_keys=service_keys, by_end_stop=True) 260 | 261 | @classmethod 262 | def blocks_by_trip_stop(cls, session, trip_id, stop_id, by_end_stop=True): 263 | """ 264 | query blocks by the end stop 265 | """ 266 | if by_end_stop: 267 | blocks = cls.blocks_by_end_stop_id(session, stop_id, trip_id=trip_id) 268 | else: 269 | blocks = cls.blocks_by_start_stop_id(session, stop_id, trip_id=trip_id) 270 | 271 | return blocks 272 | -------------------------------------------------------------------------------- /gtfsdb/model/calendar.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | import time 4 | 5 | from sqlalchemy import Column, Index 6 | from sqlalchemy.ext.hybrid import hybrid_property 7 | from sqlalchemy.orm import relationship 8 | from sqlalchemy.types import Date, SmallInteger, Integer, String 9 | 10 | from gtfsdb import config 11 | from gtfsdb.model.base import Base 12 | 13 | 14 | __all__ = ['Calendar', 'CalendarDate', 'UniversalCalendar'] 15 | 16 | 17 | log = logging.getLogger(__name__) 18 | 19 | 20 | class Calendar(Base): 21 | datasource = config.DATASOURCE_GTFS 22 | filename = 'calendar.txt' 23 | 24 | __tablename__ = 'calendar' 25 | __table_args__ = (Index('calendar_ix1', 'start_date', 'end_date'),) 26 | 27 | service_id = Column(String(255), primary_key=True, index=True, nullable=False) 28 | monday = Column(SmallInteger, nullable=False) 29 | tuesday = Column(SmallInteger, nullable=False) 30 | wednesday = Column(SmallInteger, nullable=False) 31 | thursday = Column(SmallInteger, nullable=False) 32 | friday = Column(SmallInteger, nullable=False) 33 | saturday = Column(SmallInteger, nullable=False) 34 | sunday = Column(SmallInteger, nullable=False) 35 | start_date = Column(Date, nullable=False) 36 | end_date = Column(Date, nullable=False) 37 | service_name = Column(String(255)) # Trillium extension, a human-readable name for the calendar. 38 | 39 | def weekday_list(self): 40 | weekday_dict = dict(monday=0, tuesday=1, wednesday=2, thursday=3, friday=4, saturday=5, sunday=6) 41 | item_func = weekday_dict.iteritems if hasattr(weekday_dict, 'iteritems') else weekday_dict.items 42 | return [v for k, v in item_func() if getattr(self, k)] 43 | 44 | def to_date_list(self): 45 | """ 46 | TODO: we need better date limiting management here ... this routine could spin a long time w/forever dates 47 | TODO: for example, if the begin date is 1900 or end date is 9999, then that'll cause a major slowdown 48 | """ 49 | date_list = [] 50 | weekdays = self.weekday_list() 51 | diff = self.end_date - self.start_date 52 | for i in range(diff.days + 1): 53 | d = self.start_date + datetime.timedelta(days=i) 54 | if d.weekday() in weekdays: 55 | date_list.append(dict(service_id=self.service_id, date=d)) 56 | return date_list 57 | 58 | 59 | class CalendarDate(Base): 60 | datasource = config.DATASOURCE_GTFS 61 | filename = 'calendar_dates.txt' 62 | 63 | __tablename__ = 'calendar_dates' 64 | 65 | service_id = Column(String(255), primary_key=True, index=True, nullable=False) 66 | date = Column(Date, primary_key=True, index=True, nullable=False) 67 | exception_type = Column(Integer, nullable=False) 68 | 69 | @hybrid_property 70 | def is_addition(self): 71 | return self.exception_type == 1 72 | 73 | @hybrid_property 74 | def is_removal(self): 75 | return self.exception_type == 2 76 | 77 | 78 | class UniversalCalendar(Base): 79 | datasource = config.DATASOURCE_DERIVED 80 | __tablename__ = 'universal_calendar' 81 | 82 | service_id = Column(String(255), primary_key=True, index=True, nullable=False) 83 | date = Column(Date, primary_key=True, index=True, nullable=False) 84 | 85 | trips = relationship( 86 | 'Trip', 87 | primaryjoin='UniversalCalendar.service_id==Trip.service_id', 88 | foreign_keys='(UniversalCalendar.service_id)', 89 | uselist=True, viewonly=True) 90 | 91 | @classmethod 92 | def load(cls, db, **kwargs): 93 | start_time = time.time() 94 | session = db.session 95 | for c in session.query(Calendar): 96 | session.add_all([cls(**r) for r in c.to_date_list()]) 97 | session.commit() 98 | q = session.query(CalendarDate) 99 | for calendar_date in q: 100 | cd_kwargs = dict(date=calendar_date.date, 101 | service_id=calendar_date.service_id) 102 | if calendar_date.is_addition: 103 | session.merge(cls(**cd_kwargs)) 104 | if calendar_date.is_removal: 105 | session.query(cls).filter_by(**cd_kwargs).delete() 106 | session.commit() 107 | process_time = time.time() - start_time 108 | log.debug('{0}.load ({1:.0f} seconds)'.format(cls.__name__, process_time)) 109 | -------------------------------------------------------------------------------- /gtfsdb/model/db.py: -------------------------------------------------------------------------------- 1 | from gtfsdb import config 2 | from gtfsdb import util 3 | from sqlalchemy import create_engine 4 | from sqlalchemy.orm import scoped_session, sessionmaker 5 | from contextlib import contextmanager 6 | 7 | import logging 8 | log = logging.getLogger(__file__) 9 | 10 | 11 | class Database(object): 12 | 13 | def __init__(self, **kwargs): 14 | """ 15 | keyword arguments: 16 | is_geospatial: if database supports geo functions 17 | schema: database schema name 18 | tables: limited list of tables to load into database 19 | url: SQLAlchemy database url 20 | """ 21 | self.tables = kwargs.get('tables', None) 22 | url = kwargs.get('url') 23 | if not url: 24 | url = kwargs.get('database_url', config.DEFAULT_DATABASE_URL) 25 | self.url = url 26 | self.schema = kwargs.get('schema', config.DEFAULT_SCHEMA) 27 | self.is_geospatial = kwargs.get('is_geospatial', config.DEFAULT_IS_GEOSPATIAL) 28 | 29 | """Order list of class names, used for creating & populating tables""" 30 | from gtfsdb import SORTED_CLASS_NAMES, CURRENT_CLASS_NAMES 31 | self.sorted_class_names = SORTED_CLASS_NAMES 32 | if kwargs.get('current_tables'): 33 | self.sorted_class_names.extend(CURRENT_CLASS_NAMES) 34 | # import pdb; pdb.set_trace() 35 | 36 | @property 37 | def classes(self): 38 | subclasses = self.get_base_subclasses() 39 | if self.tables: 40 | ret_val = [c for c in subclasses if c.__tablename__ in self.tables] 41 | else: 42 | ret_val = subclasses 43 | return ret_val 44 | 45 | @property 46 | def sorted_classes(self): 47 | classes = [] 48 | for class_name in self.sorted_class_names: 49 | cls = next((c for c in self.classes if c.__name__ == class_name), None) 50 | if cls: 51 | classes.append(cls) 52 | return classes 53 | 54 | @classmethod 55 | def get_base_subclasses(cls): 56 | from gtfsdb.model.base import Base 57 | return util.get_all_subclasses(Base) 58 | 59 | @property 60 | def metadata(self): 61 | from gtfsdb.model.base import Base 62 | return Base.metadata 63 | 64 | def load_tables(self, **kwargs): 65 | """ load the sorted classes """ 66 | for cls in self.sorted_classes: 67 | cls.load(self, **kwargs) 68 | 69 | def postprocess_tables(self, **kwargs): 70 | """ call the post-process routines on the sorted classes """ 71 | do_postprocess = kwargs.get('do_postprocess', True) 72 | if do_postprocess: 73 | for cls in self.sorted_classes: 74 | cls.post_process(self, **kwargs) 75 | 76 | def create(self): 77 | """ drop/create GTFS database """ 78 | for cls in self.sorted_classes: 79 | self.create_table(cls) 80 | 81 | def create_table(self, orm_class, check_first=True, drop_first=True): 82 | log.debug("create table: {0}".format(orm_class.__table__)) 83 | try: 84 | if drop_first: 85 | orm_class.__table__.drop(self.engine, checkfirst=check_first) 86 | except: 87 | log.info("NOTE: couldn't *drop* table {0} (might not be a big deal)".format(orm_class.__table__)) 88 | try: 89 | orm_class.__table__.create(self.engine, checkfirst=check_first) 90 | except Exception as e: 91 | log.info("NOTE: couldn't *create* table {0} (could be a big deal)\n{1}".format(orm_class.__table__, e)) 92 | 93 | @property 94 | def dialect_name(self): 95 | return self.engine.url.get_dialect().name 96 | 97 | @property 98 | def is_geospatial(self): 99 | return self._is_geospatial 100 | 101 | @is_geospatial.setter 102 | def is_geospatial(self, val): 103 | self._is_geospatial = val 104 | for cls in self.classes: 105 | if val and hasattr(cls, 'add_geometry_column'): 106 | cls.add_geometry_column() 107 | 108 | @property 109 | def schema(self): 110 | return self._schema 111 | 112 | @schema.setter 113 | def schema(self, val): 114 | self._schema = val 115 | 116 | # TODO ... move to create() method 117 | try: 118 | if self._schema: 119 | from sqlalchemy.schema import CreateSchema 120 | self.engine.execute(CreateSchema(self._schema), checkfirst=True) 121 | except Exception as e: 122 | log.info("NOTE: couldn't create schema {0} (schema might already exist)\n{1}".format(self._schema, e)) 123 | 124 | for cls in self.classes: 125 | cls.set_schema(self._schema) 126 | 127 | @property 128 | def url(self): 129 | return self._url 130 | 131 | @url.setter 132 | def url(self, val): 133 | self._url = val 134 | self.engine = create_engine(val) 135 | session_factory = sessionmaker(self.engine) 136 | self.session = scoped_session(session_factory) 137 | 138 | @property 139 | def is_postgresql(self): 140 | return 'postgres' in self.dialect_name 141 | 142 | @property 143 | def is_sqlite(self): 144 | return 'sqlite' in self.dialect_name 145 | 146 | @classmethod 147 | def factory(cls, **kwargs): 148 | """ helper method to open a Database object (and optionally create the tables in that Database) """ 149 | db = cls(**kwargs) 150 | if kwargs.get('create'): 151 | db.create() 152 | return db 153 | 154 | @classmethod 155 | def factory_from_cmdline(cls, args): 156 | """ helper method to open a Database via a set of cmdline args object """ 157 | kwargs = vars(args) 158 | return cls.factory(**kwargs) 159 | 160 | def prep_an_orm_class(self, orm_cls): 161 | self.prep_orm_class(orm_cls, self.schema, self.is_geospatial) 162 | 163 | @classmethod 164 | def prep_orm_class(cls, orm_cls, schema=None, is_geospatial=False): 165 | """ 166 | helper method to ready an ORM class (see Base and it's children) according to this Database's settings 167 | :why?: sometimes you might have classes you want as part of a query, but you don't want those classes 168 | available in the Database.classes() or Database.sorted_classes(), since these tables are not being loaded, etc.. 169 | """ 170 | if is_geospatial and hasattr(orm_cls, 'add_geometry_column'): 171 | orm_cls.add_geometry_column() 172 | 173 | if schema: 174 | orm_cls.set_schema(schema) 175 | 176 | @classmethod 177 | def prep_gtfsdb_model_classes(cls, schema=None, is_geo=False): 178 | for c in cls.get_base_subclasses(): 179 | cls.prep_orm_class(c, schema, is_geo) 180 | 181 | @contextmanager 182 | def managed_session(self, *args, **kwds): 183 | """ 184 | will return a session that you can use w/in a 'with' statement 185 | :see https://docs.python.org/3/library/contextlib.html#utilities : 186 | """ 187 | log.debug("get managed session") 188 | session = self.session() 189 | try: 190 | yield session 191 | finally: 192 | log.debug("close managed session") 193 | session.close() 194 | -------------------------------------------------------------------------------- /gtfsdb/model/fare.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import Column, Sequence 2 | from sqlalchemy.types import Integer, Numeric, String 3 | 4 | from gtfsdb import config 5 | from gtfsdb.model.base import Base 6 | 7 | 8 | __all__ = ['FareAttribute', 'FareRule'] 9 | 10 | 11 | class FareAttribute(Base): 12 | datasource = config.DATASOURCE_GTFS 13 | filename = 'fare_attributes.txt' 14 | 15 | __tablename__ = 'fare_attributes' 16 | 17 | fare_id = Column(String(255), primary_key=True) 18 | price = Column(Numeric(10, 2), nullable=False) 19 | currency_type = Column(String(255), nullable=False) 20 | payment_method = Column(Integer, nullable=False) 21 | transfers = Column(Integer) 22 | transfer_duration = Column(Integer) 23 | agency_id = Column(String(255)) 24 | 25 | 26 | class FareRule(Base): 27 | datasource = config.DATASOURCE_GTFS 28 | filename = 'fare_rules.txt' 29 | 30 | __tablename__ = 'fare_rules' 31 | 32 | id = Column(Integer, Sequence(None, optional=True), primary_key=True) 33 | fare_id = Column(String(255), index=True, nullable=False) 34 | route_id = Column(String(255)) 35 | origin_id = Column(String(255)) 36 | destination_id = Column(String(255)) 37 | contains_id = Column(String(255)) 38 | service_id = Column(String(255)) 39 | -------------------------------------------------------------------------------- /gtfsdb/model/feed_info.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import Column 2 | from sqlalchemy.types import Date, String 3 | 4 | from gtfsdb import config 5 | from gtfsdb.model.base import Base 6 | 7 | 8 | class FeedInfo(Base): 9 | datasource = config.DATASOURCE_GTFS 10 | filename = 'feed_info.txt' 11 | 12 | __tablename__ = 'feed_info' 13 | 14 | feed_publisher_name = Column(String(255), primary_key=True) 15 | feed_publisher_url = Column(String(255), nullable=False) 16 | feed_lang = Column(String(255), nullable=False) 17 | feed_start_date = Column(Date) 18 | feed_end_date = Column(Date) 19 | feed_version = Column(String(255)) 20 | feed_license = Column(String(255)) 21 | -------------------------------------------------------------------------------- /gtfsdb/model/frequency.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import Column 2 | from sqlalchemy.orm import relationship 3 | from sqlalchemy.types import Integer, String 4 | 5 | from gtfsdb import config 6 | from gtfsdb.model.base import Base 7 | 8 | 9 | class Frequency(Base): 10 | datasource = config.DATASOURCE_GTFS 11 | filename = 'frequencies.txt' 12 | 13 | __tablename__ = 'frequencies' 14 | 15 | trip_id = Column(String(255), primary_key=True) 16 | start_time = Column(String(8), primary_key=True) 17 | end_time = Column(String(8)) 18 | headway_secs = Column(Integer) 19 | exact_times = Column(Integer) 20 | 21 | trip = relationship( 22 | 'Trip', 23 | primaryjoin='Frequency.trip_id==Trip.trip_id', 24 | foreign_keys='(Frequency.trip_id)', 25 | uselist=False, viewonly=True) 26 | -------------------------------------------------------------------------------- /gtfsdb/model/gtfs.py: -------------------------------------------------------------------------------- 1 | from contextlib import closing 2 | import logging 3 | import shutil 4 | import tempfile 5 | import time 6 | try: 7 | from urllib import urlretrieve # Python 2 8 | except ImportError: 9 | from urllib.request import urlretrieve # Python 3 10 | import zipfile 11 | 12 | from gtfsdb import config 13 | from .route import Route 14 | 15 | log = logging.getLogger(__name__) 16 | 17 | 18 | class GTFS(object): 19 | 20 | def __init__(self, filename): 21 | # import pdb; pdb.set_trace() 22 | # TODO: replace old/clunky urlretrieve() with requests.get 23 | self.file = filename 24 | try: 25 | self.local_file = urlretrieve(filename)[0] 26 | except Exception: 27 | try: 28 | self.local_file = urlretrieve("file:///" + filename)[0] 29 | except Exception: 30 | self.local_file = filename 31 | 32 | def load(self, db, **kwargs): 33 | """ 34 | Load GTFS into database 35 | """ 36 | # import pdb; pdb.set_trace() 37 | start_time = time.time() 38 | log.debug('GTFS.load: {0}'.format(self.file)) 39 | 40 | # step 1: load .txt files from GTFS.zip, as well as derived tables & lookup tables from gtfsdb/data 41 | gtfs_directory = self.unzip() 42 | kwargs['gtfs_directory'] = gtfs_directory 43 | db.load_tables(**kwargs) 44 | shutil.rmtree(gtfs_directory) 45 | 46 | # step 2: call post process routines... 47 | db.postprocess_tables(**kwargs) 48 | 49 | # step 3: finish 50 | process_time = time.time() - start_time 51 | log.debug('GTFS.load ({0:.0f} seconds)'.format(process_time)) 52 | 53 | def unzip(self, path=None): 54 | """ 55 | Unzip GTFS files from URL/directory to path. 56 | """ 57 | path = path if path else tempfile.mkdtemp() 58 | try: 59 | with closing(zipfile.ZipFile(self.local_file)) as z: 60 | z.extractall(path) 61 | except Exception as e: 62 | log.warning(e) 63 | return path 64 | -------------------------------------------------------------------------------- /gtfsdb/model/pattern.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from sqlalchemy import Column, Integer, Numeric, String 4 | from sqlalchemy.orm import deferred, relationship 5 | from sqlalchemy.sql import func 6 | 7 | from gtfsdb import config 8 | from gtfsdb.model.base import Base 9 | from gtfsdb.model.shape import Shape 10 | from gtfsdb.model.pattern_base import PatternBase 11 | 12 | import logging 13 | log = logging.getLogger(__name__) 14 | 15 | 16 | __all__ = ['Pattern'] 17 | 18 | 19 | class Pattern(Base, PatternBase): 20 | datasource = config.DATASOURCE_DERIVED 21 | 22 | __tablename__ = 'patterns' 23 | 24 | shape_id = Column(String(255), primary_key=True, index=True) 25 | pattern_dist = Column(Numeric(20, 10)) 26 | 27 | trips = relationship( 28 | 'Trip', 29 | primaryjoin='Pattern.shape_id==Trip.shape_id', 30 | foreign_keys='(Pattern.shape_id)', 31 | uselist=True, viewonly=True) 32 | 33 | shapes = relationship( 34 | 'Shape', 35 | primaryjoin='Pattern.shape_id==Shape.shape_id', 36 | foreign_keys='(Shape.shape_id)', 37 | uselist=True, viewonly=True) 38 | 39 | @classmethod 40 | def load(cls, db, **kwargs): 41 | start_time = time.time() 42 | session = db.session 43 | q = session.query( 44 | Shape.shape_id, 45 | func.max(Shape.shape_dist_traveled).label('dist') 46 | ) 47 | shapes = q.group_by(Shape.shape_id) 48 | for shape in shapes: 49 | pattern = cls() 50 | pattern.shape_id = shape.shape_id 51 | pattern.pattern_dist = shape.dist 52 | if hasattr(cls, 'geom'): 53 | q = session.query(Shape) 54 | q = q.filter(Shape.shape_id == shape.shape_id) 55 | q = q.order_by(Shape.shape_pt_sequence) 56 | pattern.geom_from_shape(q) 57 | session.add(pattern) 58 | session.commit() 59 | session.close() 60 | processing_time = time.time() - start_time 61 | log.debug('{0}.load ({1:.0f} seconds)'.format(cls.__name__, processing_time)) 62 | -------------------------------------------------------------------------------- /gtfsdb/model/pattern_base.py: -------------------------------------------------------------------------------- 1 | from geoalchemy2 import Geometry 2 | from sqlalchemy import Column, Integer, Numeric, String 3 | from sqlalchemy.orm import deferred, relationship 4 | from sqlalchemy.sql import func 5 | 6 | from gtfsdb import config, util 7 | 8 | import logging 9 | log = logging.getLogger(__name__) 10 | 11 | 12 | class PatternBase(object): 13 | """ 14 | provides a generic set of pattern query routines, etc... 15 | """ 16 | @classmethod 17 | def add_geometry_column(cls): 18 | if not hasattr(cls, 'geom'): 19 | cls.geom = deferred(Column(Geometry(geometry_type='LINESTRING', srid=config.SRID))) 20 | 21 | def geom_from_shape(self, points): 22 | """ 23 | builds a linestring geometry for the shape from an array of points 24 | :return: will return True if there are 2+ points, and False when less than 2 points (not a line) 25 | """ 26 | coords = [util.make_coord_from_point(r.shape_pt_lon, r.shape_pt_lat) for r in points] 27 | self.geom = util.make_linestring_from_point_array(coords) 28 | 29 | # test and warn if trying to create a pattern (line) of less than 1 coord 30 | ret_val = True 31 | if len(coords) < 2: 32 | log.warning("a 'linestring' needs 2+ points ({0}); expect a postgis error unless fixed".format(coords)) 33 | ret_val = False 34 | return ret_val 35 | 36 | @classmethod 37 | def query_pattern(cls, session, pattern_id, agency=None): 38 | """ 39 | simple utility for querying a stop from gtfsdb 40 | """ 41 | ret_val = None 42 | try: 43 | log.info("query Pattern for {}".format(pattern_id)) 44 | q = session.query(cls) 45 | q = q.filter(cls.shape_id == pattern_id) 46 | # TODO q.filter(cls.agency_id == agency_id) 47 | ret_val = q.one() 48 | except Exception as e: 49 | log.info(e) 50 | return ret_val 51 | 52 | @classmethod 53 | def get_geometry_geojson(cls, session, pattern_id, agency=None): 54 | """ 55 | :returns a geojson object for the pattern geometry (should be of type:LineString) 56 | """ 57 | ret_val = None 58 | try: 59 | #import pdb; pdb.set_trace() 60 | pattern = cls.query_pattern(session, pattern_id, agency) 61 | dblist = session.query(func.st_asgeojson(pattern.geom)).one() 62 | ret_val = eval(dblist[0]) # query result comes back as list with one 'string' entry -- eval converts to dict 63 | except Exception as e: 64 | log.info(e) 65 | return ret_val 66 | 67 | @classmethod 68 | def get_geometry_encoded(cls, session, pattern_id, agency=None): 69 | """ 70 | :returns a dict with 2 fields ... google encoded points and length 71 | """ 72 | ret_val = { 73 | 'points': None, 74 | 'length': 0 75 | } 76 | 77 | try: 78 | #import pdb; pdb.set_trace() 79 | pattern = cls.query_pattern(session, pattern_id, agency) 80 | points = session.query(func.st_asencodedpolyline(pattern.geom)).one() 81 | length = session.query(func.st_npoints(pattern.geom)).one() 82 | ret_val['length'] = length[0] 83 | ret_val['points'] = points[0] 84 | except Exception as e: 85 | log.info(e) 86 | return ret_val 87 | -------------------------------------------------------------------------------- /gtfsdb/model/route.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from gtfsdb import config, util 4 | from gtfsdb.model.base import Base 5 | from .route_base import RouteBase 6 | 7 | from sqlalchemy import Column 8 | from sqlalchemy.orm import deferred, relationship 9 | from sqlalchemy.sql import func 10 | from sqlalchemy.types import Integer, String 11 | 12 | import logging 13 | log = logging.getLogger(__name__) 14 | 15 | 16 | class Route(Base, RouteBase): 17 | datasource = config.DATASOURCE_GTFS 18 | filename = 'routes.txt' 19 | 20 | __tablename__ = 'routes' 21 | 22 | route_id = Column(String(255), primary_key=True, index=True, nullable=False) 23 | agency_id = Column(String(255), index=True, nullable=True) 24 | route_short_name = Column(String(255)) 25 | route_long_name = Column(String(255)) 26 | route_desc = Column(String(1023)) 27 | route_type = Column(Integer, index=True, nullable=False) 28 | route_url = Column(String(255)) 29 | route_color = Column(String(6)) 30 | route_text_color = Column(String(6)) 31 | route_sort_order = Column(Integer, index=True) 32 | min_headway_minutes = Column(Integer) # Trillium extension. 33 | 34 | agency = relationship( 35 | 'Agency', 36 | primaryjoin='Route.agency_id==Agency.agency_id', 37 | foreign_keys='(Route.agency_id)', 38 | uselist=False, viewonly=True, 39 | lazy="joined", # don't innerjoin ... causes unit test errors 40 | ) 41 | 42 | type = relationship( 43 | 'RouteType', 44 | primaryjoin='Route.route_type==RouteType.route_type', 45 | foreign_keys='(Route.route_type)', 46 | uselist=False, viewonly=True, 47 | lazy="joined", innerjoin=True, 48 | ) 49 | 50 | trips = relationship( 51 | 'Trip', 52 | primaryjoin='Route.route_id==Trip.route_id', 53 | foreign_keys='(Route.route_id)', 54 | uselist=True, viewonly=True 55 | ) 56 | 57 | directions = relationship( 58 | 'RouteDirection', 59 | primaryjoin='Route.route_id==RouteDirection.route_id', 60 | foreign_keys='(Route.route_id)', 61 | uselist=True, viewonly=True 62 | ) 63 | 64 | def is_active(self, date=None): 65 | """ 66 | :return False whenever we see that the route start and end date are outside the 67 | input date (where the input date defaults to 'today') 68 | """ 69 | _is_active = True 70 | if self.start_date or self.end_date: 71 | _is_active = False 72 | date = util.check_date(date) 73 | if self.start_date and self.end_date: # keep this as nested if (don't combine due to below) 74 | if self.start_date <= date <= self.end_date: 75 | _is_active = True 76 | elif self.start_date and self.start_date <= date: 77 | _is_active = True 78 | elif self.end_date and date <= self.end_date: 79 | _is_active = True 80 | return _is_active 81 | 82 | @property 83 | def route_name(self, fmt="{self.route_short_name}-{self.route_long_name}"): 84 | """ 85 | build a route name out of long and short names... 86 | """ 87 | if not self.is_cached_data_valid('_route_name'): 88 | log.warning("query route name") 89 | ret_val = self.route_long_name 90 | if self.route_long_name and self.route_short_name: 91 | ret_val = fmt.format(self=self) 92 | elif self.route_long_name is None: 93 | ret_val = self.route_short_name 94 | self._route_name = ret_val 95 | self.update_cached_data('_route_name') 96 | 97 | return self._route_name 98 | 99 | def direction_name(self, direction_id, def_val=''): 100 | ret_val = def_val 101 | try: 102 | dir = self.directions.filter(RouteDirection.direction_id == direction_id) 103 | if dir and dir.direction_name: 104 | ret_val = dir.direction_name 105 | except Exception as e: 106 | log.debug(e) 107 | pass 108 | return ret_val 109 | 110 | @property 111 | def start_date(self): 112 | return self._get_start_end_dates[0] 113 | 114 | @property 115 | def end_date(self): 116 | return self._get_start_end_dates[1] 117 | 118 | @property 119 | def _get_start_end_dates(self): 120 | """find the min & max date using Trip & UniversalCalendar""" 121 | if not self.is_cached_data_valid('_start_date'): 122 | from gtfsdb.model.calendar import UniversalCalendar 123 | q = self.session.query(func.min(UniversalCalendar.date), func.max(UniversalCalendar.date)) 124 | q = q.filter(UniversalCalendar.trips.any(route_id=self.route_id)) 125 | self._start_date, self._end_date = q.one() 126 | self.update_cached_data('_start_date') 127 | 128 | return self._start_date, self._end_date 129 | 130 | @classmethod 131 | def add_geometry_column(cls): 132 | if not hasattr(cls, 'geom'): 133 | from geoalchemy2 import Geometry 134 | cls.geom = deferred(Column(Geometry('MULTILINESTRING'))) 135 | 136 | @classmethod 137 | def load_geoms(cls, db): 138 | """ load derived geometries, currently only written for PostgreSQL """ 139 | from gtfsdb.model.pattern import Pattern 140 | from gtfsdb.model.trip import Trip 141 | 142 | if db.is_geospatial and db.is_postgresql: 143 | start_time = time.time() 144 | session = db.session 145 | routes = session.query(Route).all() 146 | for route in routes: 147 | s = func.st_collect(Pattern.geom) 148 | s = func.st_multi(s) 149 | s = func.st_astext(s).label('geom') 150 | q = session.query(s) 151 | q = q.filter(Pattern.trips.any((Trip.route == route))) 152 | route.geom = q.first().geom 153 | session.merge(route) 154 | session.commit() 155 | processing_time = time.time() - start_time 156 | log.debug('{0}.load_geoms ({1:.0f} seconds)'.format(cls.__name__, processing_time)) 157 | 158 | @classmethod 159 | def post_process(cls, db, **kwargs): 160 | log.debug('{0}.post_process'.format(cls.__name__)) 161 | cls.load_geoms(db) 162 | 163 | 164 | class CurrentRoutes(Base, RouteBase): 165 | """ 166 | this table is (optionally) used as a view into the currently active routes 167 | it is pre-calculated to list routes that are currently running service 168 | (GTFS can have multiple instances of the same route, with different aspects like name and direction) 169 | """ 170 | datasource = config.DATASOURCE_DERIVED 171 | __tablename__ = 'current_routes' 172 | 173 | route_id = Column(String(255), primary_key=True, index=True, nullable=False) 174 | route = relationship( 175 | Route.__name__, 176 | primaryjoin='CurrentRoutes.route_id==Route.route_id', 177 | foreign_keys='(CurrentRoutes.route_id)', 178 | uselist=False, viewonly=True, 179 | lazy="joined", innerjoin=True, 180 | ) 181 | 182 | route_sort_order = Column(Integer) 183 | 184 | def __init__(self, route, def_order): 185 | self.route_id = route.route_id 186 | self.route_sort_order = route.route_sort_order if route.route_sort_order else def_order 187 | 188 | def is_active(self, date=None): 189 | ret_val = True 190 | if date: 191 | log.warning("you're calling CurrentRoutes.is_active with a date, which is both slow and redundant...") 192 | ret_val = self.route.is_active(date) 193 | return ret_val 194 | 195 | @classmethod 196 | def query_route(cls, session, route_id, detailed=False): 197 | """ get a gtfsdb Route object from the db """ 198 | r = super(CurrentRoutes, cls).query_route(session, route_id, detailed) 199 | return r.route 200 | 201 | @classmethod 202 | def query_active_routes(cls, session, date=None): 203 | """ 204 | wrap base active route query 205 | :return list of Route orm objects 206 | """ 207 | ret_val = [] 208 | if date: 209 | log.warning("you're calling CurrentRoutes.active_routes with a date, which is slow...") 210 | ret_val = Route.query_active_routes(session, date) 211 | else: 212 | try: 213 | clist = session.query(CurrentRoutes).order_by(CurrentRoutes.route_sort_order).all() 214 | for r in clist: 215 | ret_val.append(r.route) 216 | except Exception as e: 217 | log.warning(e) 218 | return ret_val 219 | 220 | @classmethod 221 | def post_process(cls, db, **kwargs): 222 | """ 223 | will update the current 'view' of this data 224 | 225 | steps: 226 | 1. open transaction 227 | 2. drop all data in this 'current' table 228 | 3. select current routes as a list 229 | 4. add those id's to this table 230 | 5. other processing (cached results) 231 | 6. commit 232 | 7. close transaction 233 | """ 234 | session = db.session() 235 | num_inserts = 0 236 | try: 237 | session.query(CurrentRoutes).delete() 238 | 239 | # import pdb; pdb.set_trace() 240 | rte_list = Route.query_active_routes(session) 241 | for i, r in enumerate(rte_list): 242 | c = CurrentRoutes(r, i+1) 243 | session.add(c) 244 | num_inserts += 1 245 | 246 | session.commit() 247 | session.flush() 248 | except Exception as e: 249 | log.warning(e) 250 | session.rollback() 251 | finally: 252 | session.flush() 253 | session.close() 254 | if num_inserts == 0: 255 | log.warning("CurrentRoutes did not insert any route records...hmmmm...") 256 | 257 | 258 | class RouteDirection(Base): 259 | datasource = config.DATASOURCE_GTFS 260 | filename = 'route_directions.txt' 261 | 262 | __tablename__ = 'route_directions' 263 | 264 | route_id = Column(String(255), primary_key=True, index=True, nullable=False) 265 | direction_id = Column(Integer, primary_key=True, index=True, nullable=False) 266 | direction_name = Column(String(255)) 267 | 268 | 269 | class RouteType(Base): 270 | """ 271 | OTP TYPES (come via service calls) 272 | 0:TRAM, 1:SUBWAY, 2:RAIL, 3:BUS, 4:FERRY, 5:CABLE_CAR, 6:GONDOLA, 7:FUNICULAR 273 | :see https://github.com/opentripplanner/OpenTripPlanner/blob/master/src/main/java/org/opentripplanner/routing/core/TraverseMode.java : 274 | """ 275 | datasource = config.DATASOURCE_LOOKUP 276 | filename = 'route_type.txt' 277 | __tablename__ = 'route_type' 278 | 279 | route_type = Column(Integer, primary_key=True, index=True, autoincrement=False) 280 | otp_type = Column(String(255)) 281 | route_type_name = Column(String(255)) 282 | route_type_desc = Column(String(1023)) 283 | 284 | def is_bus(self): 285 | return self.route_type == 3 286 | 287 | def is_different_mode(self, cmp_route_type): 288 | return self.route_type != cmp_route_type 289 | 290 | def is_higher_priority(self, cmp_route_type): 291 | """ abitrary compare of route types, where lower numbrer means higher priority in terms mode ranking (sans bus) """ 292 | ret_val = False 293 | if cmp_route_type != 3 and cmp_route_type < self.route_type: 294 | ret_val = True 295 | return ret_val 296 | 297 | def is_lower_priority(self, cmp_route_type): 298 | """ abitrary compare of route types, where lower numbrer means higher priority in terms mode ranking (sans bus) """ 299 | ret_val = False 300 | if cmp_route_type != self.route_type: 301 | if cmp_route_type == 3 or cmp_route_type > self.route_type: 302 | ret_val = True 303 | return ret_val 304 | 305 | 306 | class RouteFilter(Base): 307 | """ 308 | list of filters to be used to cull routes from certain lists 309 | e.g., there might be Shuttles that you never want to be shown...you can load that data here, and 310 | use it in your queries 311 | """ 312 | datasource = config.DATASOURCE_LOOKUP 313 | filename = 'route_filter.txt' 314 | __tablename__ = 'route_filters' 315 | 316 | route_id = Column(String(255), primary_key=True, index=True, nullable=False) 317 | agency_id = Column(String(255), index=True, nullable=True) 318 | description = Column(String(1023)) 319 | 320 | 321 | __all__ = [RouteType.__name__, Route.__name__, RouteDirection.__name__, RouteFilter.__name__, CurrentRoutes.__name__] -------------------------------------------------------------------------------- /gtfsdb/model/route_base.py: -------------------------------------------------------------------------------- 1 | from gtfsdb import util 2 | 3 | import logging 4 | log = logging.getLogger(__name__) 5 | 6 | 7 | class RouteBase(object): 8 | """ 9 | provides a generic set of route query routines 10 | """ 11 | 12 | def is_active(self, date=None): 13 | log.warning("calling abstract base class") 14 | return True 15 | 16 | @classmethod 17 | def query_route(cls, session, route_id, detailed=False): 18 | """ 19 | simple utility for quering a route from gtfsdb 20 | """ 21 | # import pdb; pdb.set_trace() 22 | ret_val = None 23 | try: 24 | log.info("query Route for {}".format(route_id)) 25 | q = session.query(cls) 26 | q = q.filter(cls.route_id == route_id) 27 | if detailed: 28 | # todo: some joined junk, ala what we see in stops? -- q = q.options(joinedload("stop_features")) 29 | pass 30 | ret_val = q.one() 31 | except Exception as e: 32 | log.info(e) 33 | return ret_val 34 | 35 | @classmethod 36 | def query_route_list(cls, session): 37 | """ 38 | :return list of *all* Route orm objects queried from the db 39 | """ 40 | # import pdb; pdb.set_trace() 41 | from .route import RouteFilter 42 | routes = session.query(cls)\ 43 | .filter(~cls.route_id.in_(session.query(RouteFilter.route_id)))\ 44 | .order_by(cls.route_sort_order)\ 45 | .all() 46 | return routes 47 | 48 | @classmethod 49 | def query_active_routes(cls, session, date=None): 50 | """ 51 | :return list of *active* Route orm objects queried from the db 52 | :note 'active' is based on date ... this routine won't deal with holes in the 53 | schedule (e.g., when a route is not active for a period of time, due to construction) 54 | """ 55 | # step 1: grab all routes 56 | routes = cls.query_route_list(session) 57 | 58 | # step 2: filter routes by active date 59 | ret_val = cls.filter_active_routes(routes, date) 60 | return ret_val 61 | 62 | @classmethod 63 | def filter_active_routes(cls, route_list, date=None): 64 | """ 65 | filter an input list of route (orm) objects via is_active 66 | :return new list of routes filtered by date 67 | """ 68 | # import pdb; pdb.set_trace() 69 | ret_val = [] 70 | for r in route_list: 71 | if r and r.is_active(date): 72 | ret_val.append(r) 73 | return ret_val 74 | 75 | @classmethod 76 | def query_nearest_routes(cls, session, geom): 77 | """ 78 | simple utility for quering a route from gtfsdb 79 | """ 80 | ret_val = None 81 | 82 | @classmethod 83 | def query_active_route_ids(cls, session): 84 | """ 85 | return an array of route_id / agency_id pairs 86 | {route_id:'2112', agency_id:'C-TRAN'} 87 | """ 88 | ret_val = [] 89 | routes = cls.query_active_routes(session) 90 | for r in routes: 91 | ret_val.append({"route_id": r.route_id, "agency_id": r.agency_id}) 92 | return ret_val 93 | 94 | @classmethod 95 | def make_route_short_name(cls, route, def_name=None): 96 | """ 97 | fix up the short name... 98 | """ 99 | ret_val = def_name 100 | try: 101 | ret_val = util.safe_get_any(route, ['route_short_name', 'short_name', 'route_long_name', 'name']) 102 | 103 | # strip off 'Line' from last word, ala MAX Blue Line == MAX Blue 104 | if ret_val and ret_val.startswith('MAX') and ret_val.endswith('Line'): 105 | ret_val = " ".join(ret_val.split()[:-1]) 106 | # special fix for Portland Streetcar 107 | if 'Portland Streetcar' in ret_val: 108 | ret_val = ret_val.replace('Portland Streetcar', 'PSC').strip() 109 | # fix WES 110 | if ret_val and ret_val.startswith('WES '): 111 | ret_val = "WES" 112 | # fix Portland Aerial Tram 113 | if ret_val and ret_val == 'Portland Aerial Tram': 114 | ret_val = "Aerial Tram" 115 | except Exception as e: 116 | log.warning(e) 117 | 118 | return ret_val 119 | 120 | -------------------------------------------------------------------------------- /gtfsdb/model/route_stop.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | 4 | from sqlalchemy import Column, Sequence 5 | from sqlalchemy.orm import relationship 6 | from sqlalchemy.sql import func 7 | from sqlalchemy.types import Date, Integer, String 8 | 9 | from gtfsdb import config, util 10 | from gtfsdb.model.base import Base 11 | from .route_stop_base import RouteStopBase 12 | 13 | import logging 14 | log = logging.getLogger(__name__) 15 | 16 | 17 | class RouteStop(Base, RouteStopBase): 18 | datasource = config.DATASOURCE_DERIVED 19 | __tablename__ = 'route_stops' 20 | 21 | id = Column(Integer, Sequence(None, optional=True), primary_key=True) 22 | route_id = Column(String(255), index=True, nullable=False) 23 | direction_id = Column(Integer, index=True, nullable=False) 24 | stop_id = Column(String(255), index=True, nullable=False) 25 | order = Column(Integer, index=True, nullable=False) 26 | start_date = Column(Date, index=True, nullable=False) 27 | end_date = Column(Date, index=True, nullable=False) 28 | 29 | route = relationship( 30 | 'Route', 31 | primaryjoin='RouteStop.route_id==Route.route_id', 32 | foreign_keys='(RouteStop.route_id)', 33 | uselist=False, viewonly=True, lazy='joined') 34 | 35 | stop = relationship( 36 | 'Stop', 37 | primaryjoin='RouteStop.stop_id==Stop.stop_id', 38 | foreign_keys='(RouteStop.stop_id)', 39 | uselist=False, viewonly=True, lazy='joined') 40 | 41 | direction = relationship( 42 | 'RouteDirection', 43 | primaryjoin='RouteStop.route_id==RouteDirection.route_id and RouteStop.direction_id==RouteDirection.direction_id', 44 | foreign_keys='(RouteStop.route_id, RouteStop.direction_id)', 45 | uselist=False, viewonly=True, lazy='joined') 46 | 47 | start_calendar = relationship( 48 | 'UniversalCalendar', 49 | primaryjoin='RouteStop.start_date==UniversalCalendar.date', 50 | foreign_keys='(RouteStop.start_date)', 51 | uselist=True, viewonly=True) 52 | 53 | end_calendar = relationship( 54 | 'UniversalCalendar', 55 | primaryjoin='RouteStop.end_date==UniversalCalendar.date', 56 | foreign_keys='(RouteStop.end_date)', 57 | uselist=True, viewonly=True) 58 | 59 | def is_active(self, date=None): 60 | """ 61 | :return False whenever we see that the route_stop's start and end date are 62 | outside the input date (where the input date defaults to 'today') 63 | """ 64 | _is_active = False 65 | if self.start_date and self.end_date: 66 | date = util.check_date(date) 67 | if self.start_date <= date <= self.end_date: 68 | _is_active = True 69 | return _is_active 70 | 71 | def is_valid(self): 72 | ret_val = True 73 | if self.start_date is None or self.end_date is None: 74 | ret_val = False 75 | return ret_val 76 | 77 | def get_id(self): 78 | ret_val = "r:{0} d:{1} s:{2}".format(self.route_id, self.direction_id, self.stop_id) 79 | return ret_val 80 | 81 | @classmethod 82 | def is_stop_active(cls, session, stop_id, agency_id=None, date=None): 83 | """ 84 | returns boolean whether given stop id is active for a given date 85 | """ 86 | ret_val = False 87 | rs = RouteStop.query_by_stop(session, stop_id, agency_id, date, 1) 88 | if rs and len(rs) > 0: 89 | ret_val = True 90 | return ret_val 91 | 92 | @classmethod 93 | def active_unique_routes_at_stop(cls, session, stop_id, agency_id=None, date=None, route_name_filter=False): 94 | """ 95 | to filter active routes, just provide a date to the above unique_routes_at_stop method 96 | """ 97 | ret_val = [] 98 | routes = cls.unique_routes_at_stop(session, stop_id, agency_id, date, route_name_filter) 99 | for r in routes: 100 | if r.is_active(date): 101 | ret_val.append(r) 102 | return ret_val 103 | 104 | @classmethod 105 | def query_active_stops(cls, session, route_id, direction_id=None, agency_id=None, date=None): 106 | """ 107 | returns list of routes that are seen as 'active' based on dates and filters 108 | """ 109 | # import pdb; pdb.set_trace() 110 | 111 | # step 1: default date 112 | date = util.check_date(date) 113 | 114 | # step 2a: query all route stops by route (and maybe direction and agency 115 | q = session.query(RouteStop).filter(RouteStop.route_id == route_id) 116 | if direction_id is not None: 117 | q = q.filter(RouteStop.direction_id == direction_id) 118 | if agency_id is not None: 119 | pass 120 | # TODO ... agency_id not in RouteStop -- should this even be here? 121 | # q = q.filter(RouteStop.agency_id == agency_id) 122 | 123 | # step 2b: filter based on date 124 | q = q.filter(RouteStop.start_date <= date).filter(date <= RouteStop.end_date) 125 | 126 | # step 2c: add some stop order 127 | q = q.order_by(RouteStop.order) 128 | 129 | route_stops = q.all() 130 | return route_stops 131 | 132 | @classmethod 133 | def load(cls, db, **kwargs): 134 | log.debug('{0}.load (loaded later in post_process)'.format(cls.__name__)) 135 | pass 136 | 137 | @classmethod 138 | def post_process(cls, db, **kwargs): 139 | log.debug('{0}.post_process'.format(cls.__name__)) 140 | cls.populate(db.session) 141 | 142 | @classmethod 143 | def populate(cls, session): 144 | """ 145 | for each route/direction, find list of stop_ids for route/direction pairs 146 | 147 | the load is a two part process, where part A finds a list of unique stop ids, and 148 | part B creates the RouteStop (and potentially RouteDirections ... if not in GTFS) records 149 | """ 150 | from gtfsdb import Route, RouteDirection 151 | 152 | start_time = time.time() 153 | routes = session.query(Route).all() 154 | 155 | for r in routes: 156 | # step 0: figure out some info about the route 157 | create_directions = False 158 | if r.directions is None or len(r.directions) == 0: 159 | create_directions = True 160 | 161 | # step 1a: filter the list of trips down to only a trip with a unique pattern 162 | trips = [] 163 | shape_id_filter = [] 164 | for t in r.trips: 165 | # a bit of a speedup to filter trips that have the same shape 166 | if t.shape_id and t.shape_id in shape_id_filter: 167 | continue 168 | # store our trips 169 | shape_id_filter.append(t.shape_id) 170 | trips.append(t) 171 | 172 | # step 1b: sort our list of trips by length (note: for trips with two directions, ...) 173 | trips = sorted(trips, key=lambda t: t.trip_len, reverse=True) 174 | 175 | # step 2: get a hash table of route stops with effective start and end dates 176 | stop_effective_dates = cls._find_route_stop_effective_dates(session, r.route_id) 177 | 178 | # PART A: we're going to just collect a list of unique stop ids for this route / directions 179 | for d in [0, 1]: 180 | unique_stops = [] 181 | 182 | # step 3: loop through all our trips and their stop times, pulling out a unique set of stops 183 | for t in trips: 184 | if t.direction_id == d: 185 | 186 | # step 4: loop through this trip's stop times, and find any/all stops that are in our stop list already 187 | # further, let's try to find the best position of that stop (e.g., look for where the stop patterns breaks) 188 | last_pos = None 189 | for i, st in enumerate(t.stop_times): 190 | # step 5a: make sure this stop that customers can actually board... 191 | if st.is_boarding_stop(): 192 | 193 | # step 5b: don't want arrival trips to influence route stop list 194 | if st.stop_id in unique_stops: 195 | last_pos = unique_stops.index(st.stop_id) 196 | else: 197 | # step 5b: add ths stop id to our unique list ... either in position, or appended to the end of the list 198 | if last_pos: 199 | last_pos += 1 200 | unique_stops.insert(last_pos, st.stop_id) 201 | else: 202 | unique_stops.append(st.stop_id) 203 | 204 | # PART B: add records to the database ... 205 | if len(unique_stops) > 0: 206 | 207 | # step 6: if an entry for the direction doesn't exist, create a new 208 | # RouteDirection record and add it to this route 209 | if create_directions: 210 | rd = RouteDirection() 211 | rd.route_id = r.route_id 212 | rd.direction_id = d 213 | rd.direction_name = "Outbound" if d == 0 else "Inbound" 214 | session.add(rd) 215 | 216 | # step 7: create new RouteStop records 217 | for k, stop_id in enumerate(unique_stops): 218 | # step 7: create a RouteStop record 219 | rs = RouteStop() 220 | rs.route_id = r.route_id 221 | rs.direction_id = d 222 | rs.stop_id = stop_id 223 | rs.order = k + 1 224 | s, e = cls._get_stop_effective_dates(stop_effective_dates, stop_id) 225 | rs.start_date = s 226 | rs.end_date = e 227 | if rs.is_valid(): 228 | session.add(rs) 229 | else: 230 | log.info("{0} is not valid ... not adding to the database".format(rs.get_id())) 231 | 232 | # step 8: commit the new records to the db for this route... 233 | sys.stdout.write('*') 234 | session.commit() 235 | 236 | # step 9: final commit for any stragglers 237 | session.commit() 238 | session.flush() 239 | session.close() 240 | 241 | processing_time = time.time() - start_time 242 | log.debug('{0}.post_process ({1:.0f} seconds)'.format(cls.__name__, processing_time)) 243 | 244 | @classmethod 245 | def _find_route_stop_effective_dates(cls, session, route_id): 246 | """ 247 | find effective start date and end date for all stops of the input route, when 248 | queried against the trip and stop time tables. Below are a couple of pure SQL queries that 249 | perform what I'm doing to get said start and end dates: 250 | 251 | # query all route stops with start & end dates 252 | SELECT t.route_id, st.stop_id, min(date), max(date) 253 | FROM ott.universal_calendar u, ott.trips t, ott.stop_times st 254 | WHERE t.service_id = u.service_id 255 | AND t.trip_id = st.trip_id 256 | GROUP BY t.route_id, st.stop_id 257 | 258 | # query all stops start & end dates for a given route (used below in SQLAlchemy) 259 | SELECT st.stop_id, min(date), max(date) 260 | FROM ott.universal_calendar u, ott.trips t, ott.stop_times st 261 | WHERE t.service_id = u.service_id 262 | AND t.trip_id = st.trip_id 263 | AND st.stop_id = '1' 264 | GROUP BY st.stop_id 265 | 266 | :return hash table with stop_id as key, and tuple of (stop_id, start_date, end_date) for all route stops 267 | """ 268 | ret_val = {} 269 | 270 | # step 1: query the route/stop start and end dates, based on stop time table 271 | from gtfsdb import UniversalCalendar, StopTime, Trip 272 | q = session.query(StopTime.stop_id, func.min(UniversalCalendar.date), func.max(UniversalCalendar.date)) 273 | q = q.filter(UniversalCalendar.service_id == Trip.service_id) 274 | q = q.filter(Trip.trip_id == StopTime.trip_id) 275 | q = q.filter(Trip.route_id == route_id) 276 | q = q.group_by(StopTime.stop_id) 277 | stop_dates = q.all() 278 | 279 | # step 2: make a hash of these dates with the stop id as the key 280 | for d in stop_dates: 281 | ret_val[d[0]] = d 282 | 283 | return ret_val 284 | 285 | @classmethod 286 | def _get_stop_effective_dates(cls, effective_dates_list, stop_id): 287 | """ 288 | :return: start & end date from the route stop dates returned by method above 289 | :see: _find_route_stop_effective_dates 290 | """ 291 | start = None 292 | end = None 293 | try: 294 | start = effective_dates_list[stop_id][1] 295 | end = effective_dates_list[stop_id][2] 296 | except Exception as e: 297 | log.info(e) 298 | return start, end 299 | 300 | 301 | class CurrentRouteStops(Base, RouteStopBase): 302 | """ 303 | this table is (optionally) used as a view into the currently active routes 304 | it is pre-calculated to list routes that are currently running service 305 | (GTFS can have multiple instances of the same route, with different aspects like name and direction) 306 | """ 307 | datasource = config.DATASOURCE_DERIVED 308 | __tablename__ = 'current_route_stops' 309 | 310 | id = Column(Integer, primary_key=True, index=True, nullable=False) 311 | rs = relationship( 312 | 'RouteStop', 313 | primaryjoin='CurrentRouteStops.id==RouteStop.id', 314 | foreign_keys='(RouteStop.id)', 315 | uselist=False, viewonly=True, lazy='joined' 316 | ) 317 | 318 | route_id = Column(String(255), index=True, nullable=False) 319 | route = relationship( 320 | 'Route', 321 | primaryjoin='Route.route_id==CurrentRouteStops.route_id', 322 | foreign_keys='(Route.route_id)', 323 | uselist=False, viewonly=True, lazy='joined' 324 | ) 325 | 326 | stop_id = Column(String(255), index=True, nullable=False) 327 | stop = relationship( 328 | 'Stop', 329 | primaryjoin='Stop.stop_id==CurrentRouteStops.stop_id', 330 | foreign_keys='(Stop.stop_id)', 331 | uselist=False, viewonly=True, lazy='joined' 332 | ) 333 | 334 | order = Column(Integer, index=True, nullable=False) 335 | 336 | def __init__(self, route_stop): 337 | self.id = route_stop.id 338 | self.route_id = route_stop.route_id 339 | self.stop_id = route_stop.stop_id 340 | self.order = route_stop.order 341 | 342 | @classmethod 343 | def query_by_stop(cls, session, stop_id, agency_id=None, date=None, count=None, sort=False): 344 | """ 345 | get all route stop records by looking for a given stop_id. 346 | further filtering can be had by providing an active date and agency id 347 | """ 348 | # step 1: query stop id 349 | q = session.query(CurrentRouteStops).filter(CurrentRouteStops.stop_id == stop_id) 350 | if agency_id is not None: 351 | pass 352 | # TODO ... agency_id not in RouteStop -- should this even be here? 353 | # q = q.filter(RouteStop.agency_id == agency_id) 354 | 355 | # step 2: sort the results based on order column 356 | if sort: 357 | q = q.order_by(CurrentRouteStops.order) 358 | 359 | # step 3: limit the number of objects returned by query 360 | if count: 361 | q = q.limit(count) 362 | 363 | ret_val = q.all() 364 | return ret_val 365 | 366 | @classmethod 367 | def post_process(cls, db, **kwargs): 368 | """ 369 | will update the current 'view' of this data 370 | """ 371 | session = db.session() 372 | try: 373 | session.query(CurrentRouteStops).delete() 374 | 375 | rs_list = session.query(RouteStop).all() 376 | for rs in rs_list: 377 | if rs.is_active(): 378 | c = CurrentRouteStops(rs) 379 | session.add(c) 380 | 381 | session.commit() 382 | session.flush() 383 | except Exception as e: 384 | log.warning(e) 385 | session.rollback() 386 | finally: 387 | session.flush() 388 | session.close() 389 | 390 | 391 | __all__ = [RouteStop.__name__, CurrentRouteStops.__name__] 392 | -------------------------------------------------------------------------------- /gtfsdb/model/route_stop_base.py: -------------------------------------------------------------------------------- 1 | from gtfsdb import util 2 | import logging 3 | log = logging.getLogger(__name__) 4 | 5 | 6 | class RouteStopBase(object): 7 | 8 | @classmethod 9 | def is_arrival(cls, session, trip_id, stop_id): 10 | """ 11 | :return True if it looks like this Trip / Stop pair is an arrival only 12 | NOTE: this routine might be EXPENSIVE since it is 13 | Further, this routine isn't well thought out...not sure block.is_arrival() works 14 | """ 15 | _is_arrival = False 16 | 17 | from gtfsdb import Block 18 | blocks = Block.blocks_by_trip_stop(session, trip_id, stop_id) 19 | if blocks: 20 | for b in blocks: 21 | if b.is_arrival(): 22 | _is_arrival = True 23 | break 24 | return _is_arrival 25 | 26 | 27 | @classmethod 28 | def query_route_short_names(cls, session, stop, filter_active=False): 29 | """ 30 | :return an array of short names and types 31 | """ 32 | from .route_stop import RouteStop 33 | 34 | # import pdb; pdb.set_trace() 35 | # step 1: create a short_names list 36 | short_names = [] 37 | 38 | # step 2: use either route-dao list or find the active stops 39 | routes = stop.routes 40 | if routes is None or len(routes) == 0: 41 | routes = RouteStop.active_unique_routes_at_stop(session, stop_id=stop.stop_id) 42 | routes.sort(key=lambda x: x.route_sort_order, reverse=False) 43 | 44 | # step 3: build the short names list 45 | for r in routes: 46 | if filter_active and r.is_active() is False: 47 | continue 48 | sn = {'route_id': r.route_id, 'type': r.type, 'route_type': r.type.route_type, 'otp_type': r.type.otp_type, 'route_short_name': r.make_route_short_name(r)} 49 | short_names.append(sn) 50 | 51 | return short_names 52 | 53 | @classmethod 54 | def to_route_short_names_as_string(cls, short_names, sep=", "): 55 | """ 56 | :return a string representing all short names (e.g., good for a tooltip on a stop popup) 57 | """ 58 | ret_val = None 59 | for s in short_names: 60 | rsn = s.get('route_short_name') 61 | if rsn: 62 | if ret_val is None: 63 | ret_val = rsn 64 | else: 65 | ret_val = "{}{}{}".format(ret_val, sep, rsn) 66 | return ret_val 67 | 68 | @classmethod 69 | def query_by_stop(cls, session, stop_id, agency_id=None, date=None, count=None, sort=False): 70 | """ 71 | get all route stop records by looking for a given stop_id. 72 | further filtering can be had by providing an active date and agency id 73 | """ 74 | from .route_stop import RouteStop 75 | 76 | # step 1: query all route stops by stop id (and maybe agency) 77 | q = session.query(RouteStop).filter(RouteStop.stop_id == stop_id) 78 | if agency_id is not None: 79 | q = q.filter(RouteStop.agency_id == agency_id) 80 | 81 | # step 2: filter based on date 82 | if date: 83 | date = util.check_date(date) 84 | q = q.filter(RouteStop.start_date <= date).filter(date <= RouteStop.end_date) 85 | 86 | # step 3: sort the results based on order column 87 | if sort: 88 | q = q.order_by(RouteStop.order) 89 | 90 | # step 4: limit the number of objects returned by query 91 | if count: 92 | q = q.limit(count) 93 | 94 | ret_val = q.all() 95 | return ret_val 96 | 97 | @classmethod 98 | def unique_routes_at_stop(cls, session, stop_id, agency_id=None, date=None, route_name_filter=False): 99 | """ 100 | get a unique set of route records by looking for a given stop_id. 101 | further filtering can be had by providing an active date and agency id, and route name 102 | """ 103 | ret_val = [] 104 | 105 | route_ids = [] 106 | route_names = [] 107 | 108 | route_stops = cls.query_by_stop(session, stop_id, agency_id, date, sort=True) 109 | for rs in route_stops: 110 | # step 1: filter(s) check against hashtable 111 | if rs.route_id in route_ids: 112 | continue 113 | if route_name_filter and rs.route.route_name in route_names: 114 | continue 115 | 116 | # step 2: add route attributes to cache hash-tables for later filtering (e.g. see filters above) 117 | route_ids.append(rs.route_id) 118 | route_names.append(rs.route.route_name) 119 | 120 | # step 3: this route is unique, so append route object to results 121 | ret_val.append(rs.route) 122 | return ret_val 123 | -------------------------------------------------------------------------------- /gtfsdb/model/shape.py: -------------------------------------------------------------------------------- 1 | from geoalchemy2 import Geometry 2 | from sqlalchemy import Column, Integer, Numeric, String 3 | 4 | from gtfsdb import config, util 5 | from gtfsdb.model.base import Base 6 | 7 | import logging 8 | log = logging.getLogger(__name__) 9 | 10 | 11 | __all__ = ['Shape'] 12 | 13 | 14 | class Shape(Base): 15 | datasource = config.DATASOURCE_GTFS 16 | filename = 'shapes.txt' 17 | 18 | __tablename__ = 'shapes' 19 | 20 | shape_id = Column(String(255), primary_key=True, index=True) 21 | shape_pt_lat = Column(Numeric(12, 9)) 22 | shape_pt_lon = Column(Numeric(12, 9)) 23 | shape_pt_sequence = Column(Integer, primary_key=True, index=True) 24 | shape_dist_traveled = Column(Numeric(20, 10)) 25 | 26 | @classmethod 27 | def get_sequence_from_dist(cls, dist, shapes, find_nearest=True, max_nearest=111.111, def_val=-1): 28 | """ 29 | find the sequence based on 30 | """ 31 | ret_val = None 32 | nearest_seq = def_val 33 | nearest_dist = max_nearest 34 | 35 | # loop thru shape points 36 | for s in shapes: 37 | # exact hit will stop the loop 38 | if dist == s.shape_dist_traveled: 39 | ret_val = s.shape_pt_sequence 40 | break 41 | # a fuzzy nearest is also an option 42 | if find_nearest: 43 | d = abs(dist - s.shape_dist_traveled) 44 | if d < nearest_dist: 45 | nearest_dist = d 46 | nearest_seq = s.shape_pt_sequence 47 | 48 | # assignment rules below kick in when no exact matches happens above 49 | if ret_val is None: 50 | ret_val = def_val 51 | if find_nearest: 52 | ret_val = nearest_seq 53 | 54 | return ret_val 55 | 56 | @classmethod 57 | def get_sequence_from_coord(cls, lat, lon, shapes, def_val=-1): 58 | """ 59 | find the sequence based on lat / lon coordinate 60 | """ 61 | ret_val = def_val 62 | for s in shapes: 63 | # exact hit will stop the loop 64 | if lat == float(s.shape_pt_lat) and lon == float(s.shape_pt_lon): 65 | ret_val = s.shape_pt_sequence 66 | break 67 | return ret_val 68 | 69 | @classmethod 70 | def add_geometry_column(cls): 71 | if not hasattr(cls, 'geom'): 72 | cls.geom = Column(Geometry(geometry_type='POINT', srid=config.SRID)) 73 | 74 | @classmethod 75 | def add_geom_to_dict(cls, row): 76 | args = (config.SRID, row['shape_pt_lon'], row['shape_pt_lat']) 77 | row['geom'] = 'SRID={0};POINT({1} {2})'.format(*args) 78 | 79 | @classmethod 80 | def post_process(cls, db, **kwargs): 81 | """ 82 | routines to run after db is loaded 83 | """ 84 | log.debug('{0}.post_process'.format(cls.__name__)) 85 | cls.populate_shape_dist_traveled(db) 86 | 87 | @classmethod 88 | def populate_shape_dist_traveled(cls, db): 89 | """ 90 | populate Shape.shape_pt_sequence where ever it is missing 91 | TODO: assumes feet as the measure ... should make this configurable 92 | """ 93 | session = db.session() 94 | try: 95 | shapes = session.query(Shape).order_by(Shape.shape_id, Shape.shape_pt_sequence).all() 96 | if shapes: 97 | shape_id = "-111" 98 | prev_lat = prev_lon = None 99 | distance = 0.0 100 | count = 0 101 | for s in shapes: 102 | # step 1: on first iteration or shape change, goto loop again (e.g., need 2 coords to calc distance) 103 | if prev_lat is None or shape_id != s.shape_id: 104 | prev_lat = s.shape_pt_lat 105 | prev_lon = s.shape_pt_lon 106 | shape_id = s.shape_id 107 | distance = s.shape_dist_traveled = 0.0 108 | continue 109 | 110 | # step 2: now that we have 2 coords, we can (if missing) calculate the travel distannce 111 | # import pdb; pdb.set_trace() 112 | if s.shape_dist_traveled is None: 113 | msg = "calc dist {}: {},{} to {},{}".format(s.shape_pt_sequence, prev_lat, prev_lon, s.shape_pt_lat, s.shape_pt_lon) 114 | #log.debug(msg) 115 | distance += util.distance_ft(prev_lat, prev_lon, s.shape_pt_lat, s.shape_pt_lon) 116 | s.shape_dist_traveled = distance 117 | count += 1 118 | 119 | # step 3 save off these coords (and distance) for next iteration 120 | prev_lat = s.shape_pt_lat 121 | prev_lon = s.shape_pt_lon 122 | distance = s.shape_dist_traveled 123 | 124 | # step 4 persist every now and then not to build a big buffer 125 | if count >= 10000: 126 | session.commit() 127 | session.flush() 128 | count = 0 129 | 130 | except Exception as e: 131 | log.warning(e) 132 | session.rollback() 133 | finally: 134 | session.commit() 135 | session.flush() 136 | session.close() 137 | -------------------------------------------------------------------------------- /gtfsdb/model/stop.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | from sqlalchemy import Column, Integer, Numeric, String 4 | from sqlalchemy.orm import joinedload, object_session, relationship 5 | 6 | from gtfsdb import config, util 7 | from gtfsdb.model.base import Base 8 | from .stop_base import StopBase 9 | 10 | import logging 11 | log = logging.getLogger(__name__) 12 | 13 | 14 | class Stop(Base, StopBase): 15 | datasource = config.DATASOURCE_GTFS 16 | filename = 'stops.txt' 17 | 18 | __tablename__ = 'stops' 19 | 20 | stop_id = Column(String(255), primary_key=True, index=True, nullable=False) 21 | stop_code = Column(String(50)) 22 | stop_name = Column(String(255), nullable=False) 23 | stop_desc = Column(String(255)) 24 | stop_lat = Column(Numeric(12, 9), nullable=False) 25 | stop_lon = Column(Numeric(12, 9), nullable=False) 26 | zone_id = Column(String(50)) 27 | stop_url = Column(String(255)) 28 | location_type = Column(Integer, index=True, default=0) 29 | parent_station = Column(String(255)) 30 | stop_timezone = Column(String(50)) 31 | wheelchair_boarding = Column(Integer, default=0) 32 | platform_code = Column(String(50)) 33 | direction = Column(String(50)) 34 | position = Column(String(50)) 35 | 36 | stop_features = relationship( 37 | 'StopFeature', 38 | primaryjoin='Stop.stop_id==StopFeature.stop_id', 39 | foreign_keys='(Stop.stop_id)', 40 | uselist=True, viewonly=True) 41 | 42 | stop_times = relationship( 43 | 'StopTime', 44 | primaryjoin='Stop.stop_id==StopTime.stop_id', 45 | foreign_keys='(Stop.stop_id)', 46 | uselist=True, viewonly=True) 47 | 48 | @classmethod 49 | def add_geom_to_dict(cls, row): 50 | point = util.Point.make_geo(row['stop_lon'], row['stop_lat'], config.SRID) 51 | row['geom'] = point 52 | 53 | @property 54 | def routes(self): 55 | """ 56 | return list of routes servicing this stop 57 | @todo: rewrite the cache to use timeout checking in Base.py 58 | """ 59 | try: 60 | self._routes 61 | except AttributeError: 62 | from gtfsdb.model.route import Route 63 | from gtfsdb.model.trip import Trip 64 | from gtfsdb.model.stop_time import StopTime 65 | session = object_session(self) 66 | q = session.query(Route) 67 | f = ((StopTime.stop_id == self.stop_id) & (StopTime.departure_time != '')) 68 | q = q.filter(Route.trips.any(Trip.stop_times.any(f))) 69 | q = q.order_by(Route.route_sort_order) 70 | self._routes = q.all() 71 | return self._routes 72 | 73 | @property 74 | def headsigns(self): 75 | """ 76 | Returns a dictionary of all unique (route_id, headsign) tuples used 77 | at the stop and the number of trips the head sign is used 78 | """ 79 | if not hasattr(self, '_headsigns'): 80 | from gtfsdb.model.stop_time import StopTime 81 | self._headsigns = defaultdict(int) 82 | session = object_session(self) 83 | log.info("QUERY StopTime") 84 | q = session.query(StopTime) 85 | q = q.options(joinedload('trip').joinedload('route')) 86 | q = q.filter_by(stop_id=self.stop_id) 87 | for r in q: 88 | headsign = r.stop_headsign or r.trip.trip_headsign 89 | self._headsigns[(r.trip.route, headsign)] += 1 90 | return self._headsigns 91 | 92 | @property 93 | def agencies(self): 94 | """ 95 | return list of agency ids with routes hitting this stop 96 | @todo: rewrite the cache to use timeout checking in Base.py 97 | """ 98 | try: 99 | self._agencies 100 | except AttributeError: 101 | self._agencies = [] 102 | if self.routes: 103 | for r in self.routes: 104 | if r.agency_id not in self._agencies: 105 | self.agencies.append(r.agency_id) 106 | return self._agencies 107 | 108 | @property 109 | def amenities(self): 110 | """ 111 | return list of strings for the stop amenity (feature) names 112 | """ 113 | try: 114 | self._amenities 115 | except AttributeError: 116 | self._amenities = [] 117 | if self.stop_features and len(self.stop_features) > 0: 118 | for f in self.stop_features: 119 | n = f.feature_name 120 | if n and len(n) > 0: 121 | self._amenities.append(n) 122 | self._amenities = sorted(self._amenities) 123 | return self._amenities 124 | 125 | def is_active(self, date=None): 126 | """ 127 | :return False whenever we see that the stop has zero stop_times on the given input date 128 | (which defaults to 'today') 129 | 130 | @NOTE: use caution with this routine. calling this for multiple stops can really slow things down, 131 | since you're querying large trip and stop_time tables, and asking for a schedule of each stop 132 | I used to call this multiple times via route_stop to make sure each stop was active ... that 133 | was really bad performance wise. 134 | """ 135 | from gtfsdb.model.stop_time import StopTime 136 | 137 | # import pdb; pdb.set_trace() 138 | _is_active = False 139 | date = util.check_date(date) 140 | st = StopTime.get_departure_schedule(self.session, self.stop_id, date, limit=1) 141 | if st and len(st) > 0: 142 | _is_active = True 143 | return _is_active 144 | 145 | @classmethod 146 | def query_active_stops(cls, session, limit=None, location_types=[0], active_filter=True, date=None): 147 | """ 148 | check for active stops 149 | """ 150 | # step 1: get stops 151 | q = session.query(Stop) 152 | if limit: 153 | q = q.limit(limit) 154 | if location_types and len(location_types) > 0: 155 | # note: default is to filter location_type=0, which is just stops (not stations) 156 | q.filter(Stop.location_type.in_(location_types)) 157 | stops = q.all() 158 | 159 | # step 2: filter active stops only ??? 160 | if active_filter: 161 | ret_val = [] 162 | for s in stops: 163 | if s.is_active(date): 164 | ret_val.append(s) 165 | else: 166 | ret_val = stops 167 | return ret_val 168 | 169 | @classmethod 170 | def query_active_stop_ids(cls, session, limit=None, active_filter=True): 171 | """ 172 | return an array of stop_id / agencies pairs 173 | {stop_id:'2112', agencies:['C-TRAN', 'TRIMET']} 174 | """ 175 | ret_val = [] 176 | stops = cls.query_active_stops(session, limit, active_filter) 177 | for s in stops: 178 | ret_val.append({"stop_id": s.stop_id, "agencies": s.agencies}) 179 | return ret_val 180 | 181 | 182 | class CurrentStops(Base, StopBase): 183 | """ 184 | this table is (optionally) used as a view into the currently active routes 185 | it is pre-calculated to list routes that are currently running service 186 | (GTFS can have multiple instances of the same route, with different aspects like name and direction) 187 | """ 188 | datasource = config.DATASOURCE_DERIVED 189 | __tablename__ = 'current_stops' 190 | 191 | route_short_names = Column(String(1023)) 192 | route_type = Column(Integer) 193 | route_type_other = Column(Integer) 194 | route_mode = Column(String(255)) 195 | 196 | stop_id = Column(String(255), primary_key=True, index=True, nullable=False) 197 | location_type = Column(Integer) 198 | stop_lat = Column(Numeric(12, 9), nullable=False) 199 | stop_lon = Column(Numeric(12, 9), nullable=False) 200 | 201 | stop = relationship( 202 | Stop.__name__, 203 | primaryjoin='CurrentStops.stop_id==Stop.stop_id', 204 | foreign_keys='(CurrentStops.stop_id)', 205 | uselist=False, viewonly=True, 206 | lazy="joined", innerjoin=True, 207 | ) 208 | 209 | def __init__(self, stop, session): 210 | """ 211 | create a CurrentStop record from a stop record 212 | :param stop: 213 | :param session: 214 | """ 215 | self.stop_id = stop.stop_id 216 | self.location_type = stop.location_type 217 | self.stop_lon = stop.stop_lon 218 | self.stop_lat = stop.stop_lat 219 | 220 | # copy the stop geom to CurrentStops (if we're in is_geospatial mode) 221 | if hasattr(stop, 'geom') and hasattr(self, 'geom'): 222 | self.geom = util.Point.make_geo(stop.stop_lon, stop.stop_lat, config.SRID) 223 | 224 | # convoluted route type assignment ... handle conditon where multiple modes (limited to 2) serve same stop 225 | # import pdb; pdb.set_trace() 226 | from .route_stop import CurrentRouteStops 227 | rs_list = CurrentRouteStops.query_route_short_names(session, stop, filter_active=True) 228 | for rs in rs_list: 229 | type = rs.get('type') 230 | if self.route_mode is None: 231 | self.route_type = type.route_type 232 | self.route_mode = type.otp_type 233 | elif type.is_different_mode(self.route_type): 234 | if type.is_lower_priority(self.route_type): 235 | self.route_type_other = self.route_type 236 | self.route_type = type.route_type 237 | self.route_mode = type.otp_type 238 | else: 239 | self.route_type_other = type.route_type 240 | 241 | # route short names 242 | self.route_short_names = CurrentRouteStops.to_route_short_names_as_string(rs_list) 243 | 244 | @classmethod 245 | def post_process(cls, db, **kwargs): 246 | """ 247 | will update the current 'view' of this data 248 | """ 249 | session = db.session() 250 | try: 251 | session.query(CurrentStops).delete() 252 | 253 | # import pdb; pdb.set_trace() 254 | for s in Stop.query_active_stops(session): 255 | c = CurrentStops(s, session) 256 | session.add(c) 257 | 258 | session.commit() 259 | session.flush() 260 | except Exception as e: 261 | log.warning(e) 262 | session.rollback() 263 | finally: 264 | session.flush() 265 | session.close() 266 | 267 | 268 | __all__ = [Stop.__name__, CurrentStops.__name__] 269 | -------------------------------------------------------------------------------- /gtfsdb/model/stop_base.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import Column, Integer, Numeric, String 2 | from sqlalchemy.orm import joinedload, object_session 3 | 4 | from gtfsdb import config 5 | from gtfsdb.util import BBox, Point 6 | 7 | import logging 8 | log = logging.getLogger(__name__) 9 | 10 | 11 | class StopBase(object): 12 | """ 13 | provides a generic set of stop query routines 14 | """ 15 | 16 | def active_stops(self, date=None): 17 | """ 18 | this common method will call route.is_active(), which means it will probably be slow 19 | note: this method, even when called from ActiveRoutes or ActiveStops, will probably be *slow* 20 | """ 21 | ret_val = [] 22 | try: 23 | for r in self.routes: 24 | if r.is_active(date): 25 | ret_val.append(r) 26 | except Exception as e: 27 | log.warning(e) 28 | return ret_val 29 | 30 | @classmethod 31 | def add_geometry_column(cls): 32 | if not hasattr(cls, 'geom'): 33 | from geoalchemy2 import Geometry 34 | cls.geom = Column(Geometry(geometry_type='POINT', srid=config.SRID)) 35 | 36 | @classmethod 37 | def query_orm_for_stop(cls, session, stop_id, detailed=False, agency=None): 38 | """ 39 | simple utility for querying a stop from gtfsdb 40 | """ 41 | ret_val = None 42 | try: 43 | log.info("query Stop for {}".format(stop_id)) 44 | q = session.query(cls) 45 | q = q.filter(cls.stop_id == stop_id) 46 | # TODO q.filter(cls.agency_id == agency_id) 47 | if detailed: 48 | try: 49 | q = q.options(joinedload("stop_features")) 50 | except: 51 | pass 52 | ret_val = q.one() 53 | except Exception as e: 54 | log.info(e) 55 | return ret_val 56 | 57 | @classmethod 58 | def generic_query_stops(cls, session, **kwargs): 59 | """ 60 | query for list of this data 61 | """ 62 | ret_val = [] 63 | try: 64 | # import pdb; pdb.set_trace() 65 | clist = session.query(cls) 66 | limit = kwargs.get('limit') 67 | if limit: 68 | clist = clist.limit(limit) 69 | ret_val = clist.all() 70 | except Exception as e: 71 | log.warning(e) 72 | return ret_val 73 | 74 | @classmethod 75 | def query_stops_via_bbox(cls, session, bbox, limit=2000): 76 | ret_val = [] 77 | try: 78 | log.info("query gtfsdb Stop table") 79 | q = session.query(cls) 80 | q = q.filter(cls.location_type == 0) # just stops (not stations or entrances to stations) 81 | q = q.filter(cls.geom.ST_Within(bbox.to_geojson())) 82 | q = q.limit(limit + 10) 83 | ret_val = q.all() 84 | except Exception as e: 85 | log.warning(e) 86 | return ret_val 87 | 88 | @classmethod 89 | def query_stops_via_point(cls, session, point, limit=10): 90 | ret_val = [] 91 | try: 92 | # import pdb; pdb.set_trace() 93 | log.info("query Stop table") 94 | q = session.query(cls) 95 | q = q.filter(cls.location_type == 0) 96 | q = q.order_by(cls.geom.distance_centroid(point.to_geojson())) 97 | q = q.limit(limit) 98 | ret_val = q.all() 99 | except Exception as e: 100 | log.warning(e) 101 | return ret_val 102 | 103 | @classmethod 104 | def query_stops(cls, session, **kwargs): 105 | """ 106 | will query the db for a stop, either via bbox, point & distance or just an id 107 | :return list of stops 108 | """ 109 | ret_val = [] 110 | # import pdb; pdb.set_trace() 111 | if kwargs.get('lat') or kwargs.get('min_lat'): 112 | bbox = BBox(**kwargs) 113 | if bbox.is_valid: 114 | ret_val = cls.query_stops_via_bbox(session, bbox) 115 | else: 116 | point = Point(**kwargs) 117 | if point.is_valid: 118 | ret_val = cls.query_stops_via_point_radius(session, point) 119 | else: 120 | ret_val = cls.generic_query_stops(session, **kwargs) 121 | return ret_val 122 | -------------------------------------------------------------------------------- /gtfsdb/model/stop_feature.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import Column, Sequence 2 | from sqlalchemy.types import Integer, String 3 | 4 | from gtfsdb import config 5 | from gtfsdb.model.base import Base 6 | 7 | 8 | __all__ = ['StopFeature'] 9 | 10 | 11 | class StopFeature(Base): 12 | datasource = config.DATASOURCE_GTFS 13 | filename = 'stop_features.txt' 14 | 15 | __tablename__ = 'stop_features' 16 | 17 | id = Column(Integer, Sequence(None, optional=True), primary_key=True) 18 | stop_id = Column(String(255), index=True, nullable=False) 19 | feature_type = Column(String(50)) 20 | feature_name = Column(String(255)) 21 | -------------------------------------------------------------------------------- /gtfsdb/model/stop_time.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | 4 | from gtfsdb import config, util 5 | from gtfsdb.model.base import Base 6 | from sqlalchemy import Column 7 | from sqlalchemy.orm import joinedload, relationship 8 | from sqlalchemy.sql.expression import func 9 | from sqlalchemy.types import SmallInteger, Integer, Numeric, String 10 | 11 | log = logging.getLogger(__name__) 12 | 13 | 14 | class StopTime(Base): 15 | datasource = config.DATASOURCE_GTFS 16 | filename = 'stop_times.txt' 17 | 18 | __tablename__ = 'stop_times' 19 | 20 | trip_id = Column(String(255), primary_key=True, index=True, nullable=False) 21 | stop_id = Column(String(255), index=True, nullable=False) 22 | stop_sequence = Column(Integer, primary_key=True, nullable=False) 23 | arrival_time = Column(String(9)) 24 | departure_time = Column(String(9), index=True) 25 | stop_headsign = Column(String(255)) 26 | pickup_type = Column(Integer, default=0) 27 | drop_off_type = Column(Integer, default=0) 28 | shape_dist_traveled = Column(Numeric(20, 10)) 29 | timepoint = Column(SmallInteger, index=True, default=0) 30 | 31 | stop = relationship( 32 | 'Stop', 33 | primaryjoin='Stop.stop_id==StopTime.stop_id', 34 | foreign_keys='(StopTime.stop_id)', 35 | uselist=False, viewonly=True) 36 | 37 | trip = relationship( 38 | 'Trip', 39 | primaryjoin='Trip.trip_id==StopTime.trip_id', 40 | foreign_keys='(StopTime.trip_id)', 41 | uselist=False, viewonly=True) 42 | 43 | def __init__(self, *args, **kwargs): 44 | super(StopTime, self).__init__(*args, **kwargs) 45 | if 'timepoint' not in kwargs: # this logic is the int() equal of what was after changing this to SmallInt, 46 | if 'arrival_time' in kwargs: # but I'm wondering if this is now going to set all stop times to timepoint=1=True ??? 47 | self.timepoint = 1 48 | 49 | @classmethod 50 | def post_make_record(cls, row): 51 | # import pdb; pdb.set_trace() 52 | 53 | # step 1: check that times are HH:MM:SS (append zero if just H:MM:SS) 54 | if 'arrival_time' in row: row['arrival_time'] = util.fix_time_string(row['arrival_time']) 55 | if 'departure_time' in row: row['departure_time'] = util.fix_time_string(row['departure_time']) 56 | 57 | return row 58 | 59 | def get_headsign(self): 60 | """ 61 | get the headsign at this stop ... rule is that if stop is empty, use trip headsign 62 | """ 63 | ret_val = self.stop_headsign 64 | if not ret_val: 65 | ret_val = self.trip.trip_headsign 66 | return ret_val 67 | 68 | def get_direction_name(self, def_val="", banned=['Shuttle', 'MAX Shuttle', 'Garage', 'Center Garage', 'Merlo Garage', 'Powell Garage']): 69 | """ 70 | returns either the headsign (priority) or the route direction name (when banned) 71 | (as long as one of these names are not banned and not the same name as the route name) 72 | """ 73 | ret_val = def_val 74 | try: 75 | # step 0: create a banned list with the addition of our route_long_name 76 | banned = banned + [self.trip.route.route_long_name] 77 | 78 | headsign = self.get_headsign() 79 | if headsign and not any([headsign in s for s in banned]): 80 | # step 1: use the headsign as the direction name, just as long as the headsign is 81 | # not null and not the same as the route name 82 | ret_val = headsign 83 | else: 84 | # step 2: lets use the direction name, if available 85 | d = self.trip.route.directions[self.trip.direction_id] 86 | if d.direction_name and not any([d.direction_name in s for s in banned]): 87 | ret_val = d.direction_name.lstrip('to ').lstrip('To ') 88 | except Exception as e: 89 | log.debug(e) 90 | pass 91 | return ret_val 92 | 93 | def is_boarding_stop(self): 94 | """ 95 | return whether the vehicle that is stopping at this stop, and at this time, is an 96 | in-revenue vehicle that a customer can actually board... 97 | 98 | pickup_type = 1 - No pickup available 99 | 100 | departure_time = None 101 | 102 | NOTE: in gtfsdb, we NULL out the departure times when the vehicle doesn't 103 | pick up anyone (e.g., at route end points, there are no departures...) 104 | 105 | @see: https://developers.google.com/transit/gtfs/reference#stop_times_fields 106 | """ 107 | ret_val = True 108 | if self.pickup_type == 1 or self.departure_time is None: 109 | ret_val = False 110 | return ret_val 111 | 112 | @classmethod 113 | def post_process(cls, db, **kwargs): 114 | log.debug('{0}.post_process'.format(cls.__name__)) 115 | cls.populate_shape_dist_traveled(db) 116 | # cls.null_out_last_stop_departures(db) ## commented out due to other processes 117 | 118 | @classmethod 119 | def populate_shape_dist_traveled(cls, db): 120 | """ 121 | populate StopTime.shape_dist_travelled where ever it is missing 122 | TODO: assumes feet as the measure ... should make this configurable 123 | """ 124 | session = db.session() 125 | try: 126 | stop_times = session.query(StopTime).order_by(StopTime.trip_id, StopTime.stop_sequence).all() 127 | if stop_times: 128 | trip_id = "-111" 129 | prev_lat = prev_lon = None 130 | distance = 0.0 131 | count = 0 132 | for s in stop_times: 133 | # step 1: on first iteration or shape change, goto loop again (e.g., need 2 coords to calc distance) 134 | if prev_lat is None or trip_id != s.trip_id: 135 | prev_lat = s.stop.stop_lat 136 | prev_lon = s.stop.stop_lon 137 | trip_id = s.trip_id 138 | distance = s.shape_dist_traveled = 0.0 139 | continue 140 | 141 | # step 2: now that we have 2 coords, we can (if missing) calculate the travel distannce 142 | # import pdb; pdb.set_trace() 143 | if s.shape_dist_traveled is None: 144 | #msg = "calc dist {}: {},{} to {},{}".format(s.shape_pt_sequence, prev_lat, prev_lon, s.shape_pt_lat, s.shape_pt_lon) 145 | #log.debug(msg) 146 | distance += util.distance_ft(prev_lat, prev_lon, s.stop.stop_lat, s.stop.stop_lon) 147 | s.shape_dist_traveled = distance 148 | count += 0 149 | 150 | # step 3 save off these coords (and distance) for next iteration 151 | prev_lat = s.stop.stop_lat 152 | prev_lon = s.stop.stop_lon 153 | distance = s.shape_dist_traveled 154 | 155 | # step 4 persist every now and then not to build a big buffer 156 | if count >= 10000: 157 | session.commit() 158 | session.flush() 159 | count = 0 160 | 161 | except Exception as e: 162 | log.warning(e) 163 | session.rollback() 164 | finally: 165 | session.commit() 166 | session.flush() 167 | session.close() 168 | 169 | 170 | @classmethod 171 | def null_out_last_stop_departures(cls, db): 172 | """ 173 | delete all 'depature_time' values that appear for the last stop 174 | time of a given trip (e.g., the trip ends there, so there isn't a 175 | further vehicle departure / customer pickup for that stop time / trip pair)... 176 | 177 | -- query below shows null'd out stop times 178 | select * from ott.stop_times 179 | where COALESCE(arrival_time,'')='' or COALESCE(departure_time,'')='' 180 | 181 | NOTE: we know this breaks the current GTFS spec, which states that departure & 182 | arrival times must both exist for every stop time. Sadly, GTFS is kinda wrong... 183 | """ 184 | # step 1: remove the departure times at the end of a trip 185 | log.info("QUERY StopTime for all trip end times") 186 | sq = db.session.query(StopTime.trip_id, func.max(StopTime.stop_sequence).label('end_sequence')) 187 | sq = sq.group_by(StopTime.trip_id).subquery() 188 | q = db.session.query(StopTime) 189 | q = q.filter_by(trip_id=sq.c.trip_id, stop_sequence=sq.c.end_sequence) 190 | for st in q: 191 | if st.pickup_type == 1: 192 | st.departure_time = None 193 | 194 | # remove the arrival times at the start of a trip 195 | log.info("QUERY StopTime for all trip start times") 196 | sq = db.session.query(StopTime.trip_id, func.min(StopTime.stop_sequence).label('start_sequence')) 197 | sq = sq.group_by(StopTime.trip_id).subquery() 198 | q = db.session.query(StopTime) 199 | q = q.filter_by(trip_id=sq.c.trip_id, stop_sequence=sq.c.start_sequence) 200 | for st in q: 201 | if st.drop_off_type == 1: 202 | st.arrival_time = None 203 | 204 | db.session.flush() 205 | db.session.commit() 206 | db.session.close() 207 | 208 | @classmethod 209 | def get_service_keys_from_list(cls, stop_times): 210 | ret_val = [] 211 | for s in stop_times: 212 | k = s.trip.service_id 213 | if k not in ret_val: 214 | ret_val.append(k) 215 | return ret_val 216 | 217 | @classmethod 218 | def get_departure_schedule(cls, session, stop_id, date=None, route_id=None, limit=None): 219 | """ 220 | helper routine which returns the stop schedule for a give date 221 | """ 222 | from gtfsdb.model.trip import Trip 223 | 224 | # step 0: make sure we have a valid date 225 | if date is None: 226 | date = datetime.date.today() 227 | 228 | # step 1: get stop times based on date 229 | log.debug("QUERY StopTime") 230 | q = session.query(StopTime) 231 | q = q.filter_by(stop_id=stop_id) 232 | q = q.filter(StopTime.departure_time is not None) 233 | q = q.filter(StopTime.trip.has(Trip.universal_calendar.any(date=date))) 234 | 235 | # step 2: apply an optional route filter 236 | if route_id: 237 | q = q.filter(StopTime.trip.has(Trip.route_id == route_id)) 238 | 239 | # step 3: options to speed up /q 240 | q = q.options(joinedload('trip')) 241 | 242 | # step 4: order the stop times 243 | if limit is None or limit > 1: 244 | q = q.order_by(StopTime.departure_time) 245 | 246 | # step 5: limit results 247 | if limit: 248 | q = q.limit(limit) 249 | 250 | stop_times = q.all() 251 | ret_val = cls.block_filter(session, stop_id, stop_times) 252 | 253 | return ret_val 254 | 255 | @classmethod 256 | def block_filter(cls, session, stop_id, stop_times): 257 | """ 258 | we don't want to show stop times that are arrivals, so we look at the blocks and figure out whether 259 | the input stop is the ending stop, and that there's a next trip starting at this same stop. 260 | """ 261 | ret_val = stop_times 262 | if stop_times and len(stop_times) > 1: 263 | from gtfsdb.model.block import Block 264 | keys = cls.get_service_keys_from_list(stop_times) 265 | blocks = Block.blocks_by_end_stop_id(session, stop_id, service_keys=keys) 266 | if blocks: 267 | ret_val = [] 268 | for s in stop_times: 269 | block = None 270 | for b in blocks: 271 | if s.trip_id == b.trip_id and s.trip.block_id == b.block_id: 272 | block = b 273 | blocks.remove(b) 274 | break 275 | 276 | if block is None: 277 | ret_val.append(s) 278 | elif not block.is_arrival(stop_id): 279 | ret_val.append(s) 280 | # @todo maybe monkey patch stop_time with block, so we know about last trip 281 | 282 | # this is an arrival trip, and the next trip 283 | # (don't return the stop_time as a departure) 284 | # this is the last trip of the day (so return it) 285 | return ret_val 286 | -------------------------------------------------------------------------------- /gtfsdb/model/transfer.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import Column, Sequence 2 | from sqlalchemy.types import Integer, String 3 | 4 | from gtfsdb import config 5 | from gtfsdb.model.base import Base 6 | 7 | 8 | class Transfer(Base): 9 | datasource = config.DATASOURCE_GTFS 10 | filename = 'transfers.txt' 11 | 12 | __tablename__ = 'transfers' 13 | 14 | id = Column(Integer, Sequence(None, optional=True), primary_key=True) 15 | from_stop_id = Column(String(255)) 16 | to_stop_id = Column(String(255)) 17 | transfer_type = Column(Integer, index=True, default=0) 18 | min_transfer_time = Column(Integer) 19 | -------------------------------------------------------------------------------- /gtfsdb/model/translation.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import Column, Integer, Sequence 2 | from sqlalchemy.types import String 3 | 4 | from gtfsdb import config 5 | from gtfsdb.model.base import Base 6 | 7 | 8 | class Translation(Base): 9 | datasource = config.DATASOURCE_GTFS 10 | filename = 'translations.txt' 11 | 12 | __tablename__ = 'translations' 13 | 14 | id = Column(Integer, Sequence(None, optional=True), primary_key=True) 15 | table_name = Column(String(255), nullable=False) 16 | field_name = Column(String(255), nullable=False) 17 | language = Column(String(255), nullable=False) 18 | translation = Column(String(255), nullable=False) 19 | record_id = Column(String(255)) 20 | record_sub_id = Column(String(255)) 21 | field_value = Column(String(255)) 22 | -------------------------------------------------------------------------------- /gtfsdb/model/trip.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from gtfsdb import config 4 | from gtfsdb.model.base import Base 5 | from sqlalchemy import Column 6 | from sqlalchemy.orm import relationship 7 | from sqlalchemy.types import Integer, String 8 | 9 | log = logging.getLogger(__name__) 10 | 11 | 12 | class Trip(Base): 13 | datasource = config.DATASOURCE_GTFS 14 | filename = 'trips.txt' 15 | 16 | __tablename__ = 'trips' 17 | 18 | trip_id = Column(String(255), primary_key=True, index=True, nullable=False) 19 | route_id = Column(String(255), index=True, nullable=False) 20 | service_id = Column(String(255), index=True, nullable=False) 21 | direction_id = Column(Integer, index=True) 22 | block_id = Column(String(255), index=True) 23 | shape_id = Column(String(255), index=True, nullable=True) 24 | trip_type = Column(String(255)) 25 | 26 | trip_headsign = Column(String(255)) 27 | trip_short_name = Column(String(255)) 28 | bikes_allowed = Column(Integer, default=0) 29 | wheelchair_accessible = Column(Integer, default=0) 30 | 31 | pattern = relationship( 32 | 'Pattern', 33 | primaryjoin='Trip.shape_id==Pattern.shape_id', 34 | foreign_keys='(Trip.shape_id)', 35 | uselist=False, viewonly=True) 36 | 37 | shapes = relationship( 38 | 'Shape', 39 | primaryjoin='Trip.shape_id==Shape.shape_id', 40 | foreign_keys='(Trip.shape_id)', 41 | uselist=True, viewonly=True) 42 | 43 | route = relationship( 44 | 'Route', 45 | primaryjoin='Trip.route_id==Route.route_id', 46 | foreign_keys='(Trip.route_id)', 47 | uselist=False, viewonly=True) 48 | 49 | stop_times = relationship( 50 | 'StopTime', 51 | primaryjoin='Trip.trip_id==StopTime.trip_id', 52 | foreign_keys='(Trip.trip_id)', 53 | order_by='StopTime.stop_sequence', 54 | uselist=True, viewonly=True) 55 | 56 | universal_calendar = relationship( 57 | 'UniversalCalendar', 58 | primaryjoin='Trip.service_id==UniversalCalendar.service_id', 59 | foreign_keys='(Trip.service_id)', 60 | uselist=True, viewonly=True) 61 | 62 | @classmethod 63 | def post_process(cls, db, **kwargs): 64 | trips = db.session.query(Trip).all() 65 | for t in trips: 66 | if not t.is_valid: 67 | log.warning("invalid trip: {0} only has {1} stop_time record (i.e., maybe the stops are coded as " 68 | "non-public, and thus their stop time records didn't make it into the gtfs)".format(t.trip_id, t.trip_len)) 69 | 70 | @classmethod 71 | def query_trip(cls, session, trip_id, schema=None): 72 | """ return a trip via trip_id """ 73 | if schema: 74 | Trip.set_schema(schema) 75 | ret_val = session.query(Trip).filter(Trip.trip_id==trip_id).one() 76 | return ret_val 77 | 78 | @property 79 | def start_stop(self): 80 | return self.stop_times[0].stop 81 | 82 | @property 83 | def end_stop(self): 84 | return self.stop_times[-1].stop 85 | 86 | @property 87 | def start_time(self): 88 | return self.stop_times[0].departure_time 89 | 90 | @property 91 | def end_time(self): 92 | return self.stop_times[-1].arrival_time 93 | 94 | @property 95 | def trip_len(self): 96 | ret_val = 0 97 | if self.stop_times: 98 | ret_val = len(self.stop_times) 99 | return ret_val 100 | 101 | @property 102 | def is_valid(self): 103 | # trip has to have multiple stop times to be valid, else it's not a trip... 104 | return self.trip_len >= 2 105 | -------------------------------------------------------------------------------- /gtfsdb/scripts.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import argparse 4 | 5 | from gtfsdb import config 6 | from gtfsdb.model.base import Base 7 | from gtfsdb.api import database_load 8 | 9 | 10 | def make_kwargs(args): 11 | # see below... 12 | kwargs = dict( 13 | # common cmd line items 14 | url=args.database_url, 15 | schema=args.schema, 16 | is_geospatial=args.is_geospatial, 17 | current_tables=args.current_tables, 18 | 19 | # less used params 20 | do_postprocess=not args.ignore_postprocess, 21 | ignore_blocks=args.ignore_blocks, 22 | tables=args.tables, 23 | batch_size=args.batch_size 24 | ) 25 | return kwargs 26 | 27 | 28 | def get_args(prog_name='gtfsdb-load', do_parse=True): 29 | """ 30 | database load command-line arg parser and help util... 31 | """ 32 | tables = sorted([t.name for t in Base.metadata.sorted_tables]) 33 | parser = argparse.ArgumentParser( 34 | prog=prog_name, 35 | formatter_class=argparse.ArgumentDefaultsHelpFormatter 36 | ) 37 | parser.add_argument('file', help='URL or local path to GTFS zip FILE') 38 | parser.add_argument('--batch_size', '-b', type=int, default=config.DEFAULT_BATCH_SIZE, 39 | help='BATCH SIZE to use for memory management') 40 | parser.add_argument('--database_url', '-d', default=config.DEFAULT_DATABASE_URL, 41 | help='DATABASE URL with appropriate privileges') 42 | parser.add_argument('--is_geospatial', '-g', action='store_true', 43 | default=config.DEFAULT_IS_GEOSPATIAL, 44 | help='Database supports GEOSPATIAL functions') 45 | parser.add_argument('--schema', '-s', default=config.DEFAULT_SCHEMA, 46 | help='Database SCHEMA name') 47 | parser.add_argument('--tables', choices=tables, default=None, nargs='*', 48 | help='Limited list of TABLES to load, if blank, load all tables') 49 | parser.add_argument('--create', '-c', action="store_true", 50 | help='create new db tables (note: not currently used in gtfsdb, which always creates tables)') 51 | parser.add_argument('--current_tables', '-ct', default=False, action='store_true', 52 | help="create tables that represent 'current' service (e.g., views)") 53 | parser.add_argument('--ignore_postprocess', '-np', default=False, action='store_true', 54 | help="don't run any postprocess model routines (will leave some tables empty ... but will load raw gtfs data)") 55 | parser.add_argument('--ignore_blocks', '-nb', default=False, action='store_true', 56 | help="don't bother populating the derrived block table") 57 | if do_parse: 58 | args = parser.parse_args() 59 | kwargs = make_kwargs(args) 60 | else: 61 | args = parser 62 | kwargs = None 63 | 64 | return args, kwargs 65 | 66 | 67 | def gtfsdb_load(): 68 | args, kwargs = get_args() 69 | database_load(args.file, **kwargs) 70 | 71 | 72 | def route_stop_load(): 73 | """ 74 | written as a test / debug method for RS table loader 75 | """ 76 | from gtfsdb import Database, RouteStop 77 | kwargs = get_args()[1] 78 | db = Database(**kwargs) 79 | RouteStop.load(db, **kwargs) 80 | 81 | 82 | def current_tables_load(**kwargs): 83 | """ 84 | current table loader 85 | """ 86 | from gtfsdb import Database, CurrentRoutes, CurrentStops, CurrentRouteStops 87 | db = Database(**kwargs) 88 | for cls in [CurrentRoutes, CurrentRouteStops, CurrentStops]: 89 | db.create_table(cls) 90 | cls.post_process(db, **kwargs) 91 | 92 | 93 | def current_tables_cmdline(): 94 | kwargs = get_args('gtfsdb-current-load')[1] 95 | current_tables_load(**kwargs) 96 | 97 | 98 | def db_connect_tester(): 99 | """ 100 | simple routine to connect to an existing database and list a few stops 101 | bin/connect-tester --database_url sqlite:///gtfs.db _no_gtfs_zip_needed_ 102 | """ 103 | from gtfsdb import Database, Stop, Route, StopTime 104 | args, kwargs = get_args('connect-tester') 105 | db = Database(**kwargs) 106 | for s in db.session.query(Stop).limit(2): 107 | print(s.stop_name) 108 | for r in db.session.query(Route).limit(2): 109 | print(r.route_name) 110 | stop_times = StopTime.get_departure_schedule(db.session, stop_id='11411') 111 | for st in stop_times: 112 | print(st.get_direction_name()) 113 | break 114 | -------------------------------------------------------------------------------- /gtfsdb/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTransitTools/gtfsdb/38c20e1e83577a2d0efb7b20ad3e262d27c88168/gtfsdb/tests/__init__.py -------------------------------------------------------------------------------- /gtfsdb/tests/base.py: -------------------------------------------------------------------------------- 1 | from gtfsdb import util 2 | from gtfsdb.model.db import Database 3 | from gtfsdb.api import database_load 4 | 5 | from pkg_resources import resource_filename 6 | import os 7 | import logging 8 | log = logging.getLogger(__name__) 9 | 10 | 11 | def get_test_directory_path(): 12 | """ will return current path ... tries to handle c:\\ windows junk """ 13 | path = resource_filename('gtfsdb', 'tests') 14 | path = path.replace('c:\\', '/').replace('\\', '/') 15 | return path 16 | 17 | 18 | def get_gtfs_file_uri(gtfs_file): 19 | """ will send back proper file:////blah/test_file.zip """ 20 | dir_path = get_test_directory_path() 21 | file_uri = "file://{0}".format(os.path.join(dir_path, gtfs_file)) 22 | file_uri = file_uri.replace('\\', '/') 23 | return file_uri 24 | 25 | 26 | def load_sqlite(db_name=None, gtfs_name='multi-date-feed.zip'): 27 | # import pdb; pdb.set_trace() 28 | gtfs_uri = get_gtfs_file_uri(gtfs_name) 29 | url = util.make_temp_sqlite_db_uri(db_name) 30 | db = database_load(gtfs_uri, url=url, current_tables=True) 31 | return db 32 | 33 | 34 | def load_pgsql(url, schema="current_test"): 35 | """ To run this test, do the following: 36 | x) bin/test gtfsdb.tests.test_current 37 | 38 | You might also have to do the following: 39 | a) emacs setup.py - uncomment install_requires='psycopg2' 40 | b) buildout # need psychopg2 in bin/test script 41 | c) comment out "#SKIP_TESTS = True" below 42 | d) psql -d postgres -c "CREATE DATABASE test WITH OWNER ott;" 43 | e) bin/test gtfsdb.tests.test_current 44 | """ 45 | #import pdb; pdb.set_trace() 46 | gtfs_uri = get_gtfs_file_uri('multi-date-feed.zip') 47 | db = database_load(gtfs_uri, url=url, schema=schema, is_geospatial=True, current_tables=True) 48 | return db 49 | 50 | 51 | def get_pg_db(url, schema='trimet'): 52 | db = Database(url=url, schema=schema, is_geospatial=True, current_tables=True) 53 | return db 54 | 55 | 56 | def print_list(list): 57 | for i in list: 58 | print(i.__dict__) 59 | 60 | 61 | def check_counts(list1, list2, id='stop_id'): 62 | """ check first that lists both have content; then chekc that either the lists are diff in size or content """ 63 | ret_val = False 64 | #print_list(list1) 65 | #print_list(list2) 66 | if len(list1) > 0 and len(list2) > 0: 67 | if len(list1) != len(list2): 68 | ret_val = True 69 | else: 70 | for i, e1 in enumerate(list1): 71 | v1 = getattr(e1, id) 72 | v2 = getattr(list2[i], id) 73 | if v1 != v2: 74 | ret_val = True 75 | #print("{} VS. {}".format(v1, v2)) 76 | #print("{} VS. {}".format(e1.stop.stop_name, list2[i].stop_name)) 77 | break 78 | return ret_val 79 | -------------------------------------------------------------------------------- /gtfsdb/tests/large-sample-feed.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTransitTools/gtfsdb/38c20e1e83577a2d0efb7b20ad3e262d27c88168/gtfsdb/tests/large-sample-feed.zip -------------------------------------------------------------------------------- /gtfsdb/tests/multi-date-feed.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTransitTools/gtfsdb/38c20e1e83577a2d0efb7b20ad3e262d27c88168/gtfsdb/tests/multi-date-feed.zip -------------------------------------------------------------------------------- /gtfsdb/tests/sample-feed.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTransitTools/gtfsdb/38c20e1e83577a2d0efb7b20ad3e262d27c88168/gtfsdb/tests/sample-feed.zip -------------------------------------------------------------------------------- /gtfsdb/tests/test_current.py: -------------------------------------------------------------------------------- 1 | try: 2 | import unittest2 as unittest 3 | except ImportError: 4 | import unittest 5 | 6 | from .base import * 7 | from gtfsdb import * 8 | from gtfsdb import util 9 | 10 | 11 | import logging 12 | log = logging.getLogger(__name__) 13 | 14 | 15 | class TestCurrent(unittest.TestCase): 16 | db = None 17 | DO_PG = False 18 | PG_URL = "postgresql://ott@localhost:5432/ott" 19 | PG_SCHEMA = "current_test" 20 | 21 | def setUp(self): 22 | if TestCurrent.db is None: 23 | TestCurrent.db = load_pgsql(self.PG_URL, self.PG_SCHEMA) if self.DO_PG else load_sqlite('curr') 24 | self.db = TestCurrent.db 25 | 26 | def check_query_counts(self, clz1, clz2): 27 | n1 = self.db.session.query(clz1).all() 28 | n2 = self.db.session.query(clz2).all() 29 | return check_counts(n1, n2) 30 | 31 | def test_load(self): 32 | self.assertTrue(self.check_query_counts(Stop, CurrentStops)) 33 | self.assertTrue(self.check_query_counts(Route, CurrentRoutes)) 34 | self.assertTrue(self.check_query_counts(RouteStop, CurrentRouteStops)) 35 | """"" 36 | cr_list = self.db.session.query(CurrentRoutes).all() 37 | for cr in cr_list: 38 | self.assertTrue(cr.route is not None) 39 | """"" 40 | 41 | def test_routes(self): 42 | routes = CurrentRoutes.query_active_routes(self.db.session()) 43 | self.assertTrue(len(routes) > 0) 44 | 45 | def test_stops(self): 46 | stops = CurrentStops.query_stops(self.db.session(), limit=1) 47 | self.assertTrue(len(stops) == 1) 48 | 49 | def test_route_stops(self): 50 | #import pdb; pdb.set_trace() 51 | 52 | # DADAN has 2 routes active now 53 | routes = CurrentRouteStops.unique_routes_at_stop(self.db.session(), stop_id="DADAN") 54 | self.assertTrue(len(routes) == 2) 55 | self.assertTrue(routes[0].route_id in ('NEW', 'ALWAYS')) 56 | self.assertTrue(routes[1].route_id in ('NEW', 'ALWAYS')) 57 | 58 | # DADAN has 3 routes total ... RouteStop isn't filtering current stops, so will show older inactive route 59 | routes = RouteStop.unique_routes_at_stop(self.db.session(), stop_id="DADAN") 60 | self.assertTrue(len(routes) == 3) 61 | 62 | # OLD is not active, so CurrentStops should not have OLD as stop in the current route stop table 63 | routes = CurrentRouteStops.unique_routes_at_stop(self.db.session(), stop_id="OLD") 64 | self.assertTrue(len(routes) == 0) 65 | 66 | # although OLD is not active, RouteStop should show this route stop, since it's not filtering for is_active 67 | routes = RouteStop.unique_routes_at_stop(self.db.session(), stop_id="OLD") 68 | self.assertTrue(len(routes) == 1) 69 | self.assertTrue(routes[0].route_id == 'ALWAYS') 70 | 71 | def test_stops_point(self): 72 | if self.DO_PG: 73 | point = util.Point(lat=36.915, lon=-116.762, srid="4326") 74 | curr_stops = CurrentStops.query_stops_via_point(self.db.session(), point) 75 | stops = Stop.query_stops_via_point(self.db.session(), point) 76 | self.assertTrue(check_counts(curr_stops, stops)) 77 | 78 | def test_stops_bbox(self): 79 | if self.DO_PG: 80 | bbox = util.BBox(min_lat=36.0, max_lat=37.0, min_lon=-117.5, max_lon=-116.0, srid="4326") 81 | curr_stops = CurrentStops.query_stops_via_bbox(self.db.session, bbox) 82 | stops = Stop.query_stops_via_bbox(self.db.session, bbox) 83 | self.assertTrue(check_counts(curr_stops, stops)) 84 | -------------------------------------------------------------------------------- /gtfsdb/tests/test_dates.py: -------------------------------------------------------------------------------- 1 | try: 2 | import unittest2 as unittest 3 | except ImportError: 4 | import unittest 5 | 6 | import sys 7 | import shutil 8 | import datetime 9 | 10 | from gtfsdb import * 11 | from .base import load_sqlite 12 | 13 | import logging 14 | log = logging.getLogger(__name__) 15 | 16 | 17 | class TestRouteStop(unittest.TestCase): 18 | model = RouteStop 19 | db = None 20 | 21 | def setUp(self): 22 | if TestRouteStop.db is None: 23 | TestRouteStop.db = load_sqlite() 24 | self.db = TestRouteStop.db 25 | 26 | def test_old_routes(self): 27 | date = datetime.date(2018, 12, 25) 28 | rs = RouteStop.query_active_stops(self.db.session, route_id="OLD", direction_id="1", date=date) 29 | self.assertTrue(len(rs) > 2) 30 | 31 | rs = RouteStop.query_active_stops(self.db.session, route_id="OLD", direction_id="0", date=date) 32 | self.assertTrue(len(rs) > 2) 33 | 34 | rs = RouteStop.query_active_stops(self.db.session, route_id="NEW", direction_id="1", date=date) 35 | self.assertTrue(len(rs) == 0) 36 | 37 | rs = RouteStop.query_active_stops(self.db.session, route_id="NEW", direction_id="0", date=date) 38 | self.assertTrue(len(rs) == 0) 39 | 40 | def test_via_stops(self): 41 | # date = datetime.date(2015, 6, 6) 42 | rs = RouteStop.query_by_stop(self.db.session, stop_id="OLD") 43 | self.assertTrue(len(rs) >= 2) 44 | 45 | routes = RouteStop.unique_routes_at_stop(self.db.session, stop_id="OLD") 46 | self.assertTrue(len(routes) == 1) 47 | 48 | def test_active_stop_list(self): 49 | rs = RouteStop.query_active_stops(self.db.session, route_id="ALWAYS", date=datetime.date(2015, 12, 25)) 50 | self.assertTrue(len(rs) == 0) 51 | 52 | rs = RouteStop.query_active_stops(self.db.session, route_id="ALWAYS", date=datetime.date(2018, 12, 25)) 53 | see_old_stop = False 54 | for r in rs: 55 | self.assertTrue(r.stop_id != "NEW") 56 | if r.stop_id == "OLD": 57 | see_old_stop = True 58 | self.assertTrue(see_old_stop) 59 | 60 | rs = RouteStop.query_active_stops(self.db.session, route_id="ALWAYS", date=datetime.date(2019, 1, 5)) 61 | see_new_stop = False 62 | for r in rs: 63 | self.assertTrue(r.stop_id != "OLD") 64 | if r.stop_id == "NEW": 65 | see_new_stop = True 66 | self.assertTrue(see_new_stop) 67 | 68 | def test_stop_dates(self): 69 | active = RouteStop.is_stop_active(self.db.session, stop_id="OLD", date=datetime.date(2018, 12, 25)) 70 | self.assertTrue(active) 71 | 72 | active = RouteStop.is_stop_active(self.db.session, stop_id="OLD", date=datetime.date(2019, 1, 3)) 73 | self.assertFalse(active) 74 | 75 | active = RouteStop.is_stop_active(self.db.session, stop_id="OLD", date=datetime.date(2019, 6, 6)) 76 | self.assertFalse(active) 77 | 78 | active = RouteStop.is_stop_active(self.db.session, stop_id="NEW", date=datetime.date(2018, 12, 25)) 79 | self.assertFalse(active) 80 | 81 | active = RouteStop.is_stop_active(self.db.session, stop_id="NEW", date=datetime.date(2019, 6, 6)) 82 | self.assertTrue(active) 83 | 84 | def test_new_routes(self): 85 | date = datetime.date(2019, 1, 1) 86 | rs = RouteStop.query_active_stops(self.db.session, route_id="NEW", direction_id="1", date=date) 87 | self.assertTrue(len(rs) > 2) 88 | 89 | rs = RouteStop.query_active_stops(self.db.session, route_id="NEW", direction_id="0", date=date) 90 | self.assertTrue(len(rs) > 2) 91 | 92 | rs = RouteStop.query_active_stops(self.db.session, route_id="OLD", direction_id="1", date=date) 93 | self.assertTrue(len(rs) == 0) 94 | 95 | rs = RouteStop.query_active_stops(self.db.session, route_id="OLD", direction_id="0", date=date) 96 | self.assertTrue(len(rs) == 0) 97 | 98 | def test_effective_dates(self): 99 | date = datetime.date(2019, 1, 1) 100 | rs = RouteStop.query_active_stops(self.db.session, route_id="NEW", direction_id="1", date=date) 101 | self.assertTrue(len(rs) > 2) 102 | 103 | def test_active_list(self): 104 | rs = RouteStop.query_active_stops(self.db.session, route_id="OLD", direction_id="1", date=datetime.date(2018, 12, 25)) 105 | self.assertTrue(len(rs) > 1) 106 | for s in rs: 107 | self.assertTrue("good, I see active stop id: {0}".format(s.stop_id)) 108 | 109 | 110 | def main(argv): 111 | shutil.copyfile(TestRouteStop.db_file, "gtfs.db") 112 | t = TestRouteStop() 113 | t.test_active_list() 114 | 115 | if __name__ == "__main__": 116 | main(sys.argv) 117 | -------------------------------------------------------------------------------- /gtfsdb/tests/test_geom_queries.py: -------------------------------------------------------------------------------- 1 | try: 2 | import unittest2 as unittest 3 | except ImportError: 4 | import unittest 5 | 6 | from .base import check_counts 7 | from gtfsdb import * 8 | 9 | import logging 10 | log = logging.getLogger(__name__) 11 | 12 | 13 | class TestGeomQueries(unittest.TestCase): 14 | """ 15 | load current tables: 16 | bin/gtfsdb-current-load -g -s trimet -d postgresql://ott@localhost:5432/ott x 17 | """ 18 | db = None 19 | DO_PG = False 20 | 21 | def setUp(self): 22 | from .test_current import TestCurrent 23 | if TestCurrent.DO_PG and TestGeomQueries.db is None: 24 | self.DO_PG = True 25 | self.db = Database(url=TestCurrent.PG_URL, schema=TestCurrent.PG_SCHEMA, is_geospatial=True, current_tables=True) 26 | 27 | def test_nearest(self): 28 | if self.DO_PG: 29 | point = util.Point(lat=45.53, lon=-122.6664, srid="4326") 30 | curr_stops = CurrentStops.query_stops_via_point(self.db.session(), point) 31 | stops = Stop.query_stops_via_point(self.db.session(), point) 32 | self.assertTrue(check_counts(curr_stops, stops)) 33 | 34 | def test_bbox(self): 35 | if self.DO_PG and False: 36 | #import pdb; pdb.set_trace() 37 | bbox = util.BBox(min_lat=45.530, max_lat=45.535, min_lon=-122.665, max_lon=-122.667, srid="4326") 38 | curr_stops = CurrentStops.query_stops_via_bbox(self.db.session, bbox) 39 | stops = Stop.query_stops_via_bbox(self.db.session, bbox) 40 | self.assertTrue(check_counts(curr_stops, stops)) 41 | -------------------------------------------------------------------------------- /gtfsdb/tests/test_load.py: -------------------------------------------------------------------------------- 1 | try: 2 | import unittest2 as unittest 3 | except ImportError: 4 | import unittest 5 | 6 | from gtfsdb import * 7 | from .base import load_sqlite 8 | 9 | 10 | class TestLoad(unittest.TestCase): 11 | db = None 12 | 13 | def setUp(self): 14 | if TestLoad.db is None: 15 | TestLoad.db = load_sqlite(gtfs_name='sample-feed.zip') 16 | self.db = TestLoad.db 17 | 18 | def test_database_load(self): 19 | self.assertTrue(len(self.db.session.query(Stop).all()) > 0) 20 | self.assertTrue(len(self.db.session.query(Route).all()) > 0) 21 | -------------------------------------------------------------------------------- /gtfsdb/tests/test_model.py: -------------------------------------------------------------------------------- 1 | from gtfsdb import * 2 | from .base import load_sqlite 3 | 4 | try: 5 | import unittest2 as unittest 6 | except ImportError: 7 | import unittest 8 | 9 | import datetime 10 | import logging 11 | log = logging.getLogger(__name__) 12 | 13 | 14 | class BasicModelTests(object): 15 | db = load_sqlite(gtfs_name='large-sample-feed.zip') 16 | 17 | def get_first(self): 18 | try: 19 | self._first 20 | except AttributeError: 21 | if hasattr(self, 'model'): 22 | self._first = self.db.session.query(self.model).first() 23 | return self._first 24 | 25 | def test_entity(self): 26 | if hasattr(self, 'model'): 27 | for r in self.db.session.query(self.model).limit(5): 28 | self.assertTrue(isinstance(r, self.model)) 29 | 30 | 31 | class TestRoute(unittest.TestCase, BasicModelTests): 32 | model = Route 33 | 34 | def test_dates(self): 35 | m = self.get_first() 36 | self.assertTrue(isinstance(m.start_date, datetime.date)) 37 | self.assertTrue(isinstance(m.end_date, datetime.date)) 38 | 39 | def test_active_date(self): 40 | # import pdb; pdb.set_trace() 41 | routes = Route.query_active_routes(self.db.session, datetime.date(2014, 6, 6)) 42 | self.assertTrue(len(routes) > 1) 43 | for r in routes: 44 | self.assertTrue("good, I see active route id: {0}".format(r.route_id)) 45 | 46 | def test_active_today(self): 47 | routes = Route.query_active_routes(self.db.session) 48 | for r in routes: 49 | self.assertFalse("we should not have any routes, but I see route id: {0}".format(r.route_id)) 50 | 51 | 52 | class TestRouteStop(unittest.TestCase, BasicModelTests): 53 | model = RouteStop 54 | 55 | def test_active_list(self): 56 | stops = RouteStop.query_active_stops(self.db.session, route_id="194", direction_id="1", date=datetime.date(2014, 6, 6)) 57 | self.assertTrue(len(stops) > 1) 58 | for s in stops: 59 | self.assertTrue("good, I see active stop id: {0}".format(s.stop_id)) 60 | 61 | def test_by_stop(self): 62 | stops = RouteStop.query_by_stop(self.db.session, stop_id="12883") 63 | self.assertTrue(len(stops) >= 1) 64 | 65 | def test_routes_serving_stop(self): 66 | routes = RouteStop.query_by_stop(self.db.session, stop_id="10767") 67 | self.assertTrue(len(routes) == 2) 68 | 69 | def test_route_stops(self): 70 | date = datetime.date(2014, 6, 6) 71 | stops = RouteStop.query_active_stops(self.db.session, route_id="193", direction_id="1", date=date) 72 | self.assertTrue(len(stops) > 5) 73 | 74 | stops = RouteStop.query_active_stops(self.db.session, route_id="193", direction_id="0", date=date) 75 | self.assertTrue(len(stops) > 5) 76 | 77 | stops = RouteStop.query_active_stops(self.db.session, route_id="194", direction_id="1", date=date) 78 | self.assertTrue(len(stops) > 5) 79 | 80 | stops = RouteStop.query_active_stops(self.db.session, route_id="194", direction_id="0", date=date) 81 | self.assertTrue(len(stops) > 5) 82 | 83 | def test_active_date(self): 84 | m = self.get_first() 85 | self.assertTrue(m.is_active(datetime.date(2014, 6, 6))) 86 | 87 | def test_active_today(self): 88 | m = self.get_first() 89 | self.assertFalse(m.is_active()) 90 | 91 | 92 | class TestRouteDirection(unittest.TestCase, BasicModelTests): 93 | model = RouteDirection 94 | 95 | 96 | class TestTrip(unittest.TestCase, BasicModelTests): 97 | model = Trip 98 | 99 | def test_end_stop(self): 100 | m = self.get_first() 101 | self.assertTrue(isinstance(m.end_stop, Stop)) 102 | 103 | def test_start_stop(self): 104 | m = self.get_first() 105 | self.assertTrue(isinstance(m.start_stop, Stop)) 106 | 107 | def test_stop_times(self): 108 | m = self.get_first() 109 | for stop_time in m.stop_times: 110 | self.assertTrue(isinstance(stop_time, StopTime)) 111 | 112 | def test_times(self): 113 | m = self.get_first() 114 | self.assertTrue(m.start_time) 115 | self.assertTrue(m.end_time) 116 | 117 | 118 | class TestStop(unittest.TestCase, BasicModelTests): 119 | model = Stop 120 | 121 | def test_headsigns(self): 122 | m = self.get_first() 123 | self.assertTrue(isinstance(m.headsigns, dict)) 124 | 125 | def test_routes(self): 126 | m = self.get_first() 127 | for r in m.routes: 128 | self.assertTrue(isinstance(r, Route)) 129 | 130 | 131 | class TestStopTimes(unittest.TestCase, BasicModelTests): 132 | model = StopTime 133 | 134 | def test_shape_pt_dist(self): 135 | """ the large-sample-feed.zip lacks the optional 'shape_dist_traveled' attribute, so provide it post-process """ 136 | num = -0.1 137 | for s in self.db.session.query(StopTime).filter(StopTime.trip_id == '4383758').all(): 138 | self.assertTrue(s.shape_dist_traveled > num) 139 | num += 1.0 140 | 141 | 142 | class TestAgency(unittest.TestCase, BasicModelTests): 143 | model = Agency 144 | 145 | 146 | class TestCalendar(unittest.TestCase, BasicModelTests): 147 | model = Calendar 148 | 149 | 150 | class TestCalendarDate(unittest.TestCase, BasicModelTests): 151 | model = CalendarDate 152 | 153 | 154 | class TestFareAttribute(unittest.TestCase, BasicModelTests): 155 | model = FareAttribute 156 | 157 | 158 | class TestFareRule(unittest.TestCase, BasicModelTests): 159 | model = FareRule 160 | 161 | 162 | class TestShape(unittest.TestCase, BasicModelTests): 163 | model = Shape 164 | 165 | def test_shape_pt_dist(self): 166 | """ the large-sample-feed.zip lacks the optional 'shape_dist_traveled' attribute, so provide it post-process """ 167 | num = -0.1 168 | for s in self.db.session.query(self.model).limit(5): 169 | self.assertTrue(s.shape_dist_traveled > num) 170 | num += 1.0 171 | 172 | 173 | class TestTransfer(unittest.TestCase, BasicModelTests): 174 | model = Transfer 175 | 176 | -------------------------------------------------------------------------------- /gtfsdb/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import math 4 | import datetime 5 | import tempfile 6 | 7 | from gtfsdb import config 8 | 9 | import logging 10 | log = logging.getLogger(__name__) 11 | 12 | 13 | # python2 & python3 compat - 'long' is a py2 thing, and undefined in py3 .. so long=int 14 | try: 15 | long = long 16 | except: 17 | long = int 18 | 19 | 20 | def get_all_subclasses(cls): 21 | """ 22 | :see https://stackoverflow.com/questions/3862310/how-to-find-all-the-subclasses-of-a-class-given-its-name 23 | """ 24 | ret_val = set(cls.__subclasses__()).union( 25 | [s for c in cls.__subclasses__() for s in get_all_subclasses(c)] 26 | ) 27 | return ret_val 28 | 29 | 30 | def make_temp_sqlite_db_uri(name=None): 31 | """ 32 | will return a FILE URI to a temp file, ala /tmp/bLaHh111 for the path of a new sqlite file db 33 | NOTE: name is optional ... if provided, the file will be named as such (good for testing and refreshing sqlite db) 34 | """ 35 | if name: 36 | db_file = os.path.join(tempfile.gettempdir(), name) 37 | else: 38 | db_file = tempfile.mkstemp()[1] 39 | url = 'sqlite:///{0}'.format(db_file) 40 | log.debug("DATABASE TMP FILE: {0}".format(db_file)) 41 | return url 42 | 43 | 44 | def safe_get(obj, key, def_val=None): 45 | """ 46 | try to return the key'd value from either a class or a dict 47 | (or return the raw value if we were handed a native type) 48 | """ 49 | ret_val = def_val 50 | try: 51 | ret_val = getattr(obj, key) 52 | except: 53 | try: 54 | ret_val = obj[key] 55 | except: 56 | if isinstance(obj, (int, long, str)): 57 | ret_val = obj 58 | return ret_val 59 | 60 | 61 | def safe_get_any(obj, keys, def_val=None): 62 | """ 63 | :return object element value matching the first key to have an associated value 64 | """ 65 | ret_val = def_val 66 | for k in keys: 67 | v = safe_get(obj, k) 68 | if v and len(v) > 0: 69 | ret_val = v 70 | break 71 | return ret_val 72 | 73 | 74 | def check_date(in_date, fmt_list=['%Y-%m-%d', '%m/%d/%Y', '%m-%d-%Y'], def_val=None): 75 | """ 76 | utility function to parse a request object for something that looks like a date object... 77 | """ 78 | if def_val is None: 79 | def_val = datetime.date.today() 80 | 81 | if in_date is None: 82 | ret_val = def_val 83 | elif isinstance(in_date, datetime.date) or isinstance(in_date, datetime.datetime): 84 | ret_val = in_date 85 | else: 86 | ret_val = def_val 87 | for fmt in fmt_list: 88 | try: 89 | d = datetime.datetime.strptime(in_date, fmt).date() 90 | if d is not None: 91 | ret_val = d 92 | break 93 | except Exception as e: 94 | log.debug(e) 95 | return ret_val 96 | 97 | 98 | def fix_time_string(ts): 99 | """ check that string time is HH:MM:SS (append zero if just H:MM:SS) """ 100 | ret_val = ts 101 | if ts and type(ts) == str and ts[1] == ":": 102 | ret_val = "0{0}".format(ts) 103 | return ret_val 104 | 105 | 106 | class UTF8Recoder(object): 107 | """Iterator that reads an encoded stream and encodes the input to UTF-8""" 108 | def __init__(self, f, encoding): 109 | import codecs 110 | self.reader = codecs.getreader(encoding)(f) 111 | 112 | def __iter__(self): 113 | return self 114 | 115 | def next(self): 116 | if sys.version_info >= (3, 0): 117 | return next(self.reader) 118 | else: 119 | return self.reader.next().encode('utf-8') 120 | 121 | def __next__(self): 122 | return self.next() 123 | 124 | 125 | class Point(object): 126 | is_valid = False 127 | 128 | def __init__(self, **kwargs): 129 | self.srid = kwargs.get('srid', None) 130 | try: 131 | self.lat = float(kwargs.get('lat')) 132 | self.lon = float(kwargs.get('lon')) 133 | self.is_valid = True 134 | except: 135 | self.lat = self.lon = None 136 | 137 | def get_point(self): 138 | return self.lon, self.lat 139 | 140 | def to_geojson(self): 141 | point = self.make_geo(self.lon, self.lat, self.srid) 142 | return point 143 | 144 | @classmethod 145 | def make_geo(cls, lon, lat, srid=None): 146 | geo = 'POINT({0} {1})'.format(lon, lat) 147 | if geo: 148 | geo = 'SRID={0};{1}'.format(srid, geo) 149 | return geo 150 | 151 | 152 | class BBox(object): 153 | is_valid = False 154 | 155 | def __init__(self, **kwargs): 156 | self.srid = kwargs.get('srid', None) 157 | try: 158 | self.min_lat = float(kwargs.get('min_lat')) 159 | self.min_lon = float(kwargs.get('min_lon')) 160 | self.max_lat = float(kwargs.get('max_lat')) 161 | self.max_lon = float(kwargs.get('max_lon')) 162 | self.is_valid = True 163 | except: 164 | self.min_lat = self.min_lon = self.max_lat = self.max_lon = None 165 | 166 | def get_bbox(self): 167 | return self.min_lon, self.min_lat, self.max_lon, self.max_lat 168 | 169 | def to_geojson(self): 170 | poly = self.make_geo(self.min_lon, self.max_lon, self.min_lat, self.max_lat, self.srid) 171 | return poly 172 | 173 | @classmethod 174 | def make_geo(cls, left_lon, right_lon, bot_lat, top_lat, srid=None): 175 | """ 176 | see: https://gis.stackexchange.com/questions/25797/select-bounding-box-using-postgis 177 | note: 5-pt POLY top-left, top-right, bot-right, bot-left, ulx uly 178 | llon/tlat, rlon/tlat, rlon/blat, min-lon/max-lat, min-lon/max-lat 179 | """ 180 | geo = 'POLYGON(({0} {3}, {1} {3}, {1} {2}, {0} {2}, {0} {3}))'.format(left_lon, right_lon, bot_lat, top_lat) 181 | if geo: 182 | geo = 'SRID={0};{1}'.format(srid, geo) 183 | return geo 184 | 185 | 186 | def distance_km(lat1, lon1, lat2, lon2): 187 | """ 188 | return distance between two points in km using haversine 189 | http://en.wikipedia.org/wiki/Haversine_formula 190 | http://www.platoscave.net/blog/2009/oct/5/calculate-distance-latitude-longitude-python/ 191 | Author: Wayne Dyck 192 | """ 193 | ret_val = 0 194 | radius = 6371 # km 195 | lat1 = float(lat1) 196 | lon1 = float(lon1) 197 | lat2 = float(lat2) 198 | lon2 = float(lon2) 199 | 200 | dlat = math.radians(lat2-lat1) 201 | dlon = math.radians(lon2-lon1) 202 | 203 | a = math.sin(dlat/2) * math.sin(dlat/2) + math.cos(math.radians(lat1)) \ 204 | * math.cos(math.radians(lat2)) * math.sin(dlon/2) * math.sin(dlon/2) 205 | c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a)) 206 | ret_val = radius * c 207 | 208 | return ret_val 209 | 210 | 211 | def distance_mi(lat1, lon1, lat2, lon2): 212 | """ 213 | return distance between two points in miles 214 | """ 215 | km = distance_km(lat1, lon1, lat2, lon2) 216 | return km * 0.621371192 217 | 218 | 219 | def distance_ft(lat1, lon1, lat2, lon2): 220 | """ 221 | return distance between two points in feet 222 | """ 223 | mi = distance_mi(lat1, lon1, lat2, lon2) 224 | return mi * 5280 225 | 226 | 227 | def make_coord_from_point(lon, lat): 228 | return '{0} {1}'.format(lon, lat) 229 | 230 | 231 | def make_linestring_from_point_array(coords, srid=config.SRID): 232 | return 'SRID={0};LINESTRING({1})'.format(srid, ','.join(coords)) 233 | 234 | 235 | def make_linestring_from_two_points(lon1, lat1, lon2, lat2, srid=config.SRID): 236 | coords = [] 237 | coords.append(make_coord_from_point(lon1, lat1)) 238 | coords.append(make_coord_from_point(lon2, lat2)) 239 | ls = make_linestring_from_point_array(coords, srid) 240 | return ls 241 | 242 | 243 | def make_linestring_from_two_stops(stop1, stop2, srid=config.SRID): 244 | ls = make_linestring_from_two_points(stop1.stop_lon, stop1.stop_lat, stop2.stop_lon, stop2.stop_lat, srid) 245 | return ls 246 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import sys 3 | 4 | 5 | oracle_extras = ['cx_oracle>=5.1'] 6 | postgresql_extras = ['psycopg2-binary'] 7 | # dev_extras = oracle_extras + postgresql_extras 8 | dev_extras = [] 9 | 10 | extras_require = dict( 11 | dev=dev_extras, 12 | oracle=oracle_extras, 13 | postgresql=postgresql_extras, 14 | ) 15 | 16 | install_requires = [ 17 | 'geoalchemy2', 18 | 'sqlalchemy', 19 | ] 20 | 21 | setup( 22 | name='gtfsdb', 23 | version='0.6.0', 24 | description='GTFS Database', 25 | long_description=open('README.rst').read(), 26 | keywords='GTFS', 27 | author="Open Transit Tools", 28 | author_email="info@opentransittools.org", 29 | packages=find_packages(), 30 | include_package_data=True, 31 | zip_safe=False, 32 | install_requires=install_requires, 33 | extras_require=extras_require, 34 | entry_points={ 35 | 'console_scripts': [ 36 | 'gtfsdb-load = gtfsdb.scripts:gtfsdb_load', 37 | 'gtfsdb-current-load = gtfsdb.scripts:current_tables_cmdline', 38 | 'rs-test = gtfsdb.scripts:route_stop_load', 39 | 'connect-tester = gtfsdb.scripts:db_connect_tester', 40 | ] 41 | }, 42 | classifiers=( 43 | 'Development Status :: 5 - Production/Stable', 44 | 'Environment :: Console', 45 | 'Intended Audience :: Developers', 46 | 'License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)', 47 | 'Natural Language :: English', 48 | 'Programming Language :: Python :: 2.7', 49 | 'Programming Language :: Python :: 3.7', 50 | ), 51 | ) 52 | -------------------------------------------------------------------------------- /versions.cfg: -------------------------------------------------------------------------------- 1 | [versions] 2 | 3 | 4 | 5 | # Added by buildout at 2021-01-20 16:43:53.979741 6 | GeoAlchemy2 = 0.8.4 7 | SQLAlchemy = 1.3.22 8 | zc.recipe.egg = 2.0.7 9 | zc.recipe.testrunner = 2.2 10 | 11 | # Required by: 12 | # zope.testrunner==5.2 13 | six = 1.15.0 14 | 15 | # Required by: 16 | # zope.testrunner==5.2 17 | zope.exceptions = 4.4 18 | 19 | # Required by: 20 | # zope.testrunner==5.2 21 | zope.interface = 5.2.0 22 | 23 | # Required by: 24 | # zc.recipe.testrunner==2.2 25 | zope.testrunner = 5.2 26 | --------------------------------------------------------------------------------