├── .npmignore ├── test ├── sample-feed-test │ ├── shapes.txt │ ├── fare_rules.txt │ ├── agency.txt │ ├── fare_attributes.txt │ ├── calendar_dates.txt │ ├── calendar.txt │ ├── frequencies.txt │ ├── routes.txt │ ├── trips.txt │ ├── stops.txt │ └── stop_times.txt ├── dataConversion.test.js ├── resultStream.test.js ├── splittingAndJoiningTrips.test.js └── URIStrategy.test.js ├── .gitignore ├── lib ├── gtfs2lc.js ├── stores │ ├── Store.js │ └── StoreManager.js ├── Connections2CSV.js ├── Connections2Mongo.js ├── Connections2JSONLD.js ├── services │ └── CalendarExpander.js ├── stoptimes │ ├── StopTimes2Cxs.js │ └── st2c.js ├── ConnectionsBuilder.js ├── Connections2Triples.js ├── URIStrategy.js └── gtfs2connections.js ├── baseUris-example.json ├── LICENSE ├── bin ├── linkedconnections-sort.sh ├── gtfs2lc.js ├── gtfs2lc-clean.sh └── linkedconnections-sortandjoin.js ├── .github └── workflows │ └── build-test.yml ├── package.json └── README.md /.npmignore: -------------------------------------------------------------------------------- 1 | sample_feed 2 | node_modules 3 | .services 4 | .trips 5 | *.jsonstream 6 | test 7 | -------------------------------------------------------------------------------- /test/sample-feed-test/shapes.txt: -------------------------------------------------------------------------------- 1 | shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence,shape_dist_traveled -------------------------------------------------------------------------------- /test/sample-feed-test/fare_rules.txt: -------------------------------------------------------------------------------- 1 | fare_id,route_id,origin_id,destination_id,contains_id 2 | p,AB,,, 3 | p,STBA,,, 4 | p,BFC,,, 5 | a,AAMV,,, -------------------------------------------------------------------------------- /test/sample-feed-test/agency.txt: -------------------------------------------------------------------------------- 1 | agency_id,agency_name,agency_url,agency_timezone 2 | DTA,Demo Transit Authority,http://google.com,America/Los_Angeles -------------------------------------------------------------------------------- /test/sample-feed-test/fare_attributes.txt: -------------------------------------------------------------------------------- 1 | fare_id,price,currency_type,payment_method,transfers,transfer_duration 2 | p,1.25,USD,0,0, 3 | a,5.25,USD,0,0, -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | logs 2 | *.log 3 | node_modules 4 | .services 5 | .trips 6 | /.project 7 | /.idea/* 8 | test/sample-feed/ 9 | .coveralls.yml 10 | coverage 11 | -------------------------------------------------------------------------------- /test/sample-feed-test/calendar_dates.txt: -------------------------------------------------------------------------------- 1 | service_id,date,exception_type 2 | FULLW,20070604,2 3 | FULLW,20070605,1 4 | FULLW,20070606,1 5 | FULLW,20070607,1 6 | -------------------------------------------------------------------------------- /test/sample-feed-test/calendar.txt: -------------------------------------------------------------------------------- 1 | service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date 2 | FULLW,1,1,1,1,1,1,1,20070101,20070630 3 | WE,0,0,0,0,0,1,1,20070101,20070630 4 | W,1,1,1,1,1,0,0,20070101,20070630 5 | -------------------------------------------------------------------------------- /lib/gtfs2lc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | Connections : require('./gtfs2connections.js'), 3 | Connections2Triples : require('./Connections2Triples.js'), 4 | Connections2JSONLD: require('./Connections2JSONLD.js'), 5 | Connections2Mongo: require('./Connections2Mongo.js'), 6 | }; 7 | -------------------------------------------------------------------------------- /lib/stores/Store.js: -------------------------------------------------------------------------------- 1 | const { Level } = require('level'); 2 | 3 | module.exports = function ({ fileName, encoding }, type) { 4 | let store; 5 | if (type === 'MemStore') { 6 | store = new Map(); 7 | } else { 8 | store = new Level(fileName, { valueEncoding: encoding }); 9 | } 10 | return store; 11 | }; 12 | -------------------------------------------------------------------------------- /test/sample-feed-test/frequencies.txt: -------------------------------------------------------------------------------- 1 | trip_id,start_time,end_time,headway_secs 2 | STBA,6:00:00,22:00:00,1800 3 | CITY1,6:00:00,7:59:59,1800 4 | CITY2,6:00:00,7:59:59,1800 5 | CITY1,8:00:00,9:59:59,600 6 | CITY2,8:00:00,9:59:59,600 7 | CITY1,10:00:00,15:59:59,1800 8 | CITY2,10:00:00,15:59:59,1800 9 | CITY1,16:00:00,18:59:59,600 10 | CITY2,16:00:00,18:59:59,600 11 | CITY1,19:00:00,22:00:00,1800 12 | CITY2,19:00:00,22:00:00,1800 -------------------------------------------------------------------------------- /baseUris-example.json: -------------------------------------------------------------------------------- 1 | { 2 | "stop": "http://example.org/stations/{stops.stop_id}", 3 | "route": "http://example.org/routes/{routes.route_id}", 4 | "trip": "http://example.org/trips/{trips.trip_id}", 5 | "connection": "http://example.org/connections/{routes.route_id}/{trips.trip_id}/{connection.departureStop}/{tripStartTime}/", 6 | "resolve": { 7 | "tripStartTime": "format(trips.startTime, 'yyyyMMdd\\'T\\'HHmm');" 8 | } 9 | } -------------------------------------------------------------------------------- /test/sample-feed-test/routes.txt: -------------------------------------------------------------------------------- 1 | route_id,agency_id,route_short_name,route_long_name,route_desc,route_type,route_url,route_color,route_text_color 2 | AB,DTA,10,Airport - Bullfrog,,3,,, 3 | BFC,DTA,20,Bullfrog - Furnace Creek Resort,,3,,, 4 | STBA,DTA,30,Stagecoach - Airport Shuttle,,3,,, 5 | CITY,DTA,40,City,,3,,, 6 | AAMV,DTA,50,Airport - Amargosa Valley,,3,,, 7 | joining_route,DTA,60,Joining Route,,3,,, 8 | splitting_route,DTA,60,Splitting Route,,3,,, 9 | -------------------------------------------------------------------------------- /test/sample-feed-test/trips.txt: -------------------------------------------------------------------------------- 1 | route_id,service_id,trip_id,trip_headsign,direction_id,block_id,shape_id 2 | AAMV,WE,AAMV1,to Amargosa Valley,0,, 3 | AAMV,WE,AAMV2,to Airport,1,, 4 | AAMV,WE,AAMV3,to Amargosa Valley,0,, 5 | AAMV,WE,AAMV4,to Airport,1,, 6 | AB,FULLW,AB1,to Bullfrog,0,1, 7 | AB,FULLW,AB2,to Airport,1,2, 8 | BFC,FULLW,BFC1,to Furnace Creek Resort,0,1, 9 | BFC,FULLW,BFC2,to Bullfrog,1,2, 10 | CITY,FULLW,CITY1,,0,, 11 | CITY,FULLW,CITY2,,1,, 12 | joining_route,WE,joining_trip_1,Joining Trip 1,, 13 | joining_route,WE,joining_trip_2,Joining Trip 2,, 14 | splitting_route,W,non_joining_splitting_trip_3,Non Joining Yet Splitting Trip 3,, 15 | splitting_route,W,non_joining_splitting_trip_4,Non Joining Yet Splitting Trip 4,, 16 | STBA,FULLW,STBA,Shuttle,,, 17 | -------------------------------------------------------------------------------- /test/sample-feed-test/stops.txt: -------------------------------------------------------------------------------- 1 | stop_id,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url 2 | FUR_CREEK_RES,Furnace Creek Resort (Demo),,36.425288,-117.133162,, 3 | BEATTY_AIRPORT,Nye County Airport (Demo),,36.868446,-116.784582,, 4 | BULLFROG,Bullfrog (Demo),,36.88108,-116.81797,, 5 | STAGECOACH,Stagecoach Hotel & Casino (Demo),,36.915682,-116.751677,, 6 | NADAV,North Ave / D Ave N (Demo),,36.914893,-116.76821,, 7 | NANAA,North Ave / N A Ave (Demo),,36.914944,-116.761472,, 8 | DADAN,Doing Ave / D Ave N (Demo),,36.909489,-116.768242,, 9 | EMSI,E Main St / S Irving St (Demo),,36.905697,-116.76218,, 10 | AMV,Amargosa Valley (Demo),,36.641496,-116.40094,, 11 | A,Amargosa Valley (Demo),,36.641496,-116.40094,, 12 | B,Amargosa Valley (Demo),,36.641496,-116.40094,, 13 | C,Amargosa Valley (Demo),,36.641496,-116.40094,, 14 | D,Amargosa Valley (Demo),,36.641496,-116.40094,, 15 | E,Amargosa Valley (Demo),,36.641496,-116.40094,, 16 | X,Amargosa Valley (Demo),,36.641496,-116.40094,, 17 | Y,Amargosa Valley (Demo),,36.641496,-116.40094,, 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Linked Connections contributors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /lib/Connections2CSV.js: -------------------------------------------------------------------------------- 1 | const { Transform } = require('stream'); 2 | 3 | class Connections2CSV extends Transform { 4 | constructor(header) { 5 | super({ objectMode: true }); 6 | this._headerStreamed = false; 7 | if(!header) { 8 | this.headerStreamed = true; 9 | } 10 | } 11 | 12 | _transform(connection, encoding, done) { 13 | if (!this.headerStreamed) { 14 | this.headerStreamed = true; 15 | done(null, '"departureStop","departureTime","arrivalStop","arrivalTime","trip","route","headsign"\n'); 16 | } else { 17 | let csv = connection["departureStop"] + ',' + connection["departureTime"].toISOString() + ',' 18 | + connection["arrivalStop"] + ',' + connection["arrivalTime"].toISOString() + ',' + connection["trip"]["trip_id"] + ',' 19 | + connection.route.route_id + ',"' + connection.headsign + '"' + '\n'; 20 | done(null, csv); 21 | } 22 | } 23 | 24 | get headerStreamed() { 25 | return this._headerStreamed; 26 | } 27 | 28 | set headerStreamed(value) { 29 | this._headerStreamed = value; 30 | } 31 | } 32 | 33 | module.exports = Connections2CSV; -------------------------------------------------------------------------------- /lib/Connections2Mongo.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Pieter Colpaert © Ghent University - iMinds 3 | * Transforms Connections to Mongo Extended JSON objects 4 | * https://docs.mongodb.org/manual/reference/mongodb-extended-json/ 5 | */ 6 | var Transform = require('stream').Transform, 7 | util = require('util'); 8 | 9 | var Connections2Mongo = function () { 10 | Transform.call(this, {objectMode : true}); 11 | }; 12 | 13 | util.inherits(Connections2Mongo, Transform); 14 | 15 | Connections2Mongo.prototype._transform = function (connection, encoding, done) { 16 | //Transform to iso8601 and extended JSON of mongo 17 | if (connection['@context']) { 18 | //if there's a context involved, just send it through 19 | done(null, connection['@context']); 20 | } else { 21 | if (typeof connection['departureTime'] !== 'string') { 22 | connection['departureTime'] = connection['departureTime'].toISOString(); 23 | connection['arrivalTime'] = connection['arrivalTime'].toISOString(); 24 | } 25 | connection['departureTime'] = {'$date' : connection['departureTime'] }; 26 | connection['arrivalTime'] = {'$date' : connection['arrivalTime'] }; 27 | // If @id is set, change it to _id: JSON-LD specific 28 | if (connection['@id']) { 29 | connection['_id'] = connection['@id']; 30 | delete connection['@id']; 31 | } 32 | done(null, connection); 33 | } 34 | }; 35 | 36 | module.exports = Connections2Mongo; 37 | -------------------------------------------------------------------------------- /bin/linkedconnections-sort.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## 4 | ## This file sorts and joins trips that should be one correctly from a newline delimited jsonld file of connections (the output of `gtfs2lc -f jsonld`) 5 | ## 6 | 7 | CURDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" 8 | 9 | ## should be 1 argument: the connections.nldjsonld file, and this file should exist 10 | [[ $# == 1 ]] && [[ -f $1 ]] && { 11 | ## Skip first line that corresponds to JSON-LD @context 12 | ## Order it by departureTime, as well as 13 | DEPARTURETIME=$(( `sed 1,1d $1 | head -n1 | tr "," "\n" | grep -n "departureTime"| cut -d: -f1` )); 14 | DEPARTURESTOP=$(( `sed 1,1d $1 | head -n1 | tr "," "\n" | grep -n "departureStop"| cut -d: -f1` )); 15 | ARRIVALTIME=$(( `sed 1,1d $1 | head -n1 | tr "," "\n" | grep -n "arrivalTime"| cut -d: -f1` )); 16 | ARRIVALSTOP=$(( `sed 1,1d $1 | head -n1 | tr "," "\n" | grep -n "arrivalStop"| cut -d: -f1` )); 17 | ROUTE=$(( `sed 1,1d $1 | head -n1 | tr "," "\n" | grep -n "gtfs:route"| cut -d: -f1` )); 18 | 19 | ## And after the sorting, we need to pipe it to a process that is able to join trains. Ordered in descending order, but afterwards again sorted in ascending order 20 | sort $1 -t , -k ${DEPARTURETIME}dr,${DEPARTURETIME} -k ${ARRIVALTIME}dr,${ARRIVALTIME} -k ${ROUTE}dr,${ROUTE} -k ${DEPARTURESTOP}dr,${DEPARTURESTOP} -k ${ARRIVALSTOP}dr,${ARRIVALSTOP} | $CURDIR/linkedconnections-sortandjoin.js | sort -t , -k ${DEPARTURETIME}d,${DEPARTURETIME}; 21 | } || { 22 | echo "Please provide the location of your output of 'gtfs2lc -f jsonld'" 23 | } 24 | 25 | -------------------------------------------------------------------------------- /test/dataConversion.test.js: -------------------------------------------------------------------------------- 1 | const c2csv = require('../lib/Connections2CSV'); 2 | const fs = require('fs'); 3 | const util = require('util'); 4 | const del = require('del'); 5 | const cp = require('child_process'); 6 | const { Readable } = require('stream'); 7 | 8 | const readFile = util.promisify(fs.readFile); 9 | const exec = util.promisify(cp.exec); 10 | 11 | beforeAll(async () => { 12 | await exec(`./bin/gtfs2lc.js -s --fresh test/sample-feed > test/sample-feed/formats.json`); 13 | }); 14 | 15 | afterAll(async () => { 16 | await del(['test/sample-feed/linkedConnections.json']); 17 | }); 18 | 19 | test('Convert connections to csv', async () => { 20 | let csvCxs = await stream2Array(new c2csv()); 21 | expect(csvCxs.length).toBeGreaterThan(0); 22 | expect(csvCxs[0].split(',').length).toBe(7); 23 | }); 24 | 25 | async function* connGenerator() { 26 | const conns = (await readFile('test/sample-feed/formats.json', 'utf8')).split('\n'); 27 | for (const c of conns) { 28 | if (c === '') continue; 29 | let jcx = JSON.parse(c); 30 | jcx['departureTime'] = new Date(jcx['departureTime']); 31 | jcx['arrivalTime'] = new Date(jcx['arrivalTime']); 32 | yield jcx; 33 | } 34 | } 35 | 36 | function stream2Array(stream) { 37 | return new Promise((resolve, reject) => { 38 | let array = []; 39 | Readable.from(connGenerator()).pipe(stream) 40 | .on('data', data => { 41 | array.push(data); 42 | }) 43 | .on('end', () => { 44 | resolve(array); 45 | }); 46 | }); 47 | } 48 | -------------------------------------------------------------------------------- /.github/workflows/build-test.yml: -------------------------------------------------------------------------------- 1 | # This workflow will do a clean installation of node dependencies, cache/restore them, build the source code and run tests across different versions of node 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-nodejs-with-github-actions 3 | 4 | name: Node.js CI 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | test: 14 | runs-on: ubuntu-latest 15 | strategy: 16 | matrix: 17 | node-version: [18.x, 20.x] 18 | # See supported Node.js release schedule at https://nodejs.org/en/about/releases/ 19 | steps: 20 | - name: Checkout 21 | uses: actions/checkout@v2 22 | 23 | - name: Use Node.js ${{ matrix.node-version }} 24 | uses: actions/setup-node@v2 25 | with: 26 | node-version: ${{ matrix.node-version }} 27 | cache: 'npm' 28 | - run: npm ci 29 | - run: npm run build --if-present 30 | - run: npm run test-ci 31 | 32 | - name: Submit coverage results 33 | uses: coverallsapp/github-action@master 34 | with: 35 | github-token: ${{ secrets.github_token }} 36 | flag-name: run-${{ matrix.node-version }} 37 | parallel: true 38 | 39 | coveralls: 40 | needs: test 41 | runs-on: ubuntu-latest 42 | steps: 43 | - name: Consolidate test coverage from different jobs 44 | uses: coverallsapp/github-action@master 45 | with: 46 | github-token: ${{ secrets.github_token }} 47 | parallel-finished: true 48 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "gtfs2lc", 3 | "version": "2.3.1", 4 | "description": "Mapping script from gtfs to (linked) connections", 5 | "main": "lib/gtfs2lc.js", 6 | "bin": { 7 | "gtfs2lc": "./bin/gtfs2lc.js", 8 | "gtfs2lc-clean": "./bin/gtfs2lc-clean.sh", 9 | "linkedconnections-joinandsort": "./bin/linkedconnections-sort.sh" 10 | }, 11 | "scripts": { 12 | "test": "rm -rf test/sample-feed/ ; cp -r test/sample-feed-test/ test/sample-feed/ ; ./bin/gtfs2lc.js -s -f jsonld test/sample-feed > test/sample-feed/connections-notjoined.nldjsonld ; ./bin/linkedconnections-sort.sh test/sample-feed/connections-notjoined.nldjsonld > test/sample-feed/connections.nldjsonld ; rm test/sample-feed/linkedConnections.json ; jest --runInBand", 13 | "test-ci": "rm -rf test/sample-feed/ ; cp -r test/sample-feed-test/ test/sample-feed/ ; ./bin/gtfs2lc.js -s -f jsonld test/sample-feed > test/sample-feed/connections-notjoined.nldjsonld ; ./bin/linkedconnections-sort.sh test/sample-feed/connections-notjoined.nldjsonld > test/sample-feed/connections.nldjsonld ; rm test/sample-feed/linkedConnections.json ; jest --ci --runInBand --coverage", 14 | "coveralls": "jest --coverage && coveralls < coverage/lcov.info" 15 | }, 16 | "repository": { 17 | "type": "git", 18 | "url": "git://github.com/LinkedConnections/gtfs2lc.git" 19 | }, 20 | "keywords": [ 21 | "GTFS", 22 | "Linked Connections", 23 | "RDF" 24 | ], 25 | "author": "Pieter Colpaert", 26 | "license": "MIT", 27 | "bugs": { 28 | "url": "https://github.com/LinkedConnections/gtfs2lc/issues" 29 | }, 30 | "dependencies": { 31 | "commander": "^4.1.1", 32 | "date-fns": "^2.30.0", 33 | "del": "^5.1.0", 34 | "fast-csv": "^4.3.6", 35 | "JSONStream": "^1.3.5", 36 | "level": "^8.0.0", 37 | "n3": "^1.17.0", 38 | "stream-json": "^1.8.0", 39 | "uri-templates": "^0.2.0" 40 | }, 41 | "devDependencies": { 42 | "coveralls": "^3.1.1", 43 | "jest": "^29.6.1" 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /test/sample-feed-test/stop_times.txt: -------------------------------------------------------------------------------- 1 | trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled 2 | AAMV1,8:00:00,8:00:00,BEATTY_AIRPORT,1,,,, 3 | AAMV1,9:00:00,9:00:00,AMV,2,,,, 4 | AAMV2,10:00:00,10:00:00,AMV,1,,,, 5 | AAMV2,11:00:00,11:00:00,BEATTY_AIRPORT,2,,,, 6 | AAMV3,13:00:00,13:00:00,BEATTY_AIRPORT,1,,,, 7 | AAMV3,14:00:00,14:00:00,AMV,2,,,, 8 | AAMV4,15:00:00,15:00:00,AMV,1,,,, 9 | AAMV4,16:00:00,16:00:00,BEATTY_AIRPORT,2,,,, 10 | AB1,8:00:00,8:00:00,BEATTY_AIRPORT,1,,,, 11 | AB1,8:10:00,8:15:00,BULLFROG,2,,,, 12 | AB2,12:05:00,12:05:00,BULLFROG,1,,,, 13 | AB2,12:15:00,12:15:00,BEATTY_AIRPORT,2,,,, 14 | BFC1,8:20:00,8:20:00,BULLFROG,1,,,, 15 | BFC1,9:20:00,9:20:00,FUR_CREEK_RES,2,,,, 16 | BFC2,11:00:00,11:00:00,FUR_CREEK_RES,1,,,, 17 | BFC2,12:00:00,12:00:00,BULLFROG,2,,,, 18 | CITY1,6:00:00,6:00:00,STAGECOACH,1,,,, 19 | CITY1,6:05:00,6:07:00,NANAA,2,,,, 20 | CITY1,6:12:00,6:14:00,NADAV,3,,,, 21 | CITY1,6:19:00,6:21:00,DADAN,4,,,, 22 | CITY1,6:26:00,6:28:00,EMSI,5,,,, 23 | CITY2,6:28:00,6:30:00,EMSI,1,,,, 24 | CITY2,6:35:00,6:37:00,DADAN,2,,,, 25 | CITY2,6:42:00,6:44:00,NADAV,3,,,, 26 | CITY2,6:49:00,6:51:00,NANAA,4,,,, 27 | CITY2,6:56:00,6:58:00,STAGECOACH,5,,,, 28 | joining_trip_1,12:00,12:00,A,1,E,0,0, 29 | joining_trip_1,12:05,12:05,B,2,E,0,0, 30 | joining_trip_1,12:10,12:10,C,3,E,0,0, 31 | joining_trip_1,12:15,12:15,D,4,E,0,0, 32 | joining_trip_1,12:20,12:20,E,5,E,0,0, 33 | joining_trip_2,12:00,12:00,X,1,E,0,0, 34 | joining_trip_2,12:05,12:05,Y,2,E,0,0, 35 | joining_trip_2,12:10,12:10,C,3,E,1,0, 36 | non_joining_splitting_trip_3,12:05,12:05,Y,1,E,0,0, 37 | non_joining_splitting_trip_3,12:10,12:10,C,2,E,1,0, 38 | non_joining_splitting_trip_3,12:15,12:15,D,3,E,1,0, 39 | non_joining_splitting_trip_4,12:05,12:05,Y,1,E,0,1, 40 | non_joining_splitting_trip_4,12:10,12:10,C,2,E,1,1, 41 | non_joining_splitting_trip_4,12:20,12:20,BULLFROG,3,F,1,1, 42 | joining_trip_2,12:15,12:15,D,4,E,1,0, 43 | joining_trip_2,12:20,12:20,E,5,E,1,0, 44 | STBA,6:00:00,6:00:00,STAGECOACH,1,,1,1, 45 | STBA,6:20:00,6:20:00,BEATTY_AIRPORT,2,,1,1, 46 | -------------------------------------------------------------------------------- /test/resultStream.test.js: -------------------------------------------------------------------------------- 1 | const assert = require('assert'); 2 | const cp = require('child_process'); 3 | const fs = require('fs'); 4 | const util = require('util'); 5 | const del = require('del'); 6 | 7 | const readFile = util.promisify(fs.readFile); 8 | const exec = util.promisify(cp.exec); 9 | 10 | jest.setTimeout(60000); 11 | 12 | afterEach(async () => { 13 | await del(['test/sample-feed/linkedConnections*']); 14 | }); 15 | 16 | describe('Testing whether result contains certain objects (regression tests)', () => { 17 | 18 | const lcstreamToArray = async (options, file) => { 19 | await exec(`./bin/gtfs2lc.js -s -f ${options['format'] || 'json'} -S ${options['store'] || 'MemStore'} --fresh ./test/sample-feed > ./test/sample-feed/${file}`); 20 | const data = await readFile(`./test/sample-feed/${file}`, 'utf8'); 21 | return data.split('\n'); 22 | }; 23 | 24 | //This will be the first element when sorted correctly 25 | it('Stream should contain a first connection with arrivalStop AMV', async () => { 26 | const connections = await lcstreamToArray({}, 'result.json'); 27 | expect(JSON.parse(connections[0])['arrivalStop']['stop_id']).toBe('AMV'); 28 | }); 29 | 30 | it('JSON-LD Stream should contain Connections and use LevelStore for data storage', async () => { 31 | const triples = await lcstreamToArray({ 32 | format: 'jsonld', 33 | store: 'LevelStore' 34 | }, 'result.jsonld'); 35 | expect(JSON.parse(triples[1])['@type']).toBe('Connection'); 36 | }); 37 | 38 | it('RDF Stream should contain Connections in turtle format', async () => { 39 | const triples = await lcstreamToArray({ 40 | format: 'turtle', 41 | store: 'MemStore' 42 | }, 'turtle.ttl'); 43 | expect(triples[4].includes('a lc:Connection')).toBeTruthy(); 44 | }); 45 | 46 | it('RDF Stream should be produced from feed without calendar.txt', async () => { 47 | // Hide calendar.txt for this test 48 | fs.renameSync('./test/sample-feed/calendar.txt', './test/sample-feed/calendar.txt.bkp'); 49 | const triples = await lcstreamToArray({ 50 | format: 'turtle', 51 | store: 'MemStore' 52 | }, 'turtle.ttl'); 53 | fs.renameSync('./test/sample-feed/calendar.txt.bkp', './test/sample-feed/calendar.txt'); 54 | expect(triples[4].includes('a lc:Connection')).toBeTruthy(); 55 | }); 56 | }); 57 | -------------------------------------------------------------------------------- /lib/Connections2JSONLD.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Pieter Colpaert © Ghent University - iMinds 3 | * Combines connection rules, trips and services to an unsorted stream of connections 4 | */ 5 | var Transform = require('stream').Transform, 6 | util = require('util'), 7 | URIStrategy = require('./URIStrategy.js'); 8 | 9 | var Connections2JSONLD = function (baseUris, context) { 10 | Transform.call(this, { objectMode: true }); 11 | this._contextStreamed = false; 12 | this._context = context; 13 | 14 | // Skip context if none provided 15 | if (!this._context) { 16 | this._contextStreamed = true; 17 | } 18 | 19 | this._uris = new URIStrategy(baseUris); 20 | this._count = 0; 21 | }; 22 | 23 | util.inherits(Connections2JSONLD, Transform); 24 | 25 | Connections2JSONLD.prototype._transform = function (connection, encoding, done) { 26 | try { 27 | if (!this._contextStreamed) { 28 | this._contextStreamed = true; 29 | this.push(this._context); 30 | } 31 | 32 | var id = this._uris.getId(connection); 33 | const types = ['gtfs:Regular', 'gtfs:NotAvailable', 'gtfs:MustPhone', 'gtfs:MustCoordinateWithDriver']; 34 | 35 | var lc = { 36 | "@id": id, 37 | "@type": "Connection", 38 | "departureStop": this._uris.getStopId(connection.departureStop), 39 | "arrivalStop": this._uris.getStopId(connection.arrivalStop), 40 | "departureTime": connection.departureTime, 41 | "arrivalTime": connection.arrivalTime, 42 | "gtfs:trip": this._uris.getTripId(connection), 43 | "gtfs:route": this._uris.getRouteId(connection) 44 | }; 45 | 46 | // the headsign is already the result here of earlier checking whether there’s a trip headsign 47 | // or a route headsign if connection headsign was not set. It can be used reliably 48 | if (connection.headsign) { 49 | lc["direction"] = connection.headsign; 50 | } 51 | 52 | var pickupType = types[0]; 53 | if (connection['pickup_type'] && connection['pickup_type'] !== null) { 54 | pickupType = types[connection['pickup_type']]; 55 | lc["gtfs:pickupType"] = pickupType; 56 | } 57 | 58 | var dropOffType = types[0]; 59 | if (connection['drop_off_type'] && connection['drop_off_type'] !== null) { 60 | dropOffType = types[connection['drop_off_type']]; 61 | lc["gtfs:dropOffType"] = dropOffType; 62 | } 63 | 64 | this.push(lc); 65 | done(); 66 | } catch (err) { 67 | done(null, {}); 68 | } 69 | }; 70 | 71 | module.exports = Connections2JSONLD; 72 | -------------------------------------------------------------------------------- /test/splittingAndJoiningTrips.test.js: -------------------------------------------------------------------------------- 1 | const assert = require('assert'); 2 | const fs = require('fs'); 3 | const JSONStream = require('JSONStream'); 4 | 5 | describe('The file connections.nldjsonld should contain things', () => { 6 | 7 | var streamToArray = function (stream) { 8 | let connections = []; 9 | return new Promise( (resolve, reject) => { 10 | stream.on('data', connection => { 11 | connections.push(connection); 12 | }); 13 | stream.on('error', (error) => { 14 | reject(error); 15 | }); 16 | stream.on('end', () => { 17 | resolve(connections); 18 | }); 19 | }); 20 | }; 21 | 22 | var streamPromise = streamToArray(fs.createReadStream('test/sample-feed/connections.nldjsonld', { encoding: 'utf8', objectMode: true }).pipe(JSONStream.parse())); 23 | var connections = []; 24 | it ('Joining trips should give no less or more connections than expected', async () => { 25 | connections = await streamPromise; 26 | //Retrieve the joiningtrip from the connections array for one specific day 27 | let joiningtrip = connections.filter(connection => { 28 | return connection['gtfs:route'] === 'http://example.org/routes/joining_route' && connection.departureTime.substr(0,10) === '2007-02-17'; 29 | }); 30 | 31 | //The joining train should only show 1 connection for the joined part of the trip, which has a departure a D. Let’s check this 32 | assert.equal(joiningtrip.filter(connection => connection.departureStop === 'http://example.org/stops/D').length, 1); 33 | 34 | /*let numberOfJoinedConnections = joiningtrip.filter((connection) => { 35 | return connection.joined_with.length > 0; 36 | }).length; 37 | assert.equal(numberOfJoinedConnections, 2);*/ 38 | }); 39 | 40 | it('A non joining trip that was flagged potentially joining should show up separately', () => { 41 | //There is a tricky non joining trip in the tests. This one should however show up in the data... 42 | let nonjoiningtrip = connections.filter(connection => { 43 | return connection['@id'] === 'http://example.org/connections/20070216/C/non_joining_splitting_trip_3'; 44 | }); 45 | assert.notEqual(nonjoiningtrip.length,0); 46 | }); 47 | 48 | it('A splitting trip that was flagged potentially joining should show up as a splitting trip', () => { 49 | //There is a tricky non joining trip in the tests. This one should however show up in the data... And it should also show up as a splitting train. 50 | let splittingtrip = connections.filter(connection => { 51 | return connection['gtfs:trip'] === 'http://example.org/trips/non_joining_splitting_trip_3/20070216'; 52 | }); 53 | assert.notEqual(splittingtrip.length,0); 54 | }); 55 | 56 | }); 57 | -------------------------------------------------------------------------------- /bin/gtfs2lc.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | const program = require('commander'); 4 | const gtfs2lc = require('../lib/gtfs2lc.js'); 5 | const fs = require('fs'); 6 | const del = require('del'); 7 | 8 | console.error("GTFS to linked connections converter use --help to discover more functions"); 9 | 10 | program 11 | .option('-f, --format ', 'Format of the output. Possibilities: csv, n-triples, turtle, json, jsonld, mongo (extended JSON format to be used with mongoimport) or mongold (default: json)') 12 | .option('-b, --baseUris ', 'Path to a file that describes the baseUris in json') 13 | .option('-o, --output ', 'Path to the folder where the result file will be stored') 14 | .option('-c, --compressed', 'Compress resulting connections file using gzip') 15 | .option('-s, --stream', 'Get the connections as a stream on the standard output') 16 | .option('-S, --store ', 'Store type: LevelStore (uses your disk to avoid that you run out of RAM) or MemStore (default)') 17 | .option('--fresh', 'Make sure to convert all Connection and ignore existing Historic records (which will be deleted)') 18 | .arguments('', 'Path to sorted GTFS files') 19 | .action(function (path) { 20 | program.path = path; 21 | }) 22 | .parse(process.argv); 23 | 24 | if (!program.path) { 25 | console.error('Please provide a path to the extracted (and sorted using gtfs2lc-sort) GTFS folder as the first argument'); 26 | process.exit(1); 27 | } 28 | 29 | if (program.path.endsWith('/')) { 30 | program.path = program.path.slice(0, -1); 31 | } 32 | 33 | const output = program.output || program.path; 34 | if (output.endsWith('/')) { 35 | output = output.slice(0, -1); 36 | } 37 | 38 | var baseUris = null; 39 | if (program.baseUris) { 40 | baseUris = JSON.parse(fs.readFileSync(program.baseUris, 'utf-8')); 41 | } 42 | 43 | process.on('SIGINT', async () => { 44 | console.error("\nSIGINT Received, cleaning up..."); 45 | await del( 46 | [ 47 | output + '/.stops', 48 | output + '/.routes', 49 | output + '/.trips', 50 | output + '/.services', 51 | output + '/raw_*' 52 | ], 53 | { force: true } 54 | ); 55 | console.error("Cleaned up!"); 56 | }); 57 | 58 | async function run() { 59 | console.error(`Converting GTFS to Linked Connections...`); 60 | const mapper = new gtfs2lc.Connections({ 61 | store: !program.store || program.store === 'undefined' ? 'MemStore' : program.store, 62 | format: !program.format || program.format === 'undefined' ? 'json' : program.format, 63 | compressed: program.compressed, 64 | fresh: program.fresh, 65 | baseUris: baseUris 66 | }); 67 | 68 | const connectionsFile = await mapper.convert(program.path, output); 69 | 70 | if (program.stream) { 71 | fs.createReadStream(connectionsFile).pipe(process.stdout); 72 | } else { 73 | console.error(`Linked Connections successfully created at ${connectionsFile}`); 74 | } 75 | } 76 | 77 | run(); 78 | -------------------------------------------------------------------------------- /lib/services/CalendarExpander.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Pieter Colpaert and Julián Rojas © Ghent University - imec 3 | * Make sure that the stop_times.txt is ordered by trip_id and stop_sequence before piping it to this library 4 | */ 5 | const Transform = require('stream').Transform; 6 | const { format, eachDayOfInterval } = require('date-fns'); 7 | 8 | class CalendarExpander extends Transform { 9 | constructor(calendarDates) { 10 | super({ objectMode: true }); 11 | this._calendarDates = calendarDates; 12 | } 13 | 14 | _transform(calendar, encoding, done) { 15 | // Parse and expand the calendar in memory 16 | // GTFS specification declares a date as yyyyMMdd. No other formats possible. 17 | // Parsing with substr should be safe. Mind that timezones don’t matter here. 18 | const startDate = this.createDate(calendar['start_date']); 19 | const endDate = this.createDate(calendar['end_date']); 20 | const days = eachDayOfInterval({ start: startDate, end: endDate }); 21 | const calDates = this.calendarDates.get(calendar['service_id']); 22 | const expanded = new Set(); 23 | 24 | if (calDates) { 25 | // Add already all added service dates 26 | calDates.added.forEach(d => expanded.add(format(this.createDate(d), 'yyyyMMdd'))); 27 | 28 | for (const d of days) { 29 | // Check this date is an actual service date and it hasn't been removed 30 | if (calendar[format(d, 'iiii').toLowerCase()] === '1' 31 | && !calDates.removed.has(d)) { 32 | expanded.add(format(d, 'yyyyMMdd')); 33 | } 34 | } 35 | // Delete calendar_dates rule since is no longer needed 36 | this.calendarDates.delete(calendar['service_id']); 37 | } else { 38 | // There are not additional service date rules for this calendar 39 | for (const d of days) { 40 | if (calendar[format(d, 'iiii').toLowerCase()] === '1') { 41 | expanded.add(format(d, 'yyyyMMdd')); 42 | } 43 | } 44 | } 45 | 46 | this.push({ 'service_id': calendar['service_id'], dates: Array.from(expanded) }); 47 | done(); 48 | } 49 | 50 | _flush(done) { 51 | // Deal with all the calendar_dates that didn't have a corresponding calendar rule 52 | for(const [service_id, obj] of this.calendarDates) { 53 | const dates = [] 54 | obj.added.forEach(d => dates.push(format(this.createDate(d), 'yyyyMMdd'))); 55 | 56 | this.push({ service_id, dates }); 57 | } 58 | done(); 59 | } 60 | 61 | createDate(dateString) { 62 | return new Date(dateString.substr(0, 4), parseInt(dateString.substr(4, 2)) - 1, dateString.substr(6, 2)); 63 | } 64 | 65 | get calendarDates() { 66 | return this._calendarDates; 67 | } 68 | } 69 | 70 | module.exports = CalendarExpander; -------------------------------------------------------------------------------- /lib/stoptimes/StopTimes2Cxs.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const del = require('del'); 3 | const csv = require('fast-csv'); 4 | const Store = require('../stores/Store'); 5 | const St2C = require('./st2c'); 6 | const numCPUs = require('os').cpus().length; 7 | 8 | module.exports = function (sourcePath, outPath, stores, fresh) { 9 | return new Promise(async resolve => { 10 | const t0 = new Date(); 11 | 12 | if (fresh) { 13 | console.error('Performing a fresh data transformation...'); 14 | // Delete existing historic store if fresh conversion is being requested 15 | await del([`${outPath}/history.db`], { force: true }); 16 | } 17 | // Load or create historic connections LevelDB store 18 | const historyDB = Store({ fileName: `${outPath}/history.db`, encoding: 'json' }, 'LevelStore'); 19 | await historyDB.open(); 20 | 21 | // Fragment stop_times.txt according to the number of available CPU cores 22 | const stopTimes = fs.createReadStream(`${sourcePath}/stop_times.txt`, { encoding: 'utf8', objectMode: true }) 23 | .pipe(csv.parse({ objectMode: true, headers: true, quote: '"' })) 24 | .on('error', function (e) { 25 | console.error(e); 26 | }); 27 | 28 | const connectionsPool = createWriteStreams('connections', outPath); 29 | let connIndex = -1; 30 | let currentTrip = null; 31 | let printedRows = 0; 32 | 33 | const connectionRules = stopTimes.pipe(new St2C( 34 | stores.stopsDB, 35 | stores.tripsDB, 36 | stores.routesDB, 37 | stores.servicesDB, 38 | historyDB 39 | )); 40 | 41 | connectionRules.on('error', err => { 42 | console.error(err); 43 | process.exit(-1); 44 | }) 45 | 46 | connectionRules.on('data', row => { 47 | if (row.trip['trip_id'] !== currentTrip) { 48 | currentTrip = row.trip['trip_id']; 49 | connIndex = connIndex < numCPUs - 1 ? connIndex + 1 : 0; 50 | } 51 | 52 | connectionsPool[connIndex].write(JSON.stringify(row) + '\n'); 53 | printedRows++; 54 | }); 55 | 56 | connectionRules.on('end', async () => { 57 | for (let i in connectionsPool) { 58 | connectionsPool[i].end(); 59 | } 60 | 61 | // Close all LevelDB stores as they will not be used any further on this process 62 | if (!(stores.stopsDB instanceof Map)) { 63 | await Promise.all([ 64 | stores.stopsDB.close(), 65 | stores.tripsDB.close(), 66 | stores.routesDB.close(), 67 | stores.servicesDB.close(), 68 | historyDB.close() 69 | ]); 70 | } 71 | 72 | console.error(`Created ${printedRows} Connection rules in ${new Date() - t0} ms`); 73 | resolve(); 74 | }); 75 | }); 76 | } 77 | 78 | function createWriteStreams(name, path) { 79 | const writers = []; 80 | for (let i = 0; i < numCPUs; i++) { 81 | const stream = fs.createWriteStream(`${path}/${name}_${i}.txt`, { encoding: 'utf8' }); 82 | writers.push(stream); 83 | } 84 | 85 | return writers; 86 | } -------------------------------------------------------------------------------- /lib/ConnectionsBuilder.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Pieter Colpaert © Ghent University -- IDLab -- imec 3 | * Combines connection rules, trips and services to an unsorted stream of connections 4 | */ 5 | const Transform = require('stream').Transform; 6 | const { addHours, addMinutes, addSeconds } = require('date-fns'); 7 | 8 | class ConnectionsBuilder extends Transform { 9 | constructor() { 10 | super({ objectMode: true }); 11 | } 12 | 13 | _transform(connectionRule, encoding, done) { 14 | this.processConnectionRule(connectionRule, done); 15 | } 16 | 17 | processConnectionRule(connectionRule, done) { 18 | try { 19 | const connRule = connectionRule.value; 20 | const departureDFM = parseGTFSDuration(connRule['departure_dfm']); 21 | const arrivalDFM = parseGTFSDuration(connRule['arrival_dfm']); 22 | const tripStartDFM = parseGTFSDuration(connRule.trip['startTime_dfm']) 23 | const service = connRule.serviceDates; 24 | 25 | for (var i in service) { 26 | // GTFS defined a date as a strict string: yyyyMMdd, just parse it with substrings 27 | // TODO: what if the timezone is different than the local timezone? For now, you will have to change you local system’s time... 28 | const serviceDay = new Date(service[i].substr(0, 4), parseInt(service[i].substr(4, 2)) - 1, service[i].substr(6, 2)); 29 | //add the duration to the date 30 | const departureTime = addDuration(serviceDay, departureDFM); 31 | const arrivalTime = addDuration(serviceDay, arrivalDFM); 32 | const startTime = addDuration(serviceDay, tripStartDFM); 33 | // Set startTime of the trip 34 | const trip = Object.assign({}, connRule.trip); 35 | trip.startTime = startTime; 36 | 37 | // Add complete Stop objects for more specific URI resolving 38 | const connection = { 39 | departureTime, 40 | departureStop: connRule['departure_stop'], 41 | arrivalTime, 42 | arrivalStop: connRule['arrival_stop'], 43 | trip, 44 | route: connRule.route, 45 | headsign: connRule.headsign, 46 | pickup_type: connRule['pickup_type'], 47 | drop_off_type: connRule['drop_off_type'] 48 | }; 49 | 50 | this.push(connection); 51 | } 52 | done(); 53 | } catch (err) { 54 | done(err); 55 | } 56 | } 57 | } 58 | 59 | const parseGTFSDuration = function (durationString) { 60 | let [hours, minutes, seconds] = durationString.split(':').map((val) => { return parseInt(val); }); 61 | //Be forgiving to durations that do not follow the spec e.g., (12:00 instead of 12:00:00) 62 | return { hours, minutes, seconds: seconds ? seconds : 0 }; 63 | } 64 | 65 | const addDuration = function (date, duration) { 66 | return addSeconds(addMinutes(addHours(date, duration.hours), duration.minutes), duration.seconds); 67 | } 68 | 69 | module.exports = ConnectionsBuilder; 70 | -------------------------------------------------------------------------------- /lib/Connections2Triples.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Pieter Colpaert © Ghent University - iMinds 3 | * Combines connection rules, trips and services to an unsorted stream of connections 4 | */ 5 | var Transform = require('stream').Transform, 6 | util = require('util'), 7 | URIStrategy = require('./URIStrategy.js'), 8 | N3 = require('n3'), 9 | { DataFactory } = N3, 10 | { namedNode, literal, quad } = DataFactory;; 11 | 12 | var Connections2Triples = function (baseUris) { 13 | Transform.call(this, { objectMode: true }); 14 | this._uris = new URIStrategy(baseUris); 15 | this._count = 0; 16 | }; 17 | 18 | util.inherits(Connections2Triples, Transform); 19 | 20 | Connections2Triples.prototype._transform = function (connection, encoding, done) { 21 | var id = this._uris.getId(connection); 22 | this.push( 23 | quad( 24 | namedNode(id), 25 | namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), 26 | namedNode('http://semweb.mmlab.be/ns/linkedconnections#Connection') 27 | )); 28 | this.push( 29 | quad( 30 | namedNode(id), 31 | namedNode('http://semweb.mmlab.be/ns/linkedconnections#departureStop'), 32 | namedNode(this._uris.getStopId(connection.departureStop)) 33 | )); 34 | this.push( 35 | quad( 36 | namedNode(id), 37 | namedNode('http://semweb.mmlab.be/ns/linkedconnections#arrivalStop'), 38 | namedNode(this._uris.getStopId(connection.arrivalStop)) 39 | )); 40 | this.push( 41 | quad( 42 | namedNode(id), 43 | namedNode('http://semweb.mmlab.be/ns/linkedconnections#departureTime'), 44 | literal(connection.departureTime.toISOString(), namedNode('http://www.w3.org/2001/XMLSchema#dateTime')) 45 | )); 46 | this.push( 47 | quad( 48 | namedNode(id), 49 | namedNode('http://semweb.mmlab.be/ns/linkedconnections#arrivalTime'), 50 | literal(connection.arrivalTime.toISOString(), namedNode('http://www.w3.org/2001/XMLSchema#dateTime')) 51 | )); 52 | this.push( 53 | quad( 54 | namedNode(id), 55 | namedNode('http://vocab.gtfs.org/terms#trip'), 56 | namedNode(this._uris.getTripId(connection)) 57 | )); 58 | this.push( 59 | quad( 60 | namedNode(id), 61 | namedNode('http://vocab.gtfs.org/terms#route'), 62 | namedNode(this._uris.getRouteId(connection)) 63 | )); 64 | 65 | var headsign = connection.headsign || connection.trip.trip_headsign; 66 | 67 | if (headsign) { 68 | this.push( 69 | quad( 70 | namedNode(id), 71 | namedNode('http://vocab.gtfs.org/terms#headsign'), 72 | literal(headsign, namedNode('http://www.w3.org/2001/XMLSchema#string')) 73 | )); 74 | } 75 | 76 | const types = ['http://vocab.gtfs.org/terms#Regular', 'http://vocab.gtfs.org/terms#NotAvailable', 'http://vocab.gtfs.org/terms#MustPhone', 'http://vocab.gtfs.org/terms#MustCoordinateWithDriver'] 77 | 78 | if (connection['drop_off_type'] && connection['drop_off_type'] !== null) { 79 | this.push( 80 | quad( 81 | namedNode(id), 82 | namedNode('http://vocab.gtfs.org/terms#dropOffType'), 83 | namedNode(types[connection['drop_off_type']]) 84 | )); 85 | } 86 | 87 | if (connection['pickup_type'] && connection['pickup_type'] !== null) { 88 | this.push( 89 | quad( 90 | namedNode(id), 91 | namedNode('http://vocab.gtfs.org/terms#pickupType'), 92 | namedNode(types[connection['pickup_type']]) 93 | )); 94 | } 95 | 96 | done(); 97 | }; 98 | 99 | module.exports = Connections2Triples; 100 | -------------------------------------------------------------------------------- /bin/gtfs2lc-clean.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | ##Retrieve directory of this bash script 6 | CURDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" 7 | 8 | ##Go to the dir of the GTFS 9 | [[ $# == 1 ]] && { 10 | cd $1 && { 11 | echo Converting newlines dos2unix; 12 | { 13 | sed 's/\r//' stop_times.txt > stop_times_unix.txt; mv stop_times_unix.txt stop_times.txt & 14 | sed 's/\r//' trips.txt > trips_unix.txt; mv trips_unix.txt trips.txt & 15 | sed 's/\r//' calendar.txt > calendar_unix.txt; mv calendar_unix.txt calendar.txt & 16 | sed 's/\r//' calendar_dates.txt > calendar_dates_unix.txt; mv calendar_dates_unix.txt calendar_dates.txt ; 17 | sed 's/\r//' routes.txt > routes_unix.txt; mv routes_unix.txt routes.txt ; 18 | sed 's/\r//' stops.txt > stops_unix.txt; mv stops_unix.txt stops.txt ; 19 | } ; 20 | echo Removing UTF-8 artifacts in directory $1; 21 | { 22 | sed '1s/^\xEF\xBB\xBF//' stop_times.txt > stop_times_unix.txt; mv stop_times_unix.txt stop_times.txt & 23 | sed '1s/^\xEF\xBB\xBF//' trips.txt > trips_unix.txt; mv trips_unix.txt trips.txt & 24 | sed '1s/^\xEF\xBB\xBF//' calendar.txt > calendar_unix.txt; mv calendar_unix.txt calendar.txt & 25 | sed '1s/^\xEF\xBB\xBF//' calendar_dates.txt > calendar_dates_unix.txt; mv calendar_dates_unix.txt calendar_dates.txt ; 26 | sed '1s/^\xEF\xBB\xBF//' routes.txt > routes_unix.txt; mv routes_unix.txt routes.txt ; 27 | sed '1s/^\xEF\xBB\xBF//' stops.txt > stops_unix.txt; mv stops_unix.txt stops.txt ; 28 | } ; 29 | 30 | echo Trimming EOLs and removing continuous double quotes 31 | { 32 | sed 's/[[:blank:]]*$//' stop_times.txt | sed 's/""//g' > stop_times_unix.txt; mv stop_times_unix.txt stop_times.txt & 33 | sed 's/[[:blank:]]*$//' trips.txt | sed 's/""//g' > trips_unix.txt; mv trips_unix.txt trips.txt & 34 | sed 's/[[:blank:]]*$//' calendar.txt | sed 's/""//g' > calendar_unix.txt; mv calendar_unix.txt calendar.txt & 35 | sed 's/[[:blank:]]*$//' calendar_dates.txt | sed 's/""//g' > calendar_dates_unix.txt; mv calendar_dates_unix.txt calendar_dates.txt ; 36 | sed 's/[[:blank:]]*$//' routes.txt | sed 's/""//g' > routes_unix.txt; mv routes_unix.txt routes.txt ; 37 | sed 's/[[:blank:]]*$//' stops.txt | sed 's/""//g' > stops_unix.txt; mv stops_unix.txt stops.txt ; 38 | } 39 | ## Find the right numbers of the column keys needed 40 | TRIPID_TRIPS=`head -n1 trips.txt | tr "," "\n" | grep -wn "trip_id"| cut -d: -f1` 41 | TRIPID_STOPTIMES=`head -n1 stop_times.txt | tr "," "\n" | grep -wn "trip_id"| cut -d: -f1` 42 | STOPSEQUENCE_STOPTIMES=`head -n1 stop_times.txt | tr "," "\n" | grep -wn "stop_sequence"| cut -d: -f1` 43 | ## Sort stop_times.txt by trip id and stop sequence 44 | { head -n 1 stop_times.txt ; tail -n +2 stop_times.txt | sort -t , -k ${TRIPID_STOPTIMES}d,${TRIPID_STOPTIMES} -k${STOPSEQUENCE_STOPTIMES}n,${STOPSEQUENCE_STOPTIMES}; } > stop_times2.txt ; mv stop_times2.txt stop_times.txt ; 45 | ## Sort trips.txt by trip_id and have the same ordering as stop_times.txt 46 | { head -n 1 trips.txt ; tail -n +2 trips.txt | sort -t , -k ${TRIPID_TRIPS}d,${TRIPID_TRIPS} ; } > trips2.txt ; mv trips2.txt trips.txt & 47 | ## Finally sort calendar.txt and calendar_dates.txt files in order to be processed for gtfs2lc 48 | echo Sorting files in directory $1; 49 | { head -n 1 calendar.txt ; tail -n +2 calendar.txt | sort -t , -k 1d,1; } > calendar2.txt ; mv calendar2.txt calendar.txt & 50 | { head -n 1 calendar_dates.txt ; tail -n +2 calendar_dates.txt | sort -t , -k 1d,1; } > calendar_dates2.txt ; mv calendar_dates2.txt calendar_dates.txt & 51 | } ; 52 | } || { 53 | 1>&2 echo Give a path to the gtfs dir and as Connections DB path as the only arguments; 54 | exit 1 55 | } 56 | -------------------------------------------------------------------------------- /test/URIStrategy.test.js: -------------------------------------------------------------------------------- 1 | const assert = require('assert'); 2 | const URIStrategy = require('../lib/URIStrategy'); 3 | 4 | describe('URIStrategy', () => { 5 | describe('getRouteId', () => { 6 | it('should replace {routes.route_id} by connection.trip.route.route_id', () => { 7 | let strategy = new URIStrategy(); 8 | strategy = new URIStrategy({ 9 | route: 'http://example.org/routes/{routes.route_id}', 10 | }); 11 | 12 | const connection = { 13 | route: { 14 | route_id: 'B1234-56789', 15 | } 16 | }; 17 | 18 | assert.equal( 19 | strategy.getRouteId(connection), 20 | 'http://example.org/routes/B1234-56789' 21 | ); 22 | }); 23 | 24 | it('should replace spaces by %20', () => { 25 | const strategy = new URIStrategy({ 26 | route: 'http://example.org/routes/{routes.route_id}', 27 | }); 28 | 29 | const connection = { 30 | route: { 31 | route_id: 'a b c', 32 | } 33 | }; 34 | 35 | assert.equal( 36 | strategy.getRouteId(connection), 37 | 'http://example.org/routes/a%20b%20c' 38 | ); 39 | }); 40 | 41 | it('should resolve expression by evaluating matching key in resolve object', () => { 42 | const strategy = new URIStrategy({ 43 | route: 'http://example.org/routes/{route_short_id}', 44 | resolve: { 45 | route_short_id: 'connection.route.route_id.substring(0,5)', 46 | }, 47 | }); 48 | 49 | const connection = { 50 | route: { 51 | route_id: 'B1234-56789', 52 | } 53 | }; 54 | 55 | assert.equal( 56 | strategy.getRouteId(connection), 57 | 'http://example.org/routes/B1234' 58 | ); 59 | }); 60 | 61 | it('Should resolve stop URI', async () => { 62 | const strategy = new URIStrategy({ 63 | stop: 'http://example.org/stops/{stops.stop_id}' 64 | }); 65 | assert.equal(await strategy.getStopId({ stop_id: 'stop1' }), 'http://example.org/stops/stop1'); 66 | }); 67 | 68 | it('Should resolve trip URI', () => { 69 | const strategy = new URIStrategy({ 70 | trip: 'http://example.org/trips/{trips.trip_id}/{trips.startTime(yyyyMMdd)}/{connection.departureTime(yy)}{connection.arrivalTime(yy)}', 71 | }); 72 | const connection = { 73 | departureTime: new Date('2020-02-15T09:23:00.000Z'), 74 | arrivalTime: new Date('2020-02-15T09:42:00.000Z'), 75 | trip: { 76 | trip_id: 'trip1', 77 | startTime: new Date('2020-02-15T08:00:00.000Z') 78 | } 79 | }; 80 | assert.equal(strategy.getTripId(connection), 'http://example.org/trips/trip1/20200215/2020'); 81 | }); 82 | }); 83 | 84 | describe('getId', () => { 85 | it('should resolve expression using date-fns.format function', () => { 86 | const strategy = new URIStrategy({ 87 | connection: 88 | 'http://example.org/connections/{trip_startTime}/{departureStop}/{connection.departureStop}/{connection.arrivalStop}/{trip_id}{connection.something}', 89 | resolve: { 90 | trip_id: 'connection.trip.trip_id', 91 | trip_startTime: 'format(connection.trip.startTime, "yyyyMMdd\'T\'HHmm");', 92 | departureStop: 'connection.stopId', 93 | }, 94 | }); 95 | 96 | const connection = { 97 | something: 'some', 98 | stopId: '1234', 99 | departureStop: { stop_id: '1234' }, 100 | arrivalStop: { stop_id: '4321' }, 101 | trip: { 102 | trip_id: '5678', 103 | startTime: new Date('2018-09-21T10:25:12'), 104 | }, 105 | }; 106 | 107 | assert.equal( 108 | strategy.getId(connection), 109 | 'http://example.org/connections/20180921T1025/1234/1234/4321/5678some' 110 | ); 111 | }); 112 | }); 113 | }); 114 | -------------------------------------------------------------------------------- /lib/stores/StoreManager.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const csv = require('fast-csv'); 3 | const Store = require('./Store'); 4 | const CalendarExpander = require('../services/CalendarExpander'); 5 | 6 | module.exports = async function (sourcePath, outPath, storeType) { 7 | // Step 2: Read all the required GTFS files in a stream-fashion 8 | const stops = fs.createReadStream(`${sourcePath}/stops.txt`, { encoding: 'utf8', objectMode: true }) 9 | .pipe(csv.parse({ objectMode: true, headers: true })) 10 | .on('error', function (e) { 11 | console.error(e); 12 | }); 13 | 14 | const routes = fs.createReadStream(`${sourcePath}/routes.txt`, { encoding: 'utf8', objectMode: true }) 15 | .pipe(csv.parse({ objectMode: true, headers: true })) 16 | .on('error', function (e) { 17 | console.error(e); 18 | }); 19 | 20 | const trips = fs.createReadStream(`${sourcePath}/trips.txt`, { encoding: 'utf8', objectMode: true }) 21 | .pipe(csv.parse({ objectMode: true, headers: true })) 22 | .on('error', function (e) { 23 | console.error(e); 24 | }); 25 | 26 | // Store in LevelDB or in memory Map depending on the options 27 | const [ 28 | stopsDB, routesDB, tripsDB, servicesDB 29 | ] = await Promise.all([ 30 | loadIndexData({ 31 | stream: stops, 32 | type: storeType, 33 | fileName: `${outPath}/stops.db`, 34 | encoding: 'json', 35 | key: 'stop_id', 36 | }), 37 | loadIndexData({ 38 | stream: routes, 39 | type: storeType, 40 | fileName: `${outPath}/routes.db`, 41 | encoding: 'json', 42 | key: 'route_id', 43 | }), 44 | loadIndexData({ 45 | stream: trips, 46 | type: storeType, 47 | fileName: `${outPath}/trips.db`, 48 | encoding: 'json', 49 | key: 'trip_id', 50 | }), 51 | loadServiceDates(sourcePath, outPath, storeType) 52 | ]); 53 | 54 | return { stopsDB, routesDB, tripsDB, servicesDB }; 55 | } 56 | 57 | async function loadServiceDates(sourcePath, outPath, storeType) { 58 | // Load all calendar_dates in memory store 59 | const calendarDates = new Map(); 60 | 61 | const calendarDatesStream = fs.createReadStream(`${sourcePath}/calendar_dates.txt`, { encoding: 'utf8', objectMode: true }) 62 | .pipe(csv.parse({ objectMode: true, headers: true })) 63 | .on('error', function (e) { 64 | console.error(e); 65 | }); 66 | 67 | for await (const cd of calendarDatesStream) { 68 | // Initialize calendar_date rule object 69 | if(!calendarDates.has(cd['service_id'])) { 70 | calendarDates.set(cd['service_id'], { added: new Set(), removed: new Set() }); 71 | } 72 | // Set date addition/removal accordingly 73 | if(cd['exception_type'] === '1') { 74 | calendarDates.get(cd['service_id']).added.add(cd['date']); 75 | } else if(cd['exception_type'] === '2') { 76 | calendarDates.get(cd['service_id']).removed.add(cd['date']); 77 | } 78 | } 79 | 80 | // Load and merge all expanded service dates in data store. 81 | const calendar = fs.createReadStream(`${sourcePath}/calendar.txt`, { encoding: 'utf8', objectMode: true }) 82 | .pipe(csv.parse({ objectMode: true, headers: true })) 83 | .pipe(new CalendarExpander(calendarDates)) 84 | .on('error', function (e) { 85 | console.error(e); 86 | }); 87 | 88 | return loadIndexData({ 89 | stream: calendar, 90 | type: storeType, 91 | fileName: `${outPath}/services.db`, 92 | encoding: 'json', 93 | key: 'service_id', 94 | value: 'dates' 95 | }); 96 | } 97 | 98 | async function loadIndexData({ stream, type, fileName, encoding, key, value }) { 99 | try { 100 | const store = Store({ fileName, encoding }, type); 101 | for await (const data of stream) { 102 | if (data[key]) { 103 | if (store instanceof Map) { 104 | store.set(data[key], value ? data[value] : data); 105 | } else { 106 | await store.put(data[key], value ? data[value] : data); 107 | } 108 | } 109 | } 110 | 111 | console.error(`Created and loaded store in ${fileName}`); 112 | return store; 113 | } catch (err) { 114 | console.error(err); 115 | } 116 | } -------------------------------------------------------------------------------- /lib/URIStrategy.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Pieter Colpaert © Ghent University - iMinds 3 | * Combines connection rules, trips and services to an unsorted stream of connections 4 | */ 5 | 6 | const { format } = require('date-fns'); 7 | const uri_templates = require('uri-templates'); 8 | 9 | var URIStrategy = function (baseUris) { 10 | var defaultBaseUris = { 11 | stop: 'http://example.org/stops/{stops.stop_id}', 12 | route: 'http://example.org/routes/{routes.route_id}', 13 | trip: 14 | 'http://example.org/trips/{trips.trip_id}/{trips.startTime(yyyyMMdd)}', 15 | connection: 16 | 'http://example.org/connections/{trips.startTime(yyyyMMdd)}/{depStop}/{trips.trip_id}', 17 | resolve: { 18 | depStop: 'connection.departureStop.stop_id' 19 | } 20 | }; 21 | if (!baseUris) { 22 | baseUris = defaultBaseUris; 23 | } else { 24 | if (typeof baseUris.stop !== 'string') { 25 | baseUris.stop = defaultBaseUris.stop; 26 | } 27 | if (typeof baseUris.trip !== 'string') { 28 | baseUris.trip = defaultBaseUris.trip; 29 | } 30 | if (typeof baseUris.route !== 'string') { 31 | baseUris.route = defaultBaseUris.route; 32 | } 33 | if (typeof baseUris.connection !== 'string') { 34 | baseUris.connection = defaultBaseUris.connection; 35 | } 36 | } 37 | 38 | this._stopTemplate = uri_templates(baseUris.stop); 39 | this._routeTemplate = uri_templates(baseUris.route); 40 | this._tripTemplate = uri_templates(baseUris.trip); 41 | this._connectionTemplate = uri_templates(baseUris.connection); 42 | this._resolve = baseUris.resolve || {}; 43 | }; 44 | 45 | /** 46 | * Returns a persistent identifier for a connection 47 | */ 48 | URIStrategy.prototype.getId = function (connection) { 49 | return resolveURI(this._connectionTemplate, connection, this._resolve); 50 | }; 51 | 52 | URIStrategy.prototype.getStopId = function (stop) { 53 | return resolveURI(this._stopTemplate, stop, this._resolve); 54 | }; 55 | 56 | URIStrategy.prototype.getTripId = function (connection) { 57 | return resolveURI(this._tripTemplate, connection, this._resolve); 58 | }; 59 | 60 | URIStrategy.prototype.getRouteId = function (connection) { 61 | return resolveURI(this._routeTemplate, connection, this._resolve); 62 | }; 63 | 64 | function resolveURI(template, object, resolve) { 65 | let varNames = template.varNames; 66 | let fillerObj = {}; 67 | 68 | for (let i in varNames) { 69 | fillerObj[varNames[i]] = resolveValue(varNames[i], object, resolve); 70 | } 71 | 72 | return template.fill(fillerObj); 73 | } 74 | 75 | function resolveValue(param, object, resolve) { 76 | // Entity objects to be resolved as needed 77 | const trips = object.trip ? object.trip : null; 78 | const routes = object.route ? object.route : null; 79 | const stops = object['stop_id'] ? object : null; 80 | 81 | // Try first to resolve using keys in 'resolve' object 82 | if (resolve[param]) { 83 | const connection = object; 84 | return eval(resolve[param]); 85 | } 86 | 87 | // GTFS source file and attribute name 88 | const source = param.split('.')[0]; 89 | const attr = param.split('.')[1]; 90 | let value = null; 91 | 92 | switch (source) { 93 | case 'trips': 94 | if (attr.indexOf('startTime') >= 0) { 95 | const dateformat = attr.match(/\((.*?)\)/)[1]; 96 | value = format(trips.startTime, dateformat); 97 | } else { 98 | value = trips[attr]; 99 | } 100 | break; 101 | case 'routes': 102 | value = routes[attr]; 103 | break; 104 | case 'stops': 105 | value = stops[attr]; 106 | break; 107 | case 'connection': 108 | if (attr.indexOf('departureTime') >= 0) { 109 | const dateformat = attr.match(/\((.*?)\)/)[1]; 110 | value = format(object.departureTime, dateformat); 111 | } else if (attr.indexOf('arrivalTime') >= 0) { 112 | const dateformat = attr.match(/\((.*?)\)/)[1]; 113 | value = format(object.arrivalTime, dateformat); 114 | } else if (attr.indexOf('departureStop') >= 0) { 115 | value = object.departureStop['stop_id']; 116 | } else if (attr.indexOf('arrivalStop') >= 0) { 117 | value = object.arrivalStop['stop_id']; 118 | } else { 119 | value = object[attr]; 120 | } 121 | break; 122 | } 123 | 124 | return value; 125 | } 126 | 127 | module.exports = URIStrategy; 128 | -------------------------------------------------------------------------------- /bin/linkedconnections-sortandjoin.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | //Read from standard input until endline or until end stream, process chunk. 4 | const JSONStream = require('JSONStream'); 5 | 6 | var printConnection = function (connection) { 7 | console.log(JSON.stringify(connection)); 8 | } 9 | 10 | var previous = null; 11 | var tripsLastConnection = {}; 12 | 13 | let jsonstream = process.stdin.pipe(JSONStream.parse()); 14 | 15 | jsonstream.on("data", (connection) => { 16 | if (!previous) { 17 | previous = connection; 18 | } else { 19 | if (connection.arrivalTime === previous.arrivalTime && connection.departureTime === previous.departureTime && connection.departureStop === previous.departureStop && connection.arrivalStop === previous.arrivalStop && connection['gtfs:route'] === previous['gtfs:route']) { 20 | mergedConnection = null; 21 | //See the documentation: https://support.google.com/transitpartners/answer/7084064?hl=en 22 | 23 | //### JOINING TRAINS 24 | //If the pickup type is 1 while the other isn’t merge it with the connection where the pickupType was 0 25 | if (connection["gtfs:pickupType"] === "gtfs:NotAvailable" && previous["gtfs:pickupType"] !== "gtfs:NotAvailable") { 26 | mergedConnection = previous; 27 | if (!mergedConnection.joinedWithTrip) 28 | mergedConnection.joinedWithTrip = [] 29 | mergedConnection.joinedWithTrip.push(connection["gtfs:trip"]); 30 | } else if (previous["gtfs:pickupType"] === "gtfs:NotAvailable" && connection["gtfs:pickupType"] !== "gtfs:NotAvailable") { 31 | mergedConnection = connection; 32 | if (!mergedConnection.joinedWithTrip) 33 | mergedConnection.joinedWithTrip = []; 34 | mergedConnection.joinedWithTrip.push(previous["gtfs:trip"]); 35 | } 36 | //### SPLITTING TRAINS 37 | //If the drop-off type is different, then merge it with the one where dropOffType is 0 38 | else if (connection["gtfs:dropOffType"] === "gtfs:NotAvailable" && previous["gtfs:dropOffType"] !== "gtfs:NotAvailable") { 39 | mergedConnection = previous; 40 | if (!mergedConnection.willSplitInto) 41 | mergedConnection.willSplitInto = []; 42 | mergedConnection.willSplitInto.push(connection["gtfs:trip"]); 43 | } else if (previous["gtfs:dropOffType"] === "gtfs:NotAvailable" && connection["gtfs:dropOffType"] !== "gtfs:NotAvailable") { 44 | mergedConnection = connection; 45 | if (!mergedConnection.willSplitInto) 46 | mergedConnection.willSplitInto = []; 47 | mergedConnection.willSplitInto.push(previous["gtfs:trip"]); 48 | } 49 | 50 | if (mergedConnection) 51 | previous = mergedConnection; 52 | else { 53 | processConnection(previous); 54 | previous = connection; 55 | } 56 | } else { 57 | processConnection(previous); 58 | previous = connection; 59 | } 60 | } 61 | }).on('end', () => { 62 | }); 63 | 64 | var joinedTrips = {}; 65 | 66 | var processConnection = function (connection) { 67 | if (connection.joinedWithTrip) { 68 | for (let joinedTrip of connection.joinedWithTrip) { 69 | joinedTrips[joinedTrip] = connection['gtfs:trip']; 70 | } 71 | } 72 | if (tripsLastConnection[connection['gtfs:trip']]) { 73 | connection.nextConnection = [ tripsLastConnection[connection['gtfs:trip']]["@id"] ]; 74 | //TODO: in order to support multiple splitting, check that if the element exist, that it is lower 75 | if (connection.willSplitInto && !tripsLastConnection[connection['gtfs:trip']].willSplitInto ) { // || connection.willSplitInto.length < tripsLastConnection[connection['gtfs:trip']].willSplitInto.length)) { 76 | //This is our queue: apparently this connection will split its vehicles in 2, as the next connection from this very trip is not indicated to split any more 77 | for (let splitTrip of connection.willSplitInto) { 78 | if (tripsLastConnection[splitTrip]) { 79 | connection.nextConnection.push(tripsLastConnection[splitTrip]["@id"]); 80 | } //else { 81 | //Half of this train stops at this place and does not continue 82 | //} 83 | } 84 | } 85 | } else if (joinedTrips[connection['gtfs:trip']]) { 86 | //This indicates the last connection of a to be joined trip 87 | connection.nextConnection = [ tripsLastConnection[joinedTrips[connection['gtfs:trip']]]['@id'] ]; 88 | } 89 | //only store the essentials in memory 90 | tripsLastConnection[connection['gtfs:trip']] = { "@id": connection['@id'], "willSplitInto": connection['willSplitInto'] }; 91 | //remove willSplitInto and joinedWithTrip 92 | if (connection.willSplitInto) 93 | delete connection.willSplitInto; 94 | if (connection.joinedWithTrip) 95 | delete connection.joinedWithTrip; 96 | printConnection(connection); 97 | }; 98 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GTFS to Linked Connections 2 | 3 | [![Node.js CI](https://github.com/linkedconnections/gtfs2lc/actions/workflows/build-test.yml/badge.svg)](https://github.com/linkedconnections/gtfs2lc/actions/workflows/build-test.yml) [![npm](https://img.shields.io/npm/v/gtfs2lc.svg?style=popout)](https://npmjs.com/package/gtfs2lc) [![Coverage Status](https://coveralls.io/repos/github/linkedconnections/gtfs2lc/badge.svg?branch=master)](https://coveralls.io/github/linkedconnections/gtfs2lc?branch=master) 4 | 5 | Transforms a GTFS file into a directed acyclic graph of actual _connections_. 6 | 7 | A _connection_ is the combination of a departure and its successive arrival of the same trip. 8 | Our goal is to retrieve a list of connections that is sorted by departure time, better known as a Directed Acyclic Graph. This way, route planning algorithms can be performed. 9 | 10 | More information and live demo at https://linkedconnections.org 11 | 12 | ## Converting your GTFS to (linked) connections 13 | 14 | ### Step 0: Installation 15 | 16 | Install it using the [Node Package Manager (npm)](https://www.npmjs.com/get-npm). 17 | 18 | ```bash 19 | npm install -g gtfs2lc 20 | ``` 21 | 22 | ### Step 1: discover a GTFS file 23 | 24 | If you haven’t yet picked a GTFS file you want to work with, different repositories exist. Our favorite ones: 25 | 26 | * [Transit.land’s feed registry](http://transit.land/feed-registry/) 27 | * [Mobility Database](https://mobilitydatabase.org) 28 | 29 | Yet, you may also directly ask your local public transport authority for a copy. 30 | 31 | Mind that we have not tested our code with all GTFS files yet, and there are [known limitations](#not-yet-implemented). 32 | 33 | ### Step 2: unzip your GTFS 34 | 35 | You can use your favorite unzipper. E.g., `unzip gtfs.zip` should work fine. 36 | 37 | ### Step 3: Order and clean your CSV files 38 | 39 | This process is now run automatically so you can skip to Step 4. But you can still use it independently using the enclosed bash script `gtfs2lc-clean `. Next to cleaning and sorting, it also unifies newlines and removes UTF-8 artifacts. 40 | 41 | If _step 4_ would not give the desired result, you might want to tweak the script manually. In order for our script to work: 42 | 43 | * __stop_times.txt__ must be ordered by `trip_id` and `stop_sequence`. 44 | * __calendar.txt__ must be ordered by `service_id`. 45 | * __calendar_dates.txt__ must be ordered by `service_id`. 46 | 47 | ### Step 4: Generate connections! 48 | 49 | Successfully finished the previous steps? Then you can now generate actual departure and arrival pairs (connections) as follows: 50 | 51 | ```bash 52 | gtfs2lc /path/to/extracted/gtfs -f json 53 | ``` 54 | 55 | We support other formats such as `csv` as well. 56 | 57 | For _big_ GTFS files, your memory may not be sufficient. Luckily, we’ve implemented a way to use your hard disk instead of your RAM. You can enable this with an option: `gtfs2lc /path/to/extracted/gtfs -f json --store LevelStore`. 58 | 59 | It may also be the case that your disk has limited storage space. In that case you may want to use the `--compressed` option. 60 | 61 | ### Step 5: Generate *Linked* Connections! 62 | 63 | When you download a new GTFS file, all identifiers in there might change and conflict with your previous export. Therefore, we need to think about a way to create global identifiers for the connections, trips, routes and stops in our system. As we are publishing our data on the Web, we will also use Web addresses for these global identifiers. 64 | 65 | See `baseUris-example.json` for an example on URI templates of what a stable identifier strategy could look like. Copy it and edit it to your likings. For a more detailed explanation of how to use the URI templates see the description at our [`GTFS-RT2LC`](https://github.com/linkedconnections/gtfsrt2lc#uri-templates) tool, which uses the same strategy. 66 | 67 | Now you can generate Linked Data in JSON-LD as follows: 68 | 69 | ```bash 70 | gtfs2lc /path/to/extracted/gtfs -f jsonld -b baseUris.json 71 | ``` 72 | 73 | That’s it! Want to serve your Linked Connections over HTTP? Take a look at our work over here: [The Linked Connection’s server](https://github.com/julianrojas87/linked-connections-server) (WIP) 74 | 75 | ### More options 76 | 77 | #### Post-processing joining connections, and adding nextConnection properties 78 | 79 | In GTFS, joining and splitting trains are fixed in a horrible way. See https://support.google.com/transitpartners/answer/7084064?hl=en for more details. 80 | 81 | In Linked Connections, we can solve this gracefully by adding a nextConnection array to every connection. A splitting train is then, on the last connection before it is split, indicate 2 nextConnection items. 82 | 83 | On your newline delimited jsonld file, you can perform this script in order to make that work: `linkedconnections-joinandsort yourconnectionsfile.nldjsonld` 84 | 85 | #### MongoDB 86 | Next to the jsonld format, we’ve also implement the “`mongold`” format. It can be directly used by the command `mongoimport` as follows: 87 | 88 | ```bash 89 | gtfs2lc /path/to/extracted/gtfs -f mongold -b baseUris.json | mongoimport -c myconnections 90 | ``` 91 | 92 | Mind that only MongoDB starting version 2.6 is supported and mind that it doesn’t work at this moment well together with the post-processing step of joining trips. 93 | 94 | #### Even more options 95 | 96 | For more options, check `gtfs2lc --help` 97 | 98 | ## How it works (for contributors) 99 | 100 | We first convert `stop_times.txt` to connection rules called `connections.txt`. 101 | 102 | Service dates are processed through `calendar_dates.txt` and `calendar.txt`, that was processed at the same time. 103 | 104 | In the final step, the connection rules are expanded towards connections by joining the days, service ids and connectionRules. 105 | 106 | Post-processing steps work directly on the output stream, and can map the output stream to Linked Data. Connections2JSONLD is the main class to look at. 107 | 108 | Another post-processing step is introduced to fix joining and splitting trips. 109 | 110 | ## Not yet implemented 111 | 112 | At this moment we've only implemented a conversion from the Stop Times to connections. However, in future work we will also implement a system for describing trips and routes, a system for transit stops and a system for transfers in Linked Data. 113 | 114 | Furthermore, also `frequencies.txt` is not supported at this time. We hope to support this in the future though. 115 | 116 | ## Authors 117 | 118 | * Pieter Colpaert 119 | 120 | * Julián Rojas 121 | -------------------------------------------------------------------------------- /lib/stoptimes/st2c.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Pieter Colpaert and Julián Rojas © Ghent University - imec 3 | * Make sure that the stop_times.txt is ordered by trip_id and stop_sequence before piping it to this library 4 | */ 5 | const Transform = require('stream').Transform; 6 | 7 | class StopTimesToConnections extends Transform { 8 | constructor(stopsDB, tripsDB, routesDB, servicesDB, historyDB) { 9 | super({ objectMode: true }); 10 | this._stopsDB = stopsDB; 11 | this._tripsDB = tripsDB; 12 | this._routesDB = routesDB; 13 | this._servicesDB = servicesDB; 14 | this._historyDB = historyDB; 15 | this._previousStopTime = null; 16 | this._currentTripStartTime = null; 17 | } 18 | 19 | /** 20 | * When ordered, we can just take 2 gtfs:StopTimes and bring them together in 1 "connection rule", 21 | * which is an intermediate data structure we define here 22 | */ 23 | async _transform(stopTime, encoding, done) { 24 | if (this.previousStopTime && this.previousStopTime['trip_id'] === stopTime['trip_id']) { 25 | if (stopTime['arrival_time'] === '' && stopTime['departure_time'] === '') { 26 | // Both arrival and departure time for this stop are empty, so Connection rule cannot be created. 27 | // This is valid GTFS but requires interpolation to set estimated stop times. 28 | console.error(`WARNING: Empty arrival and departure times found for trip ${stopTime['trip_id']} on stop ${stopTime['stop_id']}. Interpolation is required in a previous step to handle these cases (not supported yet). This stop time will be skipped.`) 29 | done(); 30 | } 31 | 32 | // Get related trip, route and service dates 33 | const { 34 | departureStop, 35 | arrivalStop, 36 | trip, 37 | route, 38 | serviceDates 39 | } = await this.getRelatedData(this.previousStopTime['stop_id'], stopTime['stop_id']); 40 | 41 | // Add trip start time as a resource for URI building. 42 | trip.startTime_dfm = this.currentTripStartTime; 43 | // Add stop headsigns (if any) 44 | let headsign = null; 45 | if (arrivalStop['stop_headsign'] && arrivalStop['stop_headsign'] !== '') { 46 | headsign = arrivalStop['stop_headsign']; 47 | } else if (trip['trip_headsign'] && trip['trip_headsign'] !== '') { 48 | headsign = trip['trip_headsign']; 49 | } else if (route['route_long_name'] && route['route_long_name'] !== '') { 50 | headsign = route['route_long_name']; 51 | } 52 | let previous_headsign = null; 53 | if (departureStop['stop_headsign']) { 54 | previous_headsign = departureStop['stop_headsign']; 55 | } 56 | 57 | // Use stop_code as stop ID if available. Fallback to stop_id 58 | const depStopId = departureStop['stop_code'] && departureStop['stop_code'] !== '' ? 59 | departureStop['stop_code'] : departureStop['stop_id']; 60 | const arrStopId = arrivalStop['stop_code'] && arrivalStop['stop_code'] !== '' ? 61 | arrivalStop['stop_code'] : arrivalStop['stop_id']; 62 | 63 | // Get a unique identifier for this connection rule 64 | const uniqueId = [ 65 | route['route_long_name'].replace(/\s/g, ''), 66 | trip['trip_short_name'], 67 | depStopId, 68 | arrStopId, 69 | this.currentTripStartTime, 70 | this.previousStopTime['departure_time'], 71 | stopTime['arrival_time'], 72 | this.previousStopTime['pickup_type'], 73 | stopTime['drop_off_type'] 74 | ].join('/'); 75 | 76 | // Check if this connection rule exists in historyDB and if there are any updates 77 | const newServices = await this.differentialUpdate(uniqueId, serviceDates); 78 | if (newServices) { 79 | // Create a connection rule 80 | // dfm is "duration from midnight" (see GTFS reference) 81 | const connection = { 82 | route, 83 | trip, 84 | departure_dfm: this.previousStopTime['departure_time'], 85 | arrival_dfm: stopTime['arrival_time'], 86 | departure_stop: departureStop, 87 | arrival_stop: arrivalStop, 88 | pickup_type: this.previousStopTime['pickup_type'] || '', 89 | drop_off_type: stopTime['drop_off_type'] || '', 90 | headsign, 91 | previous_headsign, 92 | stop_sequence: this.previousStopTime['stop_sequence'] 93 | }; 94 | // There are updates! push this Connection rule 95 | connection.serviceDates = newServices; 96 | this.push(connection); 97 | } 98 | 99 | } else { 100 | this.currentTripStartTime = stopTime['departure_time']; 101 | } 102 | this.previousStopTime = stopTime; 103 | done(); 104 | } 105 | 106 | async getRelatedData(departure, arrival) { 107 | const departureStop = await this.stopsDB.get(departure); 108 | const arrivalStop = await this.stopsDB.get(arrival); 109 | const trip = await this.tripsDB.get(this.previousStopTime['trip_id']); 110 | const route = await this.routesDB.get(trip['route_id']); 111 | const serviceDates = await this.servicesDB.get(trip['service_id']) || []; 112 | return { 113 | departureStop, 114 | arrivalStop, 115 | trip, 116 | route, 117 | serviceDates 118 | }; 119 | } 120 | 121 | async differentialUpdate(id, serviceDates) { 122 | try { 123 | const old = await this.historyDB.get(id); 124 | // This Connection rule has been processed in the past. 125 | // Check if there are new service dates 126 | const oldServiceDates = Object.keys(old); 127 | const newServices = serviceDates.filter(s => !oldServiceDates.includes(s)); 128 | if (newServices.length > 0) { 129 | // Update history with new found service dates 130 | const update = Object.assign({}, old); 131 | newServices.forEach(nsd => { 132 | update[nsd] = { type: 'Connection' }; 133 | }); 134 | 135 | await this.historyDB.put(id, update); 136 | return newServices; 137 | } else { 138 | // Nothing to update 139 | return null; 140 | } 141 | } catch (err) { 142 | if (err.code === 'LEVEL_NOT_FOUND') { 143 | // Is a completely new Connection rule, create history structure 144 | // for this connection rule. 145 | const history = {}; 146 | serviceDates.forEach(sd => { 147 | history[sd] = { type: 'Connection' }; 148 | }); 149 | await this.historyDB.put(id, history); 150 | return serviceDates; 151 | } else { 152 | // Something went wrong 153 | throw err; 154 | } 155 | } 156 | } 157 | 158 | get stopsDB() { 159 | return this._stopsDB; 160 | } 161 | 162 | get tripsDB() { 163 | return this._tripsDB; 164 | } 165 | 166 | get routesDB() { 167 | return this._routesDB; 168 | } 169 | 170 | get servicesDB() { 171 | return this._servicesDB; 172 | } 173 | 174 | get historyDB() { 175 | return this._historyDB; 176 | } 177 | 178 | get previousStopTime() { 179 | return this._previousStopTime; 180 | } 181 | 182 | set previousStopTime(st) { 183 | this._previousStopTime = st; 184 | } 185 | 186 | get currentTripStartTime() { 187 | return this._currentTripStartTime; 188 | } 189 | 190 | set currentTripStartTime(st) { 191 | this._currentTripStartTime = st; 192 | } 193 | } 194 | 195 | module.exports = StopTimesToConnections; 196 | -------------------------------------------------------------------------------- /lib/gtfs2connections.js: -------------------------------------------------------------------------------- 1 | const { Worker, isMainThread, parentPort, workerData } = require('worker_threads'); 2 | const os = require('os'); 3 | const fs = require('fs'); 4 | const zlib = require('zlib'); 5 | const path = require('path'); 6 | const ChildProcess = require('child_process'); 7 | const del = require('del'); 8 | const util = require('util'); 9 | const N3 = require('n3'); 10 | const StoreManager = require('./stores/StoreManager'); 11 | const StopTimes2Cxs = require('./stoptimes/StopTimes2Cxs'); 12 | const { parser: JSONLParser } = require('stream-json/jsonl/Parser'); 13 | const JSONLStringer = require('stream-json/jsonl/Stringer'); 14 | const ConnectionsBuilder = require('./ConnectionsBuilder'); 15 | const Connections2JSONLD = require('./Connections2JSONLD'); 16 | const Connections2CSV = require('./Connections2CSV'); 17 | const Connections2Mongo = require('./Connections2Mongo'); 18 | const Connections2Triples = require('./Connections2Triples'); 19 | 20 | 21 | const readdir = util.promisify(fs.readdir); 22 | const exec = util.promisify(ChildProcess.exec); 23 | 24 | class GTFSMapper { 25 | constructor(options) { 26 | this._options = options; 27 | if (!this.options.store) { 28 | this.options.store = 'MemStore'; 29 | } 30 | } 31 | 32 | /** 33 | * Returns a resultStream for connections 34 | * Step 1: Clean up and sort source files by calling bin/gtfs2lc-sort.sh 35 | * Step 2: Create index of stops.txt, routes.txt, trips.txt and, 36 | * convert calendar_dates.txt and calendar.txt to service ids mapped to a long list of dates. 37 | * Step 3: Produce (diff) connection rules based on available CPU cores 38 | * Step 4: Use Node.js worker threads to process the connection rules in parallel. 39 | * Step 5: Merge the files created in parallel and return the file path. 40 | */ 41 | convert(path, output) { 42 | const numCPUs = os.cpus().length; 43 | 44 | return new Promise(async (resolve, reject) => { 45 | const t0 = new Date(); 46 | // Step 1: Clean up and sort source files by calling bin/gtfs2lc-sort.sh 47 | console.error('Cleaning up and sorting source files'); 48 | await cleanUpSources(path); 49 | 50 | // Step 2: Read all the required GTFS files and create reusable indexes 51 | console.error('Creating index stores...'); 52 | const stores = await StoreManager(path, output, this.options.store); 53 | 54 | // Step 3: Produce (diff) connection rules based on available CPU cores 55 | console.error('Creating Connection rules...'); 56 | await StopTimes2Cxs(path, output, stores, this.options.fresh); 57 | 58 | // Step 4: Materialize connections in parallel using worker threads 59 | let w = 0; 60 | const raws = []; 61 | // Create as many worker threads as there are available CPUs 62 | for (let i = 0; i < numCPUs; i++) { 63 | const worker = new Worker(__filename, { 64 | workerData: { 65 | instance: i, 66 | output, 67 | options: this.options 68 | } 69 | }); 70 | 71 | console.error(`Materializing Connections in worker thread (PID ${worker.threadId})`); 72 | 73 | worker.on('message', async () => { 74 | raws.push(`raw_${w}`); 75 | w++; 76 | if (w === numCPUs) { 77 | // Step 5: Merge all the created files into one 78 | const format = this.options.format; 79 | let ext = null; 80 | let gz = ''; 81 | let mergeCommand = 'zcat'; 82 | 83 | if (!format || ['json', 'mongo', 'jsonld', 'mongold'].indexOf(format) >= 0) { 84 | await appendLineBreaks(output); 85 | ext = 'json'; 86 | } else if (format === 'csv') { 87 | ext = 'csv'; 88 | } else if (format === 'turtle') { 89 | await removePrefixes(output); 90 | ext = 'ttl'; 91 | } else if (format === 'ntriples') { 92 | ext = 'nt'; 93 | } 94 | 95 | try { 96 | console.error('Merging final Linked Connections file...'); 97 | if (this.options.compressed) { 98 | mergeCommand = 'cat'; 99 | gz = '.gz'; 100 | } 101 | 102 | // Join all resulting files into one 103 | const raws_joined = raws.map(r => { return `${r}.${ext}.gz` }).join(" "); 104 | await exec(`${mergeCommand} ${raws_joined} > linkedConnections.${ext}${gz} && rm ${raws_joined}`, { cwd: output }); 105 | let t1 = new Date(); 106 | console.error('linkedConnections.' + ext + ' File created in ' + (t1.getTime() - t0.getTime()) + ' ms'); 107 | await del( 108 | [ 109 | output + '/connections_*', 110 | output + '/stops.db', 111 | output + '/routes.db', 112 | output + '/trips.db', 113 | output + '/services.db' 114 | ], 115 | { force: true } 116 | ); 117 | resolve(`${output}/linkedConnections.${ext}`); 118 | } catch (err) { 119 | throw err; 120 | } 121 | } 122 | }).on('error', err => { 123 | console.error(err); 124 | reject(err); 125 | }).on('exit', (code) => { 126 | if (code !== 0) { 127 | console.error(new Error(`Worker stopped with exit code ${code}`)); 128 | reject(err); 129 | } 130 | }); 131 | } 132 | }); 133 | } 134 | 135 | get options() { 136 | return this._options; 137 | } 138 | } 139 | 140 | async function cleanUpSources(sources) { 141 | try { 142 | await exec(`${path.resolve(`${__dirname}/../bin/gtfs2lc-clean.sh`)} ${sources}`); 143 | } catch (err) { 144 | console.error(err); 145 | throw new Error('Process gtfs2lc-clean.sh exit with code: ' + code); 146 | } 147 | } 148 | 149 | async function appendLineBreaks(output) { 150 | const files = (await readdir(output)).filter(raw => raw.startsWith('raw_')); 151 | 152 | for (const [i, f] of files.entries()) { 153 | // Make sure the file ends with a newline. For some reason sometimes it fails to append. 154 | while (!(await exec(`zcat ${f} | tail -1`, { cwd: output }))["stdout"].endsWith("\n")) { 155 | await exec(`echo "" | gzip >> ${f}`, { cwd: output }); 156 | } 157 | } 158 | } 159 | 160 | async function removePrefixes(output) { 161 | const files = (await readdir(output)).filter(raw => raw.startsWith('raw_') && raw.endsWith('.ttl.gz')); 162 | for (const [i, f] of files.entries()) { 163 | if (i > 0) { 164 | // TODO: find a not hard-coded way to remove prefixes 165 | await exec(`zcat ${f} | tail -n +4 | gzip > ${f}.temp && mv ${f}.temp ${f}`, { cwd: output }); 166 | } 167 | } 168 | } 169 | 170 | // Code executed only on a Worker Thread 171 | if (!isMainThread) { 172 | let fmt = 'json'; 173 | // Read the connection rules file created in the master thread and build the Connection objects! 174 | // Use a low highWaterMark to prevent backpressure memory leaks given that this is an inflating pipeline. 175 | let connectionStream = fs.createReadStream( 176 | `${workerData['output']}/connections_${workerData['instance']}.txt`, 177 | { encoding: 'utf8', objectMode: true, highWaterMark: 4 * 1024 } 178 | ).pipe(JSONLParser()) 179 | .pipe(new ConnectionsBuilder()) 180 | .on('error', function (e) { 181 | console.error(e); 182 | }); 183 | 184 | // Now, proceed to parse the connections according to the requested format 185 | const format = workerData['options']['format']; 186 | if (!format || ['json', 'mongo'].includes(format)) { 187 | if (format === 'mongo') { 188 | connectionStream = connectionStream.pipe(new Connections2Mongo()); 189 | } 190 | connectionStream = connectionStream.pipe(new JSONLStringer()); 191 | } else if (['jsonld', 'mongold'].includes(format)) { 192 | let context = undefined; 193 | // Only include the context for the first instance 194 | if (workerData['instance'] === 0) { 195 | context = { 196 | '@context': { 197 | lc: 'http://semweb.mmlab.be/ns/linkedconnections#', 198 | gtfs: 'http://vocab.gtfs.org/terms#', 199 | xsd: 'http://www.w3.org/2001/XMLSchema#', 200 | trip: { '@type': '@id', '@id': 'gtfs:trip' }, 201 | Connection: 'lc:Connection', 202 | CancelledConnection: 'lc:CancelledConnection', 203 | departureTime: { '@type': 'xsd:dateTime', '@id': 'lc:departureTime' }, 204 | departureStop: { '@type': '@id', '@id': 'lc:departureStop' }, 205 | arrivalStop: { '@type': '@id', '@id': 'lc:arrivalStop' }, 206 | arrivalTime: { '@type': 'xsd:dateTime', '@id': 'lc:arrivalTime' }, 207 | } 208 | }; 209 | } 210 | // Convert json object stream to jsonld stream 211 | connectionStream = connectionStream.pipe(new Connections2JSONLD(workerData['options']['baseUris'], context)); 212 | 213 | if (format === 'mongold') { 214 | connectionStream = connectionStream.pipe(new Connections2Mongo()); 215 | } 216 | // Pipe the objects to a file 217 | connectionStream = connectionStream.pipe(new JSONLStringer()); 218 | } else if (format === 'csv') { 219 | fmt = 'csv'; 220 | // Only include the header on the first file 221 | let header = false; 222 | if (workerData['instance'] === 0) { 223 | header = true; 224 | } 225 | connectionStream = connectionStream.pipe(new Connections2CSV(header)); 226 | } else if (format === 'turtle') { 227 | fmt = 'ttl'; 228 | let prefixes = { 229 | lc: 'http://semweb.mmlab.be/ns/linkedconnections#', 230 | gtfs: 'http://vocab.gtfs.org/terms#', 231 | xsd: 'http://www.w3.org/2001/XMLSchema#' 232 | }; 233 | connectionStream = connectionStream.pipe(new Connections2Triples(workerData['options']['baseUris'])) 234 | .pipe(new N3.StreamWriter({ prefixes: prefixes })); 235 | } else if (format === 'ntriples') { 236 | fmt = 'nt'; 237 | connectionStream = connectionStream.pipe(new Connections2Triples(workerData['options']['baseUris'])) 238 | .pipe(new N3.StreamWriter({ format: 'N-Triples' })); 239 | } 240 | 241 | connectionStream.pipe(zlib.createGzip()) 242 | .pipe(fs.createWriteStream(`${workerData['output']}/raw_${workerData['instance']}.${fmt}.gz`)) 243 | .on('finish', () => { 244 | parentPort.postMessage('done'); 245 | });; 246 | 247 | } 248 | 249 | module.exports = GTFSMapper; 250 | --------------------------------------------------------------------------------