├── .editorconfig ├── .github └── workflows │ └── phpunit.yml ├── .gitignore ├── .php_cs ├── LICENSE ├── README.md ├── bin ├── convert.php └── validator.php ├── composer.json ├── examples ├── parseAndWrite.php └── write.php ├── perf └── parser-streaming-perf.php ├── phpstan.neon ├── phpunit.xml ├── src ├── N3Lexer.php ├── N3Parser.php ├── TriGParser.php ├── TriGParserIterator.php ├── TriGWriter.php └── Util.php └── test ├── TriGParserIteratorTest.php ├── TriGParserTest.php ├── TriGWriterTest.php └── UtilTest.php /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 4 6 | end_of_line = lf 7 | charset = utf-8 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | -------------------------------------------------------------------------------- /.github/workflows/phpunit.yml: -------------------------------------------------------------------------------- 1 | name: PHPUnit 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | permissions: 10 | contents: read 11 | 12 | jobs: 13 | build: 14 | strategy: 15 | matrix: 16 | php-versions: ['7.1', '7.2', '7.3', '7.4', '8.0', '8.1', '8.2', '8.3'] 17 | 18 | runs-on: ubuntu-latest 19 | 20 | steps: 21 | - uses: actions/checkout@v3 22 | 23 | - name: Validate composer.json and composer.lock 24 | run: composer validate --strict 25 | 26 | - name: Cache Composer packages 27 | id: composer-cache 28 | uses: actions/cache@v3 29 | with: 30 | path: vendor 31 | key: ${{ runner.os }}-php-${{ hashFiles('**/composer.lock') }} 32 | restore-keys: | 33 | ${{ runner.os }}-php- 34 | 35 | - name: Install dependencies 36 | run: composer install --prefer-dist --no-progress 37 | 38 | # Add a test script to composer.json, for instance: "test": "vendor/bin/phpunit" 39 | # Docs: https://getcomposer.org/doc/articles/scripts.md 40 | 41 | - name: Run test suite 42 | run: composer run phpunit 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /vendor/ 2 | /composer.lock 3 | /.phpunit.result.cache 4 | /.php_cs.cache 5 | -------------------------------------------------------------------------------- /.php_cs: -------------------------------------------------------------------------------- 1 | setRules([ 5 | '@Symfony' => true, 6 | '@Symfony:risky' => true, 7 | 'array_syntax' => ['syntax' => 'short'], 8 | 'fopen_flags' => false, 9 | 'no_empty_phpdoc' => true, 10 | 'no_unused_imports' => true, 11 | 'no_superfluous_phpdoc_tags' => true, 12 | 'ordered_imports' => true, 13 | 'phpdoc_summary' => false, 14 | 'protected_to_private' => false, 15 | 'combine_nested_dirname' => true, 16 | ]) 17 | ->setRiskyAllowed(true) 18 | ->setFinder( 19 | PhpCsFixer\Finder::create() 20 | ->in(__DIR__.'/bin') 21 | ->in(__DIR__.'/perf') 22 | ->in(__DIR__.'/src') 23 | ->in(__DIR__.'/test') 24 | ->name('*.php') 25 | ->append([ 26 | __FILE__, 27 | ]) 28 | ); 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Pieter Colpaert 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # The hardf turtle, n-triples, n-quads, TriG and N3 parser for PHP 2 | 3 | **hardf** is a PHP 7.1+ library that lets you handle Linked Data (RDF). It offers: 4 | - [**Parsing**](#parsing) triples/quads from [Turtle](http://www.w3.org/TR/turtle/), [TriG](http://www.w3.org/TR/trig/), [N-Triples](http://www.w3.org/TR/n-triples/), [N-Quads](http://www.w3.org/TR/n-quads/), and [Notation3 (N3)](https://www.w3.org/TeamSubmission/n3/) 5 | - [**Writing**](#writing) triples/quads to [Turtle](http://www.w3.org/TR/turtle/), [TriG](http://www.w3.org/TR/trig/), [N-Triples](http://www.w3.org/TR/n-triples/), and [N-Quads](http://www.w3.org/TR/n-quads/) 6 | 7 | Both the parser as the serializer have _streaming_ support. 8 | 9 | _This library is a port of [N3.js](https://github.com/rdfjs/N3.js/tree/v0.10.0) to PHP_ 10 | 11 | ## Triple Representation 12 | 13 | We use the triple representation in PHP ported from NodeJS N3.js library. Check https://github.com/rdfjs/N3.js/tree/v0.10.0#triple-representation for more information 14 | 15 | On purpose, we focused on performance, and not on developer friendliness. 16 | We have thus implemented this triple representation using associative arrays rather than PHP object. Thus, the same that holds for N3.js, is now an array. E.g.: 17 | 18 | ```php 19 | 'http://example.org/cartoons#Tom', 22 | 'predicate' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 23 | 'object' => 'http://example.org/cartoons#Cat', 24 | 'graph' => 'http://example.org/mycartoon', #optional 25 | ]; 26 | ``` 27 | 28 | Encode literals as follows (similar to N3.js) 29 | 30 | ```php 31 | '"Tom"@en-gb' // lowercase language 32 | '"1"^^http://www.w3.org/2001/XMLSchema#integer' // no angular brackets <> 33 | ``` 34 | 35 | ## Library functions 36 | 37 | Install this library using [composer](http://getcomposer.org): 38 | 39 | ```bash 40 | composer require pietercolpaert/hardf 41 | ``` 42 | 43 | ### Writing 44 | ```php 45 | use pietercolpaert\hardf\TriGWriter; 46 | ``` 47 | 48 | A class that should be instantiated and can write TriG or Turtle 49 | 50 | Example use: 51 | ```php 52 | $writer = new TriGWriter([ 53 | "prefixes" => [ 54 | "schema" =>"http://schema.org/", 55 | "dct" =>"http://purl.org/dc/terms/", 56 | "geo" =>"http://www.w3.org/2003/01/geo/wgs84_pos#", 57 | "rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#", 58 | "rdfs"=> "http://www.w3.org/2000/01/rdf-schema#" 59 | ], 60 | "format" => "n-quads" //Other possible values: n-quads, trig or turtle 61 | ]); 62 | 63 | $writer->addPrefix("ex","http://example.org/"); 64 | $writer->addTriple("schema:Person","dct:title","\"Person\"@en","http://example.org/#test"); 65 | $writer->addTriple("schema:Person","schema:label","\"Person\"@en","http://example.org/#test"); 66 | $writer->addTriple("ex:1","dct:title","\"Person1\"@en","http://example.org/#test"); 67 | $writer->addTriple("ex:1","http://www.w3.org/1999/02/22-rdf-syntax-ns#type","schema:Person","http://example.org/#test"); 68 | $writer->addTriple("ex:2","dct:title","\"Person2\"@en","http://example.org/#test"); 69 | $writer->addTriple("schema:Person","dct:title","\"Person\"@en","http://example.org/#test2"); 70 | echo $writer->end(); 71 | ``` 72 | 73 | #### All methods 74 | ```php 75 | //The method names should speak for themselves: 76 | $writer = new TriGWriter(["prefixes": [ /* ... */]]); 77 | $writer->addTriple($subject, $predicate, $object, $graphl); 78 | $writer->addTriples($triples); 79 | $writer->addPrefix($prefix, $iri); 80 | $writer->addPrefixes($prefixes); 81 | //Creates blank node($predicate and/or $object are optional) 82 | $writer->blank($predicate, $object); 83 | //Creates rdf:list with $elements 84 | $list = $writer->addList($elements); 85 | 86 | //Returns the current output it is already able to create and clear the internal memory use (useful for streaming) 87 | $out .= $writer->read(); 88 | //Alternatively, you can listen for new chunks through a callback: 89 | $writer->setReadCallback(function ($output) { echo $output }); 90 | 91 | //Call this at the end. The return value will be the full triple output, or the rest of the output such as closing dots and brackets, unless a callback was set. 92 | $out .= $writer->end(); 93 | //OR 94 | $writer->end(); 95 | ``` 96 | 97 | ### Parsing 98 | 99 | Next to [TriG](https://www.w3.org/TR/trig/), the TriGParser class also parses [Turtle](https://www.w3.org/TR/turtle/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/) and the [W3C Team Submission N3](https://www.w3.org/TeamSubmission/n3/) 100 | 101 | #### All methods 102 | 103 | ```php 104 | $parser = new TriGParser($options, $tripleCallback, $prefixCallback); 105 | $parser->setTripleCallback($function); 106 | $parser->setPrefixCallback($function); 107 | $parser->parse($input, $tripleCallback, $prefixCallback); 108 | $parser->parseChunk($input); 109 | $parser->end(); 110 | ``` 111 | 112 | #### Basic examples for small files 113 | 114 | Using return values and passing these to a writer: 115 | ```php 116 | use pietercolpaert\hardf\TriGParser; 117 | use pietercolpaert\hardf\TriGWriter; 118 | $parser = new TriGParser(["format" => "n-quads"]); //also parser n-triples, n3, turtle and trig. Format is optional 119 | $writer = new TriGWriter(); 120 | $triples = $parser->parse(" ."); 121 | $writer->addTriples($triples); 122 | echo $writer->end(); 123 | ``` 124 | 125 | Using callbacks and passing these to a writer: 126 | ```php 127 | $parser = new TriGParser(); 128 | $writer = new TriGWriter(["format"=>"trig"]); 129 | $parser->parse(" . .", function ($e, $triple) use ($writer) { 130 | if (!isset($e) && isset($triple)) { 131 | $writer->addTriple($triple); 132 | echo $writer->read(); //write out what we have so far 133 | } else if (!isset($triple)) // flags the end of the file 134 | echo $writer->end(); //write the end 135 | else 136 | echo "Error occured: " . $e; 137 | }); 138 | ``` 139 | 140 | #### Example using chunks and keeping prefixes 141 | 142 | When you need to parse a large file, you will need to parse only chunks and already process them. You can do that as follows: 143 | 144 | ```php 145 | $writer = new TriGWriter(["format"=>"n-quads"]); 146 | $tripleCallback = function ($error, $triple) use ($writer) { 147 | if (isset($error)) 148 | throw $error; 149 | else if (isset($triple)) { 150 | $writer->write(); 151 | echo $writer->read(); 152 | else if (isset($error)) { 153 | throw $error; 154 | } else { 155 | echo $writer->end(); 156 | } 157 | }; 158 | $prefixCallback = function ($prefix, $iri) use (&$writer) { 159 | $writer->addPrefix($prefix, $iri); 160 | }; 161 | $parser = new TriGParser(["format" => "trig"], $tripleCallback, $prefixCallback); 162 | $parser->parseChunk($chunk); 163 | $parser->parseChunk($chunk); 164 | $parser->parseChunk($chunk); 165 | $parser->end(); //Needs to be called 166 | ``` 167 | 168 | #### Parser options 169 | 170 | * `format` input format (case-insensitive) 171 | * if not provided or not matching any options below, then any [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/) or [N-Quads](https://www.w3.org/TR/n-quads/) input can be parsed (but NOT the [N3](https://www.w3.org/TeamSubmission/n3/)) 172 | * `turtle` - [Turtle](https://www.w3.org/TR/turtle/) 173 | * `trig` - [TriG](https://www.w3.org/TR/trig/) 174 | * contains `triple`, e.g. `triple`, `ntriples`, `N-Triples` - [N-Triples](https://www.w3.org/TR/n-triples/) 175 | * contains `quad`, e.g. `quad`, `nquads`, `N-Quads` - [N-Quads](https://www.w3.org/TR/n-quads/) 176 | * contains `n3`, e.g. `n3` - [N3](https://www.w3.org/TeamSubmission/n3/) 177 | * `blankNodePrefix` (defaults to `b0_`) prefix forced on blank node names, e.g. `TriGWriter(["blankNodePrefix" => 'foo'])` will parse `_:bar` as `_:foobar`. 178 | * `documentIRI` sets the base URI used to resolve relative URIs (not applicable if `format` indicates n-triples or n-quads) 179 | * `lexer` allows usage of own lexer class. A lexer must provide following public methods: 180 | * `tokenize(string $input, bool $finalize = true): array` 181 | * `tokenizeChunk(string $input): array` 182 | * `end(): array` 183 | * `explicitQuantifiers` - [...] 184 | 185 | #### Empty document base IRI 186 | 187 | Some Turtle and N3 documents may use relative-to-the-base-IRI IRI syntax (see [here](https://www.w3.org/TR/turtle/#sec-iri) and [here](https://www.w3.org/TR/turtle/#sec-iri-references)), e.g. 188 | 189 | ``` 190 | <> "some value" . 191 | ``` 192 | 193 | To properly parse such documents the document base IRI must be known. 194 | Otherwise we might end up with empty IRIs (e.g. for the subject in the example above). 195 | 196 | Sometimes the base IRI is encoded in the document, e.g. 197 | 198 | ``` 199 | @base . 200 | <> "some value" . 201 | ``` 202 | 203 | but sometimes it is missing. 204 | In such a case the [Turtle specification](https://www.w3.org/TR/turtle/#in-html-parsing) requires us to follow section 5.1.1 of the [RFC3986](http://www.ietf.org/rfc/rfc3986.txt) which says that if the base IRI is not encapsulated in the document, it should be assumed to be the document retrieval URI (e.g. the URL you downloaded the document from or a file path converted to an URL). Unfortunatelly this can not be guessed by the hardf parser and has to be provided by you using the `documentIRI` parser creation option, e.g. 205 | 206 | ```php 207 | parser = new TriGParser(["documentIRI" => "http://some.base/iri/"]); 208 | ``` 209 | 210 | Long story short if you run into the `subject/predicate/object on line X can not be parsed without knowing the the document base IRI.(...)` error, please initialize the parser with the `documentIRI` option. 211 | 212 | ### Utility 213 | ```php 214 | use pietercolpaert\hardf\Util; 215 | ``` 216 | 217 | A static class with a couple of helpful functions for handling our specific triple representation. It will help you to create and evaluate literals, IRIs, and expand prefixes. 218 | 219 | ```php 220 | $bool = isIRI($term); 221 | $bool = isLiteral($term); 222 | $bool = isBlank($term); 223 | $bool = isDefaultGraph($term); 224 | $bool = inDefaultGraph($triple); 225 | $value = getLiteralValue($literal); 226 | $literalType = getLiteralType($literal); 227 | $lang = getLiteralLanguage($literal); 228 | $bool = isPrefixedName($term); 229 | $expanded = expandPrefixedName($prefixedName, $prefixes); 230 | $iri = createIRI($iri); 231 | $literalObject = createLiteral($value, $modifier = null); 232 | ``` 233 | 234 | See the documentation at https://github.com/RubenVerborgh/N3.js#utility for more information. 235 | 236 | ## Two executables 237 | 238 | We also offer 2 simple tools in `bin/` as an example implementation: one validator and one translator. Try for example: 239 | ```bash 240 | curl -H "accept: application/trig" http://fragments.dbpedia.org/2015/en | php bin/validator.php trig 241 | curl -H "accept: application/trig" http://fragments.dbpedia.org/2015/en | php bin/convert.php trig n-triples 242 | ``` 243 | 244 | ## Performance 245 | 246 | We compared the performance on two turtle files, and parsed it with the EasyRDF library in PHP, the N3.js library for NodeJS and with Hardf. These were the results: 247 | 248 | | #triples | framework | time (ms) | memory (MB) | 249 | |----------:|-------------------------|------:|--------:| 250 | |1,866 | __Hardf__ without opcache | 27.6 | 0.722 | 251 | |1,866 | __Hardf__ with opcache | 24.5 | 0.380 | 252 | |1,866 | [EasyRDF](https://github.com/njh/easyrdf) without opcache | 5,166.5 | 2.772 | 253 | |1,866 | [EasyRDF](https://github.com/njh/easyrdf) with opcache | 5,176.2 | 2.421 | 254 | |1,866 | [ARC2](https://github.com/semsol/arc2) with opcache | 71.9 | 1.966 | 255 | | 1,866 | [N3.js](https://github.com/RubenVerborgh/N3.js) | 24.0 | 28.xxx | 256 | | 3,896,560 | __Hardf__ without opcache | 40,017.7 | 0.722 | 257 | | 3,896,560 | __Hardf__ with opcache | 33,155.3 | 0.380 | 258 | | 3,896,560 | [N3.js](https://github.com/RubenVerborgh/N3.js) | 7,004.0 | 59.xxx | 259 | | 3,896,560 | [ARC2](https://github.com/semsol/arc2) with opcache | 203,152.6 | 3,570.808 | 260 | 261 | ## License, status and contributions 262 | The hardf library is copyrighted by [Ruben Verborgh](http://ruben.verborgh.org/) and [Pieter Colpaert](https://pietercolpaert.be) 263 | and released under the [MIT License](https://github.com/pietercolpaert/hardf/blob/master/LICENSE). 264 | 265 | Contributions are welcome, and bug reports or pull requests are always helpful. 266 | If you plan to implement a larger feature, it's best to discuss this first by filing an issue. 267 | -------------------------------------------------------------------------------- /bin/convert.php: -------------------------------------------------------------------------------- 1 | #!/usr/bin/php 2 | $outformat]); 19 | $parser = new TriGParser(['format' => $informat], function ($error, $triple) use (&$writer) { 20 | if (!isset($error) && !isset($triple)) { //flags end 21 | echo $writer->end(); 22 | } elseif (!$error) { 23 | $writer->addTriple($triple); 24 | echo $writer->read(); 25 | } else { 26 | fwrite(STDERR, $error->getMessage()."\n"); 27 | } 28 | }); 29 | 30 | while ($line = fgets(STDIN)) { 31 | $parser->parseChunk($line); 32 | } 33 | $parser->end(); 34 | -------------------------------------------------------------------------------- /bin/validator.php: -------------------------------------------------------------------------------- 1 | #!/usr/bin/php 2 | $format]); 12 | $errored = false; 13 | $finished = false; 14 | $tripleCount = 0; 15 | $line = true; 16 | while (!$finished && $line) { 17 | try { 18 | $line = fgets(STDIN); 19 | if ($line) { 20 | $tripleCount += count($parser->parseChunk($line)); 21 | } else { 22 | $tripleCount += count($parser->end()); 23 | $finished = true; 24 | } 25 | } catch (\Exception $e) { 26 | echo $e->getMessage()."\n"; 27 | $errored = true; 28 | } 29 | } 30 | if (!$errored) { 31 | echo 'Parsed '.$tripleCount." triples successfully.\n"; 32 | } 33 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "pietercolpaert/hardf", 3 | "description": "A fast parser for RDF serializations such as turtle, n-triples, n-quads, trig and N3", 4 | "homepage": "https://github.com/pietercolpaert/hardf", 5 | "keywords": ["turtle","rdf","rdf1.1", "quads", "triples","n-triples","n-quads","trig", "n3","linked data", "open data", "streaming"], 6 | "type": "library", 7 | "license": "MIT", 8 | "authors": [ 9 | { 10 | "name": "Pieter Colpaert", 11 | "homepage": "https://pietercolpaert.be", 12 | "email": "pieter.colpaert@ugent.be", 13 | "role": "developer" 14 | }, 15 | { 16 | "name": "Ruben Verborgh", 17 | "homepage": "https://ruben.verborgh.org", 18 | "email": "ruben.verborgh@ugent.be", 19 | "role": "developer" 20 | } 21 | ], 22 | "support": { 23 | "source": "https://github.com/pietercolpaert/hardf", 24 | "issues": "https://github.com/pietercolpaert/hardf/issues" 25 | }, 26 | "autoload": { 27 | "psr-4": { 28 | "pietercolpaert\\hardf\\": "src/" 29 | } 30 | }, 31 | "autoload-dev": { 32 | "psr-4": { 33 | "Tests\\hardf\\": "test/" 34 | } 35 | }, 36 | "require": { 37 | "php": "^7.1|^8.0" 38 | }, 39 | "require-dev" : { 40 | "friendsofphp/php-cs-fixer": "*", 41 | "phpstan/phpstan": "^0.12.36", 42 | "phpunit/phpunit": "^7 || ^8 || ^9" 43 | }, 44 | "scripts": { 45 | "cs": "vendor/bin/php-cs-fixer fix", 46 | "cs-dry-run": "vendor/bin/php-cs-fixer fix --dry-run", 47 | "phpstan": "vendor/bin/phpstan analyse -c phpstan.neon -vvv", 48 | "phpunit": "vendor/bin/phpunit", 49 | "phpunit-with-coverage": "vendor/bin/phpunit --coverage-text" 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /examples/parseAndWrite.php: -------------------------------------------------------------------------------- 1 | "trig"]); 9 | $triples = $parser->parse("() . \"\"\"\n\"\"\"."); 10 | $writer->addTriples($triples); 11 | echo $writer->end(); 12 | 13 | //Or, option 2, the streaming version 14 | echo "--- Second streaming implementation with callbacks ---\n"; 15 | $parser = new TriGParser(); 16 | $writer = new TriGWriter(["format"=>"trig"]); 17 | $error = null; 18 | $parser->parse("@prefix ex: . . . ex:s ex:p ex:o . ", function ($e, $triple) use (&$writer) { 19 | if (!$e && $triple) 20 | $writer->addTriple($triple); 21 | else if (!$triple) 22 | echo $writer->end(); 23 | else 24 | echo "Error occured: " . $e; 25 | }, function ($prefix, $iri) use (&$writer) { 26 | $writer->addPrefix($prefix,$iri); 27 | }); 28 | -------------------------------------------------------------------------------- /examples/write.php: -------------------------------------------------------------------------------- 1 | [ 9 | "schema" =>"http://schema.org/", 10 | "dct" =>"http://purl.org/dc/terms/", 11 | "geo" =>"http://www.w3.org/2003/01/geo/wgs84_pos#", 12 | "rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#", 13 | "rdfs"=> "http://www.w3.org/2000/01/rdf-schema#" 14 | ] 15 | ]); 16 | 17 | $writer->addPrefix("ex","http://example.org/"); 18 | $writer->addTriple("schema:Person","dct:title","\"Person\"@en","http://example.org/#test"); 19 | $writer->addTriple("schema:Person","schema:label","\"Person\"@en","http://example.org/#test"); 20 | $writer->addTriple("ex:1","dct:title","\"Person1\"@en","http://example.org/#test"); 21 | $writer->addTriple("ex:1","http://www.w3.org/1999/02/22-rdf-syntax-ns#type","schema:Person","http://example.org/#test"); 22 | $writer->addTriple("ex:2","dct:title","\"Person2\"@en","http://example.org/#test"); 23 | $writer->addTriple("schema:Person","dct:title","\"Person\"@en","http://example.org/#test2"); 24 | echo $writer->end(); 25 | -------------------------------------------------------------------------------- /perf/parser-streaming-perf.php: -------------------------------------------------------------------------------- 1 | $base], function ($error, $triple) use (&$count, $TEST, $filename) { 18 | if ($triple) { 19 | ++$count; 20 | } else { 21 | echo '- Parsing file '.$filename.': '.(microtime(true) - $TEST)."s\n"; 22 | echo '* Triples parsed: '.$count."\n"; 23 | echo '* Memory usage: '.(memory_get_usage() / 1024 / 1024)."MB\n"; 24 | } 25 | }); 26 | 27 | $handle = fopen($filename, 'r'); 28 | if ($handle) { 29 | while (false !== ($line = fgets($handle, 4096))) { 30 | $parser->parseChunk($line); 31 | } 32 | $parser->end(); 33 | fclose($handle); 34 | } else { 35 | // error opening the file. 36 | echo 'File not found '.$filename; 37 | } 38 | -------------------------------------------------------------------------------- /phpstan.neon: -------------------------------------------------------------------------------- 1 | parameters: 2 | bootstrapFiles: 3 | - ./vendor/autoload.php 4 | 5 | fileExtensions: 6 | - php 7 | 8 | level: 5 9 | 10 | paths: 11 | - bin 12 | - examples 13 | - perf 14 | - src 15 | - test 16 | 17 | parallel: 18 | maximumNumberOfProcesses: 5 19 | -------------------------------------------------------------------------------- /phpunit.xml: -------------------------------------------------------------------------------- 1 | 2 | 12 | 13 | 14 | ./test/ 15 | 16 | 17 | 18 | 19 | 20 | src 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /src/N3Lexer.php: -------------------------------------------------------------------------------- 1 | \\"\{\}\|\^\`]/'; 19 | 20 | private $input; 21 | private $line = 1; 22 | 23 | /** 24 | * @var array|null 25 | */ 26 | private $comments; 27 | private $n3Mode; 28 | private $prevTokenType; 29 | 30 | private $_oldTokenize; 31 | private $_tokenize; 32 | 33 | public function __construct($options = []) 34 | { 35 | $this->initTokenize(); 36 | $this->escapeReplacements = [ 37 | '\\' => '\\', "'" => "'", '"' => '"', 38 | 'n' => "\n", 'r' => "\r", 't' => "\t", 'f' => "\f", 'b' => \chr(8), 39 | '_' => '_', '~' => '~', '.' => '.', '-' => '-', '!' => '!', '$' => '$', '&' => '&', 40 | '(' => '(', ')' => ')', '*' => '*', '+' => '+', ',' => ',', ';' => ';', '=' => '=', 41 | '/' => '/', '?' => '?', '#' => '#', '@' => '@', '%' => '%', 42 | ]; 43 | // In line mode (N-Triples or N-Quads), only simple features may be parsed 44 | if ($options['lineMode']) { 45 | // Don't tokenize special literals 46 | $this->tripleQuotedString = '/$0^/'; 47 | $this->number = '/$0^/'; 48 | $this->boolean = '/$0^/'; 49 | // Swap the tokenize method for a restricted version 50 | $this->_oldTokenize = $this->_tokenize; 51 | $self = $this; 52 | $this->_tokenize = function ($input, $finalize = true) use ($self) { 53 | $tokens = \call_user_func($this->_oldTokenize, $input, $finalize); 54 | foreach ($tokens as $token) { 55 | if (!preg_match('/^(?:blank|IRI|prefixed|literal|langcode|type|typeIRI|\.|eof)$/', $token['type'])) { 56 | throw $self->syntaxError($token['type'], $token['line']); 57 | } 58 | } 59 | 60 | return $tokens; 61 | }; 62 | } 63 | // Enable N3 functionality by default 64 | $this->n3Mode = false !== $options['n3']; 65 | 66 | // Disable comment tokens by default 67 | $this->comments = isset($options['comments']) ? $options['comments'] : null; 68 | } 69 | 70 | // ## Regular expressions 71 | //_iri: /^<((?:[^ <>{}\\]|\\[uU])+)>[ \t]*/, // IRI with escape sequences; needs sanity check after unescaping 72 | private $iri = '/^<((?:[^ <>{}\\\\]|\\\\[uU])+)>[ \\t]*/'; // IRI with escape sequences; needs sanity check after unescaping 73 | // _unescapedIri: /^<([^\x00-\x20<>\\"\{\}\|\^\`]*)>[ \t]*/, // IRI without escape sequences; no unescaping 74 | private $unescapedIri = '/^<([^\\x00-\\x20<>\\\\"\\{\\}\\|\\^\\`]*)>[ \\t]*/'; // IRI without escape sequences; no unescaping 75 | // _unescapedString: /^"[^"\\]+"(?=[^"\\])/, // non-empty string without escape sequences 76 | private $unescapedString = '/^"[^\\\\"]+"(?=[^\\\\"])/'; // non-empty string without escape sequences 77 | // _singleQuotedString: /^"[^"\\]*(?:\\.[^"\\]*)*"(?=[^"\\])|^'[^'\\]*(?:\\.[^'\\]*)*'(?=[^'\\])/, 78 | private $singleQuotedString = '/^"[^"\\\\]*(?:\\\\.[^"\\\\]*)*"(?=[^"\\\\])|^\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*\'(?=[^\'\\\\])/'; 79 | // _tripleQuotedString: /^""("[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*")""|^''('[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*')''/, 80 | private $tripleQuotedString = '/^""("[^\\\\"]*(?:(?:\\\\.|"(?!""))[^\\\\"]*)*")""|^\'\'(\'[^\\\\\']*(?:(?:\\\\.|\'(?!\'\'))[^\\\\\']*)*\')\'\'/'; 81 | private $langcode = '/^@([a-z]+(?:-[a-z0-9]+)*)(?=[^a-z0-9\\-])/i'; 82 | private $prefix = '/^((?:[A-Za-z\\xc0-\\xd6\\xd8-\\xf6])(?:\\.?[\\-0-9A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6])*)?:(?=[#\\s<])/'; 83 | private $prefixed = "/^((?:[A-Za-z\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\x{02ff}\\x{0370}-\\x{037d}\\x{037f}-\\x{1fff}\\x{200c}\\x{200d}\\x{2070}-\\x{218f}\\x{2c00}-\\x{2fef}\\x{3001}-\\x{d7ff}\\x{f900}-\\x{fdcf}\\x{fdf0}-\\x{fffd}])(?:\\.?[\\-0-9A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\x{037d}\\x{037f}-\\x{1fff}\\x{200c}\\x{200d}\\x{203f}\\x{2040}\\x{2070}-\\x{218f}\\x{2c00}-\\x{2fef}\\x{3001}-\\x{d7ff}\\x{f900}-\\x{fdcf}\\x{fdf0}-\\x{fffd}])*)?:((?:(?:[0-:A-Z_a-z\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\x{02ff}\\x{0370}-\\x{037d}\\x{037f}-\\x{1fff}\\x{200c}\\x{200d}\\x{2070}-\\x{218f}\\x{2c00}-\\x{2fef}\\x{3001}-\\x{d7ff}\\x{f900}-\\x{fdcf}\\x{fdf0}-\\x{fffd}]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~])(?:(?:[\\.\\-0-:A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\x{037d}\\x{037f}-\\x{1fff}\\x{200c}\\x{200d}\\x{203f}\\x{2040}\\x{2070}-\\x{218f}\\x{2c00}-\\x{2fef}\\x{3001}-\\x{d7ff}\\x{f900}-\\x{fdcf}\\x{fdf0}-\\x{fffd}]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~])*(?:[\\-0-:A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6\\xf8-\\x{037d}\\x{037f}-\\x{1fff}\\x{200c}\\x{200d}\\x{203f}\\x{2040}\\x{2070}-\\x{218f}\\x{2c00}-\\x{2fef}\\x{3001}-\\x{d7ff}\\x{f900}-\\x{fdcf}\\x{fdf0}-\\x{fffd}]|%[0-9a-fA-F]{2}|\\\\[!#-\\/;=?\\-@_~]))?)?)(?:[ \\t]+|(?=\\.?[,;!\\^\\s#()\\[\\]\\{\\}\"'<]))/u"; 84 | 85 | private $variable = '/^\\?(?:(?:[A-Z_a-z\\xc0-\\xd6\\xd8-\\xf6])(?:[\\-0-:A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6])*)(?=[.,;!\\^\\s#()\\[\\]\\{\\}"\'<])/'; 86 | 87 | private $blank = '/^_:((?:[0-9A-Z_a-z\\xc0-\\xd6\\xd8-\\xf6])(?:\\.?[\\-0-9A-Z_a-z\\xb7\\xc0-\\xd6\\xd8-\\xf6])*)(?:[ \\t]+|(?=\\.?[,;:\\s#()\\[\\]\\{\\}"\'<]))/'; 88 | private $number = "/^[\\-+]?(?:\\d+\\.?\\d*([eE](?:[\\-\\+])?\\d+)|\\d*\\.?\\d+)(?=[.,;:\\s#()\\[\\]\\{\\}\"'<])/"; 89 | private $boolean = '/^(?:true|false)(?=[.,;\\s#()\\[\\]\\{\\}"\'<])/'; 90 | private $keyword = '/^@[a-z]+(?=[\\s#<])/i'; 91 | private $sparqlKeyword = '/^(?:PREFIX|BASE|GRAPH)(?=[\\s#<])/i'; 92 | private $shortPredicates = '/^a(?=\\s+|<)/'; 93 | private $newline = '/^[ \\t]*(?:#[^\\n\\r]*)?(?:\\r\\n|\\n|\\r)[ \\t]*/'; 94 | private $comment = '/#([^\\n\\r]*)/'; 95 | private $whitespace = '/^[ \\t]+/'; 96 | private $endOfFile = '/^(?:#[^\\n\\r]*)?$/'; 97 | 98 | /** 99 | * tokenizes as for as possible, emitting tokens through the callback 100 | */ 101 | private function tokenizeToEnd($callback, $inputFinished) 102 | { 103 | // Continue parsing as far as possible; the loop will return eventually 104 | $input = $this->input; 105 | 106 | // Signals the syntax error through the callback 107 | $reportSyntaxError = function ($self) use ($callback, &$input) { 108 | preg_match("/^\S*/", $input, $match); 109 | $callback($self->syntaxError($match[0], $self->line), null); 110 | }; 111 | 112 | $outputComments = $this->comments; 113 | while (true) { 114 | // Count and skip whitespace lines 115 | $whiteSpaceMatch = null; 116 | $comment = null; 117 | while (preg_match($this->newline, $input, $whiteSpaceMatch)) { 118 | // Try to find a comment 119 | if ($outputComments && preg_match($this->comment, $whiteSpaceMatch[0], $comment)) { 120 | /* 121 | * originally the following line was here: 122 | * 123 | * callback(null, ['line' => $this->line, 'type' => 'comment', 'value' => $comment[1], 'prefix' => '']); 124 | * 125 | * but it makes no sense, because callback is a function from PHPUnit, which can't be relied on 126 | * in this context. therefore this line must be at least commented out. the question is, if the 127 | * whole "case" can be removed as well. 128 | * 129 | * FYI: #29 130 | */ 131 | } 132 | // Advance the input 133 | $input = substr($input, \strlen($whiteSpaceMatch[0]), \strlen($input)); 134 | ++$this->line; 135 | } 136 | // Skip whitespace on current line 137 | if (preg_match($this->whitespace, $input, $whiteSpaceMatch)) { 138 | $input = substr($input, \strlen($whiteSpaceMatch[0]), \strlen($input)); 139 | } 140 | 141 | // Stop for now if we're at the end 142 | if (preg_match($this->endOfFile, $input)) { 143 | // If the $input is finished, emit EOF 144 | if ($inputFinished) { 145 | // Try to find a final comment 146 | if ($outputComments && preg_match($this->comment, $input, $comment)) { 147 | $callback(null, ['line' => $this->line, 'type' => 'comment', 'value' => $comment[1], 'prefix' => '']); 148 | } 149 | $callback($input = null, ['line' => $this->line, 'type' => 'eof', 'value' => '', 'prefix' => '']); 150 | } 151 | $this->input = $input; 152 | 153 | return $input; 154 | } 155 | 156 | // Look for specific token types based on the first character 157 | $line = $this->line; 158 | $type = ''; 159 | $value = ''; 160 | $prefix = ''; 161 | $firstChar = $input[0]; 162 | $match = null; 163 | $matchLength = 0; 164 | $unescaped = null; 165 | $inconclusive = false; 166 | 167 | switch ($firstChar) { 168 | case '^': 169 | // We need at least 3 tokens lookahead to distinguish ^^ and ^^pre:fixed 170 | if (\strlen($input) < 3) { 171 | break; 172 | } 173 | // Try to match a type 174 | elseif ('^' === $input[1]) { 175 | $this->prevTokenType = '^^'; 176 | // Move to type IRI or prefixed name 177 | $input = substr($input, 2); 178 | if ('<' !== $input[0]) { 179 | $inconclusive = true; 180 | break; 181 | } 182 | } 183 | // If no type, it must be a path expression 184 | else { 185 | if ($this->n3Mode) { 186 | $matchLength = 1; 187 | $type = '^'; 188 | } 189 | break; 190 | } 191 | // Fall through in case the type is an IRI 192 | // no break 193 | case '<': 194 | // Try to find a full IRI without escape sequences 195 | if (preg_match($this->unescapedIri, $input, $match)) { 196 | $type = 'IRI'; 197 | $value = $match[1]; 198 | } 199 | 200 | // Try to find a full IRI with escape sequences 201 | elseif (preg_match($this->iri, $input, $match)) { 202 | $unescaped = $this->unescape($match[1]); 203 | if (null === $unescaped || preg_match($this->illegalIriChars, $unescaped)) { 204 | return $reportSyntaxError($this); 205 | } 206 | $type = 'IRI'; 207 | $value = $unescaped; 208 | } 209 | // Try to find a backwards implication arrow 210 | elseif ($this->n3Mode && \strlen($input) > 1 && '=' === $input[1]) { 211 | $type = 'inverse'; 212 | $matchLength = 2; 213 | $value = 'http://www.w3.org/2000/10/swap/log#implies'; 214 | } 215 | break; 216 | case '_': 217 | // Try to find a blank node. Since it can contain (but not end with) a dot, 218 | // we always need a non-dot character before deciding it is a prefixed name. 219 | // Therefore, try inserting a space if we're at the end of the $input. 220 | if ((preg_match($this->blank, $input, $match)) || $inputFinished && (preg_match($this->blank, $input.' ', $match))) { 221 | $type = 'blank'; 222 | $prefix = '_'; 223 | $value = $match[1]; 224 | } 225 | 226 | break; 227 | 228 | case '"': 229 | case "'": 230 | // Try to find a non-empty double-quoted literal without escape sequences 231 | if (preg_match($this->unescapedString, $input, $match)) { 232 | $type = 'literal'; 233 | $value = $match[0]; 234 | } 235 | // Try to find any other literal wrapped in a pair of single or double quotes 236 | elseif (preg_match($this->singleQuotedString, $input, $match)) { 237 | $unescaped = $this->unescape($match[0]); 238 | if (null === $unescaped) { 239 | return $reportSyntaxError($this); 240 | } 241 | $type = 'literal'; 242 | $value = preg_replace('/^\'|\'$/', '"', $unescaped); 243 | } 244 | // Try to find a literal wrapped in three pairs of single or double quotes 245 | elseif (preg_match($this->tripleQuotedString, $input, $match)) { 246 | $unescaped = isset($match[1]) ? $match[1] : $match[2]; 247 | // Count the newlines and advance line counter 248 | $this->line += \count(preg_split('/\r\n|\r|\n/', $unescaped)) - 1; 249 | $unescaped = $this->unescape($unescaped); 250 | if (null === $unescaped) { 251 | return $reportSyntaxError($this); 252 | } 253 | $type = 'literal'; 254 | $value = preg_replace("/^'|'$/", '"', $unescaped); 255 | } 256 | break; 257 | 258 | case '?': 259 | // Try to find a variable 260 | if ($this->n3Mode && (preg_match($this->variable, $input, $match))) { 261 | $type = 'var'; 262 | $value = $match[0]; 263 | } 264 | break; 265 | 266 | case '@': 267 | // Try to find a language code 268 | if ('literal' === $this->prevTokenType && preg_match($this->langcode, $input, $match)) { 269 | $type = 'langcode'; 270 | $value = $match[1]; 271 | } 272 | 273 | // Try to find a keyword 274 | elseif (preg_match($this->keyword, $input, $match)) { 275 | $type = $match[0]; 276 | } 277 | break; 278 | 279 | case '.': 280 | // Try to find a dot as punctuation 281 | if (1 === \strlen($input) ? $inputFinished : ($input[1] < '0' || $input[1] > '9')) { 282 | $type = '.'; 283 | $matchLength = 1; 284 | break; 285 | } 286 | // Fall through to numerical case (could be a decimal dot) 287 | 288 | // no break 289 | case '0': 290 | case '1': 291 | case '2': 292 | case '3': 293 | case '4': 294 | case '5': 295 | case '6': 296 | case '7': 297 | case '8': 298 | case '9': 299 | case '+': 300 | case '-': 301 | // Try to find a number 302 | if (preg_match($this->number, $input, $match)) { 303 | $type = 'literal'; 304 | $value = '"'.$match[0].'"^^http://www.w3.org/2001/XMLSchema#'.(isset($match[1]) ? 'double' : (preg_match("/^[+\-]?\d+$/", $match[0]) ? 'integer' : 'decimal')); 305 | } 306 | break; 307 | case 'B': 308 | case 'b': 309 | case 'p': 310 | case 'P': 311 | case 'G': 312 | case 'g': 313 | // Try to find a SPARQL-style keyword 314 | if (preg_match($this->sparqlKeyword, $input, $match)) { 315 | $type = strtoupper($match[0]); 316 | } else { 317 | $inconclusive = true; 318 | } 319 | break; 320 | 321 | case 'f': 322 | case 't': 323 | // Try to match a boolean 324 | if (preg_match($this->boolean, $input, $match)) { 325 | $type = 'literal'; 326 | $value = '"'.$match[0].'"^^http://www.w3.org/2001/XMLSchema#boolean'; 327 | } else { 328 | $inconclusive = true; 329 | } 330 | break; 331 | 332 | case 'a': 333 | // Try to find an abbreviated predicate 334 | if (preg_match($this->shortPredicates, $input, $match)) { 335 | $type = 'abbreviation'; 336 | $value = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'; 337 | } else { 338 | $inconclusive = true; 339 | } 340 | break; 341 | case '=': 342 | // Try to find an implication arrow or equals sign 343 | if ($this->n3Mode && \strlen($input) > 1) { 344 | $type = 'abbreviation'; 345 | if ('>' !== $input[1]) { 346 | $matchLength = 1; 347 | $value = 'http://www.w3.org/2002/07/owl#sameAs'; 348 | } else { 349 | $matchLength = 2; 350 | $value = 'http://www.w3.org/2000/10/swap/log#implies'; 351 | } 352 | } 353 | break; 354 | 355 | case '!': 356 | if (!$this->n3Mode) { 357 | break; 358 | } 359 | // no break 360 | case ',': 361 | case ';': 362 | case '[': 363 | case ']': 364 | case '(': 365 | case ')': 366 | case '{': 367 | case '}': 368 | // The next token is punctuation 369 | $matchLength = 1; 370 | $type = $firstChar; 371 | break; 372 | default: 373 | $inconclusive = true; 374 | } 375 | 376 | // Some first characters do not allow an immediate decision, so inspect more 377 | if ($inconclusive) { 378 | // Try to find a prefix 379 | if (('@prefix' === $this->prevTokenType || 'PREFIX' === $this->prevTokenType) && preg_match($this->prefix, $input, $match)) { 380 | $type = 'prefix'; 381 | $value = isset($match[1]) ? $match[1] : ''; 382 | } 383 | // Try to find a prefixed name. Since it can contain (but not end with) a dot, 384 | // we always need a non-dot character before deciding it is a prefixed name. 385 | // Therefore, try inserting a space if we're at the end of the input. 386 | elseif (preg_match($this->prefixed, $input, $match) || $inputFinished && (preg_match($this->prefixed, $input.' ', $match))) { 387 | $type = 'prefixed'; 388 | $prefix = isset($match[1]) ? $match[1] : ''; 389 | $value = $this->unescape($match[2]); 390 | } 391 | } 392 | 393 | // A type token is special: it can only be emitted after an IRI or prefixed name is read 394 | if ('^^' === $this->prevTokenType) { 395 | switch ($type) { 396 | case 'prefixed': $type = 'type'; break; 397 | case 'IRI': $type = 'typeIRI'; break; 398 | default: $type = ''; 399 | } 400 | } 401 | 402 | // What if nothing of the above was found? 403 | if (!$type) { 404 | // We could be in streaming mode, and then we just wait for more input to arrive. 405 | // Otherwise, a syntax error has occurred in the input. 406 | // One exception: error on an unaccounted linebreak (= not inside a triple-quoted literal). 407 | if ($inputFinished || (!preg_match('/^\'\'\'|^"""/', $input) && preg_match('/\\n|\\r/', $input))) { 408 | return $reportSyntaxError($this); 409 | } else { 410 | $this->input = $input; 411 | 412 | return $input; 413 | } 414 | } 415 | // Emit the parsed token 416 | $callback(null, ['line' => $line, 'type' => $type, 'value' => $value, 'prefix' => $prefix]); 417 | $this->prevTokenType = $type; 418 | 419 | // Advance to next part to tokenize 420 | $input = substr($input, $matchLength > 0 ? $matchLength : \strlen($match[0]), \strlen($input)); 421 | } 422 | } 423 | 424 | // ### `_unescape` replaces N3 escape codes by their corresponding characters 425 | private function unescape($item) 426 | { 427 | return preg_replace_callback($this->escapeSequence, function ($match) { 428 | // $match[0] contains sequence 429 | $unicode4 = isset($match[1]) ? $match[1] : null; 430 | $unicode8 = isset($match[2]) ? $match[2] : null; 431 | $escapedChar = isset($match[3]) ? $match[3] : null; 432 | $charCode = null; 433 | if ($unicode4) { 434 | $charCode = \intval($unicode4, 16); 435 | 436 | return mb_convert_encoding('&#'.(int) $charCode.';', 'UTF-8', 'HTML-ENTITIES'); 437 | } elseif ($unicode8) { 438 | $charCode = \intval($unicode8, 16); 439 | 440 | return mb_convert_encoding('&#'.(int) $charCode.';', 'UTF-8', 'HTML-ENTITIES'); 441 | } else { 442 | if (!isset($this->escapeReplacements[$escapedChar])) { 443 | throw new \Exception(); 444 | } 445 | 446 | return $this->escapeReplacements[$escapedChar]; 447 | } 448 | }, $item); 449 | } 450 | 451 | // ### `_syntaxError` creates a syntax error for the given issue 452 | private function syntaxError($issue, $line = 0) 453 | { 454 | $this->input = null; 455 | 456 | return new \Exception('Unexpected "'.$issue.'" on line '.$line.'.'); 457 | } 458 | 459 | // When handling tokenize as a variable, we can hotswap its functionality when dealing with various serializations 460 | private function initTokenize() 461 | { 462 | $this->_tokenize = function ($input, $finalize) { 463 | // If the input is a string, continuously emit tokens through the callback until the end 464 | if (!isset($this->input)) { 465 | $this->input = ''; 466 | } 467 | $this->input .= $input; 468 | $tokens = []; 469 | $error = ''; 470 | $this->input = $this->tokenizeToEnd(function ($e, $t) use (&$tokens, &$error) { 471 | if (isset($e)) { 472 | $error = $e; 473 | } 474 | $tokens[] = $t; 475 | }, $finalize); 476 | if ($error) { 477 | throw $error; 478 | } 479 | 480 | return $tokens; 481 | }; 482 | } 483 | 484 | // ## Public methods 485 | 486 | // ### `tokenize` starts the transformation of an N3 document into an array of tokens. 487 | // The input can be a string or a stream. 488 | public function tokenize($input, $finalize = true) 489 | { 490 | try { 491 | return \call_user_func($this->_tokenize, $input, $finalize); 492 | } catch (\Exception $e) { 493 | throw $e; 494 | } 495 | } 496 | 497 | // Adds the data chunk to the buffer and parses as far as possible 498 | public function tokenizeChunk($input) 499 | { 500 | return $this->tokenize($input, false); 501 | } 502 | 503 | public function end() 504 | { 505 | // Parses the rest 506 | return $this->tokenizeToEnd(true, null); 507 | } 508 | } 509 | -------------------------------------------------------------------------------- /src/N3Parser.php: -------------------------------------------------------------------------------- 1 | setTripleCallback($tripleCallback); 95 | $this->setPrefixCallback($prefixCallback); 96 | $this->contextStack = []; 97 | $this->graph = null; 98 | 99 | //This will initiate the callback methods 100 | $this->initReaders(); 101 | 102 | // Set the document IRI 103 | $this->setBase(isset($options['documentIRI']) ? $options['documentIRI'] : null); 104 | 105 | // Set supported features depending on the format 106 | if (!isset($options['format'])) { 107 | $options['format'] = ''; 108 | } 109 | $format = (string) $options['format']; 110 | $format = strtolower($format); 111 | $isTurtle = 'turtle' === $format; 112 | $isTriG = 'trig' === $format; 113 | 114 | $isNTriples = false !== strpos($format, 'triple') ? true : false; 115 | $isNQuads = false !== strpos($format, 'quad') ? true : false; 116 | $isN3 = false !== strpos($format, 'n3') ? true : false; 117 | $this->n3Mode = $isN3; 118 | $isLineMode = $isNTriples || $isNQuads; 119 | if (!($this->supportsNamedGraphs = !($isTurtle || $isN3))) { 120 | $this->readPredicateOrNamedGraph = $this->readPredicate; 121 | } 122 | $this->supportsQuads = !($isTurtle || $isTriG || $isNTriples || $isN3); 123 | // Disable relative IRIs in N-Triples or N-Quads mode 124 | if ($isLineMode) { 125 | $this->base = ''; 126 | $this->resolveIRI = function ($token) { 127 | \call_user_func($this->error, 'Disallowed relative IRI', $token); 128 | 129 | $this->subject = null; 130 | 131 | return $this->callback = function () {}; 132 | }; 133 | } 134 | $this->blankNodePrefix = null; 135 | if (isset($options['blankNodePrefix'])) { 136 | $this->blankNodePrefix = '_:'.preg_replace('/^_:/', '', $options['blankNodePrefix']); 137 | } 138 | 139 | $this->lexer = isset($options['lexer']) ? $options['lexer'] : new N3Lexer(['lineMode' => $isLineMode, 'n3' => $isN3]); 140 | // Disable explicit quantifiers by default 141 | $this->explicitQuantifiers = isset($options['explicitQuantifiers']) ? $options['explicitQuantifiers'] : null; 142 | 143 | // The read callback is the next function to be executed when a token arrives. 144 | // We start reading in the top context. 145 | $this->readCallback = $this->readInTopContext; 146 | $this->sparqlStyle = false; 147 | $this->prefixes = []; 148 | $this->prefixes['_'] = isset($this->blankNodePrefix) ? $this->blankNodePrefix : '_:b'.$this->blankNodeCount.'_'; 149 | $this->inversePredicate = false; 150 | $this->quantified = []; 151 | } 152 | 153 | // ## Private class methods 154 | // ### `_resetBlankNodeIds` restarts blank node identification 155 | public function _resetBlankNodeIds() 156 | { 157 | $this->blankNodeCount = 0; 158 | } 159 | 160 | // ### `_setBase` sets the base IRI to resolve relative IRIs 161 | private function setBase($baseIRI = null) 162 | { 163 | if (!$baseIRI) { 164 | $this->base = null; 165 | } else { 166 | // Remove fragment if present 167 | $fragmentPos = strpos($baseIRI, '#'); 168 | if (false !== $fragmentPos) { 169 | $baseIRI = substr($baseIRI, 0, $fragmentPos); 170 | } 171 | // Set base IRI and its components 172 | $this->base = $baseIRI; 173 | $this->basePath = false === strpos($baseIRI, '/') ? $baseIRI : preg_replace('/[^\/?]*(?:\?.*)?$/', '', $baseIRI); 174 | preg_match($this->schemeAuthority, $baseIRI, $matches); 175 | $this->baseRoot = isset($matches[0]) ? $matches[0] : ''; 176 | $this->baseScheme = isset($matches[1]) ? $matches[1] : ''; 177 | } 178 | } 179 | 180 | // ### `_saveContext` stores the current parsing context 181 | // when entering a new scope (list, blank node, formula) 182 | private function saveContext($type, $graph, $subject, $predicate, $object) 183 | { 184 | $n3Mode = $this->n3Mode ?: null; 185 | array_push($this->contextStack, [ 186 | 'subject' => $subject, 'predicate' => $predicate, 'object' => $object, 187 | 'graph' => $graph, 'type' => $type, 188 | 'inverse' => $n3Mode ? $this->inversePredicate : false, 189 | 'blankPrefix' => $n3Mode ? $this->prefixes['_'] : '', 190 | 'quantified' => $n3Mode ? $this->quantified : null, 191 | ]); 192 | // The settings below only apply to N3 streams 193 | if ($n3Mode) { 194 | // Every new scope resets the predicate direction 195 | $this->inversePredicate = false; 196 | // In N3, blank nodes are scoped to a formula 197 | // (using a dot as separator, as a blank node label cannot start with it) 198 | $this->prefixes['_'] = $this->graph.'.'; 199 | // Quantifiers are scoped to a formula TODO: is this correct? 200 | $this->quantified = $this->quantified; 201 | } 202 | } 203 | 204 | // ### `_restoreContext` restores the parent context 205 | // when leaving a scope (list, blank node, formula) 206 | private function restoreContext() 207 | { 208 | $context = array_pop($this->contextStack); 209 | $n3Mode = $this->n3Mode; 210 | $this->subject = $context['subject']; 211 | $this->predicate = $context['predicate']; 212 | $this->object = $context['object']; 213 | $this->graph = $context['graph']; 214 | // The settings below only apply to N3 streams 215 | if ($n3Mode) { 216 | $this->inversePredicate = $context['inverse']; 217 | $this->prefixes['_'] = $context['blankPrefix']; 218 | $this->quantified = $context['quantified']; 219 | } 220 | } 221 | 222 | private function initReaders() 223 | { 224 | // ### `_readInTopContext` reads a token when in the top context 225 | $this->readInTopContext = function ($token) { 226 | if (!isset($token['type'])) { 227 | $token['type'] = ''; 228 | } 229 | switch ($token['type']) { 230 | // If an EOF token arrives in the top context, signal that we're done 231 | case 'eof': 232 | if (null !== $this->graph) { 233 | return \call_user_func($this->error, 'Unclosed graph', $token); 234 | } 235 | unset($this->prefixes['_']); 236 | if ($this->callback) { 237 | return \call_user_func($this->callback, null, null, $this->prefixes); 238 | } 239 | // It could be a prefix declaration 240 | // no break 241 | case 'PREFIX': 242 | $this->sparqlStyle = true; 243 | // no break 244 | case '@prefix': 245 | return $this->readPrefix; 246 | // It could be a base declaration 247 | case 'BASE': 248 | $this->sparqlStyle = true; 249 | // no break 250 | case '@base': 251 | return $this->readBaseIRI; 252 | // It could be a graph 253 | case '{': 254 | if ($this->supportsNamedGraphs) { 255 | $this->graph = ''; 256 | $this->subject = null; 257 | 258 | return $this->readSubject; 259 | } 260 | // no break 261 | case 'GRAPH': 262 | if ($this->supportsNamedGraphs) { 263 | return $this->readNamedGraphLabel; 264 | } 265 | // Otherwise, the next token must be a subject 266 | // no break 267 | default: 268 | return \call_user_func($this->readSubject, $token); 269 | } 270 | }; 271 | 272 | /* 273 | * reads an IRI, prefixed name, blank node, or variable 274 | * 275 | * @return null|string|object 276 | */ 277 | $this->readEntity = function ($token, $quantifier = null) { 278 | $value = null; 279 | switch ($token['type']) { 280 | // Read a relative or absolute IRI 281 | case 'IRI': 282 | case 'typeIRI': 283 | if (null === $this->base || preg_match($this->absoluteIRI, $token['value'])) { 284 | $value = $token['value']; 285 | } else { 286 | $value = \call_user_func($this->resolveIRI, $token); 287 | } 288 | break; 289 | // Read a blank node or prefixed name 290 | case 'type': 291 | case 'blank': 292 | case 'prefixed': 293 | if (!isset($this->prefixes[$token['prefix']])) { 294 | return \call_user_func($this->error, 'Undefined prefix "'.$token['prefix'].':"', $token); 295 | } 296 | 297 | $prefix = $this->prefixes[$token['prefix']]; 298 | $value = $prefix.$token['value']; 299 | break; 300 | // Read a variable 301 | case 'var': 302 | return $token['value']; 303 | // Everything else is not an entity 304 | default: 305 | return \call_user_func($this->error, 'Expected entity but got '.$token['type'], $token); 306 | } 307 | // In N3 mode, replace the entity if it is quantified 308 | if (!isset($quantifier) && $this->n3Mode && isset($this->quantified[$value])) { 309 | $value = $this->quantified[$value]; 310 | } 311 | 312 | return $value; 313 | }; 314 | 315 | // ### `_readSubject` reads a triple's subject 316 | $this->readSubject = function ($token) { 317 | $this->predicate = null; 318 | switch ($token['type']) { 319 | case '[': 320 | // Start a new triple with a new blank node as subject 321 | $this->saveContext('blank', $this->graph, $this->subject = '_:b'.$this->blankNodeCount++, null, null); 322 | 323 | return $this->readBlankNodeHead; 324 | case '(':; 325 | // Start a new list 326 | $this->saveContext('list', $this->graph, self::RDF_NIL, null, null); 327 | $this->subject = null; 328 | 329 | return $this->readListItem; 330 | case '{': 331 | // Start a new formula 332 | if (!$this->n3Mode) { 333 | return \call_user_func($this->error, 'Unexpected graph', $token); 334 | } 335 | $this->saveContext('formula', $this->graph, $this->graph = '_:b'.$this->blankNodeCount++, null, null); 336 | 337 | return $this->readSubject; 338 | case '}': 339 | // No subject; the graph in which we are reading is closed instead 340 | return \call_user_func($this->readPunctuation, $token); 341 | case '@forSome': 342 | $this->subject = null; 343 | $this->predicate = 'http://www.w3.org/2000/10/swap/reify#forSome'; 344 | $this->quantifiedPrefix = '_:b'; 345 | 346 | return $this->readQuantifierList; 347 | case '@forAll': 348 | $this->subject = null; 349 | $this->predicate = 'http://www.w3.org/2000/10/swap/reify#forAll'; 350 | $this->quantifiedPrefix = '?b-'; 351 | 352 | return $this->readQuantifierList; 353 | default: 354 | // Read the subject entity 355 | $this->subject = \call_user_func($this->readEntity, $token); 356 | if (null == $this->subject) { 357 | throw $this->getNoBaseUriException('subject', $token['line']); 358 | } 359 | // In N3 mode, the subject might be a path 360 | if ($this->n3Mode) { 361 | return \call_user_func($this->getPathReader, $this->readPredicateOrNamedGraph); 362 | } 363 | } 364 | 365 | // The next token must be a predicate, 366 | // or, if the subject was actually a graph IRI, a named graph 367 | return $this->readPredicateOrNamedGraph; 368 | }; 369 | 370 | // ### `_readPredicate` reads a triple's predicate 371 | $this->readPredicate = function ($token) { 372 | $type = $token['type']; 373 | switch ($type) { 374 | case 'inverse': 375 | $this->inversePredicate = true; 376 | // no break 377 | case 'abbreviation': 378 | $this->predicate = $token['value']; 379 | break; 380 | case '.': 381 | case ']': 382 | case '}': 383 | // Expected predicate didn't come, must have been trailing semicolon 384 | if (null === $this->predicate) { 385 | return \call_user_func($this->error, 'Unexpected '.$type, $token); 386 | } 387 | $this->subject = null; 388 | 389 | return ']' === $type ? \call_user_func($this->readBlankNodeTail, $token) : \call_user_func($this->readPunctuation, $token); 390 | case ';': 391 | // Extra semicolons can be safely ignored 392 | return $this->readPredicate; 393 | case 'blank': 394 | if (!$this->n3Mode) { 395 | return \call_user_func($this->error, 'Disallowed blank node as predicate', $token); 396 | } 397 | // no break 398 | default: 399 | $this->predicate = \call_user_func($this->readEntity, $token); 400 | if (null == $this->predicate) { 401 | throw $this->getNoBaseUriException('predicate', $token['line']); 402 | } 403 | } 404 | // The next token must be an object 405 | return $this->readObject; 406 | }; 407 | 408 | // ### `_readObject` reads a triple's object 409 | $this->readObject = function ($token) { 410 | switch ($token['type']) { 411 | case 'literal': 412 | $this->object = $token['value']; 413 | 414 | return $this->readDataTypeOrLang; 415 | case '[': 416 | // Start a new triple with a new blank node as subject 417 | $this->saveContext('blank', $this->graph, $this->subject, $this->predicate, 418 | $this->subject = '_:b'.$this->blankNodeCount++); 419 | 420 | return $this->readBlankNodeHead; 421 | case '(': 422 | // Start a new list 423 | $this->saveContext('list', $this->graph, $this->subject, $this->predicate, self::RDF_NIL); 424 | $this->subject = null; 425 | 426 | return $this->readListItem; 427 | case '{': 428 | // Start a new formula 429 | if (!$this->n3Mode) { 430 | return \call_user_func($this->error, 'Unexpected graph', $token); 431 | } 432 | $this->saveContext('formula', $this->graph, $this->subject, $this->predicate, 433 | $this->graph = '_:b'.$this->blankNodeCount++); 434 | 435 | return $this->readSubject; 436 | default: 437 | // Read the object entity 438 | $this->object = \call_user_func($this->readEntity, $token); 439 | if (null == $this->object) { 440 | throw $this->getNoBaseUriException('object', $token['line']); 441 | } 442 | // In N3 mode, the object might be a path 443 | if ($this->n3Mode) { 444 | return \call_user_func($this->getPathReader, \call_user_func($this->getContextEndReader)); 445 | } 446 | } 447 | 448 | return \call_user_func($this->getContextEndReader); 449 | }; 450 | 451 | // ### `_readPredicateOrNamedGraph` reads a triple's predicate, or a named graph 452 | $this->readPredicateOrNamedGraph = function ($token) { 453 | return '{' === $token['type'] ? \call_user_func($this->readGraph, $token) : \call_user_func($this->readPredicate, $token); 454 | }; 455 | 456 | // ### `_readGraph` reads a graph 457 | $this->readGraph = function ($token) { 458 | if ('{' !== $token['type']) { 459 | return \call_user_func($this->error, 'Expected graph but got '.$token['type'], $token); 460 | } 461 | // The "subject" we read is actually the GRAPH's label 462 | $this->graph = $this->subject; 463 | $this->subject = null; 464 | 465 | return $this->readSubject; 466 | }; 467 | 468 | // ### `_readBlankNodeHead` reads the head of a blank node 469 | $this->readBlankNodeHead = function ($token) { 470 | if (']' === $token['type']) { 471 | $this->subject = null; 472 | 473 | return \call_user_func($this->readBlankNodeTail, $token); 474 | } else { 475 | $this->predicate = null; 476 | 477 | return \call_user_func($this->readPredicate, $token); 478 | } 479 | }; 480 | 481 | // ### `_readBlankNodeTail` reads the end of a blank node 482 | $this->readBlankNodeTail = function ($token) { 483 | if (']' !== $token['type']) { 484 | return \call_user_func($this->readBlankNodePunctuation, $token); 485 | } 486 | 487 | // Store blank node triple 488 | if (null !== $this->subject) { 489 | \call_user_func($this->triple, $this->subject, $this->predicate, $this->object, $this->graph); 490 | } 491 | 492 | // Restore the parent context containing this blank node 493 | $empty = null === $this->predicate; 494 | $this->restoreContext(); 495 | // If the blank node was the subject, continue reading the predicate 496 | if (null === $this->object) { 497 | // If the blank node was empty, it could be a named graph label 498 | return $empty ? $this->readPredicateOrNamedGraph : $this->readPredicateAfterBlank; 499 | } 500 | // If the blank node was the object, restore previous context and read punctuation 501 | else { 502 | return \call_user_func($this->getContextEndReader); 503 | } 504 | }; 505 | 506 | // ### `_readPredicateAfterBlank` reads a predicate after an anonymous blank node 507 | $this->readPredicateAfterBlank = function ($token) { 508 | // If a dot follows a blank node in top context, there is no predicate 509 | if ('.' === $token['type'] && 0 === \count($this->contextStack)) { 510 | $this->subject = null; // cancel the current triple 511 | 512 | return \call_user_func($this->readPunctuation, $token); 513 | } 514 | 515 | return \call_user_func($this->readPredicate, $token); 516 | }; 517 | 518 | // ### `_readListItem` reads items from a list 519 | $this->readListItem = function ($token) { 520 | $item = null; // The item of the list 521 | $list = null; // The list itself 522 | $prevList = $this->subject; // The previous list that contains this list 523 | $stack = &$this->contextStack; // The stack of parent contexts 524 | $parent = &$stack[\count($stack) - 1]; // The parent containing the current list 525 | $next = $this->readListItem; // The next function to execute 526 | $itemComplete = true; // Whether the item has been read fully 527 | 528 | switch ($token['type']) { 529 | case '[': 530 | // Stack the current list triple and start a new triple with a blank node as subject 531 | $list = '_:b'.$this->blankNodeCount++; 532 | $item = '_:b'.$this->blankNodeCount++; 533 | $this->subject = $item; 534 | $this->saveContext('blank', $this->graph, $list, self::RDF_FIRST, $this->subject); 535 | $next = $this->readBlankNodeHead; 536 | break; 537 | case '(': 538 | // Stack the current list triple and start a new list 539 | $this->saveContext('list', $this->graph, $list = '_:b'.$this->blankNodeCount++, self::RDF_FIRST, self::RDF_NIL); 540 | $this->subject = null; 541 | break; 542 | case ')': 543 | // Closing the list; restore the parent context 544 | $this->restoreContext(); 545 | // If this list is contained within a parent list, return the membership triple here. 546 | // This will be ` rdf:first .`. 547 | if (0 !== \count($stack) && 'list' === $stack[\count($stack) - 1]['type']) { 548 | \call_user_func($this->triple, $this->subject, $this->predicate, $this->object, $this->graph); 549 | } 550 | // Was this list the parent's subject? 551 | if (null === $this->predicate) { 552 | // The next token is the predicate 553 | $next = $this->readPredicate; 554 | // No list tail if this was an empty list 555 | if (self::RDF_NIL === $this->subject) { 556 | return $next; 557 | } 558 | } 559 | // The list was in the parent context's object 560 | else { 561 | $next = \call_user_func($this->getContextEndReader); 562 | // No list tail if this was an empty list 563 | if (self::RDF_NIL === $this->object) { 564 | return $next; 565 | } 566 | } 567 | // Close the list by making the head nil 568 | $list = self::RDF_NIL; 569 | break; 570 | case 'literal': 571 | $item = $token['value']; 572 | $itemComplete = false; // Can still have a datatype or language 573 | $next = $this->readListItemDataTypeOrLang; 574 | break; 575 | default: 576 | $item = \call_user_func($this->readEntity, $token); 577 | if (null == $item) { 578 | throw $this->getNoBaseUriException('list item', $token['line']); 579 | } 580 | } 581 | 582 | // Create a new blank node if no item head was assigned yet 583 | if (null === $list) { 584 | $list = '_:b'.$this->blankNodeCount++; 585 | $this->subject = $list; 586 | } 587 | // Is this the first element of the list? 588 | if (null === $prevList) { 589 | // This list is either the subject or the object of its parent 590 | if (null === $parent['predicate']) { 591 | $parent['subject'] = $list; 592 | } else { 593 | $parent['object'] = $list; 594 | } 595 | } else { 596 | // Continue the previous list with the current list 597 | \call_user_func($this->triple, $prevList, self::RDF_REST, $list, $this->graph); 598 | } 599 | // Add the item's value 600 | if (null !== $item) { 601 | // In N3 mode, the item might be a path 602 | if ($this->n3Mode && ('IRI' === $token['type'] || 'prefixed' === $token['type'])) { 603 | // Create a new context to add the item's path 604 | $this->saveContext('item', $this->graph, $list, self::RDF_FIRST, $item); 605 | $this->subject = $item; 606 | $this->predicate = null; 607 | // _readPath will restore the context and output the item 608 | return \call_user_func($this->getPathReader, $this->readListItem); 609 | } 610 | // Output the item if it is complete 611 | if ($itemComplete) { 612 | \call_user_func($this->triple, $list, self::RDF_FIRST, $item, $this->graph); 613 | } 614 | // Otherwise, save it for completion 615 | else { 616 | $this->object = $item; 617 | } 618 | } 619 | 620 | return $next; 621 | }; 622 | 623 | // ### `_readDataTypeOrLang` reads an _optional_ data type or language 624 | $this->readDataTypeOrLang = function ($token) { 625 | return \call_user_func($this->completeLiteral, $token, false); 626 | }; 627 | 628 | // ### `_readListItemDataTypeOrLang` reads an _optional_ data type or language in a list 629 | $this->readListItemDataTypeOrLang = function ($token) { 630 | return \call_user_func($this->completeLiteral, $token, true); 631 | }; 632 | 633 | // ### `_completeLiteral` completes the object with a data type or language 634 | $this->completeLiteral = function ($token, $listItem) { 635 | $suffix = false; 636 | switch ($token['type']) { 637 | // Add a "^^type" suffix for types (IRIs and blank nodes) 638 | case 'type': 639 | case 'typeIRI': 640 | $suffix = true; 641 | $this->object .= '^^'.\call_user_func($this->readEntity, $token); 642 | break; 643 | // Add an "@lang" suffix for language tags 644 | case 'langcode': 645 | $suffix = true; 646 | $this->object .= '@'.strtolower($token['value']); 647 | break; 648 | } 649 | // If this literal was part of a list, write the item 650 | // (we could also check the context stack, but passing in a flag is faster) 651 | if ($listItem) { 652 | \call_user_func($this->triple, $this->subject, self::RDF_FIRST, $this->object, $this->graph); 653 | } 654 | // Continue with the rest of the input 655 | if ($suffix) { 656 | return \call_user_func($this->getContextEndReader); 657 | } else { 658 | $this->readCallback = \call_user_func($this->getContextEndReader); 659 | 660 | return \call_user_func($this->readCallback, $token); 661 | } 662 | }; 663 | 664 | // ### `_readFormulaTail` reads the end of a formula 665 | $this->readFormulaTail = function ($token) { 666 | if ('}' !== $token['type']) { 667 | return \call_user_func($this->readPunctuation, $token); 668 | } 669 | 670 | // Store the last triple of the formula 671 | if (isset($this->subject)) { 672 | \call_user_func($this->triple, $this->subject, $this->predicate, $this->object, $this->graph); 673 | } 674 | 675 | // Restore the parent context containing this formula 676 | $this->restoreContext(); 677 | // If the formula was the subject, continue reading the predicate. 678 | // If the formula was the object, read punctuation. 679 | return !isset($this->object) ? $this->readPredicate : \call_user_func($this->getContextEndReader); 680 | }; 681 | 682 | // ### `_readPunctuation` reads punctuation between triples or triple parts 683 | $this->readPunctuation = function ($token) { 684 | $next = null; 685 | $subject = isset($this->subject) ? $this->subject : null; 686 | $graph = $this->graph; 687 | $inversePredicate = $this->inversePredicate; 688 | switch ($token['type']) { 689 | // A closing brace ends a graph 690 | case '}': 691 | if (null === $this->graph) { 692 | return \call_user_func($this->error, 'Unexpected graph closing', $token); 693 | } 694 | if ($this->n3Mode) { 695 | return \call_user_func($this->readFormulaTail, $token); 696 | } 697 | $this->graph = null; 698 | // A dot just ends the statement, without sharing anything with the next 699 | // no break 700 | case '.': 701 | $this->subject = null; 702 | $next = \count($this->contextStack) ? $this->readSubject : $this->readInTopContext; 703 | if ($inversePredicate) { 704 | $this->inversePredicate = false; 705 | } //TODO: What’s this? 706 | break; 707 | // Semicolon means the subject is shared; predicate and object are different 708 | case ';': 709 | $next = $this->readPredicate; 710 | break; 711 | // Comma means both the subject and predicate are shared; the object is different 712 | case ',': 713 | $next = $this->readObject; 714 | break; 715 | default: 716 | // An entity means this is a quad (only allowed if not already inside a graph) 717 | $graph = \call_user_func($this->readEntity, $token); 718 | if ($this->supportsQuads && null === $this->graph && $graph) { 719 | $next = $this->readQuadPunctuation; 720 | break; 721 | } 722 | 723 | return \call_user_func($this->error, 'Expected punctuation to follow "'.$this->object.'"', $token); 724 | } 725 | // A triple has been completed now, so return it 726 | if (null !== $subject) { 727 | $predicate = $this->predicate; 728 | $object = $this->object; 729 | if (!$inversePredicate) { 730 | \call_user_func($this->triple, $subject, $predicate, $object, $graph); 731 | } else { 732 | \call_user_func($this->triple, $object, $predicate, $subject, $graph); 733 | } 734 | } 735 | 736 | return $next; 737 | }; 738 | 739 | // ### `_readBlankNodePunctuation` reads punctuation in a blank node 740 | $this->readBlankNodePunctuation = function ($token) { 741 | $next = null; 742 | switch ($token['type']) { 743 | // Semicolon means the subject is shared; predicate and object are different 744 | case ';': 745 | $next = $this->readPredicate; 746 | break; 747 | // Comma means both the subject and predicate are shared; the object is different 748 | case ',': 749 | $next = $this->readObject; 750 | break; 751 | default: 752 | return \call_user_func($this->error, 'Expected punctuation to follow "'.$this->object.'"', $token); 753 | } 754 | // A triple has been completed now, so return it 755 | \call_user_func($this->triple, $this->subject, $this->predicate, $this->object, $this->graph); 756 | 757 | return $next; 758 | }; 759 | 760 | // ### `_readQuadPunctuation` reads punctuation after a quad 761 | $this->readQuadPunctuation = function ($token) { 762 | if ('.' !== $token['type']) { 763 | return \call_user_func($this->error, 'Expected dot to follow quad', $token); 764 | } 765 | 766 | return $this->readInTopContext; 767 | }; 768 | 769 | // ### `_readPrefix` reads the prefix of a prefix declaration 770 | $this->readPrefix = function ($token) { 771 | if ('prefix' !== $token['type']) { 772 | return \call_user_func($this->error, 'Expected prefix to follow @prefix', $token); 773 | } 774 | $this->prefix = $token['value']; 775 | 776 | return $this->readPrefixIRI; 777 | }; 778 | 779 | // ### `_readPrefixIRI` reads the IRI of a prefix declaration 780 | $this->readPrefixIRI = function ($token) { 781 | if ('IRI' !== $token['type']) { 782 | return \call_user_func($this->error, 'Expected IRI to follow prefix "'.$this->prefix.':"', $token); 783 | } 784 | $prefixIRI = \call_user_func($this->readEntity, $token); 785 | $this->prefixes[$this->prefix] = $prefixIRI; 786 | \call_user_func($this->prefixCallback, $this->prefix, $prefixIRI); 787 | 788 | return $this->readDeclarationPunctuation; 789 | }; 790 | 791 | // ### `_readBaseIRI` reads the IRI of a base declaration 792 | $this->readBaseIRI = function ($token) { 793 | if ('IRI' !== $token['type']) { 794 | return \call_user_func($this->error, 'Expected IRI to follow base declaration', $token); 795 | } 796 | $this->setBase(null === $this->base || preg_match($this->absoluteIRI, $token['value']) ? 797 | $token['value'] : \call_user_func($this->resolveIRI, $token)); 798 | 799 | return $this->readDeclarationPunctuation; 800 | }; 801 | 802 | // ### `_readNamedGraphLabel` reads the label of a named graph 803 | $this->readNamedGraphLabel = function ($token) { 804 | switch ($token['type']) { 805 | case 'IRI': 806 | case 'blank': 807 | case 'prefixed': 808 | \call_user_func($this->readSubject, $token); 809 | 810 | return $this->readGraph; 811 | case '[': 812 | return $this->readNamedGraphBlankLabel; 813 | default: 814 | return \call_user_func($this->error, 'Invalid graph label', $token); 815 | } 816 | }; 817 | 818 | // ### `_readNamedGraphLabel` reads a blank node label of a named graph 819 | $this->readNamedGraphBlankLabel = function ($token) { 820 | if (']' !== $token['type']) { 821 | return \call_user_func($this->error, 'Invalid graph label', $token); 822 | } 823 | $this->subject = '_:b'.$this->blankNodeCount++; 824 | 825 | return $this->readGraph; 826 | }; 827 | 828 | // ### `_readDeclarationPunctuation` reads the punctuation of a declaration 829 | $this->readDeclarationPunctuation = function ($token) { 830 | // SPARQL-style declarations don't have punctuation 831 | if ($this->sparqlStyle) { 832 | $this->sparqlStyle = false; 833 | 834 | return \call_user_func($this->readInTopContext, $token); 835 | } 836 | 837 | if ('.' !== $token['type']) { 838 | return \call_user_func($this->error, 'Expected declaration to end with a dot', $token); 839 | } 840 | 841 | return $this->readInTopContext; 842 | }; 843 | 844 | // Reads a list of quantified symbols from a @forSome or @forAll statement 845 | $this->readQuantifierList = function ($token) { 846 | $entity = null; 847 | switch ($token['type']) { 848 | case 'IRI': 849 | case 'prefixed': 850 | $entity = \call_user_func($this->readEntity, $token, true); 851 | break; 852 | default: 853 | return \call_user_func($this->error, 'Unexpected '.$token['type'], $token); 854 | } 855 | // Without explicit quantifiers, map entities to a quantified entity 856 | if (!$this->explicitQuantifiers) { 857 | $this->quantified[$entity] = $this->quantifiedPrefix.$this->blankNodeCount++; 858 | } else { 859 | // With explicit quantifiers, output the reified quantifier 860 | // If this is the first item, start a new quantifier list 861 | if (null === $this->subject) { 862 | $this->subject = '_:b'.$this->blankNodeCount++; 863 | \call_user_func($this->triple, isset($this->graph) ? $this->graph : '', $this->predicate, $this->subject, self::QUANTIFIERS_GRAPH); 864 | } 865 | // Otherwise, continue the previous list 866 | else { 867 | \call_user_func($this->triple,$this->subject, self::RDF_REST, 868 | $this->subject = '_:b'.$this->blankNodeCount++, self::QUANTIFIERS_GRAPH); 869 | } 870 | // Output the list item 871 | \call_user_func($this->triple, $this->subject, self::RDF_FIRST, $entity, self::QUANTIFIERS_GRAPH); 872 | } 873 | 874 | return $this->readQuantifierPunctuation; 875 | }; 876 | 877 | // Reads punctuation from a @forSome or @forAll statement 878 | $this->readQuantifierPunctuation = function ($token) { 879 | // Read more quantifiers 880 | if (',' === $token['type']) { 881 | return $this->readQuantifierList; 882 | } 883 | // End of the quantifier list 884 | else { 885 | // With explicit quantifiers, close the quantifier list 886 | if ($this->explicitQuantifiers) { 887 | \call_user_func($this->triple, $this->subject, self::RDF_REST, self::RDF_NIL, self::QUANTIFIERS_GRAPH); 888 | $this->subject = null; 889 | } 890 | // Read a dot 891 | $this->readCallback = \call_user_func($this->getContextEndReader); 892 | 893 | return \call_user_func($this->readCallback, $token); 894 | } 895 | }; 896 | 897 | // ### `_getPathReader` reads a potential path and then resumes with the given function 898 | $this->getPathReader = function ($afterPath): ?callable { 899 | $this->afterPath = $afterPath; 900 | 901 | return $this->readPath; 902 | }; 903 | 904 | // ### `_readPath` reads a potential path 905 | $this->readPath = function ($token): ?callable { 906 | switch ($token['type']) { 907 | case '!': 908 | // Forward path 909 | return $this->readForwardPath; 910 | case '^': 911 | // Backward path 912 | return $this->readBackwardPath; 913 | default: 914 | // Not a path; resume reading where we left off 915 | $stack = $this->contextStack; 916 | $parent = null; 917 | if (\is_array($stack) && \count($stack) - 1 > 0 && isset($stack[\count($stack) - 1])) { 918 | $parent = $stack[\count($stack) - 1]; 919 | } 920 | // If we were reading a list item, we still need to output it 921 | if ($parent && 'item' === $parent['type']) { 922 | // The list item is the remaining subejct after reading the path 923 | $item = $this->subject; 924 | // Switch back to the context of the list 925 | $this->restoreContext(); 926 | // Output the list item 927 | \call_user_func($this->triple, $this->subject, self::RDF_FIRST, $item, $this->graph); 928 | } 929 | 930 | return \call_user_func($this->afterPath, $token); 931 | } 932 | }; 933 | 934 | // ### `_readForwardPath` reads a '!' path 935 | $this->readForwardPath = function ($token) { 936 | $subject = null; 937 | $predicate = null; 938 | $object = '_:b'.$this->blankNodeCount++; 939 | // The next token is the predicate 940 | $predicate = \call_user_func($this->readEntity, $token); 941 | if (!$predicate) { 942 | return; 943 | } 944 | // If we were reading a subject, replace the subject by the path's object 945 | if (null === $this->predicate) { 946 | $subject = $this->subject; 947 | $this->subject = $object; 948 | } 949 | // If we were reading an object, replace the subject by the path's object 950 | else { 951 | $subject = $this->object; 952 | $this->object = $object; 953 | } 954 | // Emit the path's current triple and read its next section 955 | \call_user_func($this->triple, $subject, $predicate, $object, $this->graph); 956 | 957 | return $this->readPath; 958 | }; 959 | 960 | // ### `_readBackwardPath` reads a '^' path 961 | $this->readBackwardPath = function ($token) { 962 | $subject = '_:b'.$this->blankNodeCount++; 963 | $predicate = null; 964 | $object = null; 965 | // The next token is the predicate 966 | $predicate = \call_user_func($this->readEntity, $token); 967 | if ($predicate) { 968 | return; 969 | } 970 | // If we were reading a subject, replace the subject by the path's subject 971 | if (null === $this->predicate) { 972 | $object = $this->subject; 973 | $this->subject = $subject; 974 | } 975 | // If we were reading an object, replace the subject by the path's subject 976 | else { 977 | $object = $this->object; 978 | $this->object = $subject; 979 | } 980 | // Emit the path's current triple and read its next section 981 | \call_user_func($this->triple, $subject, $predicate, $object, $this->graph); 982 | 983 | return $this->readPath; 984 | }; 985 | 986 | // ### `_getContextEndReader` gets the next reader function at the end of a context 987 | $this->getContextEndReader = function () { 988 | $contextStack = $this->contextStack; 989 | if (!\count($contextStack)) { 990 | return $this->readPunctuation; 991 | } 992 | 993 | switch ($contextStack[\count($contextStack) - 1]['type']) { 994 | case 'blank': 995 | return $this->readBlankNodeTail; 996 | case 'list': 997 | return $this->readListItem; 998 | case 'formula': 999 | return $this->readFormulaTail; 1000 | } 1001 | }; 1002 | 1003 | // ### `_triple` emits a triple through the callback 1004 | $this->triple = function ($subject, $predicate, $object, $graph) { 1005 | \call_user_func($this->callback, null, ['subject' => $subject, 'predicate' => $predicate, 'object' => $object, 'graph' => isset($graph) ? $graph : '']); 1006 | }; 1007 | 1008 | // ### `_error` emits an error message through the callback 1009 | $this->error = function ($message, $token) { 1010 | if ($this->callback) { 1011 | \call_user_func($this->callback, new \Exception($message.' on line '.$token['line'].'.'), null); 1012 | } else { 1013 | throw new \Exception($message.' on line '.$token['line'].'.'); 1014 | } 1015 | }; 1016 | 1017 | // ### `_resolveIRI` resolves a relative IRI token against the base path, 1018 | // assuming that a base path has been set and that the IRI is indeed relative 1019 | $this->resolveIRI = function ($token) { 1020 | $iri = $token['value']; 1021 | 1022 | if (!isset($iri[0])) { // An empty relative IRI indicates the base IRI 1023 | return $this->base; 1024 | } 1025 | 1026 | switch ($iri[0]) { 1027 | // Resolve relative fragment IRIs against the base IRI 1028 | case '#': return $this->base.$iri; 1029 | // Resolve relative query string IRIs by replacing the query string 1030 | case '?': //should only replace the first occurence 1031 | return preg_replace('/(?:\?.*)?$/', $iri, $this->base, 1); 1032 | // Resolve root-relative IRIs at the root of the base IRI 1033 | case '/': 1034 | // Resolve scheme-relative IRIs to the scheme 1035 | return ('/' === $iri[1] ? $this->baseScheme : $this->baseRoot).\call_user_func($this->removeDotSegments, $iri); 1036 | // Resolve all other IRIs at the base IRI's path 1037 | default: 1038 | return \call_user_func($this->removeDotSegments, $this->basePath.$iri); 1039 | } 1040 | }; 1041 | 1042 | // ### `_removeDotSegments` resolves './' and '../' path segments in an IRI as per RFC3986 1043 | $this->removeDotSegments = function ($iri) { 1044 | // Don't modify the IRI if it does not contain any dot segments 1045 | if (!preg_match($this->dotSegments, $iri)) { 1046 | return $iri; 1047 | } 1048 | 1049 | // Start with an imaginary slash before the IRI in order to resolve trailing './' and '../' 1050 | $result = ''; 1051 | $length = \strlen($iri); 1052 | $i = -1; 1053 | $pathStart = -1; 1054 | $segmentStart = 0; 1055 | $next = '/'; 1056 | 1057 | // a function we will need here to fetch the last occurence 1058 | //search backwards for needle in haystack, and return its position 1059 | $rstrpos = function ($haystack, $needle) { 1060 | $size = \strlen($haystack); 1061 | $pos = strpos(strrev($haystack), $needle); 1062 | if (false === $pos) { 1063 | return false; 1064 | } 1065 | 1066 | return $size - $pos - 1; 1067 | }; 1068 | 1069 | while ($i < $length) { 1070 | switch ($next) { 1071 | // The path starts with the first slash after the authority 1072 | case ':': 1073 | if ($pathStart < 0) { 1074 | // Skip two slashes before the authority 1075 | if ('/' === $iri[++$i] && '/' === $iri[++$i]) { 1076 | // Skip to slash after the authority 1077 | while (($pathStart = $i + 1) < $length && '/' !== $iri[$pathStart]) { 1078 | $i = $pathStart; 1079 | } 1080 | } 1081 | } 1082 | break; 1083 | // Don't modify a query string or fragment 1084 | case '?': 1085 | case '#': 1086 | $i = $length; 1087 | break; 1088 | // Handle '/.' or '/..' path segments 1089 | case '/': 1090 | if (isset($iri[$i + 1]) && '.' === $iri[$i + 1]) { 1091 | if (isset($iri[++$i + 1])) { 1092 | $next = $iri[$i + 1]; 1093 | } else { 1094 | $next = null; 1095 | } 1096 | switch ($next) { 1097 | // Remove a '/.' segment 1098 | case '/': 1099 | if (($i - 1 - $segmentStart) > 0) { 1100 | $result .= substr($iri, $segmentStart, $i - 1 - $segmentStart); 1101 | } 1102 | $segmentStart = $i + 1; 1103 | break; 1104 | // Remove a trailing '/.' segment 1105 | case null: 1106 | case '?': 1107 | case '#': 1108 | return $result.substr($iri, $segmentStart, $i - $segmentStart).substr($iri, $i + 1); 1109 | // Remove a '/..' segment 1110 | case '.': 1111 | if (isset($iri[++$i + 1])) { 1112 | $next = $iri[$i + 1]; 1113 | } else { 1114 | $next = null; 1115 | } 1116 | if (null === $next || '/' === $next || '?' === $next || '#' === $next) { 1117 | if ($i - 2 - $segmentStart > 0) { 1118 | $result .= substr($iri, $segmentStart, $i - 2 - $segmentStart); 1119 | } 1120 | // Try to remove the parent path from result 1121 | if (($segmentStart = $rstrpos($result, '/')) >= $pathStart) { 1122 | $result = substr($result, 0, $segmentStart); 1123 | } 1124 | // Remove a trailing '/..' segment 1125 | if ('/' !== $next) { 1126 | return $result.'/'.substr($iri, $i + 1); 1127 | } 1128 | $segmentStart = $i + 1; 1129 | } 1130 | } 1131 | } 1132 | } 1133 | if (++$i < $length) { 1134 | $next = $iri[$i]; 1135 | } 1136 | } 1137 | 1138 | return $result.substr($iri, $segmentStart); 1139 | }; 1140 | } 1141 | 1142 | // ## Public methods 1143 | 1144 | // ### `parse` parses the N3 input and emits each parsed triple through the callback 1145 | public function parse($input, $tripleCallback = null, $prefixCallback = null) 1146 | { 1147 | $this->setTripleCallback($tripleCallback); 1148 | $this->setPrefixCallback($prefixCallback); 1149 | 1150 | return $this->parseChunk($input, true); 1151 | } 1152 | 1153 | // ### New method for streaming possibilities: parse only a chunk 1154 | public function parseChunk($input, $finalize = false) 1155 | { 1156 | if (!isset($this->tripleCallback)) { 1157 | $triples = []; 1158 | $error = null; 1159 | $this->callback = function ($e, $t = null) use (&$triples, &$error) { 1160 | if (!$e && $t) { 1161 | $triples[] = $t; 1162 | } elseif (!$e) { 1163 | //DONE 1164 | } else { 1165 | $error = $e; 1166 | } 1167 | }; 1168 | $tokens = $this->lexer->tokenize($input, $finalize); 1169 | foreach ($tokens as $token) { 1170 | if (isset($this->readCallback)) { 1171 | $this->readCallback = \call_user_func($this->readCallback, $token); 1172 | } 1173 | } 1174 | if ($error) { 1175 | throw $error; 1176 | } 1177 | 1178 | return $triples; 1179 | } else { 1180 | // Parse asynchronously otherwise, executing the read callback when a token arrives 1181 | $this->callback = $this->tripleCallback; 1182 | try { 1183 | $tokens = $this->lexer->tokenize($input, $finalize); 1184 | foreach ($tokens as $token) { 1185 | if (isset($this->readCallback)) { 1186 | $this->readCallback = \call_user_func($this->readCallback, $token); 1187 | } else { 1188 | //error occured in parser 1189 | break; 1190 | } 1191 | } 1192 | } catch (\Exception $e) { 1193 | if ($this->callback) { 1194 | \call_user_func($this->callback, $e, null); 1195 | } else { 1196 | throw $e; 1197 | } 1198 | $this->callback = function () {}; 1199 | } 1200 | } 1201 | } 1202 | 1203 | public function setTripleCallback($tripleCallback = null) 1204 | { 1205 | $this->tripleCallback = $tripleCallback; 1206 | } 1207 | 1208 | public function setPrefixCallback($prefixCallback = null) 1209 | { 1210 | if (isset($prefixCallback)) { 1211 | $this->prefixCallback = $prefixCallback; 1212 | } else { 1213 | $this->prefixCallback = function () {}; 1214 | } 1215 | } 1216 | 1217 | public function end() 1218 | { 1219 | return $this->parseChunk('', true); 1220 | } 1221 | 1222 | private function getNoBaseUriException($location, $line) 1223 | { 1224 | return new \Exception( 1225 | "$location on line $line can not be parsed without knowing the the document base IRI.\n". 1226 | "Please set the document base IRI using the documentIRI parser configuration option.\n". 1227 | "See https://github.com/pietercolpaert/hardf/#empty-document-base-IRI ." 1228 | ); 1229 | } 1230 | } 1231 | -------------------------------------------------------------------------------- /src/TriGParserIterator.php: -------------------------------------------------------------------------------- 1 | options = $options; 57 | $this->prefixCallback = $prefixCallback; 58 | } 59 | 60 | public function __destruct() 61 | { 62 | $this->closeTmpStream(); 63 | } 64 | 65 | /** 66 | * A thiny wrapper for the parseStream() method turning a string into 67 | * a stream resource. 68 | */ 69 | public function parse(string $input): \Iterator 70 | { 71 | $this->closeTmpStream(); 72 | $this->tmpStream = fopen('php://memory', 'r+'); 73 | fwrite($this->tmpStream, $input); 74 | rewind($this->tmpStream); 75 | 76 | return $this->parseStream($this->tmpStream); 77 | } 78 | 79 | /** 80 | * Parses a given input stream using a given chunk size. 81 | * 82 | * @param resource $input 83 | * 84 | * @throws \Exception 85 | */ 86 | public function parseStream($input, int $chunkSize = 8192): \Iterator 87 | { 88 | if (!\is_resource($input)) { 89 | throw new \Exception('Input has to be a resource'); 90 | } 91 | 92 | $this->input = $input; 93 | $this->chunkSize = $chunkSize; 94 | $this->n = -1; 95 | $this->triplesBuffer = []; 96 | $this->parser = new TriGParser($this->options, null, $this->prefixCallback); 97 | 98 | return $this; 99 | } 100 | 101 | public function current() 102 | { 103 | return current($this->triplesBuffer); 104 | } 105 | 106 | public function key() 107 | { 108 | return $this->n; 109 | } 110 | 111 | public function next(): void 112 | { 113 | $el = next($this->triplesBuffer); 114 | if (false === $el) { 115 | $this->triplesBuffer = []; 116 | $this->parser->setTripleCallback(function (?\Exception $e, 117 | ?array $quad): void { 118 | if ($e) { 119 | throw $e; 120 | } 121 | if ($quad) { 122 | $this->triplesBuffer[] = $quad; 123 | } 124 | }); 125 | while (!feof($this->input) && 0 === \count($this->triplesBuffer)) { 126 | $this->parser->parseChunk(fgets($this->input, $this->chunkSize)); 127 | } 128 | if (feof($this->input)) { 129 | $this->parser->end(); 130 | } 131 | } 132 | ++$this->n; 133 | } 134 | 135 | /** 136 | * @throws \Exception 137 | */ 138 | public function rewind(): void 139 | { 140 | $ret = rewind($this->input); 141 | if (true !== $ret) { 142 | throw new \Exception("Can't seek in the input stream"); 143 | } 144 | $this->next(); 145 | } 146 | 147 | public function valid(): bool 148 | { 149 | return false !== current($this->triplesBuffer); 150 | } 151 | 152 | private function closeTmpStream(): void 153 | { 154 | if (\is_resource($this->tmpStream)) { 155 | fclose($this->tmpStream); 156 | $this->tmpStream = null; 157 | } 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /src/TriGWriter.php: -------------------------------------------------------------------------------- 1 | escapeReplacements = [ 101 | '\\' => '\\\\', '"' => '\\"', "\t" => '\\t', 102 | "\n" => '\\n', "\r" => '\\r', \chr(8) => '\\b', "\f" => '\\f', 103 | ]; 104 | 105 | $this->setReadCallback($readCallback); 106 | $this->initWriter(); 107 | 108 | /* Initialize writer, depending on the format*/ 109 | $this->subject = null; 110 | if (!isset($options['format']) || !(preg_match('/triple|quad/i', $options['format']))) { 111 | $this->graph = ''; 112 | $this->prefixIRIs = []; 113 | if (isset($options['prefixes'])) { 114 | $this->addPrefixes($options['prefixes']); 115 | } 116 | } else { 117 | $this->writeTriple = $this->writeTripleLine; 118 | } 119 | 120 | /* 121 | * @todo make that a separate function 122 | * 123 | * @param mixed $character 124 | */ 125 | $this->characterReplacer = function ($character): string { 126 | // Replace a single character by its escaped version 127 | $character = $character[0]; 128 | if (\strlen($character) > 0 && isset($this->escapeReplacements[$character[0]])) { 129 | return $this->escapeReplacements[$character[0]]; 130 | } else { 131 | return $character; //no escaping necessary, should not happen, or something is wrong in our regex 132 | } 133 | }; 134 | } 135 | 136 | public function setReadCallback($readCallback) 137 | { 138 | $this->readCallback = $readCallback; 139 | } 140 | 141 | private function initWriter() 142 | { 143 | // ### `_writeTriple` writes the triple to the output stream 144 | $this->writeTriple = function ($subject, $predicate, $object, $graph) { 145 | if (empty($graph)) { 146 | $graph = null; 147 | } 148 | 149 | // Write the graph's label if it has changed 150 | if ($this->graph !== $graph) { 151 | // Close the previous graph and start the new one 152 | $lineToWrite = null === $this->subject ? '' : ($this->graph ? "\n}\n" : '.'.PHP_EOL); 153 | $lineToWrite .= isset($graph) ? $this->encodeIriOrBlankNode($graph).' {'.PHP_EOL : ''; 154 | $this->write($lineToWrite); 155 | 156 | $this->subject = null; 157 | 158 | // Don't treat identical blank nodes as repeating graphs 159 | if (null === $graph) { 160 | $this->graph = $graph; 161 | } else { 162 | $this->graph = '[' !== $graph[0] ? $graph : ']'; 163 | } 164 | } 165 | 166 | // Don't repeat the subject if it's the same 167 | if ($this->subject === $subject) { 168 | // Don't repeat the predicate if it's the same 169 | if ($this->predicate === $predicate) { 170 | $this->write(', '.$this->encodeObject($object)); 171 | } 172 | // Same subject, different predicate 173 | else { 174 | $this->predicate = $predicate; 175 | $this->write(";\n ".$this->encodePredicate($predicate).' '.$this->encodeObject($object)); 176 | } 177 | } 178 | // Different subject; write the whole triple 179 | else { 180 | $lineToWrite = (null === $this->subject ? '' : ".\n"); 181 | 182 | $this->subject = $subject; 183 | $lineToWrite .= $this->encodeSubject($subject); 184 | 185 | $this->predicate = $predicate; 186 | $lineToWrite .= ' '.$this->encodePredicate($predicate); 187 | $lineToWrite .= ' '.$this->encodeObject($object); 188 | 189 | $this->write($lineToWrite); 190 | } 191 | }; 192 | 193 | // ### `_writeTripleLine` writes the triple or quad to the output stream as a single line 194 | $this->writeTripleLine = function ($subject, $predicate, $object, $graph) { 195 | if (isset($graph) && '' === $graph) { 196 | $graph = null; 197 | } 198 | // Don't use prefixes 199 | unset($this->prefixMatch); 200 | 201 | // Write the triple 202 | $tripleToWrite = $this->encodeIriOrBlankNode($subject); 203 | $tripleToWrite .= ' '.$this->encodeIriOrBlankNode($predicate); 204 | $tripleToWrite .= ' '.$this->encodeObject($object); 205 | $tripleToWrite .= (isset($graph) ? ' '.$this->encodeIriOrBlankNode($graph).'.'.PHP_EOL : '.'.PHP_EOL); 206 | 207 | $this->write($tripleToWrite); 208 | }; 209 | } 210 | 211 | /** 212 | * writes the argument to the output stream 213 | */ 214 | private function write(string $string) 215 | { 216 | if ($this->blocked) { 217 | throw new \Exception('Cannot write because the writer has been closed.'); 218 | } else { 219 | if (isset($this->readCallback)) { 220 | \call_user_func($this->readCallback, $string); 221 | } else { 222 | //buffer all 223 | $this->string .= $string; 224 | } 225 | } 226 | } 227 | 228 | // ### Reads a bit of the string 229 | public function read(): string 230 | { 231 | $string = $this->string; 232 | $this->string = ''; 233 | 234 | return $string; 235 | } 236 | 237 | // ### `_encodeIriOrBlankNode` represents an IRI or blank node 238 | private function encodeIriOrBlankNode($entity) 239 | { 240 | // A blank node or list is represented as-is 241 | $firstChar = substr($entity, 0, 1); 242 | if ('[' === $firstChar || '(' === $firstChar || '_' === $firstChar && ':' === substr($entity, 1, 1)) { 243 | return $entity; 244 | } 245 | // Escape special characters 246 | if (preg_match(self::ESCAPE, $entity)) { 247 | $entity = preg_replace_callback(self::ESCAPE, $this->characterReplacer, $entity); 248 | } 249 | 250 | // Try to represent the IRI as prefixed name 251 | preg_match($this->prefixRegex, $entity, $prefixMatch); 252 | if (!isset($prefixMatch[1]) && !isset($prefixMatch[2])) { 253 | if (preg_match('/(.*?:)/', $entity, $match) && isset($this->prefixIRIs) && \in_array($match[1], $this->prefixIRIs)) { 254 | return $entity; 255 | } else { 256 | return '<'.$entity.'>'; 257 | } 258 | } else { 259 | return !isset($prefixMatch[1]) ? $entity : $this->prefixIRIs[$prefixMatch[1]].$prefixMatch[2]; 260 | } 261 | } 262 | 263 | // ### `_encodeLiteral` represents a literal 264 | private function encodeLiteral($value, $type = null, $language = null) 265 | { 266 | // Escape special characters 267 | if (preg_match(self::ESCAPE, $value)) { 268 | $value = preg_replace_callback(self::ESCAPE, $this->characterReplacer, $value); 269 | } 270 | $value = $value; 271 | // Write the literal, possibly with type or language 272 | if (isset($language)) { 273 | return '"'.$value.'"@'.$language; 274 | } elseif (isset($type)) { 275 | return '"'.$value.'"^^'.$this->encodeIriOrBlankNode($type); 276 | } else { 277 | return '"'.$value.'"'; 278 | } 279 | } 280 | 281 | // ### `_encodeSubject` represents a subject 282 | private function encodeSubject(string $subject) 283 | { 284 | if ('"' === $subject[0]) { 285 | throw new \Exception('A literal as subject is not allowed: '.$subject); 286 | } 287 | 288 | // Don't treat identical blank nodes as repeating subjects 289 | if ('[' === $subject[0]) { 290 | $this->subject = ']'; 291 | } 292 | 293 | return $this->encodeIriOrBlankNode($subject); 294 | } 295 | 296 | // ### `_encodePredicate` represents a predicate 297 | private function encodePredicate(string $predicate) 298 | { 299 | if ('"' === $predicate[0]) { 300 | throw new \Exception('A literal as predicate is not allowed: '.$predicate); 301 | } 302 | 303 | return self::RDF_TYPE === $predicate ? 'a' : $this->encodeIriOrBlankNode($predicate); 304 | } 305 | 306 | /** 307 | * represents an object 308 | * 309 | * @param array|string $object 310 | */ 311 | private function encodeObject($object) 312 | { 313 | // Represent an IRI or blank node 314 | if ('"' !== $object[0]) { 315 | return $this->encodeIriOrBlankNode($object); 316 | } 317 | // Represent a literal 318 | if (preg_match(self::LITERALMATCHER, $object, $matches)) { 319 | return $this->encodeLiteral($matches[1], isset($matches[2]) ? $matches[2] : null, isset($matches[3]) ? $matches[3] : null); 320 | } else { 321 | throw new \Exception('Invalid literal: '.$object); 322 | } 323 | } 324 | 325 | /** 326 | * adds the triple to the output stream 327 | * 328 | * @param string|array $subject 329 | * @param string $predicate 330 | * @param string|array $object 331 | * @param string|null $graph 332 | */ 333 | public function addTriple($subject, $predicate = null, $object = null, $graph = null): void 334 | { 335 | /* 336 | * The triple was given as a triple object, so shift parameters 337 | * 338 | * TODO deprecate that and remove this in next major version. That is bad style, instead adapt 339 | * callers to split S, P, O, G as different paramaters. This change also allows better 340 | * static code analysis 341 | */ 342 | if (\is_array($subject)) { 343 | $g = isset($subject['graph']) ? $subject['graph'] : null; 344 | \call_user_func($this->writeTriple, $subject['subject'], $subject['predicate'], $subject['object'], $g, $predicate); 345 | } 346 | 347 | // The optional `graph` parameter was not provided 348 | elseif (!\is_string($graph)) { 349 | \call_user_func($this->writeTriple, $subject, $predicate, $object, '', $graph); 350 | } 351 | // The `graph` parameter was provided 352 | else { 353 | \call_user_func($this->writeTriple, $subject, $predicate, $object, $graph); 354 | } 355 | } 356 | 357 | /** 358 | * adds the triples to the output stream 359 | * 360 | * @param array> $triples 361 | */ 362 | public function addTriples(array $triples): void 363 | { 364 | for ($i = 0; $i < \count($triples); ++$i) { 365 | $this->addTriple($triples[$i]); 366 | } 367 | } 368 | 369 | /** 370 | * adds the prefix to the output stream 371 | */ 372 | public function addPrefix(string $prefix, string $iri): void 373 | { 374 | $prefixes = []; 375 | $prefixes[$prefix] = $iri; 376 | $this->addPrefixes($prefixes); 377 | } 378 | 379 | /** 380 | * adds the prefixes to the output stream 381 | * 382 | * @param array $prefixes 383 | */ 384 | public function addPrefixes(array $prefixes): void 385 | { 386 | // Add all useful prefixes 387 | $hasPrefixes = false; 388 | foreach ($prefixes as $prefix => $iri) { 389 | // Verify whether the prefix can be used and does not exist yet 390 | $check = !isset($this->prefixIRIs[$iri]) || $this->prefixIRIs[$iri] !== ($prefix.':'); 391 | if (preg_match('/[#\/]$/', $iri) && $check) { 392 | $hasPrefixes = true; 393 | $this->prefixIRIs[$iri] = $prefix.':'; 394 | // Finish a possible pending triple 395 | if (null !== $this->subject) { 396 | $this->write($this->graph ? "\n}\n" : ".\n"); 397 | $this->subject = null; 398 | $this->graph = ''; 399 | } 400 | // Write prefix 401 | $this->write('@prefix '.$prefix.': <'.$iri.">.\n"); 402 | } 403 | } 404 | // Recreate the prefix matcher 405 | if ($hasPrefixes) { 406 | $IRIlist = ''; 407 | $prefixList = ''; 408 | foreach ($this->prefixIRIs as $prefixIRI => $iri) { 409 | $IRIlist .= $IRIlist ? '|'.$prefixIRI : $prefixIRI; 410 | $prefixList .= ($prefixList ? '|' : '').$iri; 411 | } 412 | $IRIlist = preg_replace("/([\]\/\(\)\*\+\?\.\\\$])/", '${1}', $IRIlist); 413 | $this->prefixRegex = '%^(?:'.$prefixList.')[^/]*$|'.'^('.$IRIlist.')([a-zA-Z][\\-_a-zA-Z0-9]*)$%'; 414 | } 415 | // End a prefix block with a newline 416 | $this->write($hasPrefixes ? "\n" : ''); 417 | } 418 | 419 | /** 420 | * creates a blank node with the given content 421 | * 422 | * @param string|array|null $object 423 | */ 424 | public function blank($predicate = null, $object = null): string 425 | { 426 | $children = $predicate; 427 | $child = ''; 428 | $length = ''; 429 | // Empty blank node 430 | if (!isset($predicate)) { 431 | $children = []; 432 | } 433 | // Blank node passed as blank("$predicate", "object") 434 | elseif (\is_string($predicate)) { 435 | $children = [['predicate' => $predicate, 'object' => $object]]; 436 | } 437 | // Blank node passed as blank({ predicate: $predicate, object: $object }) 438 | elseif (\is_array($predicate) && isset($predicate['predicate'])) { 439 | $children = [$predicate]; 440 | } 441 | 442 | switch ($length = \count($children)) { 443 | case 0: 444 | // Generate an empty blank node 445 | return '[]'; 446 | case 1: 447 | // Generate a non-nested one-triple blank node 448 | $child = $children[0]; 449 | if ('[' !== $child['object'][0]) { 450 | return '[ '.$this->encodePredicate($child['predicate']).' '. 451 | $this->encodeObject($child['object']).' ]'; 452 | } 453 | // no break 454 | default: 455 | // Generate a multi-triple or nested blank node 456 | $contents = '['; 457 | // Write all triples in order 458 | for ($i = 0; $i < $length; ++$i) { 459 | $child = $children[$i]; 460 | // Write only the object is the $predicate is the same as the previous 461 | if ($child['predicate'] === $predicate) { 462 | $contents .= ', '.$this->encodeObject($child['object']); 463 | } 464 | // Otherwise, write the $predicate and the object 465 | else { 466 | $contents .= ($i ? ";\n " : "\n "). 467 | $this->encodePredicate($child['predicate']).' '. 468 | $this->encodeObject($child['object']); 469 | $predicate = $child['predicate']; 470 | } 471 | } 472 | 473 | return $contents."\n]"; 474 | } 475 | } 476 | 477 | /** 478 | * creates a list node with the given content 479 | * 480 | * @param array $elements 481 | */ 482 | public function addList(array $elements = []): string 483 | { 484 | $length = \count($elements); 485 | $contents = []; 486 | for ($i = 0; $i < $length; ++$i) { 487 | $contents[$i] = $this->encodeObject($elements[$i]); 488 | } 489 | 490 | return '('.implode(' ', $contents).')'; 491 | } 492 | 493 | /** 494 | * Signals the end of the output stream 495 | */ 496 | public function end(): ?string 497 | { 498 | // Finish a possible pending triple 499 | if (null !== $this->subject) { 500 | $this->write($this->graph ? "\n}\n" : ".\n"); 501 | $this->subject = null; 502 | } 503 | if (isset($this->readCallbacks)) { 504 | \call_user_func($this->readCallback, $this->string); 505 | } 506 | 507 | // Disallow further writing 508 | $this->blocked = true; 509 | if (!isset($this->readCallback)) { 510 | return $this->string; 511 | } 512 | 513 | return null; 514 | } 515 | } 516 | -------------------------------------------------------------------------------- /src/Util.php: -------------------------------------------------------------------------------- 1 | $triple 51 | */ 52 | public static function inDefaultGraph(array $triple): bool 53 | { 54 | return !$triple['graph']; 55 | } 56 | 57 | /** 58 | * Gets the string value of a literal in the N3 library 59 | * 60 | * @return string|int|float|null 61 | */ 62 | public static function getLiteralValue(string $literal) 63 | { 64 | preg_match('/^"(.*)"/s', $literal, $match); //TODO: somehow the copied regex did not work. To be checked. Contained [^] 65 | if (empty($match)) { 66 | throw new \Exception($literal.' is not a literal'); 67 | } 68 | 69 | return $match[1]; 70 | } 71 | 72 | // Gets the type of a literal in the N3 library 73 | public static function getLiteralType(string $literal): string 74 | { 75 | preg_match('/^".*"(?:\^\^([^"]+)|(@)[^@"]+)?$/s', $literal, $match); //TODO: somehow the copied regex did not work. To be checked. Contained [^] instead of the . 76 | if (empty($match)) { 77 | throw new \Exception($literal.' is not a literal'); 78 | } 79 | if (!empty($match[1])) { 80 | return $match[1]; 81 | } else { 82 | return !empty($match[2]) ? self::RDFLANGSTRING : self::XSDSTRING; 83 | } 84 | } 85 | 86 | // Gets the language of a literal in the N3 library 87 | public static function getLiteralLanguage(string $literal): string 88 | { 89 | preg_match('/^".*"(?:@([^@"]+)|\^\^[^"]+)?$/s', $literal, $match); 90 | if (empty($match)) { 91 | throw new \Exception($literal.' is not a literal'); 92 | } 93 | 94 | return isset($match[1]) ? strtolower($match[1]) : ''; 95 | } 96 | 97 | /** 98 | * Tests whether the given entity ($triple object) represents a prefixed name 99 | */ 100 | public static function isPrefixedName(?string $term): bool 101 | { 102 | return !empty($term) && preg_match("/^[^:\/\"']*:[^:\/\"']+$/", $term); 103 | } 104 | 105 | /** 106 | * Expands the prefixed name to a full IRI (also when it occurs as a literal's type) 107 | * 108 | * @param array|null $prefixes 109 | */ 110 | public static function expandPrefixedName(string $prefixedName, ?array $prefixes = null): string 111 | { 112 | preg_match("/(?:^|\"\^\^)([^:\/#\"'\^_]*):[^\/]*$/", $prefixedName, $match, PREG_OFFSET_CAPTURE); 113 | $prefix = ''; 114 | $base = ''; 115 | $index = ''; 116 | 117 | if (!empty($match)) { 118 | $prefix = $match[1][0]; 119 | $base = ''; 120 | if (isset($prefixes[$prefix])) { 121 | $base = $prefixes[$prefix]; 122 | } else { 123 | $base = null; 124 | } 125 | $index = $match[1][1]; 126 | } 127 | if (!$base) { 128 | return $prefixedName; 129 | } 130 | 131 | // The match index is non-zero when expanding a literal's type 132 | if (0 === $index) { 133 | // base + prefixedName.substr(prefix.length + 1) 134 | return $base.substr($prefixedName, \strlen($prefix) + 1); 135 | } else { 136 | // prefixedName.substr(0, index + 3) + base + prefixedName.substr(index + prefix.length + 4); 137 | return substr($prefixedName, 0, $index).$base.substr($prefixedName, $index + \strlen($prefix) + 1); 138 | } 139 | } 140 | 141 | /** 142 | * Creates an IRI 143 | * 144 | * @return float|int|string|null 145 | */ 146 | public static function createIRI(?string $iri) 147 | { 148 | return !empty($iri) && '"' === substr($iri, 0, 1) ? self::getLiteralValue($iri) : $iri; 149 | } 150 | 151 | /** 152 | * Creates a literal 153 | * 154 | * @param string|null $modifier 155 | */ 156 | public static function createLiteral($value, $modifier = null): string 157 | { 158 | if (!$modifier) { 159 | switch (\gettype($value)) { 160 | case 'boolean': 161 | $value = $value ? 'true' : 'false'; 162 | $modifier = self::XSDBOOLEAN; 163 | break; 164 | case 'integer': 165 | $modifier = self::XSDINTEGER; 166 | break; 167 | case 'double': 168 | $modifier = self::XSDDOUBLE; 169 | break; 170 | case 'float': 171 | $modifier = self::XSDFLOAT; 172 | break; 173 | default: 174 | return '"'.$value.'"'; 175 | } 176 | } 177 | 178 | $result = '"'.$value; 179 | 180 | if (preg_match('/^[a-z]+(-[a-z0-9]+)*$/i', $modifier)) { 181 | $result .= '"@'.strtolower($modifier); 182 | } else { 183 | $result .= '"^^'.$modifier; 184 | } 185 | 186 | return $result; 187 | } 188 | } 189 | -------------------------------------------------------------------------------- /test/TriGParserIteratorTest.php: -------------------------------------------------------------------------------- 1 | "foo baz"@en . 15 | "baz foo"@de . 16 | IN 17 | ); 18 | fseek($input, 0); 19 | $parser = new TriGParserIterator(); 20 | $iterator = $parser->parseStream($input); 21 | $this->assertInstanceOf(\Iterator::class, $iterator); 22 | $values = iterator_to_array($iterator); 23 | $this->assertCount(2, $values); 24 | fclose($input); 25 | } 26 | 27 | public function testString(): void 28 | { 29 | $input = << "foo baz"@en . 31 | "baz foo"@de . 32 | IN; 33 | $parser = new TriGParserIterator(); 34 | $iterator = $parser->parse($input); 35 | $this->assertInstanceOf(\Iterator::class, $iterator); 36 | $values = iterator_to_array($iterator); 37 | $this->assertCount(2, $values); 38 | } 39 | 40 | public function testRepeat(): void 41 | { 42 | $input = << "foo baz"@en . 44 | "baz foo"@de . 45 | IN; 46 | $parser = new TriGParserIterator(); 47 | 48 | $iterator = $parser->parse($input); 49 | $this->assertInstanceOf(\Iterator::class, $iterator); 50 | $values = iterator_to_array($iterator); 51 | $this->assertCount(2, $values); 52 | 53 | $input = << "foo baz"@en . 55 | "baz foo"@de . 56 | _:genid1 . 57 | IN; 58 | $iterator = $parser->parse($input); 59 | $this->assertInstanceOf(\Iterator::class, $iterator); 60 | $values = iterator_to_array($iterator); 61 | $this->assertCount(3, $values); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /test/TriGWriterTest.php: -------------------------------------------------------------------------------- 1 | $prefixes]); 22 | for ($i; $i < $numargs - 1; ++$i) { 23 | /** 24 | * @var array 25 | */ 26 | $item = func_get_arg($i); 27 | 28 | /** 29 | * @var string|null 30 | */ 31 | $g = isset($item[3]) ? $item[3] : null; 32 | 33 | $writer->addTriple(['subject' => $item[0], 'predicate' => $item[1], 'object' => $item[2], 'graph' => $g]); 34 | } 35 | $output = $writer->end(); 36 | 37 | $this->assertEquals($expectedResult, $output); 38 | } 39 | 40 | private function shouldNotSerialize(): void 41 | { 42 | $numargs = \func_num_args(); 43 | $errorMessage = func_get_arg($numargs - 1); 44 | 45 | $this->expectException(Exception::class); 46 | $this->expectExceptionMessage($errorMessage); 47 | 48 | $writer = new TrigWriter(); 49 | for ($i = 0; $i < $numargs - 1; ++$i) { 50 | /** 51 | * @var array 52 | */ 53 | $item = func_get_arg($i); 54 | 55 | /** 56 | * @var string|null 57 | */ 58 | $g = isset($item[3]) ? $item[3] : null; 59 | 60 | $writer->addTriple(['subject' => $item[0], 'predicate' => $item[1], 'object' => $item[2], 'graph' => $g]); 61 | } 62 | $output = $writer->end(); 63 | } 64 | 65 | public function testZeroOrMoreTriples(): void 66 | { 67 | //should serialize 0 triples', 68 | $this->shouldSerialize(''); 69 | //should serialize 1 triple', 70 | $this->shouldSerialize(['abc', 'def', 'ghi'], 71 | ' .'."\n"); 72 | 73 | //should serialize 2 triples', 74 | $this->shouldSerialize(['abc', 'def', 'ghi'], 75 | ['jkl', 'mno', 'pqr'], 76 | ' .'."\n". 77 | ' .'."\n"); 78 | 79 | //should serialize 3 triples', 80 | $this->shouldSerialize(['abc', 'def', 'ghi'], 81 | ['jkl', 'mno', 'pqr'], 82 | ['stu', 'vwx', 'yz'], 83 | ' .'."\n". 84 | ' .'."\n". 85 | ' .'."\n"); 86 | } 87 | 88 | public function testLiterals(): void 89 | { 90 | //should serialize a literal', 91 | $this->shouldSerialize(['a', 'b', '"cde"'], 92 | ' "cde".'."\n"); 93 | 94 | //should serialize a literal with a type', 95 | $this->shouldSerialize(['a', 'b', '"cde"^^fgh'], 96 | ' "cde"^^.'."\n"); 97 | 98 | //should serialize a literal with a language', 99 | $this->shouldSerialize(['a', 'b', '"cde"@en-us'], 100 | ' "cde"@en-us.'."\n"); 101 | 102 | //should serialize a literal containing a single quote', 103 | $this->shouldSerialize(['a', 'b', '"c\'de"'], 104 | ' "c\'de".'."\n"); 105 | 106 | //should serialize a literal containing a double quote', 107 | $this->shouldSerialize(['a', 'b', '"c"de"'], 108 | ' "c\\"de".'."\n"); 109 | 110 | //should serialize a literal containing a backslash' 111 | $this->shouldSerialize(['a', 'b', '"c\\de"'], 112 | ' "c\\\\de".'."\n"); 113 | 114 | //should serialize a literal containing a tab character', 115 | $this->shouldSerialize(['a', 'b', "\"c\tde\""], 116 | " \"c\\tde\".\n"); 117 | 118 | //should serialize a literal containing a newline character', 119 | /* shouldSerialize(['a', 'b', '"c\nde"'], 120 | ' "c\\nde".\n'));*/ 121 | $this->shouldSerialize(['a', 'b', '"c'."\n".'de"'], 122 | ' "c\\nde".'."\n"); 123 | 124 | //should serialize a literal containing a cariage return character', 125 | $this->shouldSerialize(['a', 'b', '"c'."\r".'de"'], 126 | ' "c\\rde".'."\n"); 127 | 128 | //should serialize a literal containing a backspace character', 129 | $this->shouldSerialize(['a', 'b', '"c'.\chr(8).'de"'], 130 | ' "'."c\bde".'".'."\n"); //→ TODO: Doesn’t work properly 131 | 132 | //should serialize a literal containing a form feed character', 133 | $this->shouldSerialize(['a', 'b', '"c'."\f".'de"'], 134 | ' "c\\fde".'."\n"); 135 | 136 | //should serialize a literal containing a line separator 137 | $this->shouldSerialize(['a', 'b', "\"c\u{2028}de\""], ' "c'."\u{2028}".'de".'."\n"); 138 | } 139 | 140 | public function testBlankNodes(): void 141 | { 142 | //should serialize blank nodes', 143 | $this->shouldSerialize(['_:a', 'b', '_:c'], 144 | '_:a _:c.'."\n"); 145 | } 146 | 147 | public function testWrongLiterals(): void 148 | { 149 | //should not serialize a literal in the subject', 150 | $this->shouldNotSerialize(['"a"', 'b', '"c"'], 151 | 'A literal as subject is not allowed: "a"'); 152 | 153 | //should not serialize a literal in the predicate', 154 | $this->shouldNotSerialize(['a', '"b"', '"c"'], 155 | 'A literal as predicate is not allowed: "b"'); 156 | 157 | //should not serialize an invalid object literal', 158 | $this->shouldNotSerialize(['a', 'b', '"c'], 159 | 'Invalid literal: "c'); 160 | } 161 | 162 | public function testPrefixes(): void 163 | { 164 | //should not leave leading whitespace if the prefix set is empty', 165 | $this->shouldSerialize(['prefixes' => []], 166 | ['a', 'b', 'c'], 167 | ' .'."\n"); 168 | 169 | //should serialize valid prefixes', 170 | $this->shouldSerialize(['prefixes' => ['a' => 'http://a.org/', 'b' => 'http://a.org/b#', 'c' => 'http://a.org/b']], 171 | '@prefix a: .'."\n". 172 | '@prefix b: .'."\n"."\n"); 173 | 174 | //should use prefixes when possible', 175 | $this->shouldSerialize(['prefixes' => ['a' => 'http://a.org/', 'b' => 'http://a.org/b#', 'c' => 'http://a.org/b']], 176 | ['http://a.org/bc', 'http://a.org/b#ef', 'http://a.org/bhi'], 177 | ['http://a.org/bc/de', 'http://a.org/b#e#f', 'http://a.org/b#x/t'], 178 | ['http://a.org/3a', 'http://a.org/b#3a', 'http://a.org/b#a3'], 179 | '@prefix a: .'."\n". 180 | '@prefix b: .'."\n"."\n". 181 | 'a:bc b:ef a:bhi.'."\n". 182 | ' .'."\n". 183 | ' b:a3.'."\n"); 184 | 185 | //should expand prefixes when possible', 186 | $this->shouldSerialize(['prefixes' => ['a' => 'http://a.org/', 'b' => 'http://a.org/b#']], 187 | ['a:bc', 'b:ef', 'c:bhi'], 188 | '@prefix a: .'."\n". 189 | '@prefix b: .'."\n"."\n". 190 | 'a:bc b:ef .'."\n"); 191 | } 192 | 193 | public function testRepitition(): void 194 | { 195 | //should not repeat the same subjects', 196 | $this->shouldSerialize(['abc', 'def', 'ghi'], 197 | ['abc', 'mno', 'pqr'], 198 | ['stu', 'vwx', 'yz'], 199 | ' ;'."\n". 200 | ' .'."\n". 201 | ' .'."\n"); 202 | 203 | //should not repeat the same predicates', 204 | $this->shouldSerialize(['abc', 'def', 'ghi'], 205 | ['abc', 'def', 'pqr'], 206 | ['abc', 'bef', 'ghi'], 207 | ['abc', 'bef', 'pqr'], 208 | ['stu', 'bef', 'yz'], 209 | ' , ;'."\n". 210 | ' , .'."\n". 211 | ' .'."\n"); 212 | } 213 | 214 | public function testRdfType(): void 215 | { 216 | //should write rdf:type as "a"', 217 | $this->shouldSerialize(['abc', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'def'], 218 | ' a .'."\n"); 219 | } 220 | 221 | public function testQuads(): void 222 | { 223 | //should serialize a graph with 1 triple', 224 | $this->shouldSerialize(['abc', 'def', 'ghi', 'xyz'], 225 | ' {'."\n". 226 | ' '."\n". 227 | '}'."\n"); 228 | 229 | //should serialize a graph with 3 triples', 230 | $this->shouldSerialize(['abc', 'def', 'ghi', 'xyz'], 231 | ['jkl', 'mno', 'pqr', 'xyz'], 232 | ['stu', 'vwx', 'yz', 'xyz'], 233 | ' {'."\n". 234 | ' .'."\n". 235 | ' .'."\n". 236 | ' '."\n". 237 | '}'."\n"); 238 | 239 | //should serialize three graphs', 240 | $this->shouldSerialize(['abc', 'def', 'ghi', 'xyz'], 241 | ['jkl', 'mno', 'pqr', ''], 242 | ['stu', 'vwx', 'yz', 'abc'], 243 | ' {'."\n".' '."\n".'}'."\n". 244 | ' .'."\n". 245 | ' {'."\n".' '."\n".'}'."\n"); 246 | } 247 | 248 | public function testCallbackOnEnd(): void 249 | { 250 | //sends output through end 251 | $writer = new TriGWriter(); 252 | $writer->addTriple(['subject' => 'a', 'predicate' => 'b', 'object' => 'c']); 253 | $output = $writer->end(); 254 | $this->assertEquals(" .\n", $output); 255 | } 256 | 257 | public function testRespectingPrefixes(): void 258 | { 259 | //respects the prefixes argument when no stream argument is given', function (done) { 260 | $writer = new TriGWriter(['prefixes' => ['a' => 'b#']]); 261 | $writer->addTriple(['subject' => 'b#a', 'predicate' => 'b#b', 'object' => 'b#c']); 262 | $output = $writer->end(); 263 | $this->assertEquals("@prefix a: .\n\na:a a:b a:c.\n", $output); 264 | } 265 | 266 | public function testOtherPrefixes(): void 267 | { 268 | //does not repeat identical prefixes', function (done) { 269 | $writer = new TriGWriter(); 270 | $writer->addPrefix('a', 'b#'); 271 | $writer->addPrefix('a', 'b#'); 272 | $writer->addTriple(['subject' => 'b#a', 'predicate' => 'b#b', 'object' => 'b#c']); 273 | $writer->addPrefix('a', 'b#'); 274 | $writer->addPrefix('a', 'b#'); 275 | $writer->addPrefix('b', 'b#'); 276 | $writer->addPrefix('a', 'c#'); 277 | $output = $writer->end(); 278 | $this->assertEquals('@prefix a: .'."\n"."\n".'a:a a:b a:c.'."\n".'@prefix b: .'."\n"."\n".'@prefix a: .'."\n"."\n", $output); 279 | 280 | //serializes triples of a graph with a prefix declaration in between', function (done) { 281 | $writer = new TriGWriter(); 282 | $writer->addPrefix('a', 'b#'); 283 | $writer->addTriple(['subject' => 'b#a', 'predicate' => 'b#b', 'object' => 'b#c', 'graph' => 'b#g']); 284 | $writer->addPrefix('d', 'e#'); 285 | $writer->addTriple(['subject' => 'b#a', 'predicate' => 'b#b', 'object' => 'b#d', 'graph' => 'b#g']); 286 | $output = $writer->end(); 287 | $this->assertEquals('@prefix a: .'."\n"."\n".'a:g {'."\n".'a:a a:b a:c'."\n".'}'."\n".'@prefix d: .'."\n"."\n".'a:g {'."\n".'a:a a:b a:d'."\n".'}'."\n", $output); 288 | 289 | //should accept triples with separated components', function (done) { 290 | $writer = new TriGWriter(); 291 | $writer->addTriple('a', 'b', 'c'); 292 | $writer->addTriple('a', 'b', 'd'); 293 | $output = $writer->end(); 294 | $this->assertEquals(' , .'."\n", $output); 295 | 296 | //should accept quads with separated components', function (done) { 297 | $writer = new TriGWriter(); 298 | $writer->addTriple('a', 'b', 'c', 'g'); 299 | $writer->addTriple('a', 'b', 'd', 'g'); 300 | $output = $writer->end(); 301 | $this->assertEquals(' {'."\n".' , '."\n".'}'."\n", $output); 302 | } 303 | 304 | public function testBlankNodes2(): void 305 | { 306 | //should serialize triples with an empty blank node as object', function (done) { 307 | $writer = new TriGWriter(); 308 | $writer->addTriple('a1', 'b', $writer->blank()); 309 | $writer->addTriple('a2', 'b', $writer->blank([])); 310 | $output = $writer->end(); 311 | $this->assertEquals(' [].'."\n".' [].'."\n", $output); 312 | 313 | //should serialize triples with a one-triple blank node as object', function (done) { 314 | $writer = new TriGWriter(); 315 | $writer->addTriple('a1', 'b', $writer->blank('d', 'e')); 316 | $writer->addTriple('a2', 'b', $writer->blank(['predicate' => 'd', 'object' => 'e'])); 317 | $writer->addTriple('a3', 'b', $writer->blank([['predicate' => 'd', 'object' => 'e']])); 318 | $output = $writer->end(); 319 | $this->assertEquals(' [ ].'."\n".' [ ].'."\n".' [ ].'."\n", $output); 320 | 321 | //should serialize triples with a two-triple blank node as object', function (done) { 322 | $writer = new TriGWriter(); 323 | $writer->addTriple('a', 'b', $writer->blank([ 324 | ['predicate' => 'd', 'object' => 'e'], 325 | ['predicate' => 'f', 'object' => '"g"'], 326 | ])); 327 | $output = $writer->end(); 328 | $this->assertEquals(' ['."\n".' ;'."\n".' "g"'."\n".'].'."\n", $output); 329 | 330 | //should serialize triples with a three-triple blank node as object', function (done) { 331 | $writer = new TriGWriter(); 332 | $writer->addTriple('a', 'b', $writer->blank([ 333 | ['predicate' => 'd', 'object' => 'e'], 334 | ['predicate' => 'f', 'object' => '"g"'], 335 | ['predicate' => 'h', 'object' => 'i'], 336 | ])); 337 | $output = $writer->end(); 338 | $this->assertEquals(' ['."\n".' ;'."\n".' "g";'."\n".' '."\n".'].'."\n", $output); 339 | 340 | //should serialize triples with predicate-sharing blank node triples as object', function (done) { 341 | $writer = new TriGWriter(); 342 | $writer->addTriple('a', 'b', $writer->blank([ 343 | ['predicate' => 'd', 'object' => 'e'], 344 | ['predicate' => 'd', 'object' => 'f'], 345 | ['predicate' => 'g', 'object' => 'h'], 346 | ['predicate' => 'g', 'object' => 'i'], 347 | ])); 348 | $output = $writer->end(); 349 | $this->assertEquals(' ['."\n".' , ;'."\n".' , '."\n".'].'."\n", $output); 350 | 351 | //should serialize triples with nested blank nodes as object', function (done) { 352 | $writer = new TriGWriter(); 353 | $writer->addTriple('a1', 'b', $writer->blank([ 354 | ['predicate' => 'd', 'object' => $writer->blank()], 355 | ])); 356 | $writer->addTriple('a2', 'b', $writer->blank([ 357 | ['predicate' => 'd', 'object' => $writer->blank('e', 'f')], 358 | ['predicate' => 'g', 'object' => $writer->blank('h', '"i"')], 359 | ])); 360 | $writer->addTriple('a3', 'b', $writer->blank([ 361 | ['predicate' => 'd', 'object' => $writer->blank([ 362 | ['predicate' => 'g', 'object' => $writer->blank('h', 'i')], 363 | ['predicate' => 'j', 'object' => $writer->blank('k', '"l"')], 364 | ])], 365 | ])); 366 | $output = $writer->end(); 367 | $this->assertEquals(' ['."\n".' []'."\n".'].'."\n".' ['."\n".' [ ];'."\n".' [ "i" ]'."\n".'].'."\n".' ['."\n".' ['."\n".' [ ];'."\n".' [ "l" ]'."\n".']'."\n".'].'."\n", $output); 368 | 369 | //should serialize triples with an empty blank node as subject', function (done) { 370 | $writer = new TriGWriter(); 371 | $writer->addTriple($writer->blank(), 'b', 'c'); 372 | $writer->addTriple($writer->blank([]), 'b', 'c'); 373 | $output = $writer->end(); 374 | $this->assertEquals('[] .'."\n".'[] .'."\n", $output); 375 | 376 | //should serialize triples with a one-triple blank node as subject', function (done) { 377 | $writer = new TriGWriter(); 378 | $writer->addTriple($writer->blank('a', 'b'), 'c', 'd'); 379 | $writer->addTriple($writer->blank(['predicate' => 'a', 'object' => 'b']), 'c', 'd'); 380 | $writer->addTriple($writer->blank([['predicate' => 'a', 'object' => 'b']]), 'c', 'd'); 381 | $output = $writer->end(); 382 | $this->assertEquals( 383 | '[ ] .'."\n".'[ ] .'."\n".'[ ] .'."\n", 384 | $output 385 | ); 386 | 387 | //should serialize triples with an empty blank node as graph', function (done) { 388 | $writer = new TriGWriter(); 389 | $writer->addTriple('a', 'b', 'c', $writer->blank()); 390 | $writer->addTriple('a', 'b', 'c', $writer->blank([])); 391 | $output = $writer->end(); 392 | $this->assertEquals( 393 | '[] {'."\n".' '."\n".'}'."\n".'[] {'."\n".' '."\n".'}'."\n", 394 | $output 395 | ); 396 | } 397 | 398 | public function testLists(): void 399 | { 400 | //should serialize triples with an empty list as object', function (done) { 401 | $writer = new TriGWriter(); 402 | $writer->addTriple('a1', 'b', $writer->addList()); 403 | $writer->addTriple('a2', 'b', $writer->addList([])); 404 | $output = $writer->end(); 405 | $this->assertEquals(' ().'.PHP_EOL.' ().'.PHP_EOL, $output); 406 | 407 | //should serialize triples with a one-element list as object', function (done) { 408 | $writer = new TriGWriter(); 409 | $writer->addTriple('a1', 'b', $writer->addList(['c'])); 410 | $writer->addTriple('a2', 'b', $writer->addList(['"c"'])); 411 | $output = $writer->end(); 412 | $this->assertEquals(' ().'."\n".' ("c").'."\n", $output); 413 | 414 | //should serialize triples with a three-element list as object', function (done) { 415 | $writer = new TriGWriter(); 416 | $writer->addTriple('a1', 'b', $writer->addList(['c', 'd', 'e'])); 417 | $writer->addTriple('a2', 'b', $writer->addList(['"c"', '"d"', '"e"'])); 418 | $output = $writer->end(); 419 | $this->assertEquals(' ( ).'."\n".' ("c" "d" "e").'."\n", $output); 420 | 421 | //should serialize triples with an empty list as subject', function (done) { 422 | $writer = new TriGWriter(); 423 | $writer->addTriple($writer->addList(), 'b1', 'c'); 424 | $writer->addTriple($writer->addList([]), 'b2', 'c'); 425 | $output = $writer->end(); 426 | $this->assertEquals('() ;'."\n".' .'."\n", $output); 427 | 428 | //should serialize triples with a one-element list as subject', function (done) { 429 | $writer = new TriGWriter(); 430 | $writer->addTriple($writer->addList(['a']), 'b1', 'c'); 431 | $writer->addTriple($writer->addList(['a']), 'b2', 'c'); 432 | $output = $writer->end(); 433 | $this->assertEquals('() ;'."\n".' .'."\n", $output); 434 | 435 | //should serialize triples with a three-element list as subject', function (done) { 436 | $writer = new TriGWriter(); 437 | $writer->addTriple($writer->addList(['a', '"b"', '"c"']), 'd', 'e'); 438 | $output = $writer->end(); 439 | $this->assertEquals('( "b" "c") .'."\n", $output); 440 | } 441 | 442 | public function testPartialRead(): void 443 | { 444 | //should only partially output the already given data and then continue writing until end 445 | $writer = new TriGWriter(); 446 | $writer->addTriple($writer->addList(['a', '"b"', '"c"']), 'd', 'e'); 447 | $output = $writer->read(); 448 | $this->assertEquals('( "b" "c") ', $output); 449 | 450 | $writer->addTriple('a', 'b', 'c'); 451 | $output = $writer->end(); 452 | $this->assertEquals(".\n .\n", $output); 453 | } 454 | 455 | public function testTriplesBulk(): void 456 | { 457 | //should accept triples in bulk', function (done) { 458 | $writer = new TriGWriter(); 459 | $writer->addTriples( 460 | [ 461 | ['subject' => 'a', 'predicate' => 'b', 'object' => 'c'], 462 | ['subject' => 'a', 'predicate' => 'b', 'object' => 'd'], 463 | ] 464 | ); 465 | $output = $writer->end(); 466 | $this->assertEquals(' , .'."\n", $output); 467 | } 468 | 469 | public function testNTriples(): void 470 | { 471 | //should write simple triples in N-Triples mode', function (done) { 472 | $writer = new TriGWriter(['format' => 'N-Triples']); 473 | $writer->addTriple('a', 'b', 'c'); 474 | $writer->addTriple('a', 'b', 'd'); 475 | $output = $writer->end(); 476 | $this->assertEquals(' .'."\n".' .'."\n", $output); 477 | } 478 | } 479 | -------------------------------------------------------------------------------- /test/UtilTest.php: -------------------------------------------------------------------------------- 1 | assertIsBool(Util::isIRI('http://test.be')); 13 | $this->assertTrue( 14 | Util::isIRI('http://test.be') 15 | ); 16 | $this->assertFalse( 17 | Util::isIRI('"http://test.be"') 18 | ); 19 | //Does not match a blank node 20 | $this->assertFalse( 21 | Util::isIRI('_:A') 22 | ); 23 | $this->assertFalse(Util::isIRI(null)); 24 | } 25 | 26 | public function testIsLiteral(): void 27 | { 28 | $this->assertTrue(Util::isLiteral('"http://example.org/"')); 29 | $this->assertTrue(Util::isLiteral('"English"@en')); 30 | // it matches a literal with a language that contains a number 31 | $this->assertTrue(Util::isLiteral('"English"@es-419')); 32 | // it matches a literal with a type 33 | $this->assertTrue(Util::isLiteral('"3"^^http://www.w3.org/2001/XMLSchema#integer')); 34 | // it matches a literal with a newline 35 | $this->assertTrue(Util::isLiteral('"a\nb"')); 36 | // it matches a literal with a cariage return 37 | $this->assertTrue(Util::isLiteral('"a\rb"')); 38 | // it does not match an IRI 39 | $this->assertFalse(Util::isLiteral('http://example.org/')); 40 | // it does not match a blank node 41 | $this->assertFalse(Util::isLiteral('_:x')); 42 | // it does not match null 43 | $this->assertFalse(Util::isLiteral(null)); 44 | } 45 | 46 | public function testIsBlank(): void 47 | { 48 | // it matches a blank node 49 | $this->assertTrue(Util::isBlank('_:x')); 50 | // it does not match an IRI 51 | $this->assertFalse(Util::isBlank('http://example.org/')); 52 | // it does not match a literal 53 | $this->assertFalse(Util::isBlank('"http://example.org/"')); 54 | $this->assertFalse(Util::isBlank(null)); 55 | } 56 | 57 | public function testIsDefaultGraph(): void 58 | { 59 | $this->assertFalse(Util::isDefaultGraph('_:x')); 60 | $this->assertFalse(Util::isDefaultGraph('http://example.org/')); 61 | $this->assertFalse(Util::isDefaultGraph('"http://example.org/"')); 62 | // it matches null 63 | $this->assertTrue(Util::isDefaultGraph(null)); 64 | // it matches the empty string 65 | $this->assertTrue(Util::isDefaultGraph('')); 66 | } 67 | 68 | public function testinDefaultGraph(): void 69 | { 70 | // it does not match a blank node 71 | $this->assertFalse(Util::inDefaultGraph(['graph' => '_:x'])); 72 | // it does not match an IRI 73 | $this->assertFalse(Util::inDefaultGraph(['graph' => 'http://example.org/'])); 74 | // it does not match a literal 75 | $this->assertFalse(Util::inDefaultGraph(['graph' => '"http://example.org/"'])); 76 | // it matches null 77 | $this->assertTrue(Util::inDefaultGraph(['graph' => null])); 78 | // it matches the empty string 79 | $this->assertTrue(Util::inDefaultGraph(['graph' => ''])); 80 | } 81 | 82 | public function testGetLiteralValue(): void 83 | { 84 | // it gets the value of a literal 85 | $this->assertEquals('Mickey', Util::getLiteralValue('"Mickey"')); 86 | 87 | // it gets the value of a literal with a language 88 | $this->assertEquals('English', Util::getLiteralValue('"English"@en')); 89 | 90 | // it gets the value of a literal with a language that contains a number 91 | $this->assertEquals('English', Util::getLiteralValue('"English"@es-419')); 92 | 93 | // it gets the value of a literal with a type 94 | $this->assertEquals('3', Util::getLiteralValue('"3"^^http://www.w3.org/2001/XMLSchema#integer')); 95 | 96 | // it gets the value of a literal with a newline 97 | $this->assertEquals('Mickey\nMouse', Util::getLiteralValue('"Mickey\nMouse"')); 98 | 99 | // it gets the value of a literal with a cariage return 100 | $this->assertEquals('Mickey\rMouse', Util::getLiteralValue('"Mickey\rMouse"')); 101 | 102 | $this->assertEquals("foo\nbar", Util::getLiteralValue('"' . "foo\nbar" . '"')); 103 | 104 | // it does not work with non-literals 105 | //TODO: Util::getLiteralValue.bind(null, 'http://ex.org/').should.throw('http://ex.org/ is not a literal'); 106 | 107 | // it does not work with null 108 | //TODO: Util::getLiteralValue.bind(null, null).should.throw('null is not a literal'); 109 | } 110 | 111 | // tests reaction if no literal was given 112 | public function testGetLiteralValueNoLiteralGiven(): void 113 | { 114 | $this->expectException('\Exception'); 115 | 116 | Util::getLiteralValue('invalid'); 117 | } 118 | 119 | public function testGetLiteralType(): void 120 | { 121 | // it gets the type of a literal 122 | $this->assertEquals('http://www.w3.org/2001/XMLSchema#string', Util::getLiteralType('"Mickey"')); 123 | 124 | // it gets the type of a literal with a language 125 | $this->assertEquals('http://www.w3.org/1999/02/22-rdf-syntax-ns#langString', Util::getLiteralType('"English"@en')); 126 | 127 | // it gets the type of a literal with a language that contains a number 128 | $this->assertEquals('http://www.w3.org/1999/02/22-rdf-syntax-ns#langString', Util::getLiteralType('"English"@es-419')); 129 | 130 | // it gets the type of a literal with a type 131 | $this->assertEquals('http://www.w3.org/2001/XMLSchema#integer', Util::getLiteralType('"3"^^http://www.w3.org/2001/XMLSchema#integer')); 132 | 133 | // it gets the type of a literal with a newline 134 | $this->assertEquals('abc', Util::getLiteralType('"Mickey\nMouse"^^abc')); 135 | 136 | // it gets the type of a literal with a cariage return 137 | $this->assertEquals('abc', Util::getLiteralType('"Mickey\rMouse"^^abc')); 138 | 139 | // it does not work with non-literals 140 | //TODO: Util::getLiteralType.bind(null, 'http://example.org/').should.throw('http://example.org/ is not a literal'); 141 | 142 | // it does not work with null 143 | //TODO: Util::getLiteralType.bind(null, null).should.throw('null is not a literal'); 144 | } 145 | 146 | // tests getLiteralType if multi line string was given (check for adaption of Util.php, 147 | // adding an s to the regex) 148 | public function testGetLiteralTypeMultilineString(): void 149 | { 150 | $literal = '"This document is published by the Provenance Working Group (http://www.w3.org/2011/prov/wiki/Main_Page). 151 | 152 | If you wish to make comments regarding this document, please send them to public-prov-comments@w3.org (subscribe public-prov-comments-request@w3.org, archives http://lists.w3.org/Archives/Public/public-prov-comments/). All feedback is welcome."^^'; 153 | 154 | $this->assertEquals('', Util::getLiteralType($literal)); 155 | } 156 | 157 | public function testGetLiteralLanguage(): void 158 | { 159 | // it gets the language of a literal 160 | $this->assertEquals('', Util::getLiteralLanguage('"Mickey"')); 161 | 162 | // it gets the language of a literal with a language 163 | $this->assertEquals('en', Util::getLiteralLanguage('"English"@en')); 164 | 165 | // it gets the language of a literal with a language that contains a number 166 | $this->assertEquals('es-419', Util::getLiteralLanguage('"English"@es-419')); 167 | 168 | // it normalizes the language to lowercase 169 | $this->assertEquals('en-gb', Util::getLiteralLanguage('"English"@en-GB')); 170 | 171 | // it gets the language of a literal with a type 172 | $this->assertEquals('', Util::getLiteralLanguage('"3"^^http://www.w3.org/2001/XMLSchema#integer')); 173 | 174 | // it gets the language of a literal with a newline 175 | $this->assertEquals('en', Util::getLiteralLanguage('"Mickey\nMouse"@en')); 176 | 177 | // it gets the language of a literal with a cariage return 178 | $this->assertEquals('en', Util::getLiteralLanguage('"Mickey\rMouse"@en')); 179 | } 180 | 181 | // tests getLiteralLanguage if multi line string was given (check for adaption of Util.php, 182 | // adding an s to the regex) 183 | public function testGetLiteralLanguageMultilineString(): void 184 | { 185 | $literal = '"This document is published by the Provenance Working Group (http://www.w3.org/2011/prov/wiki/Main_Page). 186 | 187 | If you wish to make comments regarding this document, please send them to public-prov-comments@w3.org (subscribe public-prov-comments-request@w3.org, archives http://lists.w3.org/Archives/Public/public-prov-comments/). All feedback is welcome."@en'; 188 | 189 | $this->assertEquals('en', Util::getLiteralLanguage($literal)); 190 | } 191 | 192 | // tests reaction if no language was given 193 | public function testGetLiteralLanguageNoLiteralGiven(): void 194 | { 195 | $this->expectException('\Exception'); 196 | 197 | Util::getLiteralLanguage('invalid'); 198 | } 199 | 200 | public function testIsPrefixedName(): void 201 | { 202 | // it matches a prefixed name 203 | $this->assertTrue(Util::isPrefixedName('ex:Test')); 204 | 205 | // it does not match an IRI 206 | $this->assertFalse(Util::isPrefixedName('http://example.org/')); 207 | 208 | // it does not match a literal 209 | $this->assertFalse(Util::isPrefixedName('"http://example.org/"')); 210 | 211 | // it does not match a literal with a colon 212 | $this->assertFalse(Util::isPrefixedName('"a:b"')); 213 | 214 | // it does not match null 215 | $this->assertFalse(Util::isPrefixedName(null)); 216 | } 217 | 218 | public function testExpandPrefixedName(): void 219 | { 220 | // it expands a prefixed name 221 | $this->assertEquals('http://ex.org/#Test', Util::expandPrefixedName('ex:Test', ['ex' => 'http://ex.org/#'])); 222 | // it expands a type with a prefixed name 223 | $this->assertEquals('"a"^^http://ex.org/#type', Util::expandPrefixedName('"a"^^ex:type', ['ex' => 'http://ex.org/#'])); 224 | // it expands a prefixed name with the empty prefix 225 | $this->assertEquals('http://ex.org/#Test', Util::expandPrefixedName(':Test', ['' => 'http://ex.org/#'])); 226 | // it does not expand a prefixed name if the prefix is unknown 227 | $this->assertEquals('a:Test', Util::expandPrefixedName('a:Test', ['b' => 'http://ex.org/#'])); 228 | // it returns the input if //it is not a prefixed name 229 | $this->assertEquals('abc', Util::expandPrefixedName('abc', null)); 230 | } 231 | 232 | public function testCreateIRI(): void 233 | { 234 | // it converts a plain IRI 235 | $this->assertEquals('http://ex.org/foo#bar', Util::createIRI('http://ex.org/foo#bar')); 236 | 237 | // it converts a literal 238 | $this->assertEquals('http://ex.org/foo#bar', Util::createIRI('"http://ex.org/foo#bar"^^uri:type')); 239 | 240 | // it converts null 241 | $this->assertNull(Util::createIRI(null)); 242 | } 243 | 244 | public function testCreateLiteral(): void 245 | { 246 | // it converts the empty string 247 | $this->assertEquals('""', Util::createLiteral('')); 248 | 249 | // it converts the empty string with a language 250 | $this->assertEquals('""@en-gb', Util::createLiteral('', 'en-GB')); 251 | 252 | // it converts the empty string with a type 253 | $this->assertEquals('""^^http://ex.org/type', Util::createLiteral('', 'http://ex.org/type')); 254 | 255 | // it converts a non-empty string 256 | $this->assertEquals('"abc"', Util::createLiteral('abc')); 257 | 258 | // it converts a non-empty string with a language 259 | $this->assertEquals('"abc"@en-gb', Util::createLiteral('abc', 'en-GB')); 260 | 261 | // it converts a non-empty string with a type 262 | $this->assertEquals('"abc"^^http://ex.org/type', Util::createLiteral('abc', 'http://ex.org/type')); 263 | 264 | // it converts an integer 265 | $this->assertEquals('"123"^^http://www.w3.org/2001/XMLSchema#integer', Util::createLiteral(123)); 266 | 267 | // it converts a decimal 268 | $this->assertEquals('"2.3"^^http://www.w3.org/2001/XMLSchema#double', Util::createLiteral(2.3)); 269 | 270 | // it converts infinity 271 | $this->assertEquals('"INF"^^http://www.w3.org/2001/XMLSchema#double', Util::createLiteral(INF)); 272 | 273 | // it converts false 274 | $this->assertEquals('"false"^^http://www.w3.org/2001/XMLSchema#boolean', Util::createLiteral(false)); 275 | 276 | // it converts true 277 | $this->assertEquals('"true"^^http://www.w3.org/2001/XMLSchema#boolean', Util::createLiteral(true)); 278 | } 279 | 280 | /* 281 | public function testprefix () { 282 | var baz = Util::prefix('http://ex.org/baz#'); 283 | // it should return a function 284 | $this->assertEquals(an.instanceof(Function), baz); 285 | 286 | } 287 | public function testthe function () { 288 | // it should expand the prefix 289 | expect(baz('bar')).to.equal('http://ex.org/baz#bar'); 290 | 291 | } 292 | */ 293 | /* 294 | public function testprefixes () { 295 | public function testCalled without arguments () { 296 | var prefixes = Util::prefixes(); 297 | // it should return a function 298 | $this->assertEquals(an.instanceof(Function), prefixes); 299 | 300 | 301 | public function testthe function () { 302 | // it should not expand non-registered prefixes 303 | expect(prefixes('baz')('bar')).to.equal('bar'); 304 | 305 | 306 | // it should allow registering prefixes 307 | var p = prefixes('baz', 'http://ex.org/baz#'); 308 | expect(p).to.exist; 309 | expect(p).to.equal(prefixes('baz')); 310 | 311 | 312 | // it should expand the newly registered prefix 313 | expect(prefixes('baz')('bar')).to.equal('http://ex.org/baz#bar'); 314 | 315 | 316 | }*/ 317 | /* 318 | public function testCalled with a hash of prefixes () { 319 | var prefixes = Util::prefixes({ foo: 'http://ex.org/foo#', bar: 'http://ex.org/bar#' 320 | // it should return a function 321 | $this->assertEquals(an.instanceof(Function), prefixes); 322 | 323 | 324 | public function testthe function () { 325 | // it should expand registered prefixes 326 | expect(prefixes('foo')('bar')).to.equal('http://ex.org/foo#bar'); 327 | expect(prefixes('bar')('bar')).to.equal('http://ex.org/bar#bar'); 328 | 329 | 330 | // it should not expand non-registered prefixes 331 | expect(prefixes('baz')('bar')).to.equal('bar'); 332 | 333 | 334 | // it should allow registering prefixes 335 | var p = prefixes('baz', 'http://ex.org/baz#'); 336 | expect(p).to.exist; 337 | expect(p).to.equal(prefixes('baz')); 338 | 339 | 340 | // it should expand the newly registered prefix 341 | expect(prefixes('baz')('bar')).to.equal('http://ex.org/baz#bar'); 342 | 343 | 344 | } 345 | */ 346 | } 347 | --------------------------------------------------------------------------------