├── .gitignore ├── API ├── COPYING-MIT.txt ├── CREDITS ├── INSTALL ├── Makefile ├── README.md ├── RELEASE ├── bin ├── base-package.xml ├── compactor.php ├── compactor.sh ├── gendocs.sh ├── package.sh ├── package.xml └── package_builder.php ├── build.xml ├── composer.json ├── config.doxy ├── examples ├── The_Beatles.rdf ├── at_a_glance.php ├── database_import.php ├── dbpedia.php ├── dirty_html.php ├── doc.html ├── doc.php ├── docx.php ├── docx_document.xml ├── fetch_rss.php ├── html.php ├── matching_text_content.php ├── musicbrainz.php ├── odt.php ├── openoffice.odt ├── out.svg ├── parse_php.php ├── querypath.xml ├── rss.php ├── simple_example.php ├── sparql.php ├── svg.php ├── techniques.php ├── test.docx ├── testGrid.html └── xml.php ├── package.xml ├── package_compatible.xml ├── patches └── QueryPath-RyanMahoney.php ├── pear-summary.txt ├── phar ├── README.md └── basic_loader.php ├── quickstart-guide.md ├── src ├── QueryPath.php ├── QueryPath │ ├── CSS │ │ ├── DOMTraverser.php │ │ ├── DOMTraverser │ │ │ ├── PseudoClass.php │ │ │ └── Util.php │ │ ├── EventHandler.php │ │ ├── InputStream.php │ │ ├── NotImplementedException.php │ │ ├── ParseException.php │ │ ├── Parser.php │ │ ├── QueryPathEventHandler.php │ │ ├── Scanner.php │ │ ├── Selector.php │ │ ├── SimpleSelector.php │ │ ├── Token.php │ │ └── Traverser.php │ ├── DOMQuery.php │ ├── Entities.php │ ├── Exception.php │ ├── Extension.php │ ├── Extension │ │ ├── QPXML.php │ │ └── QPXSL.php │ ├── ExtensionRegistry.php │ ├── IOException.php │ ├── Options.php │ ├── ParseException.php │ ├── Query.php │ └── QueryPathIterator.php ├── documentation.php ├── qp.php └── qp_functions.php ├── test ├── DOMTraverserTest.xml ├── Tests │ └── QueryPath │ │ ├── CSS │ │ ├── DOMTraverserTest.php │ │ ├── ParserTest.php │ │ ├── PseudoClassTest.php │ │ ├── QueryPathEventHandlerTest.php │ │ ├── SelectorTest.php │ │ ├── TokenTest.php │ │ └── UtilTest.php │ │ ├── DOMQueryTest.php │ │ ├── EntitiesTest.php │ │ ├── ExtensionTest.php │ │ ├── Extensions │ │ ├── QPXMLTest.php │ │ └── QPXSLTest.php │ │ ├── OptionsTest.php │ │ ├── QueryPathTest.php │ │ ├── TestCase.php │ │ └── XMLIshTest.php ├── advanced.xml ├── amplify.xml ├── coverage.sh ├── data.html ├── data.xml ├── html.xml ├── no-writing.xml ├── runTests.sh └── test.php └── tutorials ├── Examples └── Examples.pkg ├── QueryPath ├── QueryPath.pkg ├── QueryPath.pkg.ini ├── css.pkg ├── css.pkg.ini ├── cssexamples.pkg └── csspseudo.pkg └── Tests └── Tests.pkg /.gitignore: -------------------------------------------------------------------------------- 1 | bin/build 2 | bin/local 3 | bin/*.tgz 4 | bin/*.zip 5 | *.tmproj 6 | releases 7 | dist 8 | test/coverage 9 | test/reports 10 | test/db 11 | docs/* 12 | doc/* 13 | bin/querypath-200x333.png 14 | test/fakepear 15 | -------------------------------------------------------------------------------- /API: -------------------------------------------------------------------------------- 1 | API Changes in 3.0.0 2 | 3 | This is a major update. See the main changes for details. 4 | 5 | API Changes in 2.1.1 6 | 7 | * NEW METHOD: document() returns the DOMDocument 8 | * BUG FIX: Issue #10 has been re-fixed to correctly collapse certain empty tags. 9 | * BUG FIX: Issue #10 has been re-fixed to correctly escape JavaScript for browsers. 10 | * BUG FIX: Issue #47 has been fixed to only remove XML declaration, but leave DOCTYPE. 11 | * NEW ARGUMENT: xpath() now supports $options, which includes the ability to set a namespace. 12 | 13 | API Changes in 2.1 14 | 15 | All changes are documented in their respective doc blocks. So take a 16 | look at http://api.querypath.org to learn more. 17 | 18 | New Functions 19 | * The `htmlqp()` method has been added for parsing icky HTML. Use 20 | this for web scraping. 21 | 22 | Altered Functions 23 | * The qp() function now supports the following new options: 24 | - convert_to_encoding 25 | - convert_from_encoding 26 | - strip_low_ascii 27 | - use_parser 28 | 29 | New Methods 30 | * attach()/detach() 31 | * has() 32 | * emptyElement() 33 | * even()/odd() 34 | * first()/last() 35 | * firstChild()/lastChild() 36 | * nextUntil()/prevUntil() 37 | * parentsUntil() 38 | * encodeDataURL() 39 | * dataURL() 40 | * filterPreg() 41 | * textBefore()/textAfter() 42 | 43 | Altered Methods 44 | * css() has been changed to allow subsequent calls 45 | to modify the style attribute (issue #28) 46 | * attr() has been changed. If it is called with no 47 | arguments, it now returns all attributes. 48 | 49 | New CSS Selectors Behavior 50 | 51 | * :contains-exactly() performs as :contains() used to perform. 52 | 53 | Altered CSS Selectors Behavior 54 | 55 | * The star operator (*) is now non-greedy, per spec. Before, the star would match 56 | any descendants. Now it will only match children. 57 | * :contains() now does substring matching instead of exact matching. This conforms 58 | to jQuery's behavior. 59 | * Quotes are now checked carefully before being stripped from pseudo-class values. 60 | * Issue #40 identified a potential infinite looping problem on poorly formed selectors. 61 | This was fixed. 62 | -------------------------------------------------------------------------------- /COPYING-MIT.txt: -------------------------------------------------------------------------------- 1 | QueryPath: Find your way 2 | Matt Butcher 3 | Copyright (C) 2009-2014 Matt Butcher 4 | Copyright (C) 2015 Google 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /CREDITS: -------------------------------------------------------------------------------- 1 | Matt Butcher [technosophos] (lead) 2 | Emily Brand [eabrand] (developer) 3 | Woody Gilk [shadowhand] (contributor) 4 | Xandar Guzman [theshadow] (contributor) 5 | Bobby Jack [fiveminuteargument] (contributor) 6 | Steven Lischer [TomorrowToday] (contributor) 7 | GDMac [GDMac] (contributor) 8 | Bill Ortell [billortell] (contributor) 9 | hakre [hakre] (contributor) 10 | katzwebservices [katzwebservices] (contributor) 11 | Markus Kalkbrenner [mkalkbrenner] (contributor) 12 | Akihiro Yamanoi [noisan] (contributor) 13 | Sandeep Shetty [sandeepshetty] (contributor) 14 | Zemistr [Zemistr] (contributor) 15 | -------------------------------------------------------------------------------- /INSTALL: -------------------------------------------------------------------------------- 1 | INSTALLING QUERYPATH 2 | ============================== 3 | 4 | There are two distributions of QueryPath: 5 | 6 | 1. The full version, which includes source, documentation, unit tests, 7 | and examples. 8 | 9 | 2. The minimal version, which includes only the source code in 10 | compacted form. 11 | 12 | INSTALLING THE MINIMAL VERSION 13 | ============================== 14 | To install QueryPath-minimal: 15 | - Extract the contents of the archive: 16 | 17 | tar -zxvf QueryPath-2.0-minimal.tgz 18 | 19 | - Move the extracted directory to the desired location. Generally, 20 | it is best to rename this folder 'QueryPath'. 21 | 22 | mv QueryPath-2.0-minimal myproject/QueryPath 23 | 24 | (Alternately, you may wish to install QueryPath in a location available on 25 | you PHP library path for use in all applications on your system.) 26 | 27 | - Include QueryPath in your scripts: 28 | 29 | require 'QueryPath/QueryPath.php'; 30 | 31 | - The minimal package does not contain any documentation. To view 32 | the documentation online, go to http://api.querypath.org. 33 | 34 | INSTALLING THE FULL VERSION 35 | ============================== 36 | To install QueryPath: 37 | 38 | - Extract the contents of the archive. 39 | 40 | tar -zxvf QueryPath-2.0.tgz 41 | 42 | - Copy the src/QueryPath directory to the desired location. 43 | 44 | cp QueryPath-2.0/src/QueryPath myproject 45 | 46 | - Include QueryPath/QueryPath.php in your PHP scripts 47 | 48 | require 'QueryPath/QueryPath.php'; 49 | 50 | The structure of the full version: 51 | 52 | / -- Base information about the library 53 | src/ -- The source (PHP) code. Include the contents of this directory in your 54 | applications. 55 | test/ -- Unit tests for QueryPath. You can run these with PHPUnit. 56 | examples/ -- Examples scripts written with QueryPath. 57 | docs/ -- Full API documentation in HTML format. You can regenerate this from 58 | the source using PHPDocumentor. 59 | tutorials/ -- DocBook additions to the QueryPath PHPDocumentor data. An HTML 60 | version of this is included in the docs/ directory. The files here are 61 | probably only useful when re-generating the API documentation. 62 | 63 | 64 | INSTALLING FROM GitHub 65 | ============================== 66 | 67 | The complete source code and all utilities for QueryPath are located in a GitHub 68 | repository. If you plan on contributing back to QueryPath, you will be able to 69 | achieve the most by working from the GitHub source. Here's how: 70 | 71 | 1. Obtain the source code from here: http://github.com/technosophos/querypath 72 | 73 | 2. Install PHPDocumentor, PHPUnit, and Phing. XDebug is strongly recommended for 74 | doing coverage analysis. These tools are used to auto-generate 75 | documentation, run unit tests, and manage project builds. 76 | 77 | 3. Run 'phing info' from the base QueryPath checkout. This will tell you how to 78 | perform various tasks with QueryPath. 79 | 80 | For more information, visit http://querypath.org. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PROJ := 'QueryPath' 2 | SRCDIR := src 3 | TESTS := test/Tests 4 | VERSION := 'DEV' 5 | DATE := `date "+%Y%m%d"` 6 | 7 | VFILES = src/HPCloud 8 | 9 | docs : 10 | @cat ./config.doxy | sed 's/-UNSTABLE%/$(VERSION)/' | doxygen - 11 | 12 | test : 13 | phpunit --verbose --color --exclude-group=deprecated $(TESTS); 14 | 15 | test-group : 16 | phpunit --verbose --color --group=$(GROUP) $(TESTS); 17 | 18 | fulltest: 19 | phpunit --color $(TESTS) 20 | 21 | lint : $(SRCDIR) 22 | find $(SRCDIR) -iname *.php -exec php -l {} ';' 23 | 24 | dist: tar 25 | 26 | tar: 27 | @echo $(PROJ)-$(VERSION)-$(DATE).tgz 28 | # @tar -zcvf $(PROJ)-$(VERSION)-$(DATE).tgz $(SRCDIR) 29 | 30 | .PHONY: docs test dist tar lint 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Use GravityPDF's QueryPath 2 | > QueryPath is now updated and maintained by the amazing folks at GravityPDF. https://github.com/GravityPDF/querypath. The version of QueryPath in this repository is no longer maintained. You are encouraged to use [GravityPDF's version](https://github.com/GravityPDF/querypath). -- Matt, Dec. 2022 3 | 4 | # QueryPath: Find your way. 5 | [![Stability: Maintenance](https://masterminds.github.io/stability/maintenance.svg)](https://masterminds.github.io/stability/maintenance.html) 6 | 7 | Authors: Matt Butcher (lead), Emily Brand, and many others 8 | 9 | [Website](http://querypath.org) | 10 | [API Docs](http://api.querypath.org/docs) | 11 | [VCS and Issue Tracking](http://github.com/technosophos/querypath) | 12 | [Support List](http://groups.google.com/group/support-querypath) | 13 | [Developer List](http://groups.google.com/group/devel-querypath) | 14 | [Pear channel](http://pear.querypath.org) | 15 | 16 | This package is licensed under an MIT license (COPYING-MIT.txt). 17 | 18 | ## At A Glance 19 | 20 | QueryPath is a jQuery-like library for working with XML and HTML 21 | documents in PHP. It now contains support for HTML5 via the 22 | [HTML5-PHP project](https://github.com/Masterminds/html5-php). 23 | 24 | ### Gettings Started 25 | 26 | Assuming you have successfully installed QueryPath via Composer, you can 27 | parse documents like this: 28 | 29 | ``` 30 | require_once "vendor/autoload.php"; 31 | 32 | // HTML5 (new) 33 | $qp = html5qp("path/to/file.html"); 34 | 35 | // Legacy HTML via libxml 36 | $qp = htmlqp("path/to/file.html"); 37 | 38 | // XML or XHTML 39 | $qp = qp("path/to/file.html"); 40 | 41 | // All of the above can take string markup instead of a file name: 42 | $qp = qp("") 43 | 44 | ``` 45 | 46 | But the real power comes from chaining. Check out the example below. 47 | 48 | ### Example Usage 49 | 50 | Say we have a document like this: 51 | ```xml 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 |
onetwothree
fourfivesix
61 | ``` 62 | 63 | And say that the above is stored in the variable `$xml`. Now 64 | we can use QueryPath like this: 65 | 66 | ```php 67 | attr('foo', 'bar'); 70 | 71 | // Print the contents of the third TD in the second row: 72 | print qp($xml, '#row2>td:nth(3)')->text(); 73 | 74 | // Append another row to the XML and then write the 75 | // result to standard output: 76 | qp($xml, 'tr:last')->after('')->writeXML(); 77 | 78 | ?> 79 | ``` 80 | 81 | (This example is in `examples/at-a-glance.php`.) 82 | 83 | With over 60 functions and robust support for chaining, you can 84 | accomplish sophisticated XML and HTML processing using QueryPath. 85 | 86 | ## QueryPath Installers 87 | 88 | The preferred method of installing QueryPath is via [Composer](http://getcomposer.org). 89 | 90 | You can also download the package from GitHub. 91 | 92 | ### Composer (Preferred) 93 | 94 | To add QueryPath as a library in your project, add this to the 'require' 95 | section of your `composer.json`: 96 | 97 | ```json 98 | { 99 | "require": { 100 | "querypath/QueryPath": ">=3.0.0" 101 | } 102 | } 103 | ``` 104 | 105 | The run `php composer.phar install` in that directory. 106 | 107 | To stay up to date on stable code, you can use `dev-master` instead of `>=3.0.0`. 108 | 109 | ### Manual Install 110 | 111 | You can either download a stable release from the 112 | [GitHub Tags page](https://github.com/technosophos/querypath/tags) 113 | or you can use `git` to clone 114 | [this repository](http://github.com/technosophos/querypath) and work from 115 | the code. 116 | 117 | ## Including QueryPath 118 | 119 | As of QueryPath 3.x, QueryPath uses the Composer autoloader if you 120 | installed with composer: 121 | ```php 122 | 125 | ``` 126 | 127 | Without Composer, you can include QueryPath like this: 128 | 129 | ```php 130 | 133 | ``` 134 | 135 | QueryPath can also be compiled into a Phar and then included like this: 136 | 137 | ```php 138 | 141 | ``` 142 | 143 | From there, the main functions you will want to use are `qp()` 144 | (alias of `QueryPath::with()`) and `htmlqp()` (alias of 145 | `QueryPath::withHTML()`). Start with the 146 | [API docs](http://api.querypath.org/docs). 147 | -------------------------------------------------------------------------------- /RELEASE: -------------------------------------------------------------------------------- 1 | # RELEASE NOTES 2 | 3 | 3.0.5 (unstable) 4 | 5 | 6 | 3.0.4 7 | - Addition of namespace fetching method ns(). 8 | - Various fixes 9 | - Basic support for HTML5 via Masterminds\HTML5 10 | - Fixed #164 11 | 12 | 3.0.3 13 | 14 | - Bug #141: Fixed :gt and :lt pseudoclasses (thanks to lsv) 15 | - Bug #124: Fixed inheritance issue with late static binding (via noisan) 16 | - Bug #126: text(string) was not updating all matched elements (via noisan) 17 | - Bug #127: wrapInner() was mangling the HTML if matched element set was greater than 1 (via noisan) 18 | - Bug #128: Improved jQuery compatibility with wrap*() *pend(), before(), after(), etc. (via noisan) 19 | 20 | 3.0.2 21 | 22 | - Bug #112: children() was not correctly restricting filters. 23 | - Bug #108: QueryPath\Query interface was too restrictive. (via mkalkbrenner) 24 | - Feature #106: :contains() is now case-insensitive. (via katzwebservices) 25 | 26 | 3.0.1 27 | 28 | - Issue #100: qp.php divided into qp.php and qp_functions.php. Composer now includes qp(). (via hakre) 29 | - Issue #102: Added QueryPath.VERSION_MAJOR, changed QueryPath.VERSION 30 | - Issue #97: Rewrite children() and filter() to be a little more efficient. 31 | 32 | 3.0.0 33 | 34 | ** REALLY IMPORTANT CHANGE:** To match jQuery, and to correctly implement 35 | a bottom-up parser, the following will no longer work: 36 | $qp($html, 'li')->find(':root ul'); 37 | You can no longer use find() to ever, ever move UP the document true 38 | (like back to the root from somewhere in the tree). You MUST use 39 | top() to do this now. This is how jQuery works, and making this 40 | minor change makes things much faster. 41 | 42 | ** REALLY IMPORTANT CHANGE:** Many "destructive" operations now return a 43 | new QueryPath object. This mirrors jQuery's behavior. 44 | 45 | THAT MEANS: find() now works like jQuery find(), branch() is 46 | deprecated. findInPlace() does what find() used to do. 47 | 48 | 1. removeAll() now works as it does in recent jQuery. Thanks to GDMac 49 | (issues #77 #78) for the fix. 50 | 2. Refactored to use namespaces. 51 | 3. Refactored to be SPR-0 compliant. 52 | 4. Now uses Composer. 53 | 5. The traversal mechanism is now bottom-up, which means Querypath is 54 | faster. 55 | 6. Issue #83: Fixed broken forloop in techniques.php. Thanks to BillOrtell for 56 | the fix. 57 | 7. ID-based searches no longer guarantee that only one element will be returned. 58 | This accomodates XML documents that may use 'id' in a way different than 59 | HTML. 60 | 8. The base CSS Traverser is now optimized for selectors that use ID or 61 | class, but no element. 62 | 9. Pseudo-element behavior has been rewritten to better conform to the 63 | standard. Using a pseudo-element no longer changes the match. Rather, 64 | it checks to see if the condition is met by the present element, and 65 | returns TRUE if it does. This means we do not need special case logic 66 | to deal with text fragments. 67 | 10. :x-root, :x-reset, and :scope are now implemented using the 68 | same algorithm (they are, in other words, aliases). Since 69 | :scope is part of CSS 4, you should use that. 70 | 11. Support for the following CSS 4 Selectors featues has been added: 71 | - :matches() pseudoclass, which behaves the way :has() behaves. 72 | - :any-link 73 | - :local-link (with some restrictions, as it does not know what the real 74 | URL of the document is). 75 | - :scope (see above) 76 | 12. Traversing UP the DOM tree with find() is no longer allowed. Use top(). 77 | 13. :first is an alias of :nth(1) instead of :first-of-type. This follows 78 | jQuery now. 79 | 14. eachLambda() is now deprecated. It WILL be removed in the future. 80 | 15. **Extensions:** QPList, QPTPL, and QPDB have all been moved to the new 81 | project QueryPath-Ext. 82 | 83 | 2.1.3 84 | 85 | 1. QueryPath Templates have gotten an overhaul from TomorrowToday (Issue #59). 86 | Templates now support attributes. 87 | 88 | 2.1.2: 89 | 90 | 1. Fixed the case where remove() caused an error when no items were found 91 | to remove (issue #63). Thanks marktheunissen for the bug report and fix. 92 | 93 | 2. New XML extensions to deal with adding namespaced elements (#64). Thanks to 94 | theshadow for contributing an entire extension, and to farinspace for 95 | detailed experiments with QP and XML namespaces. 96 | 97 | 3. The adjacent CSS selector has been modified to ignore text elements. This 98 | seems to be inline with the spec, but I am not 100% sure. Thanks to 99 | fiveminuteargument for the patch. 100 | 101 | 2.1.1: 102 | 103 | 1. The xhtml() and writeXHTML() methods now correctly escape JS/CSS and also correctly 104 | fold some tags into unaries will keeping other empty tags. See issues #10, #47. 105 | Thanks to Alex Lawrence for his input. 106 | 107 | 2. The method document() has been added. Thanks to Alex Lawrence for suggesting this 108 | addition. 109 | 110 | 3. The fetch_rss.php example created broken HREFs in some cases. Thanks to yaph for 111 | the patch. 112 | 113 | 4. The xpath() method now supports setting default namespaces. Thanks to Xavier Prud'homme 114 | for a patch. 115 | 116 | 5. The remove() method was fixed (issue #55) to now correctly return a QueryPath with 117 | just the removed nodes, while not altering the base QueryPath object. Thanks to MarcusX 118 | for finding and reporting the problem. 119 | 120 | 6. Added childrenText() convenience method. (Safe -- no changes to existing functions.) 121 | Thanks to Xavatar for suggestion and proofing initial code. 122 | 123 | 7. Fixed bad character stripping in htmlqp() (Issue #58, #52) so that meaningful whitespace 124 | is no longer stripped prior to parsing. Thanks to NortherRaven for detailed report 125 | and help testing and debugging. 126 | 127 | 8. Fixed broken :nth-of-type pseudo-class (Issue #57). Thanks to NorthernRaven for the 128 | detailed report and help debugging. 129 | 130 | 9. Fixed broken an+b rule handling in the special case '-n+b'. Thanks to NorthernRaven for 131 | reporting and helping out. 132 | 133 | 10. Xinclude support has been added via the xinclude() method on QueryPath. Thanks to Crell 134 | for the suggestion and to sdboyer for help (Issue #50). 135 | 136 | 11. QueryPath now implements Countable, which means you can do `count(qp($xml, 'div'))`. The 137 | size() function has been marked deprecated. 138 | 139 | 12. is() now takes both DOMNodes and Traversables (including QueryPath) as an argument. See 140 | issue #53. 141 | 142 | 13. The dirty_html.php example (contributed by Emily Brand, thanks!) is now fixed. Thanks to 143 | MartyIX for tracking down the issue (#59). 144 | 145 | 2.1.0: 146 | Big Changes: 147 | 148 | 1. There is now an `htmlqp()` function that parses crufty HTML in a far 149 | more reliable way than `qp()`. Use this instead of any variant of 150 | the older `@qp()` setup. 151 | 152 | 2. The API has been brought into alignment with jQuery 1.4. See 153 | API-2.1.0 for details. 154 | 155 | 3. This release was driven substantially by eabrand's GSOC 2010 156 | contributions. Thanks, Emily! 157 | 158 | 4. There are now Phar and PEAR packages available. Got to 159 | http://pear.querypath.org for PEAR packages. 160 | 161 | 5. The minimal QP distribution is no longer minified, as it reportedly 162 | causes XDebug to crash. 163 | 164 | 7. Data URs are now supported. QueryPath can now embed images directly 165 | into HTML and XML this way. 166 | 167 | 8. Documentation is now in Doxygen instead of PhpDocumentor. Thanks 168 | to Matt Farina and Kevin O'Brien for their input. 169 | -------------------------------------------------------------------------------- /bin/base-package.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | QueryPath 11 | http://www.example.com/tarballs/Package-1.0.0 12 | An HTML/XML library 13 | 14 | The QueryPath package contains the QueryPath base library, along with all 15 | of the extensions included in the base library. To learn more about 16 | QueryPath, go to http://querypath.org. 17 | 18 | 19 | Matt Butcher 20 | mbutcher 21 | matt@aleph-null.tv 22 | yes 23 | 24 | 2009-06-25 25 | 26 | 2.0 27 | 2.0 28 | 29 | 30 | beta 31 | stable 32 | 33 | LGPL v. 2.1 34 | 35 | QueryPath 2.0 introduces several new (minor) features. With a reworked 36 | internal storage model, it should perform significantly faster than its 37 | precursor. Many bugs, mostly minor, have been fixed since QueryPath 1.2. 38 | 39 | QueryPath 2.0 offers many new features, as well as numerous bug fixes and 40 | overall performance improvement. Major features: 41 | 42 | * New functions, including textImplode(). 43 | * New arguments to existing functions, such as xml() and branch(). 44 | * A new QueryPathOptions configuration object, which allows request-wide 45 | configuration. 46 | * QueryPath now supports many more options, making it more configurable. 47 | * The re-factoring of QueryPath.php to improve performance. 48 | * The removal of the QueryPath interface, and the renaming of QueryPathImpl 49 | to QueryPath. (No public-facing changes.) 50 | * Better handling of broken HTML/XML entities 51 | * Overall improvement on error handling. QueryPath now consistently throws 52 | exceptions. It no longer generates errors. 53 | * Improved performance. Internal data structures have been rewritten for 54 | optimization. Even instances on an opcode cache should see performance 55 | improvements. 56 | * Numerous bugs fixed. 57 | * Unused protected and private methods have been removed. 58 | * Unit testing has been bumped up to provide 100% function coverage and 59 | 90%+ line coverage. 60 | 61 | 62 | 63 | 64 | 65 | 66 | 5.1.0 67 | 68 | 69 | 1.4.12 70 | 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /bin/compactor.php: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /bin/compactor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | php ./compactor.php QueryPath.compact.php ../src/QueryPath/QueryPath.php -------------------------------------------------------------------------------- /bin/gendocs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ##################### 3 | # Generate PHP Docs # 4 | ##################### 5 | 6 | phpdoc=/Applications/MAMP/bin/php5/bin/phpdoc 7 | 8 | src='../src/QueryPath,../tutorials,../test,../examples'; 9 | docs='../docs' 10 | title='QueryPath 2 (Quark)' 11 | #format='HTML:frames:phpdoc.de'#',PDF:default:default' 12 | #format='HTML:frames:earthli' 13 | #format='HTML:Smarty:HandS' 14 | format='HTML:Smarty:QueryPath' 15 | category='QueryPath' 16 | tut='tutorials' 17 | ed='../examples' 18 | 19 | $phpdoc -s on -d $src -ti "$title" -t $docs -o $format -dc "$category" -dn "$category" -ed $ed 20 | -------------------------------------------------------------------------------- /bin/package.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ $# == 0 ]]; then 4 | echo "Usage: $0 TAG" 5 | exit; 6 | fi 7 | 8 | tag=$1 9 | version="querypath-$1" 10 | master='../../../QueryPath' 11 | 12 | #mkdir -p ./build/$version 13 | cd ./build/ 14 | git clone $master ./$version 15 | cd $version 16 | git checkout $tag 17 | cd ./docs 18 | ./gendocs.sh 19 | cd ../../ 20 | tar --exclude .git -zcf "$version.tgz" $version 21 | zip -r $version.zip $version -x "*.git/*" 22 | mv $version.tgz ../ 23 | mv $version.zip ../ 24 | 25 | echo "Ready to clean up" 26 | #rm -rf $version 27 | -------------------------------------------------------------------------------- /bin/package.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | QueryPath 11 | http://www.example.com/tarballs/Package-1.0.0 12 | An HTML/XML library 13 | 14 | The QueryPath package contains the QueryPath base library, along with all 15 | of the extensions included in the base library. To learn more about 16 | QueryPath, go to http://querypath.org. 17 | 18 | 19 | Matt Butcher 20 | mbutcher 21 | matt@aleph-null.tv 22 | yes 23 | 24 | 2009-06-25 25 | 26 | 2.0 27 | 2.0 28 | 29 | 30 | beta 31 | stable 32 | 33 | LGPL v. 2.1 34 | 35 | QueryPath 2.0 introduces several new (minor) features. With a reworked 36 | internal storage model, it should perform significantly faster than its 37 | precursor. Many bugs, mostly minor, have been fixed since QueryPath 1.2. 38 | 39 | QueryPath 2.0 offers many new features, as well as numerous bug fixes and 40 | overall performance improvement. Major features: 41 | 42 | * New functions, including textImplode(). 43 | * New arguments to existing functions, such as xml() and branch(). 44 | * A new QueryPathOptions configuration object, which allows request-wide 45 | configuration. 46 | * QueryPath now supports many more options, making it more configurable. 47 | * The re-factoring of QueryPath.php to improve performance. 48 | * The removal of the QueryPath interface, and the renaming of QueryPathImpl 49 | to QueryPath. (No public-facing changes.) 50 | * Better handling of broken HTML/XML entities 51 | * Overall improvement on error handling. QueryPath now consistently throws 52 | exceptions. It no longer generates errors. 53 | * Improved performance. Internal data structures have been rewritten for 54 | optimization. Even instances on an opcode cache should see performance 55 | improvements. 56 | * Numerous bugs fixed. 57 | * Unused protected and private methods have been removed. 58 | * Unit testing has been bumped up to provide 100% function coverage and 59 | 90%+ line coverage. 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 5.1.0 82 | 83 | 84 | 1.4.12 85 | 86 | 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /bin/package_builder.php: -------------------------------------------------------------------------------- 1 | $dir) { 20 | $it = new RecursiveIteratorIterator(new RecursiveDirectoryIterator($dir, RecursiveIteratorIterator::SELF_FIRST)); 21 | $oldPath = $dir; 22 | foreach ($it as $o => $item) { 23 | $base->append('' . PHP_EOL); 24 | } 25 | } 26 | 27 | $base->writeXML(); -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "querypath/QueryPath", 3 | "type": "library", 4 | "description": "HTML/XML querying (CSS 4 or XPath) and processing (like jQuery)", 5 | "homepage": "https://github.com/technosophos/querypath", 6 | "license": "MIT", 7 | "keywords": ["xml", "html", "css", "jquery", "xslt"], 8 | "require" : { 9 | "php" : ">=5.3.0", 10 | "masterminds/html5": "2.*" 11 | }, 12 | "autoload": { 13 | "psr-0": {"QueryPath": "src/"}, 14 | "files": ["src/qp_functions.php"] 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /examples/at_a_glance.php: -------------------------------------------------------------------------------- 1 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |
onetwothree
fourfivesix
13 | EOF; 14 | 15 | print "\nExample 1: \n"; 16 | // Get all of the elements in the document and add the 17 | // attribute `foo='bar'`: 18 | qp($xml, 'td')->attr('foo', 'bar')->writeXML(); 19 | 20 | print "\nExample 2: \n"; 21 | 22 | // Or print the contents of the third TD in the second row: 23 | print qp($xml, '#row2>td:nth(3)')->text(); 24 | 25 | print "\nExample 3: \n"; 26 | // Or append another row to the XML and then write the 27 | // result to standard output: 28 | qp($xml, 'tr:last')->after('')->writeXML(); 29 | 30 | ?> 31 | -------------------------------------------------------------------------------- /examples/database_import.php: -------------------------------------------------------------------------------- 1 | 7 | * @license LGPL The GNU Lesser GPL (LGPL) or an MIT-like license. 8 | */ 9 | 10 | require_once '../src/QueryPath/QueryPath.php'; 11 | require_once '../src/QueryPath/Extension/QPDB.php'; 12 | 13 | // Set the default database. 14 | QPDB::baseDB('sqlite:../test/db/qpTest.db'); 15 | 16 | // To begin, let's create a new database. We can do this outside 17 | // of QueryPath: 18 | $db = QPDB::getBaseDB(); 19 | $db->exec('CREATE TABLE IF NOT EXISTS qpdb_article (title, author, body)'); 20 | 21 | // Here's our sample article: 22 | $article = ' 23 |
24 | Use QueryPath for Fun and Profit 25 | 26 | Matt 27 | Butcher 28 | 29 | 30 | QueryPath is a great tool.

32 |

Use it in many ways.

33 | ]]> 34 | 35 |
'; 36 | 37 | // Now let's take this article and insert it into the database: 38 | $qp = qp($article); 39 | 40 | // We are going to store our insert params in here. 41 | $params = array(); 42 | 43 | // First, let's get the title 44 | $params[':title'] = $qp->find('title')->text(); 45 | 46 | // Next, let's get the name: 47 | $params[':name'] = $qp->top()->find('author>last')->text() . ', ' . $qp->prev('first')->text(); 48 | 49 | // Finally, let's get the article content: 50 | $params[':body'] = $qp->top()->find('body')->text(); 51 | 52 | // Here's the query we are going to run: 53 | $sql = 'INSERT INTO qpdb_article (title, author, body) VALUES (:title, :name, :body)'; 54 | 55 | // Now we can insert this: 56 | $qp->query($sql, $params); 57 | 58 | // Finally, we can now read this information back out into an HTML document 59 | qp(QueryPath::HTML_STUB, 'body')->queryInto('SELECT * FROM qpdb_article')->writeHTML(); 60 | 61 | // Finally, we clean up: 62 | $qp->exec('DROP TABLE qpdb_article'); -------------------------------------------------------------------------------- /examples/dbpedia.php: -------------------------------------------------------------------------------- 1 | 21 | * @license LGPL The GNU Lesser GPL (LGPL) or an MIT-like license. 22 | * @see http://www.w3.org/DesignIssues/LinkedData.html 23 | * @see http://dbpedia.org 24 | * @see sparql.php 25 | * @see musicbrainz.php 26 | */ 27 | 28 | require_once '../src/QueryPath/QueryPath.php'; 29 | 30 | // The URL to look up (any of these works): 31 | $url = 'http://dbpedia.org/data/The_Beatles.rdf'; 32 | //$url = 'http://dbpedia.org/data/Swansea.rdf'; 33 | //$url = 'http://dbpedia.org/data/The_Lord_of_the_Rings.rdf'; 34 | // HTTP headers: 35 | $headers = array( 36 | 'Accept: application/rdf,application/rdf+xml;q=0.9,*/*;q=0.8', 37 | 'Accept-Language: en-us,en', 38 | 'Accept-Charset: ISO-8859-1,utf-8', 39 | 'User-Agent: QueryPath/1.2', 40 | ); 41 | 42 | // The context options: 43 | $options = array( 44 | 'http' => array( 45 | 'method' => 'GET', 46 | 'protocol_version' => 1.1, 47 | 'header' => implode("\r\n", $headers), 48 | ), 49 | ); 50 | 51 | // Create a stream context that will tell QueryPath how to 52 | // load the file. 53 | $cxt = stream_context_create($options); 54 | 55 | // Fetch the URL and select all rdf:Description elements. 56 | // (Note that | is the CSS 3 equiv of colons for namespacing.) 57 | // To add the context, we pass it in as an option to QueryPath. 58 | $qp = qp($url, 'rdf|Description', array('context' => $cxt)); 59 | //$qp = qp('The_Beatles.rdf'); 60 | 61 | printf("There are %d descriptions in this record.\n", $qp->size()); 62 | 63 | // Here, we use rdf|* to select all elements in the RDF namespace. 64 | $qp->top()->find('rdf|*'); 65 | printf("There are %d RDF items in this record.\n", $qp->size()); 66 | 67 | // Standard pseudo-classes that are not HTML specific can be used on 68 | // namespaced elements, too. 69 | print "About: " . $qp->top()->find('rdfs|label:first')->text() . PHP_EOL; 70 | print "About (FOAF): " . $qp->top()->find('foaf|name:first')->text() . PHP_EOL; 71 | 72 | // Namespaced attributes can be retrieved using the same sort of delimiting. 73 | print "\nComment:\n"; 74 | print $qp->top()->find('rdfs|comment[xml|lang="en"]')->text(); 75 | print PHP_EOL; 76 | 77 | $qp->top(); 78 | 79 | print "\nImages:\n"; 80 | foreach ($qp->branch()->find('foaf|img') as $img) { 81 | // Note that when we use attr() we are using the XML name, NOT 82 | // the CSS 3 name. So it is rdf:resource, not rdf|resource. 83 | // The same goes for the tag() function -- it will return 84 | // the full element name (e.g. rdf:Description). 85 | print $img->attr('rdf:resource') . PHP_EOL; 86 | } 87 | 88 | print "\nImages Galleries:\n"; 89 | foreach ($qp->branch()->find('dbpprop|hasPhotoCollection') as $img) { 90 | print $img->attr('rdf:resource') . PHP_EOL; 91 | } 92 | 93 | print "\nOther Sites:\n"; 94 | foreach ($qp->branch()->find('foaf|page') as $img) { 95 | print $img->attr('rdf:resource') . PHP_EOL; 96 | } 97 | 98 | //$qp->writeXML(); -------------------------------------------------------------------------------- /examples/dirty_html.php: -------------------------------------------------------------------------------- 1 | Urban Dictionary Random Word Generator'; 13 | 14 | $page = rand(0, 288); 15 | $qp = htmlqp('http://www.urbandictionary.com/?page='.$page, '#home'); 16 | 17 | $rand = rand(0, 7); 18 | print $qp->find('.word')->eq($rand)->text().'
'; 19 | print $qp->top()->find('.definition')->eq($rand)->text(); 20 | -------------------------------------------------------------------------------- /examples/doc.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Documentation 6 | 94 | 95 | 96 | 97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 | 108 | 109 | -------------------------------------------------------------------------------- /examples/doc.php: -------------------------------------------------------------------------------- 1 | $v
"; 22 | } 23 | 24 | // The document skeleton 25 | $qpdoc = htmlqp('doc.html', 'body'); 26 | 27 | $key = $_GET['key']; 28 | 29 | // The jQuery categories that are used in QueryPath 30 | $qparray = array('Tree Traversal', 'Child Filter', 'Attribute', 'Content Filter', 'Basic Filter', 31 | 'Hierarchy', 'Basic', 'Filtering', 'Miscellaneous Traversing', 'DOM Insertion, Outside', 32 | 'DOM Insertion, Inside', 'Attributes', 'Style Properties'); 33 | 34 | $jqnames = array(); 35 | $qpnames = array(); 36 | 37 | // Search through the xml file to find any entries of jQuery entities 38 | foreach(qp('querypath.xml', 'entry') as $entry) { 39 | $qpnames[$entry->attr('name')] = 40 | array('desc' => $entry->find('desc')->innerXML(), 41 | 'jquery' => $entry->parent()->find('jquery')->innerXML(), 42 | 'querypath' => $entry->parent()->find('querypath')->innerXML()); 43 | } 44 | 45 | // Search through the xml file to find all entries of jQuery entities 46 | foreach(htmlqp('http://api.jquery.com/api/', 'entry') as $entry) { 47 | $category = false; 48 | $category = array_search($entry->find('category:first')->attr('name'), $qparray); 49 | while($entry->next('category')->html() != null) { 50 | $category = (array_search($entry->attr('name'), $qparray)) ? true : $category; 51 | if($category) break; 52 | } 53 | if($category) { 54 | $jqnames[$entry->parent()->attr('name')] = 55 | array('longdesc' => $entry->find('longdesc')->innerXML(), 56 | 'name' => $entry->parent()->find('category')->attr('name')); 57 | } 58 | } 59 | 60 | // Map the keys & sort them 61 | $jqkeys = array_keys($jqnames); 62 | $jqkeys = array_map("addClasses", $jqkeys); 63 | sort($jqkeys); 64 | 65 | // Add the keys to the nav bar 66 | $qpdoc->find('#leftbody'); 67 | foreach($jqkeys as $k => $v) { 68 | $qpdoc->append($v); 69 | } 70 | 71 | // Add the description to the main window if the key exists 72 | if(array_key_exists($key, $jqnames)) { 73 | if(array_key_exists($key, $qpnames)) { 74 | $qpdoc->top()->find('#rightfunction')->text('Function: '.ucfirst($key)); 75 | $qpdoc->top()->find('#rightdesc')->text($qpnames[$key]['desc']); 76 | $qpdoc->top()->find('#righttitle')->text('How it\'s done in jQuery'); 77 | $qpdoc->top()->find('#righttext')->text($qpnames[$key]['jquery']); 78 | $qpdoc->top()->find('#righttitle2')->text('How it\'s done in QueryPath'); 79 | $qpdoc->top()->find('#righttext2')->text($qpnames[$key]['querypath']); 80 | } 81 | else { 82 | $qpdoc->top()->find('#rightfunction')->text('Function: '.ucfirst($key)); 83 | $qpdoc->top()->find('#rightdesc')->remove(); 84 | $qpdoc->top()->find('#righttitle')->text('jQuery Documentation'); 85 | $qpdoc->top()->find('#righttext')->append($jqnames[$key]['longdesc']); 86 | } 87 | } 88 | 89 | // Write the document 90 | $qpdoc->writeHTML(); 91 | 92 | -------------------------------------------------------------------------------- /examples/docx.php: -------------------------------------------------------------------------------- 1 | branch(); 24 | print format($qr->find('w|r:first'), 'w|r:first').' '; 25 | $qp->find('w|r:first'); 26 | while($qp->next('w|r')->html() != null) { 27 | $qr = $qp->branch(); 28 | print format($qr->find('w|r'), 'w|r').' '; 29 | // print $qp->text(); 30 | } 31 | print '
'; 32 | } 33 | 34 | /** 35 | * 36 | * @param QueryPath $qp 37 | * @param String $findSelector 38 | * @return String 39 | */ 40 | function format($qp, $findSelector = null) { 41 | 42 | // Create a new branch for printing later. 43 | $qr = $qp->branch(); 44 | 45 | $text = ""; 46 | 47 | $text = $qr->find($findSelector)->find('w|t')->text(); 48 | 49 | $text = (checkUnderline($qp->branch())) ? ''.$text.'' : $text; 50 | $text = (checkBold($qp->branch())) ? ''.$text.'' : $text; 51 | 52 | return $text; 53 | } 54 | 55 | /** 56 | * 57 | * @param QueryPath $qp 58 | * @return String 59 | */ 60 | function checkBold($qp) { 61 | $qp->children("w|rPr"); 62 | return ($qp->children('w|b')->html()) ? true : false; 63 | } 64 | 65 | /** 66 | * 67 | * @param QueryPath $qp 68 | * @return String 69 | */ 70 | function checkUnderline($qp) { 71 | $qp->children("w|rPr"); 72 | return ($qp->children('w|u')->html()) ? true : false; 73 | } 74 | 75 | 76 | function docx2text($filename) { 77 | return readZippedXML($filename, "word/document.xml"); 78 | } 79 | 80 | function readZippedXML($archiveFile, $dataFile) { 81 | if (!class_exists('ZipArchive', false)) { 82 | return "ZipArchive Class Doesn't Exist."; 83 | } 84 | // Create new ZIP archive 85 | $zip = new ZipArchive(); 86 | // Open received archive file 87 | if (true === $zip->open($archiveFile)) { 88 | // If done, search for the data file in the archive 89 | if (($index = $zip->locateName($dataFile)) !== false) { 90 | // If found, read it to the string 91 | $data = $zip->getFromIndex($index); 92 | // Close archive file 93 | $zip->close(); 94 | // Load XML from a string 95 | // Skip errors and warnings 96 | return $data; 97 | // $xml = DOMDocument::loadXML($data, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING); 98 | // // Return data without XML formatting tags 99 | // return strip_tags($xml->saveXML()); 100 | } 101 | $zip->close(); 102 | } 103 | 104 | // In case of failure return empty string 105 | return $zip->getStatusString(); 106 | } 107 | -------------------------------------------------------------------------------- /examples/fetch_rss.php: -------------------------------------------------------------------------------- 1 | 16 | * @license LGPL The GNU Lesser GPL (LGPL) or an MIT-like license. 17 | */ 18 | require_once '../src/QueryPath/QueryPath.php'; 19 | 20 | // The URL of the remote RSS feed. 21 | $remote = 'http://querypath.org/aggregator/rss/2/rss.xml'; 22 | 23 | // We will write the results into this document. 24 | $out = qp(QueryPath::HTML_STUB, 'title') 25 | ->text('RSS Titles') 26 | ->top() 27 | ->find('body') 28 | ->append('
    ') 29 | ->children('ul'); 30 | 31 | // Load the remote document and loop through all of the items. 32 | foreach (qp($remote, 'channel>item') as $item) { 33 | // Get title and link. 34 | $title = $item->find('title')->text(); 35 | $link = $item->next('link')->text(); 36 | 37 | // Do a little string building. 38 | $bullet = '
  • ' . $title . '
  • '; 39 | 40 | // Add it to the output document. 41 | $out->append($bullet); 42 | } 43 | 44 | // Write the results. 45 | $out->writeHTML(); -------------------------------------------------------------------------------- /examples/html.php: -------------------------------------------------------------------------------- 1 | 16 | * @license LGPL The GNU Lesser GPL (LGPL) or an MIT-like license. 17 | */ 18 | 19 | require_once '../src/qp.php'; 20 | 21 | // Begin with an HTML stub document (XHTML, actually), and navigate to the title. 22 | qp(QueryPath::HTML_STUB, 'title') 23 | // Add some text to the title 24 | ->text('Example of QueryPath.') 25 | // Now look for the element 26 | ->top('body') 27 | // Inside the body, add a title and paragraph. 28 | ->append('

    This is a test page

    Test text

    ') 29 | // Now we select the paragraph we just created inside the body 30 | ->children('p') 31 | // Add a 'class="some-class"' attribute to the paragraph 32 | ->attr('class', 'some-class') 33 | // And add a style attribute, too, setting the background color. 34 | ->css('background-color', '#eee') 35 | // Now go back to the paragraph again 36 | ->parent() 37 | // Before the paragraph and the title, add an empty table. 38 | ->prepend('
    ') 39 | // Now let's go to the table... 40 | ->top('#my-table') 41 | // Add a couple of empty rows 42 | ->append('') 43 | // select the rows (both at once) 44 | ->children() 45 | // Add a CSS class to both rows 46 | ->addClass('table-row') 47 | // Now just get the first row (at position 0) 48 | ->eq(0) 49 | // Add a table header in the first row 50 | ->append('This is the header') 51 | // Now go to the next row 52 | ->next() 53 | // Add some data to this row 54 | ->append('This is the data') 55 | // Write it all out as HTML 56 | ->writeHTML(); 57 | ?> 58 | -------------------------------------------------------------------------------- /examples/matching_text_content.php: -------------------------------------------------------------------------------- 1 | 17 | * @license LGPL The GNU Lesser GPL (LGPL) or an MIT-like license. 18 | */ 19 | 20 | /** Include QueryPath. */ 21 | require_once '../src/QueryPath/QueryPath.php'; 22 | 23 | /** 24 | * Check if the string 'Release' is in the text content of any matched nodes. 25 | * 26 | * Returns TRUE if the text is found, FALSE otherwise. Anytime a filter callback 27 | * returns FALSE, QueryPath will remove it from the matches. 28 | * 29 | * Note that $item is a DOMNode (actually, a DOMElement). So if we wanted to do QueryPath 30 | * manipulations on it, we could wrap it in a `qp()`. 31 | */ 32 | function exampleCallback($index, $item) { 33 | $text = qp($item)->text(); 34 | return strpos($text, 'Release') !== FALSE; 35 | } 36 | 37 | /* 38 | * This is the QueryPath call. 39 | * 40 | * First we fetch the remote page, parse it, and grab just the `a` tags inside of the summary. 41 | * Then we filter the results through our callback. 42 | * Finally, we fetch all of the matching text and print it. 43 | * 44 | * NOTE: If you are using PHP 5.3, you can define the callback inline instead of separating it 45 | * into a stand-alone function. 46 | */ 47 | print htmlqp('http://php.net/', 'h1.summary a') 48 | ->filterCallback('exampleCallback') 49 | ->textImplode(PHP_EOL); 50 | ?> -------------------------------------------------------------------------------- /examples/musicbrainz.php: -------------------------------------------------------------------------------- 1 | 15 | * @license LGPL The GNU Lesser GPL (LGPL) or an MIT-like license. 16 | * @see http://musicbrainz.org 17 | */ 18 | require_once '../src/QueryPath/QueryPath.php'; 19 | 20 | $artist_url = 'http://musicbrainz.org/ws/1/artist/?type=xml&name=u2'; 21 | $album_url = 'http://musicbrainz.org/ws/1/release/?type=xml&artistid='; 22 | try { 23 | $artist = qp($artist_url, 'artist:first'); 24 | if ($artist->size() > 0) { 25 | $id = $artist->attr('id'); 26 | print '

    The best match we found was for ' . $artist->children('name')->text() . PHP_EOL; 27 | print '

    Artist ID: ' . $id . PHP_EOL; 28 | print '

    Albums for this artist' . PHP_EOL; 29 | print '

    '.$album_url.'

    '; 30 | $albums = qp($album_url . urlencode($id))->writeXML(); 31 | 32 | foreach ($albums as $album) { 33 | print $album->find('title')->text() . PHP_EOL; 34 | // Fixme: Label is broken. See Drupal QueryPath module. 35 | print '(' . $album->next('label')->text() . ')' .PHP_EOL; 36 | } 37 | } 38 | } 39 | catch (Exception $e) { 40 | print $e->getMessage(); 41 | } 42 | -------------------------------------------------------------------------------- /examples/odt.php: -------------------------------------------------------------------------------- 1 | 17 | * @license LGPL The GNU Lesser GPL (LGPL) or an MIT-like license. 18 | */ 19 | 20 | /** Include main QP library. */ 21 | require_once '../src/QueryPath/QueryPath.php'; 22 | 23 | // If you have the Zip lib combiled in: 24 | //$file = 'zip://openoffice.odt#content'; 25 | // Example for systems w/o zip lib: 26 | $file = 'tmp/content.xml'; 27 | $doc = qp($file); 28 | print 'Contents:' . PHP_EOL; 29 | 30 | // Show the "outline": all of the heading items: 31 | foreach ($doc->find('text|h') as $header) { 32 | $style = $header->attr('text:style-name'); 33 | $attr_parts = explode('_', $style); 34 | $level = array_pop($attr_parts); 35 | $out = str_repeat(' ', $level) . '- ' . $header->text(); 36 | print $out . PHP_EOL; 37 | } 38 | 39 | // This is a fairly sophisticated selector. It gets the first 40 | // match and then gets the 41 | // elements. That is the syntax for 42 | // ODT lists. Not elegant.... 43 | $selector = 'text|list[text|style-name="L1"]:first text|p[text|style-name="P1"]'; 44 | 45 | print PHP_EOL . "Bullet List" . PHP_EOL; 46 | foreach ($doc->top()->find($selector) as $bullet) { 47 | print ' * ' . $bullet->text() . PHP_EOL; 48 | } 49 | 50 | print PHP_EOL . "Ordered List" . PHP_EOL; 51 | $i = 0; 52 | foreach ($doc->top()->find('text|list[text|style-name="L2"]:first text|p[text|style-name="P2"]') as $bullet) { 53 | print ' ' . (++$i) . '. '. $bullet->text() . PHP_EOL; 54 | } -------------------------------------------------------------------------------- /examples/openoffice.odt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/technosophos/querypath/b0279de7837d199e10b779ed98e2201d8a10e61f/examples/openoffice.odt -------------------------------------------------------------------------------- /examples/out.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | Created by QueryPath. 4 | 5 | -------------------------------------------------------------------------------- /examples/parse_php.php: -------------------------------------------------------------------------------- 1 | 15 | * @license LGPL The GNU Lesser GPL (LGPL) or an MIT-like license. 16 | */ 17 | ?> 18 | 19 | 20 | Parse PHP from QueryPath 21 | 22 | 23 | text(); 28 | ?> 29 | 30 | -------------------------------------------------------------------------------- /examples/querypath.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Add constructs a new element at the specified location in the document. 6 | 7 | 8 | 9 |
      10 |
    • list item 1
    • 11 |
    • list item 2
    • 12 |
    • list item 3
    • 13 |
    14 |

    a paragraph

    15 | 16 | $('li').add('

    new paragraph

    ') 17 | .css('background-color', 'red'); 18 |
    19 |
    20 | 21 | test.html 22 | 23 | 24 | 25 | 26 |

    Hello

    27 | Hello Again 28 | 29 | 30 |
    31 | 32 | test.php 33 | 34 | require_once 'QueryPath.php'; 35 | 36 | htmlqp('test.html') 37 | $qp->append('

    new paragraph

    ') 38 | ->find('p') 39 | ->css('background-color', 'red') 40 | ->top() 41 | ->writeXHTML(); 42 |
    43 |
    44 |
    45 | 46 | 47 | It's important to note that this method does not replace a class. It simply adds the class, appending it to any which may already be assigned to the elements. 48 | 49 | More than one class may be added at a time, separated by a space, to the set of matched elements. 50 | 51 | 52 | This method is often used with .removeClass() to switch elements' classes from one to another. 53 | 54 | $('p').addClass('myClass yourClass'); 55 | 56 | $('p').removeClass('myClass noClass').addClass('yourClass'); 57 | 58 | $('ul li:last').addClass(function() { 59 | return 'item-' + $(this).index(); 60 | }); 61 | 62 | Given an unordered list with five li elements, this example adds the class "item-4" to the last li. 63 | 64 | 65 | test.html 66 | 67 | 68 | 69 | 70 |

    Hello

    71 | Hello Again 72 | 73 | 74 |
    75 | 76 | test.php 77 | 78 | require_once 'QueryPath.php'; 79 | 80 | htmlqp('test.html') 81 | $qp->append('
    ') 82 | ->find('div') 83 | ->addClass('testing one two three') 84 | ->top() 85 | ->writeXHTML(); 86 |
    87 |
    88 |
    89 | 90 | 91 | The selector expression preceding the method is the container after which the content is inserted. 92 | 93 | 94 | This method is often used with .removeClass() to switch elements' classes from one to another. 95 | 96 |
    97 |

    Greetings

    98 |
    Hello
    99 |
    Goodbye
    100 |
    101 | We can create content and insert it after several elements at once: 102 | $('.inner').after('

    Test

    '); 103 | We can also select an element on the page and insert it after another: 104 | $('.container').after($('h2')); 105 | .after() will also work on disconnected DOM nodes. For example, given the following code: 106 | $('
    ').after('

    '); 107 |
    108 | 109 | 110 | test.html 111 | 112 | 113 | 114 | 115 |

    Hello

    116 | Hello Again 117 | 118 | 119 |
    120 | 121 | test.php 122 | 123 | require_once 'QueryPath.php'; 124 | 125 | htmlqp('test.html') 126 | $qp->append('

    First P

    Second P

    ') 127 | ->children('p') 128 | ->after('

    new paragraph

    ') 129 | ->top() 130 | ->writeHTML(); 131 |
    132 |
    133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 |
    141 |

    Greetings

    142 |
    Hello
    143 |
    Goodbye
    144 |
    145 |
    146 |
    147 | 148 | test.html 149 | 150 | 151 | 152 | 153 |

    Hello

    154 | Hello Again 155 | 156 | 157 |
    158 | 159 | test.php 160 | 161 | require_once 'QueryPath.php'; 162 | 163 | htmlqp('test.html') 164 | $qp->append('

    First P

    Second P

    ') 165 | ->children('p') 166 | ->after('

    new paragraph

    ') 167 | ->top() 168 | ->writeHTML(); 169 |
    170 |
    171 |
    172 | 173 | 174 | Add a yellow background to a span and to all of the children of an element. 175 | 176 | 177 | 178 | 179 | 180 | 181 | 187 | 188 | 189 |

    Hello

    190 | Hello Again 191 |
      192 |
    • I
    • 193 |
    • II 194 |
        195 |
      • A
      • 196 |
      • B 197 |
          198 |
        • 1
        • 199 |
        • 2
        • 200 |
        • 3
        • 201 |
        202 |
      • 203 |
      • C
      • 204 |
      205 |
    • 206 |
    • III
    • 207 |
    208 | 209 | 210 |
    211 |
    212 | 213 | test.html 214 | 215 | 216 | 217 | 218 |

    Hello

    219 | Hello Again 220 | 221 | 222 |
    223 | 224 | test.php 225 | 226 | require_once 'QueryPath.php'; 227 | 228 | htmlqp('test.html') 229 | ->find('span') // Point querypath to the span 230 | ->css('background', 'yellow') // Add the CSS 231 | ->top() // Point QueryPath to the top of the page 232 | ->writeXHTML(); // Write HTML, notice it does not use print 233 | 234 |
    235 |
    236 | -------------------------------------------------------------------------------- /examples/rss.php: -------------------------------------------------------------------------------- 1 | 18 | * @license LGPL The GNU Lesser GPL (LGPL) or an MIT-like license. 19 | */ 20 | 21 | require_once '../src/QueryPath/QueryPath.php'; 22 | 23 | // This is the stub RSS document. 24 | $rss_stub =' 25 | 27 | 28 | 29 | 30 | 31 | en 32 | QueryPath 33 | 34 | 35 | '; 36 | 37 | // This is the stub RSS element. 38 | $rss_item_stub = ' 39 | 40 | Untitled 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | '; 49 | 50 | // Here are some dummy items. For the same of 51 | // simplicity, we are just using a nested array. Of 52 | // course, this could be a database lookup or whatever. 53 | $items = array( 54 | array( 55 | 'title' => 'Item 1', 56 | 'link' => 'http://example.com/item1', 57 | 'description' => 'This has embedded HTML', 58 | 'comments' => 'http://example.com/item1/comments', 59 | 'category' => 'Some Term', 60 | 'pubDate' => date('r'), 61 | 'guid' => '123456-789', 62 | ), 63 | array( 64 | 'title' => 'Item 2', 65 | 'link' => 'http://example.com/item2', 66 | 'description' => 'This has embedded HTML', 67 | 'comments' => 'http://example.com/item2/comments', 68 | 'category' => 'Some Other Term', 69 | 'pubDate' => date('r'), 70 | 'guid' => '123456-790', 71 | ), 72 | ); 73 | 74 | // The main QueryPath, which holds the channel. 75 | $qp = qp($rss_stub, 'title') 76 | ->text('A QueryPath RSS Feed') 77 | ->next('link')->text('http://example.com') 78 | ->next('description')->text('QueryPath: Find your way.') 79 | ->parent(); 80 | 81 | // For each element in the array above, we create a new 82 | // QueryPath and then populate the XML fragment with data. 83 | foreach ($items as $item) { 84 | 85 | // Begin with the stub RSS item, with title currently selected. 86 | $qpi = qp($rss_item_stub, 'title') 87 | // Add a title. 88 | ->text($item['title']) 89 | // Add a link. Note that we are giving no args to next() for the 90 | // sake of simplicity. 91 | ->next()->text($item['link']) 92 | // Go to next element and add a description. Note that the text() 93 | // call will automatically encode HTML. < will become < and so on. 94 | ->next()->text($item['description']) 95 | // Go on down the list... 96 | ->next()->text($item['comments']) 97 | ->next()->text($item['category']) 98 | ->next()->text($item['pubDate']) 99 | ->next()->text($item['guid']); 100 | 101 | // Now we append it. 102 | $qp->append($qpi->top()); 103 | } 104 | 105 | // If we were running this on a server, we would need to set the content 106 | // type: 107 | // header('Content-Type: application/rss+xml'); 108 | 109 | // Write the outpt as XML. 110 | $qp->writeXML(); 111 | ?> -------------------------------------------------------------------------------- /examples/simple_example.php: -------------------------------------------------------------------------------- 1 | 18 | * @license LGPL The GNU Lesser GPL (LGPL) or an MIT-like license. 19 | * @see qp() 20 | * @see QueryPath::find() 21 | * @see QueryPath::writeHTML() 22 | * @see html.php 23 | * @see https://fedorahosted.org/querypath/wiki/QueryPathTutorial The Official Tutorial 24 | */ 25 | require_once '../src/QueryPath/QueryPath.php'; 26 | qp(QueryPath::HTML_STUB)->find('body')->text('Hello World')->writeHTML(); 27 | 28 | $qp = htmlqp(QueryPath::HTML_STUB, 'body'); 29 | 30 | 31 | $qp->append('

    Hello

    Goodbye

    ') 32 | ->children('p') 33 | ->after('

    new paragraph

    '); 34 | 35 | echo ($qp->find('p')->children('p')->html()) ? 'print' : 'dont print';; 36 | 37 | // ->writeHTML(); 38 | -------------------------------------------------------------------------------- /examples/sparql.php: -------------------------------------------------------------------------------- 1 | 15 | * @license LGPL The GNU Lesser GPL (LGPL) or an MIT-like license. 16 | * @see http://www.w3.org/2009/sparql/wiki/Main_Page 17 | * @see http://dbpedia.org 18 | * @see dbpedia.php 19 | * @see musicbrainz.php 20 | * @see http://drupal.org/project/querypath 21 | */ 22 | 23 | require '../src/QueryPath/QueryPath.php'; 24 | 25 | // We are using the dbpedia database to execute a SPARQL query. 26 | 27 | // URL to DB Pedia's SPARQL endpoint. 28 | $url = 'http://dbpedia.org/sparql'; 29 | 30 | // The SPARQL query to run. 31 | $sparql = ' 32 | PREFIX foaf: 33 | PREFIX rdfs: 34 | SELECT ?uri ?name ?label 35 | WHERE { 36 | ?uri foaf:name ?name . 37 | ?uri rdfs:label ?label 38 | FILTER (?name = "The Beatles") 39 | FILTER (lang(?label) = "en") 40 | } 41 | '; 42 | 43 | // We first set up the parameters that will be sent. 44 | $params = array( 45 | 'query' => $sparql, 46 | 'format' => 'application/sparql-results+xml', 47 | ); 48 | 49 | // DB Pedia wants a GET query, so we create one. 50 | $data = http_build_query($params); 51 | $url .= '?' . $data; 52 | 53 | // Next, we simply retrieve, parse, and output the contents. 54 | $qp = qp($url, 'head'); 55 | 56 | // Get the headers from the resulting XML. 57 | $headers = array(); 58 | foreach ($qp->children('variable') as $col) { 59 | $headers[] = $col->attr('name'); 60 | } 61 | 62 | // Get rows of data from result. 63 | $rows = array(); 64 | $col_count = count($headers); 65 | foreach ($qp->top()->find('results>result') as $row) { 66 | $cols = array(); 67 | $row->children(); 68 | for ($i = 0; $i < $col_count; ++$i) { 69 | $cols[$i] = $row->branch()->eq($i)->text(); 70 | } 71 | $rows[] = $cols; 72 | } 73 | 74 | // Turn data into table. 75 | $table = ''; 76 | foreach ($rows as $row) { 77 | $table .= ''; 80 | } 81 | $table .= '
    ' . implode('', $headers) . '
    '; 78 | $table .= implode('', $row); 79 | $table .= '
    '; 82 | 83 | // Add table to HTML document. 84 | qp(QueryPath::HTML_STUB, 'body')->append($table)->writeHTML(); -------------------------------------------------------------------------------- /examples/svg.php: -------------------------------------------------------------------------------- 1 | 16 | * @license LGPL The GNU Lesser GPL (LGPL) or an MIT-like license. 17 | */ 18 | 19 | require_once '../src/QueryPath/QueryPath.php'; 20 | 21 | // Let's stub out a basic SVG document. 22 | $svg_stub = ' 23 | 31 | Created by QueryPath. 32 | '; 33 | 34 | qp($svg_stub) 35 | ->attr(array('width' => 800, 'height' => 600)) 36 | ->append('') 37 | ->find('#second') 38 | ->attr(array('x' => 15, 'y' => 4, 'width' => 40, 'height' => 60, 'fill' => 'red')) 39 | ->prev() 40 | ->attr(array('x' => 2, 'y' => 2, 'width' => 40, 'height' => 60, 'fill' => 'navy')) 41 | ->writeXML(); 42 | 43 | -------------------------------------------------------------------------------- /examples/techniques.php: -------------------------------------------------------------------------------- 1 | 19 | * @license LGPL (The GNU Lesser GPL) or an MIT-like license. 20 | */ 21 | 22 | require '../src/QueryPath/QueryPath.php'; 23 | 24 | $demo = ' 25 | 26 |
  • Foo
  • 27 |
  • Foo
  • 28 |
  • Foo
  • 29 |
  • Foo
  • 30 |
  • Foo
  • 31 |
    32 | '; 33 | 34 | $qp = qp($demo, 'data'); 35 | 36 | // Iterate over elements as DOMNodes: 37 | foreach ($qp->get() as $li_ele) { 38 | print $li_ele->tagName . PHP_EOL; // Prints 'li' five times. 39 | } 40 | 41 | // Iterate over elements as QueryPath objects 42 | foreach ($qp as $li_qp) { 43 | print $li_qp->tag() . PHP_EOL; // Prints 'li' five times 44 | } 45 | 46 | function callbackFunction($index, $element) { 47 | print $element->tagName . PHP_EOL; 48 | } 49 | 50 | // Iterate using a callback function 51 | $qp->each('callbackFunction'); 52 | 53 | // Iterate using a Lambda-style function 54 | $qp->eachLambda('return $item->tagName . PHP_EOL;'); 55 | 56 | // Loop through by index/count 57 | for ($i = 0; $i < $qp->size(); ++$i) { 58 | $domElement = $qp->get($i); 59 | print $domElement->tagName . PHP_EOL; 60 | } -------------------------------------------------------------------------------- /examples/test.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/technosophos/querypath/b0279de7837d199e10b779ed98e2201d8a10e61f/examples/test.docx -------------------------------------------------------------------------------- /examples/testGrid.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Example of 960 Grid Layout 6 | 7 | 8 | 9 | 10 | 11 | 12 |
    13 |
    QueryPath
    14 |
    15 |
    16 |
    Learn More
    17 |

    Visit the Wiki to learn more.

    18 |
    19 |
    20 |
    Download Now
    21 |

    Get QueryPath 1.0.

    22 |
    23 |
    24 |
    25 |
    Tutorial
    26 |

    Learn the basics of QueryPath from the tutorial.

    27 |
    28 |
    QueryPath is a library for working with XML and HTML documents.
    29 |
    30 |
    Examples
    31 |

    Along with a complete set of API documents and unit tests, QueryPath includes examples showing how QueryPath can create HTML, XML, RSS, and even SVG.

    32 |
    33 |
    34 |
    35 |
    Using the Database
    36 |

    A database wrapper lets you integrate database queries right into the DOM. View the tutorial.

    37 |
    38 |
    39 |
    Using Templates
    40 |

    The template extension allows you to use pure HTML as a template language. View the tutorial.

    41 |
    42 |
    43 |
    Copyright (c) 2009, Matt Butcher.
    44 |
    45 |
    46 | 47 | 48 | -------------------------------------------------------------------------------- /examples/xml.php: -------------------------------------------------------------------------------- 1 | 13 | * 14 | * 15 | * (A space was inserted above to prevent the documentation renderer from 16 | * misinterpreting it.) 17 | * 18 | * @author M Butcher 19 | * @license LGPL The GNU Lesser GPL (LGPL) or an MIT-like license. 20 | */ 21 | 22 | require_once '../src/QueryPath/QueryPath.php'; 23 | 24 | 25 | // Create a new XML document wrapped in a QueryPath. 26 | // By default, it will point to the root element, 27 | // 28 | $record = qp('') 29 | // Add a new last name inside of author. 30 | ->append('Dostoyevsky') 31 | // Select all of the children of . In this case, 32 | // that is 33 | ->children() 34 | // Oh, wait... we wanted last name to be inside of a 35 | // element. Use wrap to wrap the current element in something: 36 | ->wrap('') 37 | // And before last name, we want to add first name. 38 | ->before('') 39 | // Select first name 40 | ->prev() 41 | // Set the text of first name 42 | ->text('Fyodor') 43 | // And then after first name, add the patronymic 44 | ->after('Fyodorovich') 45 | // Now go back to the root element, the top of the document. 46 | ->top() 47 | // Add another tag -- origin. 48 | ->append('Russia') 49 | // turn the QueryPath contents back into a string. Since we are 50 | // at the top of the document, the whole document will be converted 51 | // to a string. 52 | ->xml(); 53 | 54 | // Print our results. 55 | print $record; -------------------------------------------------------------------------------- /package_compatible.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | QueryPath 4 | pear.querypath.org 5 | QueryPath: A library for working with XML, HTML, and web services. 6 | 7 | 8 | QueryPath implements much of the jQuery API in PHP, and supports CSS 9 | selector queries, jQuery's traversal and manipulation APIs, and 10 | adds many other useful methods. Learn more at http://querypath.org. 11 | 12 | Matt Butcher 13 | technosophos 14 | matt@aleph-null.tv 15 | yes 16 | 17 | 18 | Emily Brand 19 | eabrand 20 | emily@example.com 21 | yes 22 | 23 | 24 | Woody Gilk 25 | shadowhand 26 | woody@wingsc.com 27 | yes 28 | 29 | 2010-10-20 30 | 31 | 32 | 2.1.0beta3 33 | 2.1.0beta3 34 | 35 | 36 | beta 37 | beta 38 | 39 | LGPL or MIT (your choice) 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 5.2.0 48 | 49 | 50 | 1.4.8 51 | 52 | 53 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /pear-summary.txt: -------------------------------------------------------------------------------- 1 | QueryPath: A library for working with XML, HTML, and web services. 2 | 3 | QueryPath implements much of the jQuery API in PHP, and supports CSS 4 | selector queries, jQuery's traversal and manipulation APIs, and 5 | adds many other useful methods. Learn more at http://querypath.org. -------------------------------------------------------------------------------- /phar/README.md: -------------------------------------------------------------------------------- 1 | # Phar Support 2 | 3 | PHP 5.3 and onward supports the Phar format for archiving multiple PHP applications into a single file. 4 | 5 | QueryPath, from version 2.1 onward, can be distributed as a Phar file that can be used like this: 6 | 7 | 13 | 14 | The files in this folder are those required *specifically* by the phar system. The build script only includes these in the Phar distribution. They are ignored in other distributions. 15 | 16 | ## Building a Phar Package 17 | 18 | Currently, phar packages are built by the master `build` target. They can also be built independently using the `pharBuild` target. -------------------------------------------------------------------------------- /phar/basic_loader.php: -------------------------------------------------------------------------------- 1 | getMessage(); 9 | die('Cannot initialize Phar'); 10 | } 11 | __HALT_COMPILER(); 12 | ?> -------------------------------------------------------------------------------- /quickstart-guide.md: -------------------------------------------------------------------------------- 1 | # QueryPath QuickStart 2 | 3 | This short guide is intended to help you get started with QueryPath 3. 4 | 5 | ## Using QueryPath in Your Project 6 | 7 | To use QueryPath inside of your own application, you will need to make sure that PHP can find the QueryPath library. There are a few possible ways of doing this. The first is to use an autoloader. The second is to include QueryPath manually. We'll look briefly at each. 8 | 9 | ### Autoloaders and QueryPath 10 | 11 | In recent time, PHP has standardized a method of automatically importing classes by name. This is often called [PSR-0 autoloading](https://github.com/php-fig/fig-standards/blob/master/accepted/PSR-0.md). Symfony, Composer, and many other PHP projects use PSR-0 autoloaders, and QueryPath should work with those. In addition, QueryPath has its own autoloader in `qp.php`. 12 | 13 | To use QueryPath's autoloader, all you need to do is include `qp.php`. This will detect if another autoloader is already in place, and if not, it will configure it's own autoloader: 14 | 15 | ```{php} 16 | text(); 20 | 21 | print htmlqp('http://technosophos.com', 'title')->text(); 22 | ?> 23 | ``` 24 | 25 | The above illustrates the requiring of QueryPath's autoloader. Note that in that case we don't need to do anything else to get the `QueryPath` class or the `htmlqp()` functions. 26 | 27 | QueryPath also ships with [Composer](http://getcomposer.org) support. Composer provides PSR-0 autoloading. To use Composer's autoloader, you can do this: 28 | 29 | ```{php} 30 | text(); 35 | 36 | // THIS DOESN'T WORK! 37 | // print htmlqp('http://technosophos.com', 'title')->text(); 38 | ?> 39 | ``` 40 | 41 | Notice, though, that the `qp()` and `htmlqp` functions *will not work* with this method. Why? Because PHP's autoloader does not know about functions. It operates on classes only. So you can use QueryPath's Object-Oriented API (`QueryPath::with()`, `QueryPath::withHTML()`, `QueryPath::withXML()`), but not the `qp()` and `qphtml()` functions. If you want to use those, too, simply include `qp.php`: 42 | 43 | ```{php} 44 | text(); 50 | 51 | // This works because qp.php was imported 52 | print htmlqp('http://technosophos.com', 'title')->text(); 53 | ?> 54 | ``` 55 | 56 | ## A Simple Example 57 | 58 | So far, we have seen a few variations of the same program. Let's learn what it does. Here's the program: 59 | 60 | ```{php} 61 | text(); 65 | 66 | print htmlqp('http://technosophos.com', 'title')->text(); 67 | ?> 68 | ``` 69 | 70 | This does the same thing two different ways. Let's look at line 3: 71 | 72 | ```{php} 73 | text(); 75 | ?> 76 | ``` 77 | 78 | This line does three things: 79 | 80 | 1. It loads and parses the HTML document it finds at `http://techosophos.com`. QueryPath can load documents locally and remotely. It can also load strings of HTML or XML, as well as `SimpleXML` objects and `DOMDocument` objects. It should be easy to get your HTML or XML loaded into QueryPath. 81 | 2. It performs a search for the tag named `title`. QueryPath uses CSS 4 Selectors (as the current draft stands) as a query language -- just like jQuery and CSS. (If you prefer XPath, check out the `xpath()` method on QueryPath). Of course, `title` is a very basic selector. You can do more advanced selectors like `#bar-one table>tr:odd td>a:first-of-type()`, which looks for the element with ID `bar-one` and then fetches every odd row from its table, then from each cell in the row, it finds the first hyperlink. 82 | 3. Finally, the example calls `text()`, which will fetch the text content of the first element it's found (in this case, the `title` tag in the HTML head). If not title is found, this will return an empty string. Otherwise it will return the text of that tag. 83 | 84 | QueryPath has well over 60 methods like `text()`. Some are for navigating, like `top()`, `children()`, `next()`, and `prev()`. Some are for manipulating the parts of an HTML or XML element, like `attar()`. Others are for doing sophisticated finding and filtering operations (`find()`, `filter()`, `filterCallback()`, `map()`, and so on). And, of course, there are methods for modifying the document (`append()`, `before()`, `after()`, `attr()`, `text()`, and many more). 85 | 86 | The goal of QueryPath is to make it easy for you to process XML and HTML documents. There may be a lot of methods to learn (just like jQuery), but those methods are there to make your life simpler. 87 | 88 | ## HTML vs XML 89 | 90 | When QueryPath was first introduced, it did not distinguish between XML and HTML documents. At that time, momentum was behind XHTML, and it looked like the future was XML. But over time, it has become abundantly clear that HTML documents cannot be treated as XML during parsing and processing, or during output. 91 | 92 | So there are now separate parser functions for HTML and XML -- as well as a generic parser function that inspects the document and attempts to determine whether it is XML or HTML: 93 | 94 | * `QueryPath::withXML()`: This *only* handles XML documents. If you give it an HTML document, it will attempt to force XML parsing on that document. 95 | * `htmlqp()`, `QueryPath::withHTML()`: This will force QueryPath to use the HTML parser. it will also make a number of adjustments to QueryPath to accommodate common HTML breakages. 96 | * `qp()`, `QueryPath::with()`: This will attempt to guess whether the document is XML or HTML. In general, it favors XML slightly. Guessing may be done by… 97 | - File extension 98 | - XML declaration 99 | - The suggestions made by any options passed into the document 100 | 101 | ###… And Character Encoding 102 | 103 | XML suggests that all documents be encoded as UTF-8. Most HTML documents are encoded using one of the ISO specifications (typically ISO-8859-1). And web servers are often misconfigured to report that documents are using one character set when they are actually using another. 104 | 105 | To work around all of these issues, QueryPath attempts to convert documents automatically. It does this using PHP's internal character detection libraries. But sometimes it guesses wrong. You can adjust this feature manually by passing in language settings in the `$options` array. See the documentation on `qp()` for details. 106 | 107 | 108 | ## Where to go from here 109 | 110 | * [QueryPath.org](http://querypath.org) has pointers to other resources. 111 | * [The API docs](http://api.querypath.org) have detailed explanations of every single part of QueryPath. 112 | 113 | -------------------------------------------------------------------------------- /src/QueryPath/CSS/DOMTraverser/Util.php: -------------------------------------------------------------------------------- 1 | hasAttribute($name)) { 21 | return FALSE; 22 | } 23 | 24 | if (is_null($value)) { 25 | return TRUE; 26 | } 27 | 28 | return self::matchesAttributeValue($value, $node->getAttribute($name), $operation); 29 | } 30 | /** 31 | * Check whether the given DOMElement has the given namespaced attribute. 32 | */ 33 | public static function matchesAttributeNS($node, $name, $nsuri, $value = NULL, $operation = EventHandler::isExactly) { 34 | if (!$node->hasAttributeNS($nsuri, $name)) { 35 | return FALSE; 36 | } 37 | 38 | if (is_null($value)) { 39 | return TRUE; 40 | } 41 | 42 | return self::matchesAttributeValue($value, $node->getAttributeNS($nsuri, $name), $operation); 43 | } 44 | 45 | /** 46 | * Check for attr value matches based on an operation. 47 | */ 48 | public static function matchesAttributeValue($needle, $haystack, $operation) { 49 | 50 | if (strlen($haystack) < strlen($needle)) return FALSE; 51 | 52 | // According to the spec: 53 | // "The case-sensitivity of attribute names in selectors depends on the document language." 54 | // (6.3.2) 55 | // To which I say, "huh?". We assume case sensitivity. 56 | switch ($operation) { 57 | case EventHandler::isExactly: 58 | return $needle == $haystack; 59 | case EventHandler::containsWithSpace: 60 | // XXX: This needs testing! 61 | return preg_match('/\b/', $haystack) == 1; 62 | //return in_array($needle, explode(' ', $haystack)); 63 | case EventHandler::containsWithHyphen: 64 | return in_array($needle, explode('-', $haystack)); 65 | case EventHandler::containsInString: 66 | return strpos($haystack, $needle) !== FALSE; 67 | case EventHandler::beginsWith: 68 | return strpos($haystack, $needle) === 0; 69 | case EventHandler::endsWith: 70 | //return strrpos($haystack, $needle) === strlen($needle) - 1; 71 | return preg_match('/' . $needle . '$/', $haystack) == 1; 72 | } 73 | return FALSE; // Shouldn't be able to get here. 74 | } 75 | 76 | /** 77 | * Remove leading and trailing quotes. 78 | */ 79 | public static function removeQuotes($str) { 80 | $f = substr($str, 0, 1); 81 | $l = substr($str, -1); 82 | if ($f === $l && ($f == '"' || $f == "'")) { 83 | $str = substr($str, 1, -1); 84 | } 85 | return $str; 86 | } 87 | 88 | /** 89 | * Parse an an+b rule for CSS pseudo-classes. 90 | * 91 | * Invalid rules return `array(0, 0)`. This is per the spec. 92 | * 93 | * @param $rule 94 | * Some rule in the an+b format. 95 | * @retval array 96 | * `array($aVal, $bVal)` of the two values. 97 | */ 98 | public static function parseAnB($rule) { 99 | if ($rule == 'even') { 100 | return array(2, 0); 101 | } 102 | elseif ($rule == 'odd') { 103 | return array(2, 1); 104 | } 105 | elseif ($rule == 'n') { 106 | return array(1, 0); 107 | } 108 | elseif (is_numeric($rule)) { 109 | return array(0, (int)$rule); 110 | } 111 | 112 | $regex = '/^\s*([+\-]?[0-9]*)n\s*([+\-]?)\s*([0-9]*)\s*$/'; 113 | $matches = array(); 114 | $res = preg_match($regex, $rule, $matches); 115 | 116 | // If it doesn't parse, return 0, 0. 117 | if (!$res) { 118 | return array(0, 0); 119 | } 120 | 121 | $aVal = isset($matches[1]) ? $matches[1] : 1; 122 | if ($aVal == '-') { 123 | $aVal = -1; 124 | } 125 | else { 126 | $aVal = (int) $aVal; 127 | } 128 | 129 | $bVal = 0; 130 | if (isset($matches[3])) { 131 | $bVal = (int) $matches[3]; 132 | if (isset($matches[2]) && $matches[2] == '-') { 133 | $bVal *= -1; 134 | } 135 | } 136 | return array($aVal, $bVal); 137 | } 138 | 139 | } 140 | -------------------------------------------------------------------------------- /src/QueryPath/CSS/EventHandler.php: -------------------------------------------------------------------------------- 1 | 13 | * @license MIT 14 | */ 15 | namespace QueryPath\CSS; 16 | 17 | /** @addtogroup querypath_css CSS Parsing 18 | * QueryPath includes a CSS 3 Selector parser. 19 | * 20 | * 21 | * Typically the parser is not accessed directly. Most developers will use it indirectly from 22 | * qp(), htmlqp(), or one of the methods on a QueryPath object. 23 | * 24 | * This parser is modular and is not tied to QueryPath, so you can use it in your 25 | * own (non-QueryPath) projects if you wish. To dive in, start with EventHandler, the 26 | * event interface that works like a SAX API for CSS selectors. If you want to check out 27 | * the details, check out the parser (QueryPath::CSS::Parser), scanner 28 | * (QueryPath::CSS::Scanner), and token list (QueryPath::CSS::Token). 29 | */ 30 | 31 | /** 32 | * An event handler for handling CSS 3 Selector parsing. 33 | * 34 | * This provides a standard interface for CSS 3 Selector event handling. As the 35 | * parser parses a selector, it will fire events. Implementations of EventHandler 36 | * can then handle the events. 37 | * 38 | * This library is inspired by the SAX2 API for parsing XML. Each component of a 39 | * selector fires an event, passing the necessary data on to the event handler. 40 | * 41 | * @ingroup querypath_css 42 | */ 43 | interface EventHandler { 44 | /** The is-exactly (=) operator. */ 45 | const isExactly = 0; // = 46 | /** The contains-with-space operator (~=). */ 47 | const containsWithSpace = 1; // ~= 48 | /** The contains-with-hyphen operator (!=). */ 49 | const containsWithHyphen = 2; // |= 50 | /** The contains-in-string operator (*=). */ 51 | const containsInString = 3; // *= 52 | /** The begins-with operator (^=). */ 53 | const beginsWith = 4; // ^= 54 | /** The ends-with operator ($=). */ 55 | const endsWith = 5; // $= 56 | /** The any-element operator (*). */ 57 | const anyElement = '*'; 58 | 59 | /** 60 | * This event is fired when a CSS ID is encountered. 61 | * An ID begins with an octothorp: #name. 62 | * 63 | * @param string $id 64 | * The ID passed in. 65 | */ 66 | public function elementID($id); // #name 67 | /** 68 | * Handle an element name. 69 | * Example: name 70 | * @param string $name 71 | * The name of the element. 72 | */ 73 | public function element($name); // name 74 | /** 75 | * Handle a namespaced element name. 76 | * example: namespace|name 77 | * @param string $name 78 | * The tag name. 79 | * @param string $namespace 80 | * The namespace identifier (Not the URI) 81 | */ 82 | public function elementNS($name, $namespace = NULL); 83 | /** 84 | * Handle an any-element (*) operator. 85 | * Example: * 86 | */ 87 | public function anyElement(); // * 88 | /** 89 | * Handle an any-element operator that is constrained to a namespace. 90 | * Example: ns|* 91 | * @param string $ns 92 | * The namespace identifier (not the URI). 93 | */ 94 | public function anyElementInNS($ns); // ns|* 95 | /** 96 | * Handle a CSS class selector. 97 | * Example: .name 98 | * @param string $name 99 | * The name of the class. 100 | */ 101 | public function elementClass($name); // .name 102 | /** 103 | * Handle an attribute selector. 104 | * Example: [name=attr] 105 | * Example: [name~=attr] 106 | * @param string $name 107 | * The attribute name. 108 | * @param string $value 109 | * The value of the attribute, if given. 110 | * @param int $operation 111 | * The operation to be used for matching. See {@link EventHandler} 112 | * constants for a list of supported operations. 113 | */ 114 | public function attribute($name, $value = NULL, $operation = EventHandler::isExactly); // [name=attr] 115 | /** 116 | * Handle an attribute selector bound to a specific namespace. 117 | * Example: [ns|name=attr] 118 | * Example: [ns|name~=attr] 119 | * @param string $name 120 | * The attribute name. 121 | * @param string $ns 122 | * The namespace identifier (not the URI). 123 | * @param string $value 124 | * The value of the attribute, if given. 125 | * @param int $operation 126 | * The operation to be used for matching. See {@link EventHandler} 127 | * constants for a list of supported operations. 128 | */ 129 | public function attributeNS($name, $ns, $value = NULL, $operation = EventHandler::isExactly); 130 | /** 131 | * Handle a pseudo-class. 132 | * Example: :name(value) 133 | * @param string $name 134 | * The pseudo-class name. 135 | * @param string $value 136 | * The value, if one is found. 137 | */ 138 | public function pseudoClass($name, $value = NULL); //:name(value) 139 | /** 140 | * Handle a pseudo-element. 141 | * Example: ::name 142 | * @param string $name 143 | * The pseudo-element name. 144 | */ 145 | public function pseudoElement($name); // ::name 146 | /** 147 | * Handle a direct descendant combinator. 148 | * Example: > 149 | */ 150 | public function directDescendant(); // > 151 | /** 152 | * Handle a adjacent combinator. 153 | * Example: + 154 | */ 155 | public function adjacent(); // + 156 | /** 157 | * Handle an another-selector combinator. 158 | * Example: , 159 | */ 160 | public function anotherSelector(); // , 161 | /** 162 | * Handle a sibling combinator. 163 | * Example: ~ 164 | */ 165 | public function sibling(); // ~ combinator 166 | /** 167 | * Handle an any-descendant combinator. 168 | * Example: ' ' 169 | */ 170 | public function anyDescendant(); // ' ' (space) operator. 171 | } 172 | -------------------------------------------------------------------------------- /src/QueryPath/CSS/InputStream.php: -------------------------------------------------------------------------------- 1 | stream = str_split($string); 26 | } 27 | /** 28 | * Look ahead one character. 29 | * 30 | * @return char 31 | * Returns the next character, but does not remove it from 32 | * the stream. 33 | */ 34 | function peek() { 35 | return $this->stream[0]; 36 | } 37 | /** 38 | * Get the next unconsumed character in the stream. 39 | * This will remove that character from the front of the 40 | * stream and return it. 41 | */ 42 | function consume() { 43 | $ret = array_shift($this->stream); 44 | if (!empty($ret)) { 45 | $this->position++; 46 | } 47 | return $ret; 48 | } 49 | /** 50 | * Check if the stream is empty. 51 | * @return boolean 52 | * Returns TRUE when the stream is empty, FALSE otherwise. 53 | */ 54 | function isEmpty() { 55 | return count($this->stream) == 0; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/QueryPath/CSS/NotImplementedException.php: -------------------------------------------------------------------------------- 1 | is = $in; 29 | } 30 | 31 | /** 32 | * Return the position of the reader in the string. 33 | */ 34 | public function position() { 35 | return $this->is->position; 36 | } 37 | 38 | /** 39 | * See the next char without removing it from the stack. 40 | * 41 | * @return char 42 | * Returns the next character on the stack. 43 | */ 44 | public function peek() { 45 | return $this->is->peek(); 46 | } 47 | 48 | /** 49 | * Get the next token in the input stream. 50 | * 51 | * This sets the current token to the value of the next token in 52 | * the stream. 53 | * 54 | * @return int 55 | * Returns an int value corresponding to one of the Token constants, 56 | * or FALSE if the end of the string is reached. (Remember to use 57 | * strong equality checking on FALSE, since 0 is a valid token id.) 58 | */ 59 | public function nextToken() { 60 | $tok = -1; 61 | ++$this->it; 62 | if ($this->is->isEmpty()) { 63 | if ($this->recurse) { 64 | throw new \QueryPath\Exception("Recursion error detected at iteration " . $this->it . '.'); 65 | exit(); 66 | } 67 | //print "{$this->it}: All done\n"; 68 | $this->recurse = TRUE; 69 | $this->token = FALSE; 70 | return FALSE; 71 | } 72 | $ch = $this->is->consume(); 73 | //print __FUNCTION__ . " Testing $ch.\n"; 74 | if (ctype_space($ch)) { 75 | $this->value = ' '; // Collapse all WS to a space. 76 | $this->token = $tok = Token::white; 77 | //$ch = $this->is->consume(); 78 | return $tok; 79 | } 80 | 81 | if (ctype_alnum($ch) || $ch == '-' || $ch == '_') { 82 | // It's a character 83 | $this->value = $ch; //strtolower($ch); 84 | $this->token = $tok = Token::char; 85 | return $tok; 86 | } 87 | 88 | $this->value = $ch; 89 | 90 | switch($ch) { 91 | case '*': 92 | $tok = Token::star; 93 | break; 94 | case chr(ord('>')): 95 | $tok = Token::rangle; 96 | break; 97 | case '.': 98 | $tok = Token::dot; 99 | break; 100 | case '#': 101 | $tok = Token::octo; 102 | break; 103 | case '[': 104 | $tok = Token::lsquare; 105 | break; 106 | case ']': 107 | $tok = Token::rsquare; 108 | break; 109 | case ':': 110 | $tok = Token::colon; 111 | break; 112 | case '(': 113 | $tok = Token::lparen; 114 | break; 115 | case ')': 116 | $tok = Token::rparen; 117 | break; 118 | case '+': 119 | $tok = Token::plus; 120 | break; 121 | case '~': 122 | $tok = Token::tilde; 123 | break; 124 | case '=': 125 | $tok = Token::eq; 126 | break; 127 | case '|': 128 | $tok = Token::pipe; 129 | break; 130 | case ',': 131 | $tok = Token::comma; 132 | break; 133 | case chr(34): 134 | $tok = Token::quote; 135 | break; 136 | case "'": 137 | $tok = Token::squote; 138 | break; 139 | case '\\': 140 | $tok = Token::bslash; 141 | break; 142 | case '^': 143 | $tok = Token::carat; 144 | break; 145 | case '$': 146 | $tok = Token::dollar; 147 | break; 148 | case '@': 149 | $tok = Token::at; 150 | break; 151 | } 152 | 153 | 154 | // Catch all characters that are legal within strings. 155 | if ($tok == -1) { 156 | // TODO: This should be UTF-8 compatible, but PHP doesn't 157 | // have a native UTF-8 string. Should we use external 158 | // mbstring library? 159 | 160 | $ord = ord($ch); 161 | // Characters in this pool are legal for use inside of 162 | // certain strings. Extended ASCII is used here, though I 163 | // Don't know if these are really legal. 164 | if (($ord >= 32 && $ord <= 126) || ($ord >= 128 && $ord <= 255)) { 165 | $tok = Token::stringLegal; 166 | } 167 | else { 168 | throw new ParseException('Illegal character found in stream: ' . $ord); 169 | } 170 | } 171 | 172 | $this->token = $tok; 173 | return $tok; 174 | } 175 | 176 | /** 177 | * Get a name string from the input stream. 178 | * A name string must be composed of 179 | * only characters defined in Token:char: -_a-zA-Z0-9 180 | */ 181 | public function getNameString() { 182 | $buf = ''; 183 | while ($this->token === Token::char) { 184 | $buf .= $this->value; 185 | $this->nextToken(); 186 | //print '_'; 187 | } 188 | return $buf; 189 | } 190 | 191 | /** 192 | * This gets a string with any legal 'string' characters. 193 | * See CSS Selectors specification, section 11, for the 194 | * definition of string. 195 | * 196 | * This will check for string1, string2, and the case where a 197 | * string is unquoted (Oddly absent from the "official" grammar, 198 | * though such strings are present as examples in the spec.) 199 | * 200 | * Note: 201 | * Though the grammar supplied by CSS 3 Selectors section 11 does not 202 | * address the contents of a pseudo-class value, the spec itself indicates 203 | * that a pseudo-class value is a "value between parenthesis" [6.6]. The 204 | * examples given use URLs among other things, making them closer to the 205 | * definition of 'string' than to 'name'. So we handle them here as strings. 206 | */ 207 | public function getQuotedString() { 208 | if ($this->token == Token::quote || $this->token == Token::squote || $this->token == Token::lparen) { 209 | $end = ($this->token == Token::lparen) ? Token::rparen : $this->token; 210 | $buf = ''; 211 | $escape = FALSE; 212 | 213 | $this->nextToken(); // Skip the opening quote/paren 214 | 215 | // The second conjunct is probably not necessary. 216 | while ($this->token !== FALSE && $this->token > -1) { 217 | //print "Char: $this->value \n"; 218 | if ($this->token == Token::bslash && !$escape) { 219 | // XXX: The backslash (\) is removed here. 220 | // Turn on escaping. 221 | //$buf .= $this->value; 222 | $escape = TRUE; 223 | } 224 | elseif ($escape) { 225 | // Turn off escaping 226 | $buf .= $this->value; 227 | $escape = FALSE; 228 | } 229 | elseif ($this->token === $end) { 230 | // At end of string; skip token and break. 231 | $this->nextToken(); 232 | break; 233 | } 234 | else { 235 | // Append char. 236 | $buf .= $this->value; 237 | } 238 | $this->nextToken(); 239 | } 240 | return $buf; 241 | } 242 | } 243 | 244 | // Get the contents inside of a pseudoClass(). 245 | public function getPseudoClassString() { 246 | if ($this->token == Token::quote || $this->token == Token::squote || $this->token == Token::lparen) { 247 | $end = ($this->token == Token::lparen) ? Token::rparen : $this->token; 248 | $buf = ''; 249 | $escape = FALSE; 250 | 251 | $this->nextToken(); // Skip the opening quote/paren 252 | 253 | // The second conjunct is probably not necessary. 254 | while ($this->token !== FALSE && $this->token > -1) { 255 | //print "Char: $this->value \n"; 256 | if ($this->token == Token::bslash && !$escape) { 257 | // XXX: The backslash (\) is removed here. 258 | // Turn on escaping. 259 | //$buf .= $this->value; 260 | $escape = TRUE; 261 | } 262 | elseif ($escape) { 263 | // Turn off escaping 264 | $buf .= $this->value; 265 | $escape = FALSE; 266 | } 267 | // Allow nested pseudoclasses. 268 | elseif ($this->token == Token::lparen) { 269 | $buf .= "("; 270 | $buf .= $this->getPseudoClassString(); 271 | $buf .= ")"; 272 | } 273 | elseif ($this->token === $end) { 274 | // At end of string; skip token and break. 275 | $this->nextToken(); 276 | break; 277 | } 278 | else { 279 | // Append char. 280 | $buf .= $this->value; 281 | } 282 | $this->nextToken(); 283 | } 284 | return $buf; 285 | } 286 | } 287 | 288 | /** 289 | * Get a string from the input stream. 290 | * This is a convenience function for getting a string of 291 | * characters that are either alphanumber or whitespace. See 292 | * the Token::white and Token::char definitions. 293 | * 294 | * @deprecated This is not used anywhere in QueryPath. 295 | *//* 296 | public function getStringPlusWhitespace() { 297 | $buf = ''; 298 | if($this->token === FALSE) {return '';} 299 | while ($this->token === Token::char || $this->token == Token::white) { 300 | $buf .= $this->value; 301 | $this->nextToken(); 302 | } 303 | return $buf; 304 | }*/ 305 | 306 | } 307 | -------------------------------------------------------------------------------- /src/QueryPath/CSS/Selector.php: -------------------------------------------------------------------------------- 1 | b>c', the iterator will produce: 18 | * - c 19 | * - b 20 | * - a 21 | * It is assumed, therefore, that any suitable querying engine will 22 | * traverse from the bottom (`c`) back up. 23 | * 24 | * @b Usage 25 | * 26 | * This class is an event handler. It can be plugged into an Parser and 27 | * receive the events the Parser generates. 28 | * 29 | * This class is also an iterator. Once the parser has completed, the 30 | * captured selectors can be iterated over. 31 | * 32 | * @code 33 | * parse(); 38 | * 39 | * foreach ($selectorList as $simpleSelector) { 40 | * // Do something with the SimpleSelector. 41 | * print_r($simpleSelector); 42 | * } 43 | * ?> 44 | * @endode 45 | * 46 | * 47 | * @since QueryPath 3.0.0 48 | */ 49 | class Selector implements EventHandler, \IteratorAggregate, \Countable { 50 | protected $selectors = array(); 51 | protected $currSelector; 52 | protected $selectorGroups = array(); 53 | protected $groupIndex = 0; 54 | 55 | public function __construct() { 56 | $this->currSelector = new SimpleSelector(); 57 | 58 | $this->selectors[$this->groupIndex][] = $this->currSelector; 59 | } 60 | 61 | public function getIterator() { 62 | return new \ArrayIterator($this->selectors); 63 | } 64 | 65 | /** 66 | * Get the array of SimpleSelector objects. 67 | * 68 | * Normally, one iterates over a Selector. However, if it is 69 | * necessary to get the selector array and manipulate it, this 70 | * method can be used. 71 | */ 72 | public function toArray() { 73 | return $this->selectors; 74 | } 75 | 76 | public function count() { 77 | return count($this->selectors); 78 | } 79 | 80 | public function elementID($id) { 81 | $this->currSelector->id = $id; 82 | } 83 | public function element($name) { 84 | $this->currSelector->element = $name; 85 | } 86 | public function elementNS($name, $namespace = NULL) { 87 | $this->currSelector->ns = $namespace; 88 | $this->currSelector->element = $name; 89 | } 90 | public function anyElement() { 91 | $this->currSelector->element = '*'; 92 | } 93 | public function anyElementInNS($ns) { 94 | $this->currSelector->ns = $ns; 95 | $this->currSelector->element = '*'; 96 | } 97 | public function elementClass($name) { 98 | $this->currSelector->classes[] = $name; 99 | } 100 | public function attribute($name, $value = NULL, $operation = EventHandler::isExactly) { 101 | $this->currSelector->attributes[] = array( 102 | 'name' => $name, 103 | 'value' => $value, 104 | 'op' => $operation, 105 | ); 106 | } 107 | public function attributeNS($name, $ns, $value = NULL, $operation = EventHandler::isExactly) { 108 | $this->currSelector->attributes[] = array( 109 | 'name' => $name, 110 | 'value' => $value, 111 | 'op' => $operation, 112 | 'ns' => $ns, 113 | ); 114 | } 115 | public function pseudoClass($name, $value = NULL) { 116 | $this->currSelector->pseudoClasses[] = array('name' => $name, 'value' => $value); 117 | } 118 | public function pseudoElement($name) { 119 | $this->currSelector->pseudoElements[] = $name; 120 | } 121 | public function combinator($combinatorName) { 122 | $this->currSelector->combinator = $combinatorName; 123 | $this->currSelector = new SimpleSelector(); 124 | array_unshift($this->selectors[$this->groupIndex], $this->currSelector); 125 | //$this->selectors[]= $this->currSelector; 126 | } 127 | public function directDescendant() { 128 | $this->combinator(SimpleSelector::directDescendant); 129 | } 130 | public function adjacent() { 131 | $this->combinator(SimpleSelector::adjacent); 132 | } 133 | public function anotherSelector() { 134 | $this->groupIndex++; 135 | $this->currSelector = new SimpleSelector(); 136 | $this->selectors[$this->groupIndex] = array($this->currSelector); 137 | } 138 | public function sibling() { 139 | $this->combinator(SimpleSelector::sibling); 140 | } 141 | public function anyDescendant() { 142 | $this->combinator(SimpleSelector::anyDescendant); 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /src/QueryPath/CSS/SimpleSelector.php: -------------------------------------------------------------------------------- 1 | '; 70 | case self::sibling: 71 | return '~'; 72 | case self::anotherSelector: 73 | return ', '; 74 | case self::anyDescendant: 75 | return ' '; 76 | } 77 | } 78 | 79 | public function __construct() { 80 | } 81 | 82 | public function notEmpty() { 83 | return !empty($element) 84 | && !empty($id) 85 | && !empty($classes) 86 | && !empty($combinator) 87 | && !empty($attributes) 88 | && !empty($pseudoClasses) 89 | && !empty($pseudoElements) 90 | ; 91 | } 92 | 93 | public function __tostring() { 94 | $buffer = array(); 95 | try { 96 | 97 | if (!empty($this->ns)) { 98 | $buffer[] = $this->ns; $buffer[] = '|'; 99 | } 100 | if (!empty($this->element)) $buffer[] = $this->element; 101 | if (!empty($this->id)) $buffer[] = '#' . $this->id; 102 | if (!empty($this->attributes)) { 103 | foreach ($this->attributes as $attr) { 104 | $buffer[] = '['; 105 | if(!empty($attr['ns'])) $buffer[] = $attr['ns'] . '|'; 106 | $buffer[] = $attr['name']; 107 | if (!empty($attr['value'])) { 108 | $buffer[] = self::attributeOperator($attr['op']); 109 | $buffer[] = $attr['value']; 110 | } 111 | $buffer[] = ']'; 112 | } 113 | } 114 | if (!empty($this->pseudoClasses)) { 115 | foreach ($this->pseudoClasses as $ps) { 116 | $buffer[] = ':' . $ps['name']; 117 | if (isset($ps['value'])) { 118 | $buffer[] = '(' . $ps['value'] . ')'; 119 | } 120 | } 121 | } 122 | foreach ($this->pseudoElements as $pe) { 123 | $buffer[] = '::' . $pe; 124 | } 125 | 126 | if (!empty($this->combinator)) { 127 | $buffer[] = self::combinatorOperator($this->combinator); 128 | } 129 | 130 | } 131 | catch (\Exception $e) { 132 | return $e->getMessage(); 133 | } 134 | 135 | return implode('', $buffer); 136 | } 137 | 138 | } 139 | -------------------------------------------------------------------------------- /src/QueryPath/CSS/Token.php: -------------------------------------------------------------------------------- 1 | 160, 'iexcl' => 161, 'cent' => 162, 'pound' => 163, 99 | 'curren' => 164, 'yen' => 165, 'brvbar' => 166, 'sect' => 167, 100 | 'uml' => 168, 'copy' => 169, 'ordf' => 170, 'laquo' => 171, 101 | 'not' => 172, 'shy' => 173, 'reg' => 174, 'macr' => 175, 'deg' => 176, 102 | 'plusmn' => 177, 'sup2' => 178, 'sup3' => 179, 'acute' => 180, 103 | 'micro' => 181, 'para' => 182, 'middot' => 183, 'cedil' => 184, 104 | 'sup1' => 185, 'ordm' => 186, 'raquo' => 187, 'frac14' => 188, 105 | 'frac12' => 189, 'frac34' => 190, 'iquest' => 191, 'Agrave' => 192, 106 | 'Aacute' => 193, 'Acirc' => 194, 'Atilde' => 195, 'Auml' => 196, 107 | 'Aring' => 197, 'AElig' => 198, 'Ccedil' => 199, 'Egrave' => 200, 108 | 'Eacute' => 201, 'Ecirc' => 202, 'Euml' => 203, 'Igrave' => 204, 109 | 'Iacute' => 205, 'Icirc' => 206, 'Iuml' => 207, 'ETH' => 208, 110 | 'Ntilde' => 209, 'Ograve' => 210, 'Oacute' => 211, 'Ocirc' => 212, 111 | 'Otilde' => 213, 'Ouml' => 214, 'times' => 215, 'Oslash' => 216, 112 | 'Ugrave' => 217, 'Uacute' => 218, 'Ucirc' => 219, 'Uuml' => 220, 113 | 'Yacute' => 221, 'THORN' => 222, 'szlig' => 223, 'agrave' => 224, 114 | 'aacute' => 225, 'acirc' => 226, 'atilde' => 227, 'auml' => 228, 115 | 'aring' => 229, 'aelig' => 230, 'ccedil' => 231, 'egrave' => 232, 116 | 'eacute' => 233, 'ecirc' => 234, 'euml' => 235, 'igrave' => 236, 117 | 'iacute' => 237, 'icirc' => 238, 'iuml' => 239, 'eth' => 240, 118 | 'ntilde' => 241, 'ograve' => 242, 'oacute' => 243, 'ocirc' => 244, 119 | 'otilde' => 245, 'ouml' => 246, 'divide' => 247, 'oslash' => 248, 120 | 'ugrave' => 249, 'uacute' => 250, 'ucirc' => 251, 'uuml' => 252, 121 | 'yacute' => 253, 'thorn' => 254, 'yuml' => 255, 'quot' => 34, 122 | 'amp' => 38, 'lt' => 60, 'gt' => 62, 'apos' => 39, 'OElig' => 338, 123 | 'oelig' => 339, 'Scaron' => 352, 'scaron' => 353, 'Yuml' => 376, 124 | 'circ' => 710, 'tilde' => 732, 'ensp' => 8194, 'emsp' => 8195, 125 | 'thinsp' => 8201, 'zwnj' => 8204, 'zwj' => 8205, 'lrm' => 8206, 126 | 'rlm' => 8207, 'ndash' => 8211, 'mdash' => 8212, 'lsquo' => 8216, 127 | 'rsquo' => 8217, 'sbquo' => 8218, 'ldquo' => 8220, 'rdquo' => 8221, 128 | 'bdquo' => 8222, 'dagger' => 8224, 'Dagger' => 8225, 'permil' => 8240, 129 | 'lsaquo' => 8249, 'rsaquo' => 8250, 'euro' => 8364, 'fnof' => 402, 130 | 'Alpha' => 913, 'Beta' => 914, 'Gamma' => 915, 'Delta' => 916, 131 | 'Epsilon' => 917, 'Zeta' => 918, 'Eta' => 919, 'Theta' => 920, 132 | 'Iota' => 921, 'Kappa' => 922, 'Lambda' => 923, 'Mu' => 924, 'Nu' => 925, 133 | 'Xi' => 926, 'Omicron' => 927, 'Pi' => 928, 'Rho' => 929, 'Sigma' => 931, 134 | 'Tau' => 932, 'Upsilon' => 933, 'Phi' => 934, 'Chi' => 935, 'Psi' => 936, 135 | 'Omega' => 937, 'alpha' => 945, 'beta' => 946, 'gamma' => 947, 136 | 'delta' => 948, 'epsilon' => 949, 'zeta' => 950, 'eta' => 951, 137 | 'theta' => 952, 'iota' => 953, 'kappa' => 954, 'lambda' => 955, 138 | 'mu' => 956, 'nu' => 957, 'xi' => 958, 'omicron' => 959, 'pi' => 960, 139 | 'rho' => 961, 'sigmaf' => 962, 'sigma' => 963, 'tau' => 964, 140 | 'upsilon' => 965, 'phi' => 966, 'chi' => 967, 'psi' => 968, 141 | 'omega' => 969, 'thetasym' => 977, 'upsih' => 978, 'piv' => 982, 142 | 'bull' => 8226, 'hellip' => 8230, 'prime' => 8242, 'Prime' => 8243, 143 | 'oline' => 8254, 'frasl' => 8260, 'weierp' => 8472, 'image' => 8465, 144 | 'real' => 8476, 'trade' => 8482, 'alefsym' => 8501, 'larr' => 8592, 145 | 'uarr' => 8593, 'rarr' => 8594, 'darr' => 8595, 'harr' => 8596, 146 | 'crarr' => 8629, 'lArr' => 8656, 'uArr' => 8657, 'rArr' => 8658, 147 | 'dArr' => 8659, 'hArr' => 8660, 'forall' => 8704, 'part' => 8706, 148 | 'exist' => 8707, 'empty' => 8709, 'nabla' => 8711, 'isin' => 8712, 149 | 'notin' => 8713, 'ni' => 8715, 'prod' => 8719, 'sum' => 8721, 150 | 'minus' => 8722, 'lowast' => 8727, 'radic' => 8730, 'prop' => 8733, 151 | 'infin' => 8734, 'ang' => 8736, 'and' => 8743, 'or' => 8744, 'cap' => 8745, 152 | 'cup' => 8746, 'int' => 8747, 'there4' => 8756, 'sim' => 8764, 153 | 'cong' => 8773, 'asymp' => 8776, 'ne' => 8800, 'equiv' => 8801, 154 | 'le' => 8804, 'ge' => 8805, 'sub' => 8834, 'sup' => 8835, 'nsub' => 8836, 155 | 'sube' => 8838, 'supe' => 8839, 'oplus' => 8853, 'otimes' => 8855, 156 | 'perp' => 8869, 'sdot' => 8901, 'lceil' => 8968, 'rceil' => 8969, 157 | 'lfloor' => 8970, 'rfloor' => 8971, 'lang' => 9001, 'rang' => 9002, 158 | 'loz' => 9674, 'spades' => 9824, 'clubs' => 9827, 'hearts' => 9829, 159 | 'diams' => 9830 160 | ); 161 | } 162 | 163 | -------------------------------------------------------------------------------- /src/QueryPath/Exception.php: -------------------------------------------------------------------------------- 1 | 15 | * @license MIT 16 | * @see Extension 17 | * @see ExtensionRegistry::extend() 18 | */ 19 | namespace QueryPath; 20 | 21 | /** @addtogroup querypath_extensions Extensions 22 | * The QueryPath extension system and bundled extensions. 23 | * 24 | * Much like jQuery, QueryPath provides a simple extension mechanism that allows 25 | * extensions to auto-register themselves upon being loaded. For a simple example, see 26 | * QPXML. For the internals, see QueryPath::Extension and QueryPath::with(). 27 | */ 28 | 29 | /** 30 | * A Extension is a tool that extends the capabilities of a Query object. 31 | * 32 | * Extensions to QueryPath should implement the Extension interface. The 33 | * only requirement is that the extension provide a constructor that takes a 34 | * Query object as a parameter. 35 | * 36 | * Here is an example QueryPath extension: 37 | * @code 38 | * qp = $qp; 43 | * } 44 | * 45 | * public function stubToe() { 46 | * $this->qp->find(':root')->append('')->end(); 47 | * return $this->qp; 48 | * } 49 | * } 50 | * ?> 51 | * @endcode 52 | * In this example, the StubExtensionOne class implements Extension. 53 | * The constructor stores a local copyof the Query object. This is important 54 | * if you are planning on fully integrating with QueryPath's Fluent Interface. 55 | * 56 | * Finally, the stubToe() function illustrates how the extension makes use of 57 | * QueryPath's Query object internally, and remains part of the fluent interface by returning 58 | * the $qp object. 59 | * 60 | * Enabling an Extension 61 | * 62 | * To enable an extension, call the QueryPath::enable() method. 63 | * 64 | * @code 65 | * 68 | * @endcode 69 | * 70 | * More complex management of extensions can be accomplished with the 71 | * QueryPath::ExtensionRegistry class. 72 | * 73 | * How is a QueryPath extension called? 74 | * 75 | * QueryPath extensions are called like regular QueryPath functions. For 76 | * example, the extension above can be called like this: 77 | * 78 | * qp('some.xml')->stubToe(); 79 | * // or 80 | * QueryPath::with('some.xml')->stubToe(); 81 | * 82 | * Since it returns the Query ($qp) object, chaining is supported: 83 | * 84 | * print qp('some.xml')->stubToe()->xml(); 85 | * 86 | * When you write your own extensions, anything that does not need to return a 87 | * specific value should return the Query object. Between that and the 88 | * extension registry, this will provide the best developer experience. 89 | * 90 | * @ingroup querypath_extensions 91 | */ 92 | interface Extension { 93 | public function __construct(\QueryPath\Query $qp); 94 | } 95 | -------------------------------------------------------------------------------- /src/QueryPath/Extension/QPXML.php: -------------------------------------------------------------------------------- 1 | 12 | * @author Xander Guzman 13 | * @license MIT 14 | * @see QueryPath::Extension 15 | * @see QueryPath::ExtensionRegistry::extend() 16 | * @see QPXML 17 | * @ingroup querypath_extensions 18 | */ 19 | class QPXML implements \QueryPath\Extension { 20 | 21 | protected $qp; 22 | 23 | public function __construct(\QueryPath\Query $qp) { 24 | $this->qp = $qp; 25 | } 26 | 27 | public function schema($file) { 28 | $doc = $this->qp->branch()->top()->get(0)->ownerDocument; 29 | 30 | if (!$doc->schemaValidate($file)) { 31 | throw new \QueryPath\Exception('Document did not validate against the schema.'); 32 | } 33 | } 34 | 35 | /** 36 | * Get or set a CDATA section. 37 | * 38 | * If this is given text, it will create a CDATA section in each matched element, 39 | * setting that item's value to $text. 40 | * 41 | * If no parameter is passed in, this will return the first CDATA section that it 42 | * finds in the matched elements. 43 | * 44 | * @param string $text 45 | * The text data to insert into the current matches. If this is NULL, then the first 46 | * CDATA will be returned. 47 | * 48 | * @return mixed 49 | * If $text is not NULL, this will return a {@link QueryPath}. Otherwise, it will 50 | * return a string. If no CDATA is found, this will return NULL. 51 | * @see comment() 52 | * @see QueryPath::text() 53 | * @see QueryPath::html() 54 | */ 55 | public function cdata($text = NULL) { 56 | if (isset($text)) { 57 | // Add this text as CDATA in the current elements. 58 | foreach ($this->qp->get() as $element) { 59 | $cdata = $element->ownerDocument->createCDATASection($text); 60 | $element->appendChild($cdata); 61 | } 62 | return $this->qp;; 63 | } 64 | 65 | // Look for CDATA sections. 66 | foreach ($this->qp->get() as $ele) { 67 | foreach ($ele->childNodes as $node) { 68 | if ($node->nodeType == XML_CDATA_SECTION_NODE) { 69 | // Return first match. 70 | return $node->textContent; 71 | } 72 | } 73 | } 74 | return NULL; 75 | // Nothing found 76 | } 77 | 78 | /** 79 | * Get or set a comment. 80 | * 81 | * This function is used to get or set comments in an XML or HTML document. 82 | * If a $text value is passed in (and is not NULL), then this will add a comment 83 | * (with the value $text) to every match in the set. 84 | * 85 | * If no text is passed in, this will return the first comment in the set of matches. 86 | * If no comments are found, NULL will be returned. 87 | * 88 | * @param string $text 89 | * The text of the comment. If set, a new comment will be created in every item 90 | * wrapped by the current {@link QueryPath}. 91 | * @return mixed 92 | * If $text is set, this will return a {@link QueryPath}. If no text is set, this 93 | * will search for a comment and attempt to return the string value of the first 94 | * comment it finds. If no comment is found, NULL will be returned. 95 | * @see cdata() 96 | */ 97 | public function comment($text = NULL) { 98 | if (isset($text)) { 99 | foreach ($this->qp->get() as $element) { 100 | $comment = $element->ownerDocument->createComment($text); 101 | $element->appendChild($comment); 102 | } 103 | return $this->qp; 104 | } 105 | foreach ($this->qp->get() as $ele) { 106 | foreach ($ele->childNodes as $node) { 107 | if ($node->nodeType == XML_COMMENT_NODE) { 108 | // Return first match. 109 | return $node->textContent; 110 | } 111 | } 112 | } 113 | } 114 | 115 | /** 116 | * Get or set a processor instruction. 117 | */ 118 | public function pi($prefix = NULL, $text = NULL) { 119 | if (isset($text)) { 120 | foreach ($this->qp->get() as $element) { 121 | $comment = $element->ownerDocument->createProcessingInstruction($prefix, $text); 122 | $element->appendChild($comment); 123 | } 124 | return $this->qp; 125 | } 126 | foreach ($this->qp->get() as $ele) { 127 | foreach ($ele->childNodes as $node) { 128 | if ($node->nodeType == XML_PI_NODE) { 129 | 130 | if (isset($prefix)) { 131 | if ($node->tagName == $prefix) { 132 | return $node->textContent; 133 | } 134 | } 135 | else { 136 | // Return first match. 137 | return $node->textContent; 138 | } 139 | } 140 | } // foreach 141 | } // foreach 142 | } 143 | public function toXml() { 144 | return $this->qp->document()->saveXml(); 145 | } 146 | 147 | /** 148 | * Create a NIL element. 149 | * 150 | * @param string $text 151 | * @param string $value 152 | * @reval object $element 153 | */ 154 | public function createNilElement($text, $value) { 155 | $value = ($value)? 'true':'false'; 156 | $element = $this->qp->createElement($text); 157 | $element->attr('xsi:nil', $value); 158 | return $element; 159 | } 160 | 161 | /** 162 | * Create an element with the given namespace. 163 | * 164 | * @param string $text 165 | * @param string $nsUri 166 | * The namespace URI for the given element. 167 | * @return \QueryPath\DOMQuery 168 | */ 169 | public function createElement($text, $nsUri = null) { 170 | if (isset ($text)) { 171 | foreach ($this->qp->get() as $element) { 172 | if ($nsUri === null && strpos($text, ':') !== false) { 173 | $ns = array_shift(explode(':', $text)); 174 | $nsUri = $element->ownerDocument->lookupNamespaceURI($ns); 175 | 176 | if ($nsUri === null) { 177 | throw new \QueryPath\Exception("Undefined namespace for: " . $text); 178 | } 179 | } 180 | 181 | $node = null; 182 | if ($nsUri !== null) { 183 | $node = $element->ownerDocument->createElementNS( 184 | $nsUri, 185 | $text 186 | ); 187 | } else { 188 | $node = $element->ownerDocument->createElement($text); 189 | } 190 | return QueryPath::with($node); 191 | } 192 | } 193 | return; 194 | } 195 | 196 | /** 197 | * Append an element. 198 | * 199 | * @param string $text 200 | * @return \QueryPath\DOMQuery 201 | */ 202 | public function appendElement($text) { 203 | if (isset ($text)) { 204 | foreach ($this->qp->get() as $element) { 205 | $node = $this->qp->createElement($text); 206 | QueryPath::with($element)->append($node); 207 | } 208 | } 209 | return $this->qp; 210 | } 211 | } 212 | -------------------------------------------------------------------------------- /src/QueryPath/Extension/QPXSL.php: -------------------------------------------------------------------------------- 1 | 11 | * @license MIT 12 | * @see QueryPath::Extension 13 | * @see QueryPath::ExtensionRegistry::extend() 14 | * @see QPXSL 15 | * @see QPXML 16 | */ 17 | 18 | namespace QueryPath\Extension; 19 | 20 | /** 21 | * Provide tools for running XSL Transformation (XSLT) on a document. 22 | * 23 | * This extension provides the {@link QPXSL::xslt()} function, which transforms 24 | * a source XML document into another XML document according to the rules in 25 | * an XSLT document. 26 | * 27 | * This QueryPath extension can be used as follows: 28 | * 29 | * xslt('stylesheet.xml')->writeXML(); 34 | * ?> 35 | * 36 | * This will transform src.xml according to the XSLT rules in 37 | * stylesheet.xml. The results are returned as a QueryPath object, which 38 | * is written to XML using {@link QueryPath::writeXML()}. 39 | * 40 | * 41 | * @ingroup querypath_extensions 42 | */ 43 | class QPXSL implements \QueryPath\Extension { 44 | 45 | protected $src = NULL; 46 | 47 | public function __construct(\QueryPath\Query $qp) { 48 | $this->src = $qp; 49 | } 50 | 51 | /** 52 | * Given an XSLT stylesheet, run a transformation. 53 | * 54 | * This will attempt to read the provided stylesheet and then 55 | * execute it on the current source document. 56 | * 57 | * @param mixed $style 58 | * This takes a QueryPath object or any of the types that the 59 | * {@link qp()} function can take. 60 | * @return QueryPath 61 | * A QueryPath object wrapping the transformed document. Note that this is a 62 | * different document than the original. As such, it has no history. 63 | * You cannot call {@link QueryPath::end()} to undo a transformation. (However, 64 | * the original source document will remain unchanged.) 65 | */ 66 | public function xslt($style) { 67 | if (!($style instanceof QueryPath)) { 68 | $style = \QueryPath::with($style); 69 | } 70 | $sourceDoc = $this->src->top()->get(0)->ownerDocument; 71 | $styleDoc = $style->get(0)->ownerDocument; 72 | $processor = new \XSLTProcessor(); 73 | $processor->importStylesheet($styleDoc); 74 | return \QueryPath::with($processor->transformToDoc($sourceDoc)); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/QueryPath/ExtensionRegistry.php: -------------------------------------------------------------------------------- 1 | getMethods(); 44 | foreach ($methods as $method) { 45 | self::$extensionMethodRegistry[$method->getName()] = $classname; 46 | } 47 | } 48 | 49 | /** 50 | * Check to see if a method is known. 51 | * This checks to see if the given method name belongs to one of the 52 | * registered extensions. If it does, then this will return TRUE. 53 | * 54 | * @param string $name 55 | * The name of the method to search for. 56 | * @return boolean 57 | * TRUE if the method exists, false otherwise. 58 | */ 59 | public static function hasMethod($name) { 60 | return isset(self::$extensionMethodRegistry[$name]); 61 | } 62 | 63 | /** 64 | * Check to see if the given extension class is registered. 65 | * Given a class name for a QueryPath::Extension class, this 66 | * will check to see if that class is registered. If so, it will return 67 | * TRUE. 68 | * 69 | * @param string $name 70 | * The name of the class. 71 | * @return boolean 72 | * TRUE if the class is registered, FALSE otherwise. 73 | */ 74 | public static function hasExtension($name) { 75 | return in_array($name, self::$extensionRegistry); 76 | } 77 | 78 | /** 79 | * Get the class that a given method belongs to. 80 | * Given a method name, this will check all registered extension classes 81 | * to see if any of them has the named method. If so, this will return 82 | * the classname. 83 | * 84 | * Note that if two extensions are registered that contain the same 85 | * method name, the last one registred will be the only one recognized. 86 | * 87 | * @param string $name 88 | * The name of the method. 89 | * @return string 90 | * The name of the class. 91 | */ 92 | public static function getMethodClass($name) { 93 | return self::$extensionMethodRegistry[$name]; 94 | } 95 | 96 | /** 97 | * Get extensions for the given Query object. 98 | * 99 | * Given a Query object, this will return 100 | * an associative array of extension names to (new) instances. 101 | * Generally, this is intended to be used internally. 102 | * 103 | * @param Query $qp 104 | * The Query into which the extensions should be registered. 105 | * @return array 106 | * An associative array of classnames to instances. 107 | */ 108 | public static function getExtensions(Query $qp) { 109 | $extInstances = array(); 110 | foreach (self::$extensionRegistry as $ext) { 111 | $extInstances[$ext] = new $ext($qp); 112 | } 113 | return $extInstances; 114 | } 115 | 116 | public static function extensionNames() { 117 | return self::$extensionRegistry; 118 | } 119 | 120 | /** 121 | * Enable or disable automatic extension loading. 122 | * 123 | * If extension autoloading is disabled, then QueryPath will not 124 | * automatically load all registred extensions when a new Query 125 | * object is created using qp(). 126 | */ 127 | public static function autoloadExtensions($boolean = TRUE) { 128 | self::$useRegistry = $boolean; 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /src/QueryPath/IOException.php: -------------------------------------------------------------------------------- 1 | Details 18 | * This class defines no options of its own. Instead, it provides a 19 | * central tool for developers to override options set by QueryPath. 20 | * When a QueryPath object is created, it will evaluate options in the 21 | * following order: 22 | * 23 | * - Options passed into qp() have highest priority. 24 | * - Options in QueryPath::Options (this class) have the next highest priority. 25 | * - If the option is not specified elsewhere, QueryPath will use its own defaults. 26 | * 27 | * @see qp() 28 | * @see QueryPath::Options::set() 29 | * @ingroup querypath_util 30 | */ 31 | class Options { 32 | /** 33 | * This is the static options array. 34 | * 35 | * Use the {@link set()}, {@link get()}, and {@link merge()} to 36 | * modify this array. 37 | */ 38 | static $options = array(); 39 | /** 40 | * Set the default options. 41 | * 42 | * The passed-in array will be used as the default options list. 43 | * 44 | * @param array $array 45 | * An associative array of options. 46 | */ 47 | static function set($array) { 48 | self::$options = $array; 49 | } 50 | /** 51 | * Get the default options. 52 | * 53 | * Get all options currently set as default. 54 | * 55 | * @return array 56 | * An array of options. Note that only explicitly set options are 57 | * returned. {@link QueryPath} defines default options which are not 58 | * stored in this object. 59 | */ 60 | static function get() { 61 | return self::$options; 62 | } 63 | /** 64 | * Merge the provided array with existing options. 65 | * 66 | * On duplicate keys, the value in $array will overwrite the 67 | * value stored in the options. 68 | * 69 | * @param array $array 70 | * Associative array of options to merge into the existing options. 71 | */ 72 | static function merge($array) { 73 | self::$options = $array + self::$options; 74 | } 75 | /** 76 | * Returns true of the specified key is already overridden in this object. 77 | * 78 | * @param string $key 79 | * The key to search for. 80 | */ 81 | static function has($key) { 82 | return array_key_exists($key, self::$options); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/QueryPath/ParseException.php: -------------------------------------------------------------------------------- 1 | level == LIBXML_ERR_WARNING ? self::WARN_MSG_FORMAT : self::ERR_MSG_FORMAT; 26 | $msgs[] = sprintf($format, $err->file, $err->line, $err->column, $err->message, $err->code); 27 | } 28 | $msg .= implode("\n", $msgs); 29 | 30 | if (isset($file)) { 31 | $msg .= ' (' . $file; 32 | if (isset($line)) $msg .= ': ' . $line; 33 | $msg .= ')'; 34 | } 35 | 36 | parent::__construct($msg, $code); 37 | } 38 | 39 | public static function initializeFromError($code, $str, $file, $line, $cxt) { 40 | //printf("\n\nCODE: %s %s\n\n", $code, $str); 41 | $class = __CLASS__; 42 | throw new $class($str, $code, $file, $line); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/QueryPath/Query.php: -------------------------------------------------------------------------------- 1 | qp)) { 24 | $this->qp = \QueryPath::with(parent::current(), NULL, $this->options); 25 | } 26 | else { 27 | $splos = new \SplObjectStorage(); 28 | $splos->attach(parent::current()); 29 | $this->qp->setMatches($splos); 30 | } 31 | return $this->qp; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/qp.php: -------------------------------------------------------------------------------- 1 | find('foo')->count(); 16 | * ?> 17 | * @endcode 18 | * 19 | * If no autoloader is currently operating, this will use 20 | * QueryPath's default autoloader **unless** 21 | * QP_NO_AUTOLOADER is defined, in which case all of the 22 | * files will be statically required in. 23 | */ 24 | 25 | // This is sort of a last ditch attempt to load QueryPath if no 26 | // autoloader is used. 27 | if (!class_exists('\QueryPath')) { 28 | 29 | // If classloaders are explicitly disabled, load everything. 30 | if (defined('QP_NO_AUTOLOADER')) { 31 | // This is all (and only) the required classes for QueryPath. 32 | // Extensions are not loaded automatically. 33 | require __DIR__ . '/QueryPath/Exception.php'; 34 | require __DIR__ . '/QueryPath/ParseException.php'; 35 | require __DIR__ . '/QueryPath/IOException.php'; 36 | require __DIR__ . '/QueryPath/CSS/ParseException.php'; 37 | require __DIR__ . '/QueryPath/CSS/NotImplementedException.php'; 38 | require __DIR__ . '/QueryPath/CSS/EventHandler.php'; 39 | require __DIR__ . '/QueryPath/CSS/SimpleSelector.php'; 40 | require __DIR__ . '/QueryPath/CSS/Selector.php'; 41 | require __DIR__ . '/QueryPath/CSS/Traverser.php'; 42 | require __DIR__ . '/QueryPath/CSS/DOMTraverser/PseudoClass.php'; 43 | // require __DIR__ . '/QueryPath/CSS/DOMTraverser/PseudoElement.php'; 44 | require __DIR__ . '/QueryPath/CSS/DOMTraverser/Util.php'; 45 | require __DIR__ . '/QueryPath/CSS/DOMTraverser.php'; 46 | require __DIR__ . '/QueryPath/CSS/Token.php'; 47 | require __DIR__ . '/QueryPath/CSS/InputStream.php'; 48 | require __DIR__ . '/QueryPath/CSS/Scanner.php'; 49 | require __DIR__ . '/QueryPath/CSS/Parser.php'; 50 | require __DIR__ . '/QueryPath/CSS/QueryPathEventHandler.php'; 51 | require __DIR__ . '/QueryPath/Query.php'; 52 | require __DIR__ . '/QueryPath/Entities.php'; 53 | require __DIR__ . '/QueryPath/Extension.php'; 54 | require __DIR__ . '/QueryPath/ExtensionRegistry.php'; 55 | require __DIR__ . '/QueryPath/Options.php'; 56 | require __DIR__ . '/QueryPath/QueryPathIterator.php'; 57 | require __DIR__ . '/QueryPath/DOMQuery.php'; 58 | require __DIR__ . '/QueryPath.php'; 59 | } 60 | else { 61 | spl_autoload_register(function ($klass) { 62 | $parts = explode('\\', $klass); 63 | 64 | // Issue #164 65 | if ($parts[0] == '') { 66 | array_shift($parts); 67 | } 68 | 69 | if ($parts[0] == 'QueryPath') { 70 | $path = __DIR__ . '/' . implode('/', $parts) . '.php'; 71 | if (file_exists($path)) { 72 | require $path; 73 | } 74 | } 75 | }); 76 | } 77 | } 78 | 79 | // Define qp() and qphtml() function. 80 | if (!function_exists('qp')) { 81 | require __DIR__ . '/qp_functions.php'; 82 | } 83 | -------------------------------------------------------------------------------- /test/DOMTraverserTest.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |
      13 |
    • one
    • 14 |
    • two
    • 15 |
    • three
    • 16 |
    • four
    • 17 |
    • five
    • 18 |
    • six
    • 19 |
    20 |
    21 | 22 | bottom 23 | 24 | 25 | 26 | 27 |

    this is the way the world ends. Not with a bang, but a whimper.

    28 | ==FOO== 29 | target 30 | 31 | text 32 | More text 33 | 34 | 35 |
    36 | -------------------------------------------------------------------------------- /test/Tests/QueryPath/CSS/SelectorTest.php: -------------------------------------------------------------------------------- 1 | parse(); 15 | return $handler; 16 | } 17 | 18 | public function testElement() { 19 | $selector = $this->parse('test')->toArray(); 20 | 21 | $this->assertEquals(1, count($selector)); 22 | $this->assertEquals('test', $selector[0]['0']->element); 23 | } 24 | 25 | public function testElementNS() { 26 | $selector = $this->parse('foo|test')->toArray(); 27 | 28 | $this->assertEquals(1, count($selector)); 29 | $this->assertEquals('test', $selector[0]['0']->element); 30 | $this->assertEquals('foo', $selector[0]['0']->ns); 31 | } 32 | 33 | public function testId() { 34 | $selector = $this->parse('#test')->toArray(); 35 | 36 | $this->assertEquals(1, count($selector)); 37 | $this->assertEquals('test', $selector[0][0]->id); 38 | } 39 | 40 | public function testClasses() { 41 | $selector = $this->parse('.test')->toArray(); 42 | 43 | $this->assertEquals(1, count($selector)); 44 | $this->assertEquals('test', $selector[0][0]->classes[0]); 45 | 46 | $selector = $this->parse('.test.foo.bar')->toArray(); 47 | $this->assertEquals('test', $selector[0][0]->classes[0]); 48 | $this->assertEquals('foo', $selector[0][0]->classes[1]); 49 | $this->assertEquals('bar', $selector[0][0]->classes[2]); 50 | 51 | } 52 | 53 | public function testAttributes() { 54 | $selector = $this->parse('foo[bar=baz]')->toArray(); 55 | $this->assertEquals(1, count($selector)); 56 | $attrs = $selector[0][0]->attributes; 57 | 58 | $this->assertEquals(1, count($attrs)); 59 | 60 | $attr = $attrs[0]; 61 | $this->assertEquals('bar', $attr['name']); 62 | $this->assertEquals(EventHandler::isExactly, $attr['op']); 63 | $this->assertEquals('baz', $attr['value']); 64 | 65 | $selector = $this->parse('foo[bar=baz][size=one]')->toArray(); 66 | $attrs = $selector[0][0]->attributes; 67 | 68 | $this->assertEquals('one', $attrs[1]['value']); 69 | } 70 | 71 | public function testAttributesNS() { 72 | $selector = $this->parse('[myns|foo=bar]')->toArray(); 73 | 74 | $attr = $selector[0][0]->attributes[0]; 75 | 76 | $this->assertEquals('myns', $attr['ns']); 77 | $this->assertEquals('foo', $attr['name']); 78 | } 79 | 80 | public function testPseudoClasses() { 81 | $selector = $this->parse('foo:first')->toArray(); 82 | $pseudo = $selector[0][0]->pseudoClasses; 83 | 84 | $this->assertEquals(1, count($pseudo)); 85 | 86 | $this->assertEquals('first', $pseudo[0]['name']); 87 | } 88 | 89 | public function testPseudoElements() { 90 | $selector = $this->parse('foo::bar')->toArray(); 91 | $pseudo = $selector[0][0]->pseudoElements; 92 | 93 | $this->assertEquals(1, count($pseudo)); 94 | 95 | $this->assertEquals('bar', $pseudo[0]); 96 | } 97 | 98 | public function testCombinators() { 99 | // This implies *>foo 100 | $selector = $this->parse('>foo')->toArray(); 101 | 102 | $this->assertEquals(SimpleSelector::directDescendant, $selector[0][1]->combinator); 103 | 104 | // This will be a selector with three simples: 105 | // 'bar' 106 | // 'foo ' 107 | // '*>' 108 | $selector = $this->parse('>foo bar')->toArray(); 109 | $this->assertNull($selector[0][0]->combinator); 110 | $this->assertEquals(SimpleSelector::anyDescendant, $selector[0][1]->combinator); 111 | $this->assertEquals(SimpleSelector::directDescendant, $selector[0][2]->combinator); 112 | } 113 | 114 | public function testIterator() { 115 | $selector = $this->parse('foo::bar'); 116 | 117 | $iterator = $selector->getIterator(); 118 | $this->assertInstanceOf('\Iterator', $iterator); 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /test/Tests/QueryPath/CSS/TokenTest.php: -------------------------------------------------------------------------------- 1 | assertEquals('character', (Token::name(0))); 19 | $this->assertEquals('a legal non-alphanumeric character', (Token::name(99))); 20 | $this->assertEquals('end of file', (Token::name(FALSE))); 21 | $this->assertEquals(0, strpos(Token::name(22),'illegal character')); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /test/Tests/QueryPath/CSS/UtilTest.php: -------------------------------------------------------------------------------- 1 | assertEquals('foo', Util::removeQuotes('"foo"')); 18 | $this->assertEquals('foo', Util::removeQuotes("'foo'")); 19 | $this->assertEquals('"foo\'', Util::removeQuotes("\"foo'")); 20 | $this->assertEquals('f"o"o', Util::removeQuotes('f"o"o')); 21 | 22 | } 23 | public function testParseAnB() { 24 | // even 25 | $this->assertEquals(array(2, 0), Util::parseAnB('even')); 26 | // odd 27 | $this->assertEquals(array(2, 1), Util::parseAnB('odd')); 28 | // 5 29 | $this->assertEquals(array(0, 5), Util::parseAnB('5')); 30 | // +5 31 | $this->assertEquals(array(0, 5), Util::parseAnB('+5')); 32 | // n 33 | $this->assertEquals(array(1, 0), Util::parseAnB('n')); 34 | // 2n 35 | $this->assertEquals(array(2, 0), Util::parseAnB('2n')); 36 | // -234n 37 | $this->assertEquals(array(-234, 0), Util::parseAnB('-234n')); 38 | // -2n+1 39 | $this->assertEquals(array(-2, 1), Util::parseAnB('-2n+1')); 40 | // -2n + 1 41 | $this->assertEquals(array(-2, 1), Util::parseAnB(' -2n + 1 ')); 42 | // +2n-1 43 | $this->assertEquals(array(2, -1), Util::parseAnB('2n-1')); 44 | $this->assertEquals(array(2, -1), Util::parseAnB('2n - 1')); 45 | // -n + 3 46 | $this->assertEquals(array(-1, 3), Util::parseAnB('-n+3')); 47 | 48 | // Test invalid values 49 | $this->assertEquals(array(0, 0), Util::parseAnB('obviously + invalid')); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /test/Tests/QueryPath/EntitiesTest.php: -------------------------------------------------------------------------------- 1 | 5 | * @license The GNU Lesser GPL (LGPL) or an MIT-like license. 6 | */ 7 | namespace QueryPath\Tests; 8 | require_once __DIR__ . '/TestCase.php'; 9 | 10 | /** 11 | * @ingroup querypath_tests 12 | */ 13 | class EntitiesTest extends TestCase { 14 | public function testReplaceEntity() { 15 | $entity = 'amp'; 16 | $this->assertEquals('38', \QueryPath\Entities::replaceEntity($entity)); 17 | 18 | $entity = 'lceil'; 19 | $this->assertEquals('8968', \QueryPath\Entities::replaceEntity($entity)); 20 | } 21 | 22 | public function testReplaceAllEntities() { 23 | $test = '&©&& nothing.'; 24 | $expect = '&©&& nothing.'; 25 | $this->assertEquals($expect, \QueryPath\Entities::replaceAllEntities($test)); 26 | 27 | $test = '&&& '; 28 | $expect = '&&& '; 29 | $this->assertEquals($expect, \QueryPath\Entities::replaceAllEntities($test)); 30 | 31 | $test = "é\n"; 32 | $expect = "é\n"; 33 | $this->assertEquals($expect, \QueryPath\Entities::replaceAllEntities($test)); 34 | } 35 | 36 | public function testReplaceHexEntities() { 37 | $test = '©'; 38 | $expect = '©'; 39 | $this->assertEquals($expect, \QueryPath\Entities::replaceAllEntities($test)); 40 | } 41 | 42 | public function testQPEntityReplacement() { 43 | $test = '&©&& nothing.'; 44 | /*$expect = '&©&& nothing.';*/ 45 | // We get this because the DOM serializer re-converts entities. 46 | $expect = ' 47 | &©&& nothing.'; 48 | 49 | $qp = qp($test, NULL, array('replace_entities' => TRUE)); 50 | // Interestingly, the XML serializer converts decimal to hex and ampersands 51 | // to &. 52 | $this->assertEquals($expect, trim($qp->xml())); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /test/Tests/QueryPath/ExtensionTest.php: -------------------------------------------------------------------------------- 1 | 5 | * @license The GNU Lesser GPL (LGPL) or an MIT-like license. 6 | */ 7 | namespace QueryPath\Tests; 8 | //require_once 'PHPUnit/Autoload.php'; 9 | require_once __DIR__ . '/TestCase.php'; 10 | require_once __DIR__ . '/../../../src/QueryPath/Extension.php'; 11 | //require_once __DIR__ . '/../../../src/QueryPath.php'; 12 | //require_once 'QueryPathTest.php'; 13 | 14 | use \QueryPath\Extension; 15 | use \QueryPath\ExtensionRegistry; 16 | 17 | /** 18 | * 19 | */ 20 | //define('self::DATA_FILE', 'test/data.xml'); 21 | 22 | /** 23 | * Run all of the usual tests, plus some extras, with some extensions loaded. 24 | * @ingroup querypath_tests 25 | * @group extension 26 | */ 27 | class QueryPathExtensionTest extends TestCase { 28 | 29 | public static function setUpBeforeClass() { 30 | ExtensionRegistry::extend('\QueryPath\Tests\StubExtensionOne'); 31 | ExtensionRegistry::extend('\QueryPath\Tests\StubExtensionTwo'); 32 | } 33 | 34 | public function testExtensions() { 35 | $this->assertNotNull(qp()); 36 | } 37 | 38 | public function testHasExtension() { 39 | $this->assertTrue(ExtensionRegistry::hasExtension('\QueryPath\Tests\StubExtensionOne')); 40 | } 41 | 42 | public function testStubToe() { 43 | $this->assertEquals(1, qp(self::DATA_FILE, 'unary')->stubToe()->top(':root > toe')->size()); 44 | } 45 | 46 | public function testStuble() { 47 | $this->assertEquals('arg1arg2', qp(self::DATA_FILE)->stuble('arg1', 'arg2')); 48 | } 49 | 50 | /** 51 | * @expectedException \QueryPath\Exception 52 | */ 53 | public function testNoRegistry() { 54 | ExtensionRegistry::$useRegistry = FALSE; 55 | try { 56 | qp(self::DATA_FILE)->stuble('arg1', 'arg2'); 57 | } 58 | catch (\QueryPath\Exception $e) { 59 | ExtensionRegistry::$useRegistry = TRUE; 60 | throw $e; 61 | } 62 | 63 | } 64 | 65 | public function testExtend() { 66 | $this->assertFalse(ExtensionRegistry::hasExtension('\QueryPath\Tests\StubExtensionThree')); 67 | ExtensionRegistry::extend('\QueryPath\Tests\StubExtensionThree'); 68 | $this->assertTrue(ExtensionRegistry::hasExtension('\QueryPath\Tests\StubExtensionThree')); 69 | } 70 | 71 | public function tearDown() { 72 | ExtensionRegistry::$useRegistry = TRUE; 73 | } 74 | 75 | /** 76 | * @expectedException \QueryPath\Exception 77 | */ 78 | public function testAutoloadExtensions() { 79 | // FIXME: This isn't really much of a test. 80 | ExtensionRegistry::autoloadExtensions(FALSE); 81 | try { 82 | qp()->stubToe(); 83 | } 84 | catch (Exception $e) { 85 | ExtensionRegistry::autoloadExtensions(TRUE); 86 | throw $e; 87 | } 88 | } 89 | 90 | /** 91 | * @expectedException \QueryPath\Exception 92 | */ 93 | public function testCallFailure() { 94 | qp()->foo(); 95 | } 96 | 97 | // This does not (and will not) throw an exception. 98 | // /** 99 | // * @expectedException QueryPathException 100 | // */ 101 | // public function testExtendNoSuchClass() { 102 | // ExtensionRegistry::extend('StubExtensionFour'); 103 | // } 104 | 105 | } 106 | // Create a stub extension: 107 | /** 108 | * Create a stub extension 109 | * 110 | * @ingroup querypath_tests 111 | */ 112 | class StubExtensionOne implements Extension { 113 | private $qp = NULL; 114 | public function __construct(\QueryPath\Query $qp) { 115 | $this->qp = $qp; 116 | } 117 | 118 | public function stubToe() { 119 | $this->qp->top()->append('')->end(); 120 | return $this->qp; 121 | } 122 | } 123 | /** 124 | * Create a stub extension 125 | * 126 | * @ingroup querypath_tests 127 | */ 128 | class StubExtensionTwo implements Extension { 129 | private $qp = NULL; 130 | public function __construct(\QueryPath\Query $qp) { 131 | $this->qp = $qp; 132 | } 133 | public function stuble($arg1, $arg2) { 134 | return $arg1 . $arg2; 135 | } 136 | } 137 | /** 138 | * Create a stub extension 139 | * 140 | * @ingroup querypath_tests 141 | */ 142 | class StubExtensionThree implements Extension { 143 | private $qp = NULL; 144 | public function __construct(\QueryPath\Query $qp) { 145 | $this->qp = $qp; 146 | } 147 | public function stuble($arg1, $arg2) { 148 | return $arg1 . $arg2; 149 | } 150 | } 151 | 152 | //ExtensionRegistry::extend('StubExtensionOne'); 153 | //ExtensionRegistry::extend('StubExtensionTwo'); 154 | -------------------------------------------------------------------------------- /test/Tests/QueryPath/Extensions/QPXMLTest.php: -------------------------------------------------------------------------------- 1 | 5 | * @license The GNU Lesser GPL (LGPL) or an MIT-like license. 6 | */ 7 | namespace QueryPath\Tests; 8 | 9 | //require_once 'PHPUnit/Autoload.php'; 10 | require_once __DIR__ . '/../TestCase.php'; 11 | require_once 'src/QueryPath/Extension/QPXML.php'; 12 | /** 13 | * @ingroup querypath_tests 14 | * @group extension 15 | */ 16 | class QPXMLTests extends TestCase { 17 | 18 | protected $file = './test/advanced.xml'; 19 | public static function setUpBeforeClass() { 20 | \QueryPath::enable('\QueryPath\Extension\QPXML'); 21 | } 22 | 23 | public function testCDATA() { 24 | $this->assertEquals('This is a CDATA section.', qp($this->file, 'first')->cdata()); 25 | 26 | $msg = 'Another CDATA Section'; 27 | $this->assertEquals($msg, qp($this->file, 'second')->cdata($msg)->top()->find('second')->cdata()); 28 | } 29 | 30 | public function testComment(){ 31 | $this->assertEquals('This is a comment.', trim(qp($this->file, 'root')->comment())); 32 | $msg = "Message"; 33 | $this->assertEquals($msg, qp($this->file, 'second')->comment($msg)->top()->find('second')->comment()); 34 | } 35 | 36 | public function testProcessingInstruction() { 37 | $this->assertEquals('This is a processing instruction.', trim(qp($this->file, 'third')->pi())); 38 | $msg = "Message"; 39 | $this->assertEquals($msg, qp($this->file, 'second')->pi('qp', $msg)->top()->find('second')->pi()); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /test/Tests/QueryPath/Extensions/QPXSLTest.php: -------------------------------------------------------------------------------- 1 | 5 | * @license The GNU Lesser GPL (LGPL) or an MIT-like license. 6 | */ 7 | 8 | namespace QueryPath\Tests; 9 | 10 | //require_once 'PHPUnit/Autoload.php'; 11 | require_once 'src/QueryPath/Extension/QPXSL.php'; 12 | require_once __DIR__ . '/../TestCase.php'; 13 | /** 14 | * @ingroup querypath_tests 15 | * @extension 16 | */ 17 | class QPXSLTests extends TestCase { 18 | 19 | protected $file = './test/advanced.xml'; 20 | 21 | public static function setUpBeforeClass() { 22 | \QueryPath::enable('\QueryPath\Extension\QPXSL'); 23 | } 24 | public function testXSLT() { 25 | // XML and XSLT taken from http://us.php.net/manual/en/xsl.examples-collection.php 26 | // and then modified to be *actually welformed* XML. 27 | $orig = ' 28 | 29 | Fight for your mind 30 | Ben Harper 31 | 1995 32 | 33 | 34 | Electric Ladyland 35 | Jimi Hendrix 36 | 1997 37 | 38 | '; 39 | 40 | $template = ' 41 | 42 | 43 | 44 |
    45 | Hey! Welcome to \'s sweet CD collection! 46 | 47 |
    48 |
    49 | 50 |

    51 |

    by -

    52 |
    53 |
    54 |
    55 | '; 56 | 57 | $qp = qp($orig)->xslt($template); 58 | $this->assertEquals(2, $qp->top('h1')->size(), 'Make sure that data was formatted'); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /test/Tests/QueryPath/OptionsTest.php: -------------------------------------------------------------------------------- 1 | 5 | * @license The GNU Lesser GPL (LGPL) or an MIT-like license. 6 | */ 7 | namespace QueryPath\Tests; 8 | require_once __DIR__ . '/TestCase.php'; 9 | use \QueryPath\Options; 10 | 11 | /** 12 | * @ingroup querypath_tests 13 | */ 14 | class OptionsTest extends TestCase { 15 | 16 | public function testOptions() { 17 | $expect = array('test1' => 'val1', 'test2' => 'val2'); 18 | $options = array('test1' => 'val1', 'test2' => 'val2'); 19 | 20 | Options::set($options); 21 | 22 | $results = Options::get(); 23 | $this->assertEquals($expect, $results); 24 | 25 | $this->assertEquals('val1', $results['test1']); 26 | } 27 | 28 | public function testQPOverrideOrder() { 29 | $expect = array('test1' => 'val3', 'test2' => 'val2'); 30 | $options = array('test1' => 'val1', 'test2' => 'val2'); 31 | 32 | Options::set($options); 33 | $qpOpts = qp(NULL, NULL, array('test1'=>'val3', 'replace_entities' => TRUE))->getOptions(); 34 | 35 | $this->assertEquals($expect['test1'], $qpOpts['test1']); 36 | $this->assertEquals(TRUE, $qpOpts['replace_entities']); 37 | $this->assertNull($qpOpts['parser_flags']); 38 | $this->assertEquals($expect['test2'], $qpOpts['test2']); 39 | } 40 | 41 | public function testQPHas() { 42 | $options = array('test1' => 'val1', 'test2' => 'val2'); 43 | 44 | Options::set($options); 45 | $this->assertTrue(Options::has('test1')); 46 | $this->assertFalse(Options::has('test3')); 47 | } 48 | public function testQPMerge() { 49 | $options = array('test1' => 'val1', 'test2' => 'val2'); 50 | $options2 = array('test1' => 'val3', 'test4' => 'val4'); 51 | 52 | Options::set($options); 53 | Options::merge($options2); 54 | 55 | $results = Options::get(); 56 | $this->assertTrue(Options::has('test4')); 57 | $this->assertEquals('val3', $results['test1']); 58 | } 59 | 60 | } 61 | -------------------------------------------------------------------------------- /test/Tests/QueryPath/QueryPathTest.php: -------------------------------------------------------------------------------- 1 | assertInstanceOf('\QueryPath\DOMQuery', $qp); 15 | 16 | } 17 | 18 | public function testWithHTML() { 19 | $qp = \QueryPath::with(\QueryPath::HTML_STUB); 20 | 21 | $this->assertInstanceOf('\QueryPath\DOMQuery', $qp); 22 | } 23 | public function testWithHTML5() { 24 | $qp = \QueryPath::withHTML5(\QueryPath::HTML5_STUB); 25 | 26 | $this->assertInstanceOf('\QueryPath\DOMQuery', $qp); 27 | } 28 | 29 | public function testWithXML() { 30 | $qp = \QueryPath::with(\QueryPath::XHTML_STUB); 31 | 32 | $this->assertInstanceOf('\QueryPath\DOMQuery', $qp); 33 | } 34 | 35 | public function testEnable() { 36 | \QueryPath::enable('\QueryPath\Tests\DummyExtension'); 37 | 38 | $qp = \QueryPath::with(\QueryPath::XHTML_STUB); 39 | 40 | $this->assertTrue($qp->grrrrrrr()); 41 | 42 | } 43 | 44 | } 45 | 46 | class DummyExtension implements \QueryPath\Extension { 47 | 48 | public function __construct(\QueryPath\Query $qp) { 49 | $this->qp = $qp; 50 | } 51 | 52 | public function grrrrrrr() { 53 | return TRUE; 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /test/Tests/QueryPath/TestCase.php: -------------------------------------------------------------------------------- 1 | 7 | * @license The GNU Lesser GPL (LGPL) or an MIT-like license. 8 | */ 9 | 10 | namespace QueryPath\Tests; 11 | 12 | /** @addtogroup querypath_tests Tests 13 | * Unit tests and regression tests for QueryPath. 14 | */ 15 | 16 | /** */ 17 | //require_once 'PHPUnit/Autoload.php'; 18 | require_once __DIR__ . '/TestCase.php'; 19 | 20 | /** 21 | * Test the XMLish functions of QueryPath. 22 | * 23 | * This uses a testing harness, XMLishMock, to test 24 | * a protected method of QueryPath. 25 | * 26 | * @ingroup querypath_test 27 | */ 28 | class XMLishTest extends TestCase { 29 | public function testXMLishMock() { 30 | $tests = array( 31 | 'this/is/a/path' => FALSE, 32 | "this is just some plain\ntext with a line break." => FALSE, 33 | '2 > 1' => FALSE, 34 | '1 < 2' => FALSE, 35 | //'1 < 2 > 1' => FALSE, 36 | '' => TRUE, 37 | '' => TRUE, 38 | '' => TRUE, // It's not valid, but HTML parser will try it. 39 | ); 40 | foreach ($tests as $test => $correct) { 41 | $mock = new XMLishMock(); 42 | $this->assertEquals($correct, $mock->exposedIsXMLish($test), "Testing $test"); 43 | } 44 | } 45 | 46 | public function testXMLishWithBrokenHTML() { 47 | $html = '
    Abe H. Rosenbloom Field

    Located in a natural bowl north of 10th Avenue, Rosenbloom Field was made possible by a gift from Virginia Whitney Rosenbloom \'36 and Abe H. Rosenbloom \'34. The Pioneers observed the occasion of the field\'s dedication on Oct. 4, 1975, by defeating Carleton 36-26. Rosenbloom Field has a seating capacity of 1,500.

    A former member of the Grinnell Advisory Board and other college committees, Abe Rosenbloom played football at Grinnell from 1931 to 1933. He played guard and was one of the Missouri Valley Conference\'s smallest gridders (5\'6" and 170 pounds). He averaged more than 45 minutes a game playing time during a 24-game varsity career and was named to the Des Moines Register\'s all-Missouri Valley Conference squad in 1932 and 1933.

    On the south side of the field, a memorial recalls the 100th anniversary of the first intercollegiate football game played west of the Mississippi. The game took place on the Grinnell campus on Nov. 16, 1889. On the north side, a marker commemorates the first 50 years of football in the west, and recalls the same game, played in 1889, Grinnell College vs. the University of Iowa. Grinnell won, 24-0.

    '; 48 | $mock = new XMLishMock(); 49 | $this->assertEquals(TRUE, $mock->exposedIsXMLish($html), "Testing broken HTML"); 50 | } 51 | 52 | } 53 | 54 | /** 55 | * A testing class for XMLish tests. 56 | * 57 | * @ingroup querypath_tests 58 | */ 59 | class XMLishMock extends \QueryPath\DOMQuery { 60 | public function exposedIsXMLish($str) { 61 | return $this->isXMLish($str); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /test/advanced.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /test/coverage.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | phpunit=/Applications/MAMP/bin/php5/bin/phpunit 4 | $phpunit --coverage-html coverage Tests 5 | rm "db/qpTest.db" 6 | rm "db/qpTest2.db" -------------------------------------------------------------------------------- /test/data.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | data 8 | 9 | 10 | 11 | 12 | 13 |

    This is the title

    14 | 15 | 16 | -------------------------------------------------------------------------------- /test/data.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 |
  • Hello
  • 7 |
  • 8 |
  • 9 |
  • Last
  • 10 |
    11 | 12 |
  • 13 | 14 | This is footer text. 15 | -------------------------------------------------------------------------------- /test/html.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | data 8 | 9 | 10 | 11 | 12 | 13 |

    This is the title

    14 | 15 | -------------------------------------------------------------------------------- /test/no-writing.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /test/runTests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | phpunit=/Applications/MAMP/bin/php5/bin/phpunit 4 | cd .. 5 | $phpunit test/Tests/ 6 | cd - 7 | rm "db/qpTest.db" 8 | rm "db/qpTest2.db" 9 | -------------------------------------------------------------------------------- /test/test.php: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env php 2 | 8 | * @license The GNU Lesser GPL (LGPL) or an MIT-like license. 9 | */ 10 | require '../src/QueryPath/QueryPath.php'; 11 | //$str = 'abc > def.g |: hi(jk)[lmn]*op'; 12 | //$str = '&abc.def'; 13 | 14 | /** 15 | * Testing harness for the CssEventHandler. 16 | * @ingroup querypath_tests 17 | */ 18 | class SimpleTestCssEventHandler implements CssEventHandler { 19 | var $stack = NULL; 20 | var $expect = array(); 21 | 22 | public function __construct() { 23 | $this->stack = array(); 24 | } 25 | 26 | public function getStack() { 27 | return $this->stack; 28 | } 29 | 30 | public function dumpStack() { 31 | print "\nExpected:\n"; 32 | $format = "Element %d: %s\n"; 33 | foreach ($this->expect as $item) { 34 | printf($format, $item->eventType(), implode(',', $item->params())); 35 | } 36 | 37 | print "Got:\n"; 38 | foreach($this->stack as $item){ 39 | printf($format, $item->eventType(), implode(',', $item->params())); 40 | } 41 | } 42 | 43 | public function expects($stack) { 44 | $this->expect = $stack; 45 | } 46 | 47 | public function success() { 48 | return ($this->expect == $this->stack); 49 | } 50 | 51 | public function elementID($id) { 52 | $this->stack[] = new TestEvent(TestEvent::elementID, $id); 53 | } 54 | public function element($name) { 55 | $this->stack[] = new TestEvent(TestEvent::element, $name); 56 | } 57 | public function elementNS($name, $namespace = NULL){ 58 | $this->stack[] = new TestEvent(TestEvent::elementNS, $name, $namespace); 59 | } 60 | public function anyElement(){ 61 | $this->stack[] = new TestEvent(TestEvent::anyElement); 62 | } 63 | public function anyElementInNS($ns){ 64 | $this->stack[] = new TestEvent(TestEvent::anyElementInNS, $ns); 65 | } 66 | public function elementClass($name){ 67 | $this->stack[] = new TestEvent(TestEvent::elementClass, $name); 68 | } 69 | public function attribute($name, $value = NULL, $operation = CssEventHandler::isExactly){ 70 | $this->stack[] = new TestEvent(TestEvent::attribute, $name, $value, $operation); 71 | } 72 | public function attributeNS($name, $ns, $value = NULL, $operation = CssEventHandler::isExactly){ 73 | $this->stack[] = new TestEvent(TestEvent::attributeNS, $name, $ns, $value, $operation); 74 | } 75 | public function pseudoClass($name, $value = NULL){ 76 | $this->stack[] = new TestEvent(TestEvent::pseudoClass, $name, $value); 77 | } 78 | public function pseudoElement($name){ 79 | $this->stack[] = new TestEvent(TestEvent::pseudoElement, $name); 80 | } 81 | public function directDescendant(){ 82 | $this->stack[] = new TestEvent(TestEvent::directDescendant); 83 | } 84 | public function anyDescendant() { 85 | $this->stack[] = new TestEvent(TestEvent::anyDescendant); 86 | } 87 | public function adjacent(){ 88 | $this->stack[] = new TestEvent(TestEvent::adjacent); 89 | } 90 | public function anotherSelector(){ 91 | $this->stack[] = new TestEvent(TestEvent::anotherSelector); 92 | } 93 | public function sibling(){ 94 | $this->stack[] = new TestEvent(TestEvent::sibling); 95 | } 96 | } 97 | 98 | /** 99 | * Simple utility object for use with the TestCssEventHandler. 100 | * @ingroup querypath_tests 101 | */ 102 | class TestEvent { 103 | const elementID = 0; 104 | const element = 1; 105 | const elementNS = 2; 106 | const anyElement = 3; 107 | const elementClass = 4; 108 | const attribute = 5; 109 | const attributeNS = 6; 110 | const pseudoClass = 7; 111 | const pseudoElement = 8; 112 | const directDescendant = 9; 113 | const adjacent = 10; 114 | const anotherSelector = 11; 115 | const sibling = 12; 116 | const anyElementInNS = 13; 117 | const anyDescendant = 14; 118 | 119 | var $type = NULL; 120 | var $params = NULL; 121 | 122 | public function __construct($event_type) { 123 | 124 | 125 | 126 | $this->type = $event_type; 127 | $args = func_get_args(); 128 | array_shift($args); 129 | $this->params = $args; 130 | 131 | print "Event " . $event_type; 132 | print_r($args); 133 | } 134 | 135 | public function eventType() { 136 | return $this->type; 137 | } 138 | 139 | public function params() { 140 | return $this->params; 141 | } 142 | } 143 | 144 | print ord('"'); 145 | #$str = 'tag.class #id :test (test) + anotherElement > yetAnother[test] more[test="ing"]'; 146 | $str = 'tag.class #id :test (test)'; 147 | print "Now testing: $str\n"; 148 | 149 | $c = new SimpleTestCssEventHandler(); 150 | 151 | $p = new CssParser($str, $c); 152 | $p->parse(); 153 | 154 | -------------------------------------------------------------------------------- /tutorials/Examples/Examples.pkg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | QueryPath Examples 5 | Using QueryPath. 6 | 7 | 8 | 9 | Matt Butcher 10 | 11 | {@link http://querypath.org Project Founder} 12 | 13 | 14 | 15 | {@toc} 16 | 17 | About QueryPath Examples 18 | 19 | The scripts provided in this package are intended to illustrate how {@link 20 | QueryPath} 21 | can work in various circumstances. Each example is self-contained, and makes 22 | use only of QueryPath and the core extensions. 23 | 24 | 25 | You will find much more information about these examples on the 26 | {@link http://fedorahosted.org/querypath wiki}. There, each example is 27 | explained in detail, and example output is also shown. 28 | 29 | 30 | 31 | Where to begin 32 | 33 | If you are new to QueryPath, you may wish to start with 34 | {@link simple_example.php}, and then move on to {@link xml.php} and 35 | {@link html.php}. 36 | 37 | 38 | As you click on each page, check out the "source code for this file" 39 | link. That will take you to a syntax-highlighted version of the source code. 40 | In most cases, the source code is under 50 lines and includes many comments. 41 | 42 | 43 | -------------------------------------------------------------------------------- /tutorials/QueryPath/QueryPath.pkg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Using QueryPath @UNSTABLE@ 5 | How to make the most of the QueryPath library. 6 | 7 | 8 | 9 | Matt Butcher 10 | 11 | {@link http://querypath.org Project Founder} {@link http://queryPath.org} {@link http://technosophos.com} 12 | 13 | 14 | 15 | {@toc} 16 | 17 | About QueryPath 18 | 19 | QueryPath is a library designed to help you quickly and efficiently search, 20 | modify, and traverse XML and HTML documents. It implements many of the functions 21 | found in {@link http://jquery.com jQuery}. However, 22 | it is optimized for server-side work. 23 | 24 | 25 | Basic introductory documentation on QueryPath can be found at the 26 | {@link http://querypath.org official QueryPath website}. 27 | That site is kept updated with all of the latest information. 28 | This suite of documentation 29 | provides detailed information about the QueryPath API. 30 | 31 | 32 | Ready to get going? Start with the {@link qp()} function. 33 | Along with the basic documentation, there are several examples linked from 34 | there. 35 | 36 | 37 | 38 | What can you do with QueryPath? 39 | QueryPath can handle many different XML/HTML processing functions. Here 40 | are a few examples: 41 | 42 | Work remote web services like Flickr, Google, Twitter, and YouTube 43 | Import legacy HTML 44 | Crawl and parse sites 45 | Retrieve RDFa and Microformats from HTML 46 | Parse XML configuration files 47 | Access the semantic web 48 | Maintain pure HTML templates and merge data with QueryPath 49 | 50 | 51 | QueryPath is a flexible library. We hope you find it useful. 52 | 53 | 54 | The Structure of QueryPath 55 | To successfully use QueryPath, you will need to be acquainted with only 56 | one function ({@link qp()}) and one class ({@link QueryPath}). 57 | 58 | The qp() Function 59 | The {@link qp()} function is a factory for creating new QueryPath 60 | objects. It should be used for constructing new {@link QueryPath} 61 | objects. The principle job of the {@link qp()} function is to create a 62 | QueryPath instance and bind a document to that object. Any 63 | QueryPath object can be attached to only one document. 64 | QueryPath knows how to handle XML, HTML, DOMDocument, SimpleXMLElement 65 | objects, and a few other document data structures. It can also load files 66 | and remote URLs and parse the contents. 67 | When handling such documents, QueryPath may have to guess whether a 68 | document is XML or HTML. To read more about the rules, see the API 69 | documentation for {@link qp()}. 70 | 71 | 72 | The QueryPath Class 73 | The QueryPath class provides all of the tools for working with a 74 | document. In addition to implementing all of the manipulation and traversal 75 | methods of jQuery, it provides many other tools that may be useful when 76 | managing documents on the server side or in a desktop application. 77 | To learn about the rich set of methods available on a QueryPath 78 | object, take a look at the {@link QueryPath} API documentation. 79 | Note: To construct a QueryPath object, you should use the 80 | {@link qp()} factory function. Rarely is there a reason to call 81 | QueryPath's constructor directly. 82 | 83 | 84 | Extending QueryPath 85 | While QueryPath provides many useful features, it does not 86 | (obviously) provide all features. It does, however, support a very simple 87 | method of building extensions. Simple follow the extension pattern 88 | exhibited in the {@link QPList.php} extension, and you can quickly add 89 | your own methods to QueryPath in a maintainable and re-usable way. 90 | On very, very rare occasions, you may need to extend the QueryPath 91 | object itself. As of QueryPath 2.0, you can now do this. You are encouraged 92 | to do so, though, only in cases where extensions cannot provide the 93 | desired behavior. 94 | 95 | 96 | 97 | Tutorials 98 | The best place to start on QueryPath is with the 99 | {@link http://www.ibm.com/developerworks/opensource/library/os-php-querypath/index.html?S_TACT=105AGX01&S_CMP=HP IBM DeveloperWorks intro}. It covers the basics of QueryPath, 100 | and provides a simple 10-line twitter client. 101 | A {@link http://technosophos.com/content/querypath-and-html-basics blog on QueryPath and HTML} introduces the basics of working with 102 | HTML. Another three-part blog on QPDB ({@link http://technosophos.com/content/using-querypath-interact-sql-database-part-1 1}, 103 | {@link http://technosophos.com/content/using-querypath-interact-sql-database-part-2 2}, 104 | {@link http://technosophos.com/content/using-querypath-interact-sql-database-part-3 3}) 105 | introduces SQL programming with QueryPath. 106 | The original 107 | {@link https://fedorahosted.org/querypath/wiki/QueryPathTutorial Tutorial} 108 | also remains accurate. 109 | In that tutorial you will find a few simple examples, and lots of links to more 110 | sophisticated examples. 111 | 112 | 113 | 114 | Resources 115 | 116 | The guide here contains detailed documentation of each method in the 117 | QueryPath library. But there are other sources of documentation. 118 | 119 | 120 | The {@link http://querypath.org QueryPath.org} website is a hub of 121 | information about QueryPath. Check out screencasts, articles, and other 122 | sources of information there. 123 | 124 | 125 | Issue tracking and development of QueryPath are hosted on 126 | {@link http://github.com/technosophos/querypath GitHub}, 127 | a shared development platform. 128 | 129 | 130 | There are two mailing lists for QueryPath: One for 131 | {@link http://groups.google.com/group/support-querypath support} and one for 132 | {@link http://groups.google.com/group/devel-querypath developers}. 133 | 134 | 135 | Matt Butcher regularly posts {@link http://technosophos.com/category/tags/querypath articles about QueryPath} on 136 | {@link http://technosophos.com TechnoSophos}. 137 | 138 | 139 | 140 | -------------------------------------------------------------------------------- /tutorials/QueryPath/QueryPath.pkg.ini: -------------------------------------------------------------------------------- 1 | [Linked Tutorials] 2 | css -------------------------------------------------------------------------------- /tutorials/QueryPath/css.pkg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | QueryPath CSS Reference 5 | Making the most of QueryPath CSS support 6 | 7 | 8 | 9 | Matt Butcher 10 | 11 | {@link http://querypath.org Project Founder} 12 | 13 | 14 | 15 | {@toc} 16 | 17 | About the QueryPath CSS Reference 18 | 19 | This document is a reference for the CSS Level 3 Selectors support as implemented 20 | in QueryPath "quark" (2.0) and beyond. 21 | 22 | 23 | For detailed API documentation, see {@link http://api.querypath.org}. 24 | 25 | 26 | 27 | Selectors 28 | 29 | Selectors are composed of simple selectors combined with combinators. Simple 30 | selectors may be composed of any combination of the following: 31 | 32 | Element ID (#elementID) 33 | Element name (tagname) 34 | Class (.classname) 35 | Attribute ([name="val"]) 36 | Pseudo-class (:class(value)) 37 | Pseudo-element (::element) 38 | Any element (*) 39 | 40 | 41 | For a list of pseudo-classes, see {@tutorial QueryPath/csspseudo}. 42 | 43 | 44 | Combinators 45 | 46 | Combinators join two simple selectors. The following combinators are supported 47 | by QueryPath (this list includes ALL combinators defined in the CSS 3 Selectors 48 | specification.): 49 | 50 | Any descendant (' ' - a blank space between two selectors) 51 | Direct descendant (>) 52 | Adjacent (+) 53 | Another (,) 54 | Sibling (~) 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /tutorials/QueryPath/css.pkg.ini: -------------------------------------------------------------------------------- 1 | [Linked Tutorials] 2 | csspseudo 3 | cssexamples -------------------------------------------------------------------------------- /tutorials/QueryPath/cssexamples.pkg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | CSS Examples 5 | Examples of CSS selectors 6 | 7 | 8 | 9 | Matt Butcher 10 | 11 | {@link http://querypath.org Project Founder} 12 | 13 | 14 | 15 | {@toc} 16 | 17 | About the Examples 18 | 19 | This document provides some examples of CSS 3 Selectors. While this is not a 20 | comprehensive compendium of features, it provides a set of frequently used 21 | patterns that appear frequently in QueryPath code. 22 | 23 | 24 | For detailed API documentation, see {@link http://api.querypath.org}. 25 | 26 | 27 | -------------------------------------------------------------------------------- /tutorials/QueryPath/csspseudo.pkg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Pseudo-classes and Pseudo-elements 5 | Reference for pseudo-classes and pseudo-elements supported in QueryPath 6 | 7 | 8 | 9 | Matt Butcher 10 | 11 | {@link http://querypath.org Project Founder} 12 | 13 | 14 | 15 | {@toc} 16 | 17 | About the QueryPath CSS Reference 18 | 19 | The CSS 3 Selector specification defines pseudo-classes and pseudo-elements. QueryPath implements as many of these as is possible. 20 | 21 | 22 | For details on CSS 3 selectors, the canonical resource is the specification, currently in final draft form at {@link http://www.w3.org/TR/2009/WD-css3-selectors-20090310/}. 23 | 24 | 25 | 26 | Supported Pseudo-classes and Pseudo-elements 27 | 28 | There are three categories of supported pseudo-classes: Those from the CSS 3 Selector specification, those that are QueryPath-specific, and those that were defined in jQuery, and thus supported here for compatibility. 29 | 30 | 31 | Supported standard pseudo-classes 32 | 33 | 34 | :lang(en-us) -- language code, matches lang or xml:lang attributes. 35 | :link() -- HTML only 36 | :root() 37 | :nth-child(an + b) 38 | :nth-last-child(an + b) 39 | :nth-of-type(an + b) -- as of QuerPath 'quark' (2.0) 40 | :nth-last-of-type(an + b) -- as of QuerPath 'quark' 41 | :first-child 42 | :last-child 43 | :first-of-type 44 | :last-of-type 45 | :only-child 46 | :only-of-type 47 | :empty 48 | :not(filter) -- this can take a FULL CSS 3 selector, not just a simple selector. 49 | 50 | 51 | 52 | 53 | Supported QueryPath-specific pseudo-classes 54 | 55 | 56 | :x-root 57 | :x-reset 58 | 59 | 60 | 61 | 62 | Supported non-standard jQuery pseudo-classes 63 | 64 | 65 | :even -- like nth-child(even) 66 | :odd -- like nth-child(odd) 67 | :lt(int) -- index of element is less than given value 68 | :gt(int) -- index of element is greater than given value 69 | :nth(int) -- index of element is equal to given value 70 | :eq(int) -- synonym for :nth() 71 | :first -- element is first child of some parent 72 | :last -- element is last child of some parent 73 | :parent -- element has at least one child 74 | :enabled -- element is enabled form element (HTML only) 75 | :disabled -- element is disabled form element (HTML only) 76 | :checked -- element is checked, likely a form field (HTML only) 77 | :text -- element is text field (HTML only) 78 | :radio -- element is radio button (HTML only) 79 | :checkbox -- element is checkbox (HTML only) 80 | :file -- element is file upload field (HTML only) 81 | :password -- element is password field (HTML only) 82 | :submit -- element is submit button (HTML only) 83 | :image 84 | :button -- element is button (HTML only) 85 | :reset -- element is reset button (HTML only) 86 | :header -- elements that are HTML headers (h1 - hN) 87 | :has(some filter) -- elements that have children that match the supplied filter 88 | :contains(some text) -- elements that have text of which some substring matches the supplied value (Prior to QueryPath 2.1, this behaved identically to :contains-exactly) 89 | :contains-exactly(some text) -- elements that have text which exactly matches the supplied value (QueryPath 2.1) 90 | 91 | 92 | 93 | 94 | Supported pseudo-elements 95 | 96 | pseudo-elements use the double-colon (::) notation. 97 | No other notation is supported in QueryPath. (Other CSS 3 98 | implementations may support single-colon syntax of CSS 2.1. 99 | QueryPath does not.) 100 | 101 | 102 | 103 | ::first-letter 104 | ::first-line -- calculated by seeking for the first explicit line ending (\n). 105 | 106 | 107 | 108 | 109 | 110 | Unsupported Pseudo-classes and Pseudo-elements 111 | 112 | Several pseudo-classes and pseudo-elements require a visual user agent (UA) 113 | before they can be used. QueryPath does not provide a visual representation 114 | of XML or HTML data, and thus cannot implement these pseudo-classes and 115 | pseudo-elements. 116 | 117 | 118 | 119 | :visited 120 | :hover 121 | :active 122 | :focus 123 | :target 124 | :indeterminate 125 | :animated (jQuery extension) 126 | :visible (jQuery extension) 127 | :hidden (jQuery extension) 128 | ::before 129 | ::after 130 | ::selected 131 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /tutorials/Tests/Tests.pkg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Testing QueryPath 5 | How to set up a testing environment for QueryPath. 6 | 7 | 8 | 9 | Matt Butcher 10 | 11 | {@link http://querypath.org Project Founder} 12 | 13 | 14 | 15 | {@toc} 16 | 17 | About QueryPath's Testing' 18 | 19 | QueryPath is tested using unit tests written against the {@link PHPUnit} unit 20 | testing framework. Every public function of QueryPath should have at least one 21 | corresponding test. Most protected and private functions should also be 22 | exercised, though perhaps not directly. 23 | 24 | 25 | The user guide for QueryPath is located in the 26 | {@link http://fedorahosted.org/querypath official wiki}. 27 | It is kept updated with all of the latest information. 28 | 29 | 30 | Ready to get going? Start with the {@link qp()} function. Along with the 31 | basic documentation, there are several examples linked from that function. 32 | 33 | 34 | 35 | --------------------------------------------------------------------------------