├── .gitignore ├── .mailmap ├── .travis.yml ├── Changes ├── LICENSE ├── MANIFEST ├── MANIFEST.SKIP ├── cpanfile ├── dist.ini ├── lib └── Web │ ├── Query.pm │ └── Query │ └── LibXML.pm ├── t ├── 00_compile.t ├── 01_src.t ├── 02_op.t ├── 03_traverse.t ├── 04_element.t ├── 05_html5.t ├── 06_new_from_url_error_handling.t ├── 07_remove.t ├── 08_indent.t ├── 09_as_html.t ├── 10_subclass.t ├── 11_get_eq.t ├── add.t ├── after.t ├── append.t ├── attr.t ├── bad-url-with-options.t ├── before.t ├── bug-text-contents.t ├── class.t ├── clone.t ├── contents.t ├── data │ ├── foo.html │ └── html5_snippet.html ├── destroy.t ├── detach.t ├── filter.t ├── find.t ├── has_class.t ├── insert_after.t ├── insert_before.t ├── lib │ ├── My │ │ ├── TreeBuilder.pm │ │ └── Web │ │ │ └── Query.pm │ └── WQTest.pm ├── match_and_not.t ├── new.t ├── next.t ├── next_until.t ├── no_space_compacting.t ├── node-types.t ├── prepend.t ├── prev.t ├── processing-instructions.t ├── remove.t ├── remove_class.t ├── replace_with.t ├── special-attributes.t ├── split.t ├── store_comments.t ├── tagname.t └── xpath.t ├── weaver.ini └── xt └── live └── 01_simple.t /.gitignore: -------------------------------------------------------------------------------- 1 | notes 2 | Web-Query-* 3 | .prove 4 | .build 5 | -------------------------------------------------------------------------------- /.mailmap: -------------------------------------------------------------------------------- 1 | Carlos Fernando Avila Gratz 2 | Tokuhiro Matsuno 3 | Yanick Champoux 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | --- 2 | before_install: 3 | - export HARNESS_OPTIONS=j10:c HARNESS_TIMER=1 4 | - git config --global user.name "Dist Zilla Plugin TravisCI" 5 | - git config --global user.email $HOSTNAME":not-for-mail@travis-ci.com" 6 | install: 7 | - cpanm --with-recommends --installdeps -n . 8 | language: perl 9 | matrix: 10 | include: 11 | - perl: '5.22' 12 | - perl: '5.24' 13 | - perl: '5.26' 14 | - perl: '5.28' 15 | - perl: '5.30' 16 | script: 17 | - prove -l t 18 | -------------------------------------------------------------------------------- /Changes: -------------------------------------------------------------------------------- 1 | Revision history for Perl extension Web::Query 2 | 3 | {{$NEXT}} 4 | [API CHANGES] 5 | 6 | [BUG FIXES] 7 | 8 | [DOCUMENTATION] 9 | 10 | [ENHANCEMENTS] 11 | 12 | [NEW FEATURES] 13 | 14 | [STATISTICS] 15 | 16 | 1.01 2024-01-12 17 | [BUG FIXES] 18 | - Fix tests to work with new version of HTML::TreeBuilder::LibXML. (GH#57) 19 | 20 | [DOCUMENTATION] 21 | - Fix documentation typos. (GH#56, esabol) 22 | 23 | [ENHANCEMENTS] 24 | - Move tests to Test2::V0. 25 | 26 | [STATISTICS] 27 | - code churn: 48 files changed, 229 insertions(+), 210 deletions(-) 28 | 29 | 1.00 2023-09-06 30 | [API CHANGES] 31 | - Web::Query will now throw when failing to retrieve an url, instead of 32 | silently returning C. (GH#55) 33 | 34 | [STATISTICS] 35 | - code churn: 8 files changed, 56 insertions(+), 32 deletions(-) 36 | 37 | 0.39 2018-08-21 38 | [BUG FIXES] 39 | - localize $@ in destructor to prevent clobbering. (GH#51, Maurice Aubrey) 40 | 41 | [STATISTICS] 42 | - code churn: 6 files changed, 81 insertions(+), 4 deletions(-) 43 | 44 | 0.38 2016-07-03 45 | [BUG FIXES] 46 | - HTML::Selector::XPath 0.19 has a bug regarding '//b' expressions. 47 | 48 | [STATISTICS] 49 | - code churn: 2 files changed, 9 insertions(+), 2 deletions(-) 50 | 51 | 0.37 2016-07-02 52 | [BUG FIXES] 53 | - Require List::Util 1.44+ (for 'uniq') 54 | 55 | [STATISTICS] 56 | - code churn: 2 files changed, 19 insertions(+), 7 deletions(-) 57 | 58 | 0.36 2016-06-30 59 | [BUG FIXES] 60 | - `->text()` doesn't break on text nodes. (GH#47, reported by Gabor Szabo) 61 | 62 | [DOCUMENTATION] 63 | - Add mention of a way to get PIs of XML documents (GH#49). 64 | 65 | [ENHANCEMENTS] 66 | - `wq()` can now create an empty document. 67 | - Add 'join' argument to `as_html`. 68 | - Add 'match' function. 69 | - Add 'split' function. (GH#45) 70 | 71 | [STATISTICS] 72 | - code churn: 11 files changed, 322 insertions(+), 46 deletions(-) 73 | 74 | 0.35 2016-05-31 75 | [DOCUMENTATION] 76 | - Add troubleshooting entry for 'script' elements. [GH#8] 77 | 78 | [ENHANCEMENTS] 79 | - 'attr' method now accept many attributes and code refs in setter mode. 80 | 81 | [STATISTICS] 82 | - code churn: 6 files changed, 104 insertions(+), 33 deletions(-) 83 | 84 | 0.34 2015-09-23 85 | [BUG FIXES] 86 | - 'filter' was exploding on text nodes. [GH#44] 87 | 88 | [STATISTICS] 89 | - code churn: 4 files changed, 24 insertions(+), 4 deletions(-) 90 | 91 | 0.33 2015-09-23 92 | [BUG FIXES] 93 | - Make sure we use XML::LibXML > 2.0107 for `unique_keys`. [GH#43] 94 | - 'filter' with coderef was not generating a sub-WQ object. 95 | 96 | [ENHANCEMENTS] 97 | - Be more resilient to #text nodes. (GH#42) 98 | 99 | [STATISTICS] 100 | - code churn: 6 files changed, 101 insertions(+), 34 deletions(-) 101 | 102 | 0.32 2015-08-29 103 | [ENHANCEMENTS] 104 | - add id() as a shortcut method for `->attr('id')`. [GH#38] 105 | - add 'name()' as a shortcut method for `->attr('name')`. [GH#39] 106 | - add 'data()' as a shortcut method for `->attr('data-*foo*')`. [GH#40] 107 | - add `toggle_class()` method. [GH#41] 108 | 109 | [STATISTICS] 110 | - code churn: 5 files changed, 394 insertions(+), 172 deletions(-) 111 | 112 | 0.31 2015-08-25 113 | - each() would skip nodes if its subref was calling remove(). [yanick] 114 | - remove duplicate code for duplicate(). [yanick] 115 | 116 | [STATISTICS] 117 | - code churn: 5 files changed, 46 insertions(+), 25 deletions(-) 118 | 119 | 0.30 2015-08-23 120 | - next_until.t was failing if XML::LibXML isn't installed. [yanick] 121 | 122 | 0.29 2015-08-21 123 | - add() now returns a new element (instead of modifying $self). [yanick] 124 | - added 'not()'. [yanick] 125 | - added 'and_back'. [yanick] 126 | - added 'next_until()'. [yanick] 127 | 128 | 0.28 2015-06-30 129 | - new_from_html with options was breaking 'end()'. (yanick) 130 | 131 | 0.27 2014-12-24T00:52:33Z 132 | - new() with a bad url wasn't returning 'undef' when options were given. 133 | (yanick) 134 | - Add 'no_space_compacting' option. #33 (yanick) 135 | - Add 'tagname' to query/modify tag names. #34 (yanick) 136 | - XPath expressions can now be used as well. #35 (yanick) 137 | 138 | 0.26 2014-03-31T08:23:34Z 139 | - impl prev() and next() method #31 (xaicron) 140 | 141 | 0.25 2014-02-13T01:26:42Z 142 | - re-packaging(no feature changes) 143 | 144 | 0.24 2014-02-12T05:34:09Z 145 | - replace_with: Can't call method "clone" on an undefined value #24 146 | (Reported by @daxim++, Fixed by @yanick++) 147 | 148 | 0.23 2013-05-30T16:09:03Z 149 | - improved find() documentation 150 | - fixed cpanfile min perl version 151 | - modified tests to use the expression form of eval to try to load 152 | Web::Query::LibXML - the block form of eval is not working as expected 153 | on some perl versions on i386-freebsd (cafe01) 154 | 155 | 0.22 2013-05-15T23:36:38Z 156 | - added new module: Web::Query::LibXML 157 | - modified test files to also test Web::Query::LibXML (if it loads). 158 | 159 | 0.21 2013-05-15T14:36:11Z 160 | - new jQuery-compatible method: add() 161 | - fixed filter() that relied on wrong find() behavior 162 | - fixed two t/03_traverse.t tests that was expecting wrong behavior from 163 | filter() 164 | 165 | 0.20 2013-05-13T22:51:02Z 166 | - improved documentation 167 | - fixed find() to match only descendant elements This is the correct 168 | jQuery compatible implementation, which I have changed in 0.14 to also 169 | match root nodes, my bad. 170 | - fixed tests that relied on that wrong find() behavior. (cafe01) 171 | 172 | 0.19 2013-05-12T18:19:57Z 173 | - implemented contents() jQuery-compatible method 174 | - new() now accepts another Web::Query object (cafe01) 175 | 176 | 0.18 2013-05-09T19:40:40Z 177 | - fixed html() method, now using $self->_build_tree 178 | - calling parent() instead of undocumented getParentNode() 179 | - calling disembowel() instead of guts() Need for Web::Query::LibXML, so 180 | nodes get detached from old document and returned each as root of a new 181 | document. (Carlos Fernando Avila Gratz) 182 | 183 | 0.17 2013-05-08T01:18:36Z 184 | - new_from_file() now calling guts() instead of elementify() So the file 185 | can contain a document fragment (multiple root nodes) instead of a full 186 | document (single root). Also, now all new_from_* methods behave the 187 | same. (Carlos Fernando Avila Gratz) 188 | 189 | 0.16 2013-04-22T14:26:44Z 190 | - modified new_from_element() to ignore non-blessed items (Carlos Fernando 191 | Avila Gratz) 192 | - created _build_tree() method (Carlos Fernando Avila Gratz) 193 | 194 | 0.15 2013-04-09T00:29:48Z 195 | - added clone() method (Carlos Fernando Avila Gratz) 196 | - now storing comments from parsed html (Carlos Fernando Avila Gratz) 197 | - fixed remove() to get rid of removed element refs removes from $self and 198 | from all $self->{before}. Also modified how each() instantiates the 199 | objects, so $_->end works in the callback, which is needed for 200 | $_->remove() to work in the callback. (Carlos Fernando Avila Gratz) 201 | 202 | 0.14 2013-04-07T02:22:25Z 203 | - new jQuery compatible methods, and related tests * append * prepend * 204 | before * after * insert_before * insert_after * detach * add_class * 205 | remove_class * has_class (Carlos Fernando Avila Gratz) 206 | 207 | 0.13 2013-04-05T06:37:27Z 208 | - fixed find() bug was calling selector_to_xpath() in the loop, breaking 209 | the selector after the second call. (Carlos Fernando Avila Gratz) 210 | - Search from '//' when the node was created from HTML. (tokuhirom) 211 | 212 | 0.12 2013-04-03T20:24:49Z 213 | - Make subclass friendly (Carlos Fernando Avila Gratz) 214 | 215 | 0.11 216 | - Implement a remove method that effects the html results. (gugod++) 217 | 218 | 0.10 219 | [INCOMPATIBLE CHANGES] 220 | - new_from_url() is no longer throws exception on bad response from HTTP 221 | server. https://rt.cpan.org/Ticket/Display.html?id=76187 (oleg++) 222 | 223 | 0.09 224 | - Switch to Module::Build 225 | - first() and last() should construct new object, but not modify self 226 | (Oleg++) 227 | 228 | 0.08 229 | - added ->map and ->filter methods (Hiroki Honda) 230 | - fixed as (empty)->first->size and (empty)->last->size return 0 (Hiroki 231 | Honda) 232 | 233 | 0.07 234 | - HTML5 support 235 | 236 | 0.06 237 | - added first, last methods(akiym) 238 | 239 | 0.05 240 | - added docs for 'how do i customize useragent'. 241 | 242 | 0.04 243 | - added ->size and ->parent method. 244 | 245 | 0.03 246 | - fix fucking win32 new line issue. (it may works, i hope.) 247 | 248 | 0.02 249 | - added docs for find method(reported by kan++). 250 | 251 | 0.01 2011-02-19T10:38:22Z 252 | - original version 253 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This software is copyright (c) 2012 by Tokuhiro Matsuno Etokuhirom AAJKLFJEF@ GMAIL COME. 2 | 3 | This is free software; you can redistribute it and/or modify it under 4 | the same terms as the Perl 5 programming language system itself. 5 | 6 | Terms of the Perl programming language system itself 7 | 8 | a) the GNU General Public License as published by the Free 9 | Software Foundation; either version 1, or (at your option) any 10 | later version, or 11 | b) the "Artistic License" 12 | 13 | --- The GNU General Public License, Version 1, February 1989 --- 14 | 15 | This software is Copyright (c) 2012 by Tokuhiro Matsuno Etokuhirom AAJKLFJEF@ GMAIL COME. 16 | 17 | This is free software, licensed under: 18 | 19 | The GNU General Public License, Version 1, February 1989 20 | 21 | GNU GENERAL PUBLIC LICENSE 22 | Version 1, February 1989 23 | 24 | Copyright (C) 1989 Free Software Foundation, Inc. 25 | 51 Franklin St, Suite 500, Boston, MA 02110-1335 USA 26 | 27 | Everyone is permitted to copy and distribute verbatim copies 28 | of this license document, but changing it is not allowed. 29 | 30 | Preamble 31 | 32 | The license agreements of most software companies try to keep users 33 | at the mercy of those companies. By contrast, our General Public 34 | License is intended to guarantee your freedom to share and change free 35 | software--to make sure the software is free for all its users. The 36 | General Public License applies to the Free Software Foundation's 37 | software and to any other program whose authors commit to using it. 38 | You can use it for your programs, too. 39 | 40 | When we speak of free software, we are referring to freedom, not 41 | price. Specifically, the General Public License is designed to make 42 | sure that you have the freedom to give away or sell copies of free 43 | software, that you receive source code or can get it if you want it, 44 | that you can change the software or use pieces of it in new free 45 | programs; and that you know you can do these things. 46 | 47 | To protect your rights, we need to make restrictions that forbid 48 | anyone to deny you these rights or to ask you to surrender the rights. 49 | These restrictions translate to certain responsibilities for you if you 50 | distribute copies of the software, or if you modify it. 51 | 52 | For example, if you distribute copies of a such a program, whether 53 | gratis or for a fee, you must give the recipients all the rights that 54 | you have. You must make sure that they, too, receive or can get the 55 | source code. And you must tell them their rights. 56 | 57 | We protect your rights with two steps: (1) copyright the software, and 58 | (2) offer you this license which gives you legal permission to copy, 59 | distribute and/or modify the software. 60 | 61 | Also, for each author's protection and ours, we want to make certain 62 | that everyone understands that there is no warranty for this free 63 | software. If the software is modified by someone else and passed on, we 64 | want its recipients to know that what they have is not the original, so 65 | that any problems introduced by others will not reflect on the original 66 | authors' reputations. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | GNU GENERAL PUBLIC LICENSE 72 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 73 | 74 | 0. This License Agreement applies to any program or other work which 75 | contains a notice placed by the copyright holder saying it may be 76 | distributed under the terms of this General Public License. The 77 | "Program", below, refers to any such program or work, and a "work based 78 | on the Program" means either the Program or any work containing the 79 | Program or a portion of it, either verbatim or with modifications. Each 80 | licensee is addressed as "you". 81 | 82 | 1. You may copy and distribute verbatim copies of the Program's source 83 | code as you receive it, in any medium, provided that you conspicuously and 84 | appropriately publish on each copy an appropriate copyright notice and 85 | disclaimer of warranty; keep intact all the notices that refer to this 86 | General Public License and to the absence of any warranty; and give any 87 | other recipients of the Program a copy of this General Public License 88 | along with the Program. You may charge a fee for the physical act of 89 | transferring a copy. 90 | 91 | 2. You may modify your copy or copies of the Program or any portion of 92 | it, and copy and distribute such modifications under the terms of Paragraph 93 | 1 above, provided that you also do the following: 94 | 95 | a) cause the modified files to carry prominent notices stating that 96 | you changed the files and the date of any change; and 97 | 98 | b) cause the whole of any work that you distribute or publish, that 99 | in whole or in part contains the Program or any part thereof, either 100 | with or without modifications, to be licensed at no charge to all 101 | third parties under the terms of this General Public License (except 102 | that you may choose to grant warranty protection to some or all 103 | third parties, at your option). 104 | 105 | c) If the modified program normally reads commands interactively when 106 | run, you must cause it, when started running for such interactive use 107 | in the simplest and most usual way, to print or display an 108 | announcement including an appropriate copyright notice and a notice 109 | that there is no warranty (or else, saying that you provide a 110 | warranty) and that users may redistribute the program under these 111 | conditions, and telling the user how to view a copy of this General 112 | Public License. 113 | 114 | d) You may charge a fee for the physical act of transferring a 115 | copy, and you may at your option offer warranty protection in 116 | exchange for a fee. 117 | 118 | Mere aggregation of another independent work with the Program (or its 119 | derivative) on a volume of a storage or distribution medium does not bring 120 | the other work under the scope of these terms. 121 | 122 | 3. You may copy and distribute the Program (or a portion or derivative of 123 | it, under Paragraph 2) in object code or executable form under the terms of 124 | Paragraphs 1 and 2 above provided that you also do one of the following: 125 | 126 | a) accompany it with the complete corresponding machine-readable 127 | source code, which must be distributed under the terms of 128 | Paragraphs 1 and 2 above; or, 129 | 130 | b) accompany it with a written offer, valid for at least three 131 | years, to give any third party free (except for a nominal charge 132 | for the cost of distribution) a complete machine-readable copy of the 133 | corresponding source code, to be distributed under the terms of 134 | Paragraphs 1 and 2 above; or, 135 | 136 | c) accompany it with the information you received as to where the 137 | corresponding source code may be obtained. (This alternative is 138 | allowed only for noncommercial distribution and only if you 139 | received the program in object code or executable form alone.) 140 | 141 | Source code for a work means the preferred form of the work for making 142 | modifications to it. For an executable file, complete source code means 143 | all the source code for all modules it contains; but, as a special 144 | exception, it need not include source code for modules which are standard 145 | libraries that accompany the operating system on which the executable 146 | file runs, or for standard header files or definitions files that 147 | accompany that operating system. 148 | 149 | 4. You may not copy, modify, sublicense, distribute or transfer the 150 | Program except as expressly provided under this General Public License. 151 | Any attempt otherwise to copy, modify, sublicense, distribute or transfer 152 | the Program is void, and will automatically terminate your rights to use 153 | the Program under this License. However, parties who have received 154 | copies, or rights to use copies, from you under this General Public 155 | License will not have their licenses terminated so long as such parties 156 | remain in full compliance. 157 | 158 | 5. By copying, distributing or modifying the Program (or any work based 159 | on the Program) you indicate your acceptance of this license to do so, 160 | and all its terms and conditions. 161 | 162 | 6. Each time you redistribute the Program (or any work based on the 163 | Program), the recipient automatically receives a license from the original 164 | licensor to copy, distribute or modify the Program subject to these 165 | terms and conditions. You may not impose any further restrictions on the 166 | recipients' exercise of the rights granted herein. 167 | 168 | 7. The Free Software Foundation may publish revised and/or new versions 169 | of the General Public License from time to time. Such new versions will 170 | be similar in spirit to the present version, but may differ in detail to 171 | address new problems or concerns. 172 | 173 | Each version is given a distinguishing version number. If the Program 174 | specifies a version number of the license which applies to it and "any 175 | later version", you have the option of following the terms and conditions 176 | either of that version or of any later version published by the Free 177 | Software Foundation. If the Program does not specify a version number of 178 | the license, you may choose any version ever published by the Free Software 179 | Foundation. 180 | 181 | 8. If you wish to incorporate parts of the Program into other free 182 | programs whose distribution conditions are different, write to the author 183 | to ask for permission. For software which is copyrighted by the Free 184 | Software Foundation, write to the Free Software Foundation; we sometimes 185 | make exceptions for this. Our decision will be guided by the two goals 186 | of preserving the free status of all derivatives of our free software and 187 | of promoting the sharing and reuse of software generally. 188 | 189 | NO WARRANTY 190 | 191 | 9. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 192 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 193 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 194 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 195 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 196 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 197 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 198 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 199 | REPAIR OR CORRECTION. 200 | 201 | 10. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 202 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 203 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 204 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 205 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 206 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 207 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 208 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 209 | POSSIBILITY OF SUCH DAMAGES. 210 | 211 | END OF TERMS AND CONDITIONS 212 | 213 | Appendix: How to Apply These Terms to Your New Programs 214 | 215 | If you develop a new program, and you want it to be of the greatest 216 | possible use to humanity, the best way to achieve this is to make it 217 | free software which everyone can redistribute and change under these 218 | terms. 219 | 220 | To do so, attach the following notices to the program. It is safest to 221 | attach them to the start of each source file to most effectively convey 222 | the exclusion of warranty; and each file should have at least the 223 | "copyright" line and a pointer to where the full notice is found. 224 | 225 | 226 | Copyright (C) 19yy 227 | 228 | This program is free software; you can redistribute it and/or modify 229 | it under the terms of the GNU General Public License as published by 230 | the Free Software Foundation; either version 1, or (at your option) 231 | any later version. 232 | 233 | This program is distributed in the hope that it will be useful, 234 | but WITHOUT ANY WARRANTY; without even the implied warranty of 235 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 236 | GNU General Public License for more details. 237 | 238 | You should have received a copy of the GNU General Public License 239 | along with this program; if not, write to the Free Software 240 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA 241 | 242 | 243 | Also add information on how to contact you by electronic and paper mail. 244 | 245 | If the program is interactive, make it output a short notice like this 246 | when it starts in an interactive mode: 247 | 248 | Gnomovision version 69, Copyright (C) 19xx name of author 249 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 250 | This is free software, and you are welcome to redistribute it 251 | under certain conditions; type `show c' for details. 252 | 253 | The hypothetical commands `show w' and `show c' should show the 254 | appropriate parts of the General Public License. Of course, the 255 | commands you use may be called something other than `show w' and `show 256 | c'; they could even be mouse-clicks or menu items--whatever suits your 257 | program. 258 | 259 | You should also get your employer (if you work as a programmer) or your 260 | school, if any, to sign a "copyright disclaimer" for the program, if 261 | necessary. Here a sample; alter the names: 262 | 263 | Yoyodyne, Inc., hereby disclaims all copyright interest in the 264 | program `Gnomovision' (a program to direct compilers to make passes 265 | at assemblers) written by James Hacker. 266 | 267 | , 1 April 1989 268 | Ty Coon, President of Vice 269 | 270 | That's all there is to it! 271 | 272 | 273 | --- The Artistic License 1.0 --- 274 | 275 | This software is Copyright (c) 2012 by Tokuhiro Matsuno Etokuhirom AAJKLFJEF@ GMAIL COME. 276 | 277 | This is free software, licensed under: 278 | 279 | The Artistic License 1.0 280 | 281 | The Artistic License 282 | 283 | Preamble 284 | 285 | The intent of this document is to state the conditions under which a Package 286 | may be copied, such that the Copyright Holder maintains some semblance of 287 | artistic control over the development of the package, while giving the users of 288 | the package the right to use and distribute the Package in a more-or-less 289 | customary fashion, plus the right to make reasonable modifications. 290 | 291 | Definitions: 292 | 293 | - "Package" refers to the collection of files distributed by the Copyright 294 | Holder, and derivatives of that collection of files created through 295 | textual modification. 296 | - "Standard Version" refers to such a Package if it has not been modified, 297 | or has been modified in accordance with the wishes of the Copyright 298 | Holder. 299 | - "Copyright Holder" is whoever is named in the copyright or copyrights for 300 | the package. 301 | - "You" is you, if you're thinking about copying or distributing this Package. 302 | - "Reasonable copying fee" is whatever you can justify on the basis of media 303 | cost, duplication charges, time of people involved, and so on. (You will 304 | not be required to justify it to the Copyright Holder, but only to the 305 | computing community at large as a market that must bear the fee.) 306 | - "Freely Available" means that no fee is charged for the item itself, though 307 | there may be fees involved in handling the item. It also means that 308 | recipients of the item may redistribute it under the same conditions they 309 | received it. 310 | 311 | 1. You may make and give away verbatim copies of the source form of the 312 | Standard Version of this Package without restriction, provided that you 313 | duplicate all of the original copyright notices and associated disclaimers. 314 | 315 | 2. You may apply bug fixes, portability fixes and other modifications derived 316 | from the Public Domain or from the Copyright Holder. A Package modified in such 317 | a way shall still be considered the Standard Version. 318 | 319 | 3. You may otherwise modify your copy of this Package in any way, provided that 320 | you insert a prominent notice in each changed file stating how and when you 321 | changed that file, and provided that you do at least ONE of the following: 322 | 323 | a) place your modifications in the Public Domain or otherwise make them 324 | Freely Available, such as by posting said modifications to Usenet or an 325 | equivalent medium, or placing the modifications on a major archive site 326 | such as ftp.uu.net, or by allowing the Copyright Holder to include your 327 | modifications in the Standard Version of the Package. 328 | 329 | b) use the modified Package only within your corporation or organization. 330 | 331 | c) rename any non-standard executables so the names do not conflict with 332 | standard executables, which must also be provided, and provide a separate 333 | manual page for each non-standard executable that clearly documents how it 334 | differs from the Standard Version. 335 | 336 | d) make other distribution arrangements with the Copyright Holder. 337 | 338 | 4. You may distribute the programs of this Package in object code or executable 339 | form, provided that you do at least ONE of the following: 340 | 341 | a) distribute a Standard Version of the executables and library files, 342 | together with instructions (in the manual page or equivalent) on where to 343 | get the Standard Version. 344 | 345 | b) accompany the distribution with the machine-readable source of the Package 346 | with your modifications. 347 | 348 | c) accompany any non-standard executables with their corresponding Standard 349 | Version executables, giving the non-standard executables non-standard 350 | names, and clearly documenting the differences in manual pages (or 351 | equivalent), together with instructions on where to get the Standard 352 | Version. 353 | 354 | d) make other distribution arrangements with the Copyright Holder. 355 | 356 | 5. You may charge a reasonable copying fee for any distribution of this 357 | Package. You may charge any fee you choose for support of this Package. You 358 | may not charge a fee for this Package itself. However, you may distribute this 359 | Package in aggregate with other (possibly commercial) programs as part of a 360 | larger (possibly commercial) software distribution provided that you do not 361 | advertise this Package as a product of your own. 362 | 363 | 6. The scripts and library files supplied as input to or produced as output 364 | from the programs of this Package do not automatically fall under the copyright 365 | of this Package, but belong to whomever generated them, and may be sold 366 | commercially, and may be aggregated with this Package. 367 | 368 | 7. C or perl subroutines supplied by you and linked into this Package shall not 369 | be considered part of this Package. 370 | 371 | 8. The name of the Copyright Holder may not be used to endorse or promote 372 | products derived from this software without specific prior written permission. 373 | 374 | 9. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED 375 | WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 376 | MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 377 | 378 | The End 379 | 380 | -------------------------------------------------------------------------------- /MANIFEST: -------------------------------------------------------------------------------- 1 | CODE_OF_CONDUCT.md 2 | CONTRIBUTORS 3 | Changes 4 | INSTALL 5 | LICENSE 6 | MANIFEST 7 | META.json 8 | META.yml 9 | Makefile.PL 10 | README.mkdn 11 | SIGNATURE 12 | cpanfile 13 | doap.xml 14 | lib/Web/Query.pm 15 | lib/Web/Query/LibXML.pm 16 | t/00-compile.t 17 | t/00-report-prereqs.dd 18 | t/00-report-prereqs.t 19 | t/00_compile.t 20 | t/01_src.t 21 | t/02_op.t 22 | t/03_traverse.t 23 | t/04_element.t 24 | t/05_html5.t 25 | t/06_new_from_url_error_handling.t 26 | t/07_remove.t 27 | t/08_indent.t 28 | t/09_as_html.t 29 | t/10_subclass.t 30 | t/11_get_eq.t 31 | t/add.t 32 | t/after.t 33 | t/append.t 34 | t/attr.t 35 | t/bad-url-with-options.t 36 | t/before.t 37 | t/bug-text-contents.t 38 | t/class.t 39 | t/clone.t 40 | t/contents.t 41 | t/data/foo.html 42 | t/data/html5_snippet.html 43 | t/destroy.t 44 | t/detach.t 45 | t/filter.t 46 | t/find.t 47 | t/has_class.t 48 | t/insert_after.t 49 | t/insert_before.t 50 | t/lib/My/TreeBuilder.pm 51 | t/lib/My/Web/Query.pm 52 | t/lib/WQTest.pm 53 | t/match_and_not.t 54 | t/new.t 55 | t/next.t 56 | t/next_until.t 57 | t/no_space_compacting.t 58 | t/node-types.t 59 | t/prepend.t 60 | t/prev.t 61 | t/processing-instructions.t 62 | t/remove.t 63 | t/remove_class.t 64 | t/replace_with.t 65 | t/special-attributes.t 66 | t/split.t 67 | t/store_comments.t 68 | t/tagname.t 69 | t/xpath.t 70 | xt/live/01_simple.t 71 | xt/release/unused-vars.t 72 | -------------------------------------------------------------------------------- /MANIFEST.SKIP: -------------------------------------------------------------------------------- 1 | dist.ini 2 | .mailmap 3 | MANIFEST.SKIP 4 | weaver.ini 5 | -------------------------------------------------------------------------------- /cpanfile: -------------------------------------------------------------------------------- 1 | # This file is generated by Dist::Zilla::Plugin::CPANFile v6.030 2 | # Do not edit this file directly. To change prereqs, edit the `dist.ini` file. 3 | 4 | requires "Exporter" => "0"; 5 | requires "HTML::Entities" => "0"; 6 | requires "HTML::Selector::XPath" => "0.20"; 7 | requires "HTML::TreeBuilder::LibXML" => "0"; 8 | requires "HTML::TreeBuilder::XPath" => "0"; 9 | requires "LWP::UserAgent" => "0"; 10 | requires "List::Util" => "1.44"; 11 | requires "Scalar::Util" => "0"; 12 | requires "parent" => "0"; 13 | requires "perl" => "5.008005"; 14 | requires "strict" => "0"; 15 | requires "warnings" => "0"; 16 | 17 | on 'test' => sub { 18 | requires "Cwd" => "0"; 19 | requires "ExtUtils::MakeMaker" => "0"; 20 | requires "File::Spec" => "0"; 21 | requires "FindBin" => "0"; 22 | requires "IO::Handle" => "0"; 23 | requires "IPC::Open3" => "0"; 24 | requires "Test2::Tools::Exception" => "0"; 25 | requires "Test2::V0" => "0"; 26 | requires "Test::Exception" => "0"; 27 | requires "Test::More" => "0"; 28 | requires "lib" => "0"; 29 | requires "utf8" => "0"; 30 | }; 31 | 32 | on 'test' => sub { 33 | recommends "CPAN::Meta" => "2.120900"; 34 | }; 35 | 36 | on 'configure' => sub { 37 | requires "ExtUtils::MakeMaker" => "0"; 38 | }; 39 | 40 | on 'develop' => sub { 41 | requires "Test::More" => "0.96"; 42 | requires "Test::Vars" => "0"; 43 | requires "utf8" => "0"; 44 | }; 45 | -------------------------------------------------------------------------------- /dist.ini: -------------------------------------------------------------------------------- 1 | name = Web-Query 2 | author = Tokuhiro Matsuno 3 | license = Perl_5 4 | copyright_holder = Tokuhiro Matsuno 5 | copyright_year = 2012 6 | 7 | [@Filter] 8 | -bundle=@YANICK 9 | -remove=Covenant 10 | -remove=License 11 | authority = cpan:TOKUHIROM 12 | import_from_build=cpanfile 13 | NextVersion::Semantic.format=%d.%02d 14 | upstream=github 15 | AutoPrereqs.skip = XML::LibXML 16 | -------------------------------------------------------------------------------- /lib/Web/Query.pm: -------------------------------------------------------------------------------- 1 | package Web::Query; 2 | # ABSTRACT: Yet another scraping library like jQuery 3 | 4 | use strict; 5 | use warnings; 6 | use 5.008001; 7 | use parent qw/Exporter/; 8 | use HTML::TreeBuilder::XPath; 9 | use LWP::UserAgent; 10 | use HTML::Selector::XPath 0.20 qw/selector_to_xpath/; 11 | use Scalar::Util qw/blessed refaddr/; 12 | use HTML::Entities qw/encode_entities/; 13 | 14 | use List::Util 1.44 qw/ reduce uniq /; 15 | use Scalar::Util qw/ refaddr /; 16 | 17 | our @EXPORT = qw/wq/; 18 | 19 | our $RESPONSE; 20 | 21 | sub wq { Web::Query->new(@_) } 22 | 23 | our $UserAgent; 24 | 25 | sub __ua { $UserAgent ||= LWP::UserAgent->new } 26 | 27 | sub _build_tree { 28 | my( $self, $options ) = @_; 29 | 30 | my $no_space_compacting = ref $self ? $self->{no_space_compacting} 31 | : ref $options eq 'HASH' ? $options->{no_space_compacting} : 0; 32 | 33 | my $tree = HTML::TreeBuilder::XPath->new( 34 | no_space_compacting => $no_space_compacting 35 | ); 36 | $tree->ignore_unknown(0); 37 | $tree->store_comments(1); 38 | $tree; 39 | } 40 | 41 | sub new { 42 | my ($class, $stuff, $options) = @_; 43 | 44 | my $self = $class->_resolve_new($stuff,$options) 45 | or return undef; 46 | 47 | $self->{indent} = $options->{indent} if $options->{indent}; 48 | 49 | $self->{no_space_compacting} = $options->{no_space_compacting}; 50 | 51 | return $self; 52 | } 53 | 54 | sub _resolve_new { 55 | my( $class, $stuff, $options) = @_; 56 | 57 | return $class->new_from_element([],undef,$options) unless defined $stuff; 58 | 59 | if (blessed $stuff) { 60 | return $class->new_from_element([$stuff],undef,$options) 61 | if $stuff->isa('HTML::Element'); 62 | 63 | return $class->new_from_url($stuff->as_string,$options) 64 | if $stuff->isa('URI'); 65 | 66 | return $class->new_from_element($stuff->{trees}, undef, $options) 67 | if $stuff->isa($class); 68 | 69 | die "Unknown source type: $stuff"; 70 | } 71 | 72 | return $class->new_from_element($stuff,undef,$options) if ref $stuff eq 'ARRAY'; 73 | 74 | return $class->new_from_url($stuff,$options) if $stuff =~ m{^(?:https?|file)://}; 75 | 76 | return $class->new_from_html($stuff,$options) if $stuff =~ /<.*?>/; 77 | 78 | return $class->new_from_file($stuff,$options) if $stuff !~ /\n/ && -f $stuff; 79 | 80 | die "Unknown source type: $stuff"; 81 | } 82 | 83 | sub new_from_url { 84 | my ($class, $url,$options) = @_; 85 | 86 | $RESPONSE = __ua()->get($url); 87 | 88 | no warnings 'uninitialized'; 89 | 90 | unless( $RESPONSE->is_success ) { 91 | die "failed to retrieve '$url', " . $RESPONSE->code. " " 92 | . $RESPONSE->message."\n"; 93 | }; 94 | 95 | return $class->new_from_html($RESPONSE->decoded_content,$options); 96 | } 97 | 98 | sub new_from_file { 99 | my ($class, $fname, $options) = @_; 100 | my $tree = $class->_build_tree($options); 101 | $tree->parse_file($fname); 102 | my $self = $class->new_from_element([$tree->disembowel],undef,$options); 103 | $self->{need_delete}++; 104 | return $self; 105 | } 106 | 107 | sub new_from_html { 108 | my ($class, $html,$options) = @_; 109 | my $tree = $class->_build_tree($options); 110 | $tree->parse_content($html); 111 | my $self = $class->new_from_element([ 112 | map { 113 | ref $_ ? $_ : bless { _content => $_ }, 'HTML::TreeBuilder::XPath::TextNode' 114 | } $tree->disembowel 115 | ],undef,$options); 116 | $self->{need_delete}++; 117 | return $self; 118 | } 119 | 120 | sub new_from_element { 121 | my $self_or_class = shift; 122 | 123 | my $trees = ref $_[0] eq 'ARRAY' ? $_[0] : [$_[0]]; 124 | 125 | return bless { trees => [ @$trees ], before => $_[1] }, 126 | ref $self_or_class || $self_or_class; 127 | } 128 | 129 | sub end { 130 | my $self = shift; 131 | return $self->{before}; 132 | } 133 | 134 | sub size { 135 | my $self = shift; 136 | return scalar(@{$self->{trees}}); 137 | } 138 | 139 | sub parent { 140 | my $self = shift; 141 | 142 | my @new = map { $_->parent } @{$self->{trees}}; 143 | 144 | return (ref $self || $self)->new_from_element(\@new, $self); 145 | } 146 | 147 | sub first { 148 | my $self = shift; 149 | return $self->eq(0); 150 | } 151 | 152 | sub last { 153 | my $self = shift; 154 | return $self->eq(-1); 155 | } 156 | 157 | sub get { 158 | my ($self, $index) = @_; 159 | return $self->{trees}[$index]; 160 | } 161 | 162 | sub eq { 163 | my ($self, $index) = @_; 164 | return (ref $self || $self)->new_from_element([$self->{trees}[$index] || ()], $self); 165 | } 166 | 167 | sub find { 168 | my ($self, $selector) = @_; 169 | 170 | my $xpath = ref $selector ? $$selector : selector_to_xpath($selector, root => './'); 171 | my @new = map { eval{ $_->findnodes($xpath) } } @{$self->{trees}}; 172 | 173 | return (ref $self || $self)->new_from_element(\@new, $self); 174 | } 175 | 176 | sub contents { 177 | my ($self, $selector) = @_; 178 | 179 | my @new = map { $_->content_list } @{$self->{trees}}; 180 | 181 | if ($selector) { 182 | my $xpath = ref $selector ? $$selector : selector_to_xpath($selector); 183 | @new = grep { $_->matches($xpath) } @new; 184 | } 185 | 186 | return (ref $self || $self)->new_from_element(\@new, $self); 187 | } 188 | 189 | sub as_html { 190 | my $self = shift; 191 | my %args = @_; 192 | 193 | my @html = map { 194 | ref $_ ? ( $_->isa('HTML::TreeBuilder::XPath::TextNode') || $_->isa('HTML::TreeBuilder::XPath::CommentNode' ) ) 195 | ? $_->getValue 196 | : $_->as_HTML( q{&<>'"}, $self->{indent}, {} ) 197 | : $_ 198 | } @{$self->{trees}}; 199 | 200 | return join $args{join}, @html if defined $args{join}; 201 | 202 | return wantarray ? @html : $html[0]; 203 | } 204 | 205 | sub html { 206 | my $self = shift; 207 | 208 | if (@_) { 209 | map { 210 | $_->delete_content; 211 | my $tree = $self->_build_tree; 212 | 213 | $tree->parse_content($_[0]); 214 | $_->push_content($tree->disembowel); 215 | } @{$self->{trees}}; 216 | return $self; 217 | } 218 | 219 | my @html; 220 | for my $t ( @{$self->{trees}} ) { 221 | push @html, join '', map { 222 | ref $_ ? $_->as_HTML( q{&<>'"}, $self->{indent}, {}) 223 | : encode_entities($_) 224 | } eval { $t->content_list }; 225 | } 226 | 227 | return wantarray ? @html : $html[0]; 228 | } 229 | 230 | sub text { 231 | my $self = shift; 232 | 233 | if (@_) { 234 | map { $_->delete_content; $_->push_content($_[0]) } @{$self->{trees}}; 235 | return $self; 236 | } 237 | 238 | my @html = map { 239 | ref $_ ? $_->as_text : $_ 240 | } @{$self->{trees}}; 241 | return wantarray ? @html : $html[0]; 242 | } 243 | 244 | sub attr { 245 | my $self = shift; 246 | 247 | if ( @_ == 1 ) { # getter 248 | return wantarray 249 | ? map { $_->attr(@_) } @{$self->{trees}} 250 | : eval { $self->{trees}[0]->attr(@_) } 251 | ; 252 | } 253 | 254 | while( my( $attr, $value ) = splice @_, 0, 2 ) { 255 | my $code = ref $value eq 'CODE' ? $value : undef; 256 | 257 | for my $t ( @{$self->{trees}} ) { 258 | if ( $code ) { 259 | no warnings 'uninitialized'; 260 | my $orig = $_ = $t->attr($attr); 261 | $code->(); 262 | next if $orig eq $_; 263 | $value = $_; 264 | } 265 | $t->attr($attr => $value); 266 | } 267 | } 268 | 269 | return $self; 270 | } 271 | 272 | sub id { 273 | my $self = shift; 274 | 275 | if ( @_ ) { # setter 276 | my $new_id = shift; 277 | 278 | return $self if $self->size == 0; 279 | 280 | return $self->each(sub{ 281 | $_->attr( id => $new_id->(@_) ) 282 | }) if ref $new_id eq 'CODE'; 283 | 284 | if ( $self->size == 1 ) { 285 | $self->attr( id => $new_id ); 286 | } 287 | else { 288 | return $self->first->attr( id => $new_id ); 289 | } 290 | } 291 | else { # getter 292 | 293 | # the eval is there in case there is no tree 294 | return wantarray 295 | ? map { $_->attr('id') } @{$self->{trees}} 296 | : eval { $self->{trees}[0]->attr('id') } 297 | ; 298 | } 299 | } 300 | 301 | sub name { 302 | my $self = shift; 303 | $self->attr( 'name', @_ ); 304 | } 305 | 306 | sub data { 307 | my $self = shift; 308 | my $name = shift; 309 | $self->attr( join( '-', 'data', $name ), @_ ); 310 | } 311 | 312 | sub tagname { 313 | my $self = shift; 314 | my @retval = map { $_ eq '~comment' ? '#comment' : $_ } 315 | map { ref $_ eq 'HTML::TreeBuilder::XPath::TextNode' ? '#text' 316 | : ref $_ eq 'HTML::TreeBuilder::XPath::CommentNode' ? '#comment' 317 | : ref $_ ? $_->tag(@_) 318 | : '#text' 319 | ; 320 | } @{$self->{trees}}; 321 | return wantarray ? @retval : $retval[0]; 322 | } 323 | 324 | sub each { 325 | my ($self, $code) = @_; 326 | my $i = 0; 327 | 328 | # make a copy such that if we modify the list via 'delete', 329 | # it won't change from under our feet (see t/each-and-delete.t 330 | # for a case where it can) 331 | my @trees = @{ $self->{trees} }; 332 | for my $tree ( @trees ) { 333 | local $_ = (ref $self || $self)->new_from_element([$tree], $self); 334 | $code->($i++, $_); 335 | } 336 | return $self; 337 | } 338 | 339 | sub map { 340 | my ($self, $code) = @_; 341 | my $i = 0; 342 | return +[map { 343 | my $tree = $_; 344 | local $_ = (ref $self || $self)->new($tree); 345 | $code->($i++, $_); 346 | } @{$self->{trees}}]; 347 | } 348 | 349 | sub filter { 350 | my $self = shift; 351 | 352 | my @new; 353 | 354 | if (ref($_[0]) eq 'CODE') { 355 | my $code = $_[0]; 356 | my $i = 0; 357 | @new = grep { 358 | my $tree = $_; 359 | local $_ = (ref $self || $self)->new_from_element($tree); 360 | $code->($i++, $_); 361 | } @{$self->{trees}}; 362 | } 363 | else { 364 | my $xpath = ref $_[0] ? ${$_[0]} : selector_to_xpath($_[0]); 365 | @new = grep { eval { $_->matches($xpath) } } @{$self->{trees}}; 366 | } 367 | 368 | return (ref $self || $self)->new_from_element(\@new, $self); 369 | } 370 | 371 | sub _is_same_node { 372 | refaddr($_[1]) == refaddr($_[2]); 373 | } 374 | 375 | sub remove { 376 | my $self = shift; 377 | my $before = $self->end; 378 | 379 | while (defined $before) { 380 | @{$before->{trees}} = grep { 381 | my $el = $_; 382 | not grep { $self->_is_same_node( $el, $_ ) } @{$self->{trees}}; 383 | } @{$before->{trees}}; 384 | 385 | $before = $before->end; 386 | } 387 | 388 | $_->delete for @{$self->{trees}}; 389 | @{$self->{trees}} = (); 390 | 391 | $self; 392 | } 393 | 394 | sub replace_with { 395 | my ( $self, $replacement ) = @_; 396 | 397 | my $i = 0; 398 | for my $node ( @{ $self->{trees} } ) { 399 | my $rep = $replacement; 400 | 401 | if ( ref $rep eq 'CODE' ) { 402 | local $_ = (ref $self || $self)->new($node); 403 | $rep = $rep->( $i++ => $_ ); 404 | } 405 | 406 | $rep = (ref $self || $self)->new_from_html( $rep ) 407 | unless ref $rep; 408 | 409 | 410 | 411 | my $r = $rep->{trees}->[0]; 412 | { no warnings; 413 | $r = $r->clone if ref $r; 414 | } 415 | $r->parent( $node->parent ) if ref $r and $node->parent; 416 | 417 | $node->replace_with( $r ); 418 | } 419 | 420 | $replacement->remove if ref $replacement eq (ref $self || $self); 421 | 422 | return $self; 423 | } 424 | 425 | sub append { 426 | my ($self, $stuff) = @_; 427 | $stuff = (ref $self || $self)->new($stuff); 428 | 429 | foreach my $t (@{$self->{trees}}) { 430 | $t->push_content($_) for ref($t)->clone_list(@{$stuff->{trees}}); 431 | } 432 | 433 | $self; 434 | } 435 | 436 | sub prepend { 437 | my ($self, $stuff) = @_; 438 | $stuff = (ref $self || $self)->new($stuff); 439 | 440 | foreach my $t (@{$self->{trees}}) { 441 | $t->unshift_content($_) for ref($t)->clone_list(@{$stuff->{trees}}); 442 | } 443 | 444 | $self; 445 | } 446 | 447 | 448 | sub before { 449 | my ($self, $stuff) = @_; 450 | $stuff = (ref $self || $self)->new($stuff); 451 | 452 | foreach my $t (@{$self->{trees}}) { 453 | $t->preinsert(ref($t)->clone_list(@{$stuff->{trees}})); 454 | } 455 | 456 | $self; 457 | } 458 | 459 | 460 | sub after { 461 | my ($self, $stuff) = @_; 462 | $stuff = (ref $self || $self)->new($stuff); 463 | 464 | foreach my $t (@{$self->{trees}}) { 465 | $t->postinsert(ref($t)->clone_list(@{$stuff->{trees}})); 466 | } 467 | 468 | $self; 469 | } 470 | 471 | 472 | sub insert_before { 473 | my ($self, $target) = @_; 474 | 475 | foreach my $t (@{$target->{trees}}) { 476 | $t->preinsert(ref($t)->clone_list(@{$self->{trees}})); 477 | } 478 | 479 | $self; 480 | } 481 | 482 | sub insert_after { 483 | my ($self, $target) = @_; 484 | 485 | foreach my $t (@{$target->{trees}}) { 486 | $t->postinsert(ref($t)->clone_list(@{$self->{trees}})); 487 | } 488 | 489 | $self; 490 | } 491 | 492 | sub detach { 493 | my ($self) = @_; 494 | $_->detach for @{$self->{trees}}; 495 | $self; 496 | } 497 | 498 | sub add_class { 499 | my ($self, $class) = @_; 500 | 501 | for (my $i = 0; $i < @{$self->{trees}}; $i++) { 502 | my $t = $self->{trees}->[$i]; 503 | my $current_class = $t->attr('class') || ''; 504 | 505 | my $classes = ref $class eq 'CODE' ? $class->($i, $current_class, $t) : $class; 506 | my @classes = split /\s+/, $classes; 507 | 508 | foreach (@classes) { 509 | $current_class .= " $_" unless $current_class =~ /(?:^|\s)$_(?:\s|$)/; 510 | } 511 | 512 | $current_class =~ s/(?:^\s*|\s*$)//g; 513 | $current_class =~ s/\s\s+/ /g; 514 | 515 | $t->attr('class', $current_class); 516 | } 517 | 518 | $self; 519 | } 520 | 521 | 522 | sub remove_class { 523 | my ($self, $class) = @_; 524 | 525 | for (my $i = 0; $i < @{$self->{trees}}; $i++) { 526 | my $t = $self->{trees}->[$i]; 527 | my $current_class = $t->attr('class'); 528 | next unless defined $current_class; 529 | 530 | my $classes = ref $class eq 'CODE' ? $class->($i, $current_class, $t) : $class; 531 | my @remove_classes = split /\s+/, $classes; 532 | my @final = grep { 533 | my $existing_class = $_; 534 | not grep { $existing_class eq $_} @remove_classes; 535 | } split /\s+/, $current_class; 536 | 537 | $t->attr('class', join ' ', @final); 538 | } 539 | 540 | $self; 541 | 542 | } 543 | 544 | sub toggle_class { 545 | my $self = shift; 546 | 547 | my @classes = uniq @_; 548 | 549 | $self->each(sub{ 550 | for my $class ( @classes ) { 551 | my $method = $_->has_class($class) ? 'remove_class' : 'add_class'; 552 | $_->$method($class); 553 | } 554 | }); 555 | } 556 | 557 | sub has_class { 558 | my ($self, $class) = @_; 559 | 560 | foreach my $t (@{$self->{trees}}) { 561 | no warnings 'uninitialized'; 562 | return 1 if $t->attr('class') =~ /(?:^|\s)$class(?:\s|$)/; 563 | } 564 | 565 | return undef; 566 | } 567 | 568 | sub clone { 569 | my ($self) = @_; 570 | my @clones = map { $_->clone } @{$self->{trees}}; 571 | return (ref $self || $self)->new_from_element(\@clones); 572 | } 573 | 574 | sub add { 575 | my ($self, @stuff) = @_; 576 | my @nodes; 577 | 578 | # add(selector, context) 579 | if (@stuff == 2 && !ref $stuff[0] && $stuff[1]->isa('HTML::Element')) { 580 | my $xpath = ref $stuff[0] ? ${$stuff[0]} : selector_to_xpath($stuff[0]); 581 | push @nodes, $stuff[1]->findnodes( $xpath, root => './'); 582 | } 583 | else { 584 | # handle any combination of html string, element object and web::query object 585 | push @nodes, map { 586 | $self->{need_delete} = 1 if $_->{need_delete}; 587 | delete $_->{need_delete}; 588 | @{$_->{trees}}; 589 | } map { (ref $self || $self)->new($_) } @stuff; 590 | } 591 | 592 | my %ids = map { $self->_node_id($_) => 1 } @{ $self->{trees} }; 593 | 594 | $self->new_from_element( [ 595 | @{$self->{trees}}, grep { ! $ids{ $self->_node_id($_) } } @nodes 596 | ], $self ); 597 | } 598 | 599 | sub _node_id { 600 | my( undef, $node ) = @_; 601 | refaddr $node; 602 | } 603 | 604 | sub prev { 605 | my $self = shift; 606 | my @new; 607 | for my $tree (@{$self->{trees}}) { 608 | push @new, $tree->getPreviousSibling; 609 | } 610 | return (ref $self || $self)->new_from_element(\@new, $self); 611 | } 612 | 613 | sub next { 614 | my $self = shift; 615 | 616 | my @new = grep { $_ } map { $_->getNextSibling } @{ $self->{trees} }; 617 | 618 | return (ref $self || $self)->new_from_element(\@new, $self); 619 | } 620 | 621 | sub match { 622 | my( $self, $selector ) = @_; 623 | 624 | my $xpath = ref $selector ? $$selector : selector_to_xpath($selector); 625 | 626 | my $results = $self->map(sub{ 627 | my(undef,$e) = @_; 628 | return 0 unless ref $e; # it's a string 629 | return !!$e->get(0)->matches($xpath); 630 | }); 631 | 632 | return wantarray ? @$results : $results->[0]; 633 | } 634 | 635 | sub not { 636 | my( $self, $selector ) = @_; 637 | 638 | my $class = ref $self; 639 | 640 | my $xpath = ref $selector ? $$selector : selector_to_xpath($selector); 641 | $self->filter(sub { ! grep { $_->matches($xpath) } grep { ref $_ } $class->new($_)->{trees}[0] } ); 642 | } 643 | 644 | sub and_back { 645 | my $self = shift; 646 | 647 | $self->add( $self->end ); 648 | } 649 | 650 | sub next_until { 651 | my( $self, $selector ) = @_; 652 | 653 | my $class = ref $self; 654 | my $collection = $class->new_from_element([],$self); 655 | 656 | my $next = $self->next->not($selector); 657 | while( $next->size ) { 658 | $collection = $collection->add($next); 659 | $next = $next->next->not( $selector ); 660 | } 661 | 662 | # hide the loop from the outside world 663 | $collection->{before} = $self; 664 | 665 | return $collection; 666 | } 667 | 668 | sub split { 669 | my( $self, $selector, %args ) = @_; 670 | 671 | my @current; 672 | my @list; 673 | 674 | $self->contents->each(sub{ 675 | my(undef,$e)=@_; 676 | 677 | if( $e->match($selector) ) { 678 | push @list, [ @current ]; 679 | @current = ( $e ); 680 | } 681 | else { 682 | if ( $current[1] ) { 683 | $current[1] = $current[1]->add($e); 684 | } 685 | else { 686 | $current[1] = $e; 687 | } 688 | } 689 | }); 690 | push @list, [ @current ]; 691 | 692 | if( $args{skip_leading} ) { 693 | @list = grep { $_->[0] } @list; 694 | } 695 | 696 | unless ( $args{pairs} ) { 697 | @list = map { reduce { $a->add($b) } grep { $_ } @$_ } @list; 698 | } 699 | 700 | return @list; 701 | } 702 | 703 | sub last_response { 704 | return $RESPONSE; 705 | } 706 | 707 | sub DESTROY { 708 | return unless $_[0]->{need_delete}; 709 | 710 | # avoid memory leaks 711 | local $@; 712 | eval { $_->delete } for @{$_[0]->{trees}}; 713 | } 714 | 715 | 1; 716 | __END__ 717 | 718 | =encoding utf8 719 | 720 | =for stopwords prev 721 | 722 | 723 | =head1 SYNOPSIS 724 | 725 | use Web::Query; 726 | 727 | wq('http://www.w3.org/TR/html401/') 728 | ->find('div.head dt') 729 | ->each(sub { 730 | my $i = shift; 731 | printf("%d %s\n", $i+1, $_->text); 732 | }); 733 | 734 | =head1 DESCRIPTION 735 | 736 | Web::Query is a yet another scraping framework, have a jQuery like interface. 737 | 738 | Yes, I know Ingy's L. But it's just alpha quality. It doesn't work. 739 | Web::Query built at top of the CPAN modules, L, L, and L. 740 | 741 | So, this module uses L and only supports the CSS 3 742 | selector supported by that module. 743 | Web::Query doesn't support jQuery's extended queries(yet?). If a selector is 744 | passed as a scalar ref, it'll be taken as a straight XPath expression. 745 | 746 | $wq( '

hello

there

' )->find( 'p' ); # css selector 747 | $wq( '

hello

there

' )->find( \'/div/p' ); # xpath selector 748 | 749 | 750 | B. 751 | 752 | =head1 FUNCTIONS 753 | 754 | =over 4 755 | 756 | =item C<< wq($stuff) >> 757 | 758 | This is a shortcut for C<< Web::Query->new($stuff) >>. This function is exported by default. 759 | 760 | =back 761 | 762 | =head1 METHODS 763 | 764 | =head2 CONSTRUCTORS 765 | 766 | =over 4 767 | 768 | =item my $q = Web::Query->new($stuff, \%options ) 769 | 770 | Create new instance of Web::Query. You can make the instance from URL(http, https, file scheme), HTML in string, URL in string, L object, C, and either one 771 | L object or an array ref of them. 772 | 773 | # all valid creators 774 | $q = Web::Query->new( 'http://techblog.babyl.ca' ); 775 | $q = Web::Query->new( '

foo

' ); 776 | $q = Web::Query->new( undef ); 777 | 778 | This method throw the exception on unknown $stuff. 779 | 780 | This method returns undefined value on non-successful response with URL. 781 | 782 | Currently, the only two valid options are I, which will be used as 783 | the indentation string if the object is printed, and I, 784 | which will prevent the compaction of whitespace characters in text blocks. 785 | 786 | =item my $q = Web::Query->new_from_element($element: HTML::Element) 787 | 788 | Create new instance of Web::Query from instance of L. 789 | 790 | =item C<< my $q = Web::Query->new_from_html($html: Str) >> 791 | 792 | Create new instance of Web::Query from HTML. 793 | 794 | =item my $q = Web::Query->new_from_url($url: Str) 795 | 796 | Create new instance of Web::Query from URL. 797 | 798 | If the response is not success(It means /^20[0-9]$/), this method returns undefined value. 799 | 800 | You can get a last result of response, use the C<< $Web::Query::RESPONSE >>. 801 | 802 | Here is a best practical code: 803 | 804 | my $url = 'http://example.com/'; 805 | my $q = Web::Query->new_from_url($url) 806 | or die "Cannot get a resource from $url: " . Web::Query->last_response()->status_line; 807 | 808 | =item my $q = Web::Query->new_from_file($file_name: Str) 809 | 810 | Create new instance of Web::Query from file name. 811 | 812 | =back 813 | 814 | =head2 TRAVERSING 815 | 816 | =head3 add 817 | 818 | Returns a new object augmented with the new element(s). 819 | 820 | =over 4 821 | 822 | =item add($html) 823 | 824 | An HTML fragment to add to the set of matched elements. 825 | 826 | =item add(@elements) 827 | 828 | One or more @elements to add to the set of matched elements. 829 | 830 | @elements that already are part of the set are not added a second time. 831 | 832 | my $group = $wq->find('#foo'); # collection has 1 element 833 | $group = $group->add( '#bar', $wq ); # 2 elements 834 | $group->add( '#foo', $wq ); # still 2 elements 835 | 836 | =item add($wq) 837 | 838 | An existing Web::Query object to add to the set of matched elements. 839 | 840 | =item add($selector, $context) 841 | 842 | $selector is a string representing a selector expression to find additional elements to add to the set of matched elements. 843 | 844 | $context is the point in the document at which the selector should begin matching 845 | 846 | =back 847 | 848 | =head3 contents 849 | 850 | Get the immediate children of each element in the set of matched elements, including text and comment nodes. 851 | 852 | =head3 each 853 | 854 | Visit each nodes. C<< $i >> is a counter value, 0 origin. C<< $elem >> is iteration item. 855 | C<< $_ >> is localized by C<< $elem >>. 856 | 857 | $q->each(sub { my ($i, $elem) = @_; ... }) 858 | 859 | =head3 end 860 | 861 | Back to the before context like jQuery. 862 | 863 | =head3 filter 864 | 865 | Reduce the elements to those that pass the function's test. 866 | 867 | $q->filter(sub { my ($i, $elem) = @_; ... }) 868 | 869 | =head3 find 870 | 871 | Get the descendants of each element in the current set of matched elements, filtered by a selector. 872 | 873 | my $q2 = $q->find($selector); # $selector is a CSS3 selector. 874 | 875 | B If you want to match the element itself, use L. 876 | 877 | B 878 | From v0.14 to v0.19 (inclusive) find() also matched the element itself, which is not jQuery compatible. 879 | You can achieve that result using C, C and C: 880 | 881 | my $wq = wq('

bar

'); # needed because we don't have a global document like jQuery does 882 | print $wq->filter('.foo')->add($wq->find('.foo'))->as_html; #

bar

bar

883 | 884 | =head3 first 885 | 886 | Return the first matching element. 887 | 888 | This method constructs a new Web::Query object from the first matching element. 889 | 890 | =head3 last 891 | 892 | Return the last matching element. 893 | 894 | This method constructs a new Web::Query object from the last matching element. 895 | 896 | =head3 match($selector) 897 | 898 | Returns a boolean indicating if the elements match the C<$selector>. 899 | 900 | In scalar context returns only the boolean for the first element. 901 | 902 | For the reverse of C, see C. 903 | 904 | =head3 not($selector) 905 | 906 | Returns all the elements not matching the C<$selector>. 907 | 908 | # $do_for_love will be every thing, except #that 909 | my $do_for_love = $wq->find('thing')->not('#that'); 910 | 911 | =head3 and_back 912 | 913 | Add the previous set of elements to the current one. 914 | 915 | # get the h1 plus everything until the next h1 916 | $wq->find('h1')->next_until('h1')->and_back; 917 | 918 | =head3 map 919 | 920 | Creates a new array with the results of calling a provided function on every element. 921 | 922 | $q->map(sub { my ($i, $elem) = @_; ... }) 923 | 924 | =head3 parent 925 | 926 | Get the parent of each element in the current set of matched elements. 927 | 928 | =head3 prev 929 | 930 | Get the previous node of each element in the current set of matched elements. 931 | 932 | my $prev = $q->prev; 933 | 934 | =head3 next 935 | 936 | Get the next node of each element in the current set of matched elements. 937 | 938 | my $next = $q->next; 939 | 940 | =head3 next_until( $selector ) 941 | 942 | Get all subsequent siblings, up to (but not including) the next node matched C<$selector>. 943 | 944 | =head2 MANIPULATION 945 | 946 | =head3 add_class 947 | 948 | Adds the specified class(es) to each of the set of matched elements. 949 | 950 | # add class 'foo' to

elements 951 | wq('

foo

bar

')->find('p')->add_class('foo'); 952 | 953 | =head3 toggle_class( @classes ) 954 | 955 | Toggles the given class or classes on each of the element. I.e., if the element had the class, it'll be removed, 956 | and if it hadn't, it'll be added. 957 | 958 | Classes are toggled once, no matter how many times they appear in the argument list. 959 | 960 | $q->toggle_class( 'foo', 'foo', 'bar' ); 961 | 962 | # equivalent to 963 | 964 | $q->toggle_class('foo')->toggle_class('bar'); 965 | 966 | # and not 967 | 968 | $q->toggle_class('foo')->toggle_class('foo')->toggle_class('bar'); 969 | 970 | 971 | =head3 after 972 | 973 | Insert content, specified by the parameter, after each element in the set of matched elements. 974 | 975 | wq('

foo

')->find('p') 976 | ->after('bar') 977 | ->end 978 | ->as_html; #

foo

bar
979 | 980 | The content can be anything accepted by L. 981 | 982 | =head3 append 983 | 984 | Insert content, specified by the parameter, to the end of each element in the set of matched elements. 985 | 986 | wq('
')->append('

foo

')->as_html; #

foo

987 | 988 | The content can be anything accepted by L. 989 | 990 | =head3 as_html 991 | 992 | Returns the string representations of either the first or all elements, 993 | depending if called in list or scalar context. 994 | 995 | If given an argument C, the string representations of the elements 996 | will be concatenated with the given string. 997 | 998 | wq( '

foo

bar

' ) 999 | ->find('p') 1000 | ->as_html( join => '!' ); 1001 | #

foo

!

bar

1002 | 1003 | =head3 C< attr > 1004 | 1005 | Get/set attribute values. 1006 | 1007 | In getter mode, it'll return either the values of the attribute 1008 | for all elements of the set, or only the first one depending of the calling context. 1009 | 1010 | my @values = $q->attr('style'); # style of all elements 1011 | my $first_value = $q->attr('style'); # style of first element 1012 | 1013 | In setter mode, it'll set attributes value for all elements, and return back 1014 | the original object for easy chaining. 1015 | 1016 | $q->attr( 'alt' => 'a picture' )->find( ... ); 1017 | 1018 | # can pass more than 1 element too 1019 | $q->attr( alt => 'a picture', src => 'file:///...' ); 1020 | 1021 | The value passed for an attribute can be a code ref. In that case, 1022 | the code will be called with C<$_> set to the current attribute value. 1023 | If the code modifies C<$_>, the attribute will be updated with the new value. 1024 | 1025 | $q->attr( alt => sub { $_ ||= 'A picture' } ); 1026 | 1027 | =head3 C< id > 1028 | 1029 | Get/set the elements's id attribute. 1030 | 1031 | In getter mode, it behaves just like C. 1032 | 1033 | In setter mode, it behaves like C, but with the following exceptions. 1034 | 1035 | If the attribute value is a scalar, it'll be only assigned to 1036 | the first element of the set (as ids are supposed to be unique), and the returned object will only contain 1037 | that first element. 1038 | 1039 | my $first_element = $q->id('the_one'); 1040 | 1041 | It's possible to set the ids of all the elements by passing a sub to C. The sub is given the same arguments as for 1042 | C, and its return value is taken to be the new id of the elements. 1043 | 1044 | $q->id( sub { my $i = shift; 'foo_' . $i } ); 1045 | 1046 | =head3 C< name > 1047 | 1048 | Get/set the elements's 'name' attribute. 1049 | 1050 | my $name = $q->name; # equivalent to $q->attr( 'name' ); 1051 | 1052 | $q->name( 'foo' ); # equivalent to $q->attr( name => 'foo' ); 1053 | 1054 | =head3 C< data > 1055 | 1056 | Get/set the elements's 'data-*name*' attributes. 1057 | 1058 | my $data = $q->data('foo'); # equivalent to $q->attr( 'data-foo' ); 1059 | 1060 | $q->data( 'foo' => 'bar' ); # equivalent to $q->attr( 'data-foo' => 'bar' ); 1061 | 1062 | 1063 | =head3 tagname 1064 | 1065 | Get/Set the tag name of elements. 1066 | 1067 | my $name = $q->tagname; 1068 | 1069 | $q->tagname($new_name); 1070 | 1071 | =head3 before 1072 | 1073 | Insert content, specified by the parameter, before each element in the set of matched elements. 1074 | 1075 | wq('

foo

')->find('p') 1076 | ->before('bar') 1077 | ->end 1078 | ->as_html; #
bar

foo

1079 | 1080 | The content can be anything accepted by L. 1081 | 1082 | =head3 clone 1083 | 1084 | Create a deep copy of the set of matched elements. 1085 | 1086 | =head3 detach 1087 | 1088 | Remove the set of matched elements from the DOM. 1089 | 1090 | =head3 has_class 1091 | 1092 | Determine whether any of the matched elements are assigned the given class. 1093 | 1094 | =head3 C< html > 1095 | 1096 | Get/Set the innerHTML. 1097 | 1098 | my @html = $q->html(); 1099 | 1100 | my $html = $q->html(); # 1st matching element only 1101 | 1102 | $q->html('

foo

'); 1103 | 1104 | =head3 insert_before 1105 | 1106 | Insert every element in the set of matched elements before the target. 1107 | 1108 | =head3 insert_after 1109 | 1110 | Insert every element in the set of matched elements after the target. 1111 | 1112 | =head3 C< prepend > 1113 | 1114 | Insert content, specified by the parameter, to the beginning of each element in the set of matched elements. 1115 | 1116 | =head3 remove 1117 | 1118 | Delete the elements associated with the object from the DOM. 1119 | 1120 | # remove all tags from the document 1121 | $q->find('blink')->remove; 1122 | 1123 | =head3 remove_class 1124 | 1125 | Remove a single class, multiple classes, or all classes from each element in the set of matched elements. 1126 | 1127 | =head3 replace_with 1128 | 1129 | Replace the elements of the object with the provided replacement. 1130 | The replacement can be a string, a C object or an 1131 | anonymous function. The anonymous function is passed the index of the current 1132 | node and the node itself (with is also localized as C<$_>). 1133 | 1134 | my $q = wq( '

Abracadabra

' ); 1135 | 1136 | $q->find('b')->replace_with('Ocus); 1137 | #

Ocuscadabra

1138 | 1139 | $q->find('u')->replace_with($q->find('b')); 1140 | #

cadaAbra

1141 | 1142 | $q->find('i')->replace_with(sub{ 1143 | my $name = $_->text; 1144 | return "<$name>"; 1145 | }); 1146 | #

Abrabra

1147 | 1148 | =head3 size 1149 | 1150 | Return the number of elements in the Web::Query object. 1151 | 1152 | wq('

foo

bar

')->find('p')->size; # 2 1153 | 1154 | =head3 text 1155 | 1156 | Get/Set the text. 1157 | 1158 | my @text = $q->text(); 1159 | 1160 | my $text = $q->text(); # 1st matching element only 1161 | 1162 | $q->text('text'); 1163 | 1164 | If called in a scalar context, only return the string representation 1165 | of the first element 1166 | 1167 | =head2 OTHERS 1168 | 1169 | =over 4 1170 | 1171 | =item Web::Query->last_response() 1172 | 1173 | Returns last HTTP response status that generated by C. 1174 | 1175 | =back 1176 | 1177 | =head1 HOW DO I CUSTOMIZE USER AGENT? 1178 | 1179 | You can specify your own instance of L. 1180 | 1181 | $Web::Query::UserAgent = LWP::UserAgent->new( agent => 'Mozilla/5.0' ); 1182 | 1183 | =head1 FAQ AND TROUBLESHOOTING 1184 | 1185 | =head2 How to find XML processing instructions in a document? 1186 | 1187 | It's possible with L and by using an xpath expression 1188 | with C: 1189 | 1190 | # find 1191 | $q->find(\"//processing-instruction('xml-stylesheet')"); 1192 | 1193 | However, note that the support for processing instructions 1194 | in L is sketchy, so there 1195 | are methods like C that won't work. 1196 | 1197 | 1198 | =head2 Can't get the content of script elements 1199 | 1200 | The "; 1206 | 1207 | say Web::Query::wq( $node )->text; 1208 | # nothing is printed! 1209 | 1210 | say Web::Query::wq( $node )->html; 1211 | # var x = '<p>foo</p>'; 1212 | 1213 | say Web::Query::LibXML::wq( $node )->text; 1214 | # var x = '

foo

'; 1215 | 1216 | say Web::Query::LibXML::wq( $node )->html; 1217 | # var x = '<p>foo</p>'; 1218 | 1219 | 1220 | =head1 INCOMPATIBLE CHANGES 1221 | 1222 | =over 4 1223 | 1224 | =item 0.10 1225 | 1226 | new_from_url() is no longer throws exception on bad response from HTTP server. 1227 | 1228 | =back 1229 | 1230 | =head1 AUTHOR 1231 | 1232 | Tokuhiro Matsuno Etokuhirom AAJKLFJEF@ GMAIL COME 1233 | 1234 | =head1 SEE ALSO 1235 | 1236 | =over 1237 | 1238 | =item L 1239 | 1240 | =item L 1241 | 1242 | =back 1243 | 1244 | =head1 LICENSE 1245 | 1246 | Copyright (C) Tokuhiro Matsuno 1247 | 1248 | This library is free software; you can redistribute it and/or modify 1249 | it under the same terms as Perl itself. 1250 | 1251 | =cut 1252 | -------------------------------------------------------------------------------- /lib/Web/Query/LibXML.pm: -------------------------------------------------------------------------------- 1 | package Web::Query::LibXML; 2 | # ABSTRACT: fast, drop-in replacement for Web::Query 3 | 4 | 5 | use 5.008005; 6 | use strict; 7 | use warnings; 8 | use parent qw/Web::Query Exporter/; 9 | use HTML::TreeBuilder::LibXML; 10 | 11 | # version required for unique_key 12 | use XML::LibXML 2.0107; 13 | 14 | our @EXPORT = qw/wq/; 15 | 16 | sub wq { Web::Query::LibXML->new(@_) } 17 | 18 | sub _build_tree { 19 | my $tree = HTML::TreeBuilder::LibXML->new(); 20 | $tree->ignore_unknown(0); 21 | $tree->store_comments(1); 22 | $tree; 23 | } 24 | 25 | sub _is_same_node { 26 | $_[1]->{node}->isSameNode($_[2]->{node}); 27 | } 28 | 29 | sub prev { 30 | my $self = shift; 31 | my @new; 32 | for my $tree (@{$self->{trees}}) { 33 | push @new, $tree->left; 34 | } 35 | return (ref $self || $self)->new_from_element(\@new, $self); 36 | } 37 | 38 | sub next { 39 | my $self = shift; 40 | my @new; 41 | for my $tree (@{$self->{trees}}) { 42 | push @new, grep { $_ } $tree->right; 43 | } 44 | return (ref $self || $self)->new_from_element(\@new, $self); 45 | } 46 | 47 | sub tagname { 48 | my $self = shift; 49 | my $method = @_ ? 'setNodeName' : 'nodeName'; 50 | 51 | my @retval = map { $_->{node}->$method(@_) } @{$self->{trees}}; 52 | return wantarray ? @retval : $retval[0]; 53 | } 54 | 55 | sub _node_id { $_[1]{node}->unique_key } 56 | 1; 57 | __END__ 58 | 59 | =encoding utf-8 60 | 61 | =head1 SYNOPSIS 62 | 63 | use Web::Query::LibXML; 64 | 65 | # imports wq() 66 | # all methods inherited from Web::Query 67 | # see Web::Query for documentation 68 | 69 | 70 | =head1 DESCRIPTION 71 | 72 | Web::Query::LibXML is Web::Query subclass that overrides the _build_tree() method to use HTML::TreeBuilder::LibXML instead of HTML::TreeBuilder::XPath. 73 | Its a lot faster than its superclass. Use this module unless you can't install (or depend on) L on your system. 74 | 75 | =head1 FUNCTIONS 76 | 77 | =over 4 78 | 79 | =item C<< wq($stuff) >> 80 | 81 | This is a shortcut for C<< Web::Query::LibXML->new($stuff) >>. This function is exported by default. 82 | 83 | =back 84 | 85 | =head1 METHODS 86 | 87 | All public methods are inherited from L. 88 | 89 | =head1 LICENSE 90 | 91 | Copyright (C) Carlos Fernando Avila Gratz. 92 | 93 | This library is free software; you can redistribute it and/or modify 94 | it under the same terms as Perl itself. 95 | 96 | =head1 AUTHOR 97 | 98 | Carlos Fernando Avila Gratz Ecafe@q1software.comE 99 | 100 | =head1 SEE ALSO 101 | 102 | L, L, L 103 | 104 | =cut 105 | 106 | -------------------------------------------------------------------------------- /t/00_compile.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use Test2::V0; 3 | 4 | use Web::Query; 5 | 6 | pass "it compiles"; 7 | 8 | done_testing; 9 | -------------------------------------------------------------------------------- /t/01_src.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | use utf8; 4 | use Test2::V0; 5 | use Cwd (); 6 | use Web::Query; 7 | 8 | test('Web::Query'); 9 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 10 | 11 | done_testing; 12 | 13 | 14 | sub test { 15 | my $class = shift; 16 | diag "testing $class"; 17 | no warnings 'redefine'; 18 | *wq = \&{$class . "::wq" }; 19 | 20 | subtest 'from file' => sub { 21 | plan tests => 5; 22 | run_tests(wq('t/data/foo.html')); 23 | }; 24 | 25 | is wq('t/data/html5_snippet.html')->size, 3, 'snippet from file'; 26 | 27 | subtest 'from url' => sub { 28 | plan tests => 5; 29 | run_tests(wq('file://' . Cwd::abs_path('t/data/foo.html'))); 30 | }; 31 | 32 | subtest 'from treebuilder' => sub { 33 | plan tests => 5; 34 | my $tree = HTML::TreeBuilder::XPath->new_from_file('t/data/foo.html'); 35 | run_tests(wq($tree)); 36 | }; 37 | 38 | subtest 'from Array[treebuilder]' => sub { 39 | plan tests => 5; 40 | my $tree = HTML::TreeBuilder::XPath->new_from_file('t/data/foo.html'); 41 | run_tests(wq([$tree])); 42 | }; 43 | 44 | subtest 'from html' => sub { 45 | plan tests => 5; 46 | open my $fh, '<', 't/data/foo.html'; 47 | my $html = do { local $/; <$fh> }; 48 | run_tests(wq($html)); 49 | }; 50 | 51 | subtest 'from Web::Query object' => sub { 52 | plan tests => 5; 53 | my $tree = HTML::TreeBuilder::XPath->new_from_file('t/data/foo.html'); 54 | run_tests(wq(wq($tree))); 55 | }; 56 | 57 | if (eval "require URI; 1;") { 58 | subtest 'from URI' => sub { 59 | plan tests => 5; 60 | run_tests(wq(URI->new('file://' . Cwd::abs_path('t/data/foo.html')))); 61 | }; 62 | } 63 | 64 | } 65 | 66 | 67 | sub run_tests { 68 | $_[0]->find('.foo')->find('a')->each(sub { 69 | is $_->text, 'foo!'; 70 | is $_->attr('href'), '/foo'; 71 | }) 72 | ->end()->end() 73 | ->find('.bar')->find('a')->each(sub { 74 | is $_->text, 'bar!'; 75 | is $_->attr('href'), '/bar'; 76 | $_->attr('href' => '/bar2'); 77 | note $_->html; 78 | }); 79 | like $_[0]->html, qr{href="/bar2"}; 80 | } 81 | -------------------------------------------------------------------------------- /t/02_op.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | use utf8; 4 | use Test2::V0; 5 | use Web::Query; 6 | 7 | test('Web::Query'); 8 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 9 | 10 | done_testing; 11 | 12 | 13 | sub test { 14 | my $class = shift; 15 | diag "testing $class"; 16 | no warnings 'redefine'; 17 | *wq = \&{$class . "::wq" }; 18 | 19 | subtest 'get/set text' => sub { 20 | my $q = wq('t/data/foo.html'); 21 | $q->find('.foo a')->text('> ok'); 22 | is trim($q->find('.foo a')->text()), '> ok'; 23 | is trim($q->find('.foo a')->html()), '> ok'; 24 | }; 25 | 26 | subtest 'get/set html' => sub { 27 | my $q = wq('t/data/foo.html'); 28 | $q->find('.foo')->html('ok'); 29 | is trim($q->find('.foo')->html()), 'ok'; 30 | }; 31 | 32 | } 33 | 34 | sub trim { 35 | local $_ = shift; 36 | $_ =~ s/[\r\n]+$//; 37 | $_ 38 | } 39 | -------------------------------------------------------------------------------- /t/03_traverse.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | use utf8; 4 | use Test2::V0; 5 | use Scalar::Util qw/refaddr/; 6 | use Web::Query; 7 | 8 | test('Web::Query'); 9 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 10 | 11 | done_testing; 12 | 13 | 14 | sub test { 15 | my $class = shift; 16 | diag "testing $class"; 17 | no warnings 'redefine'; 18 | *wq = \&{$class . "::wq" }; 19 | 20 | my $html = '
'; 21 | 22 | subtest 'parent' => sub { 23 | is wq($html)->find('#baz')->parent()->attr('id'), 'bar'; 24 | is wq($html)->find('#bar')->parent()->attr('id'), 'foo'; 25 | }; 26 | 27 | subtest 'first/last return new instance' => sub { 28 | subtest 'first' => sub { 29 | my $q = wq($html)->find('div'); 30 | my $first = $q->first; 31 | isnt(refaddr($first), refaddr($q)); 32 | }; 33 | subtest 'last' => sub { 34 | my $q = wq($html)->find('div'); 35 | my $last = $q->last; 36 | isnt(refaddr($last), refaddr($q)); 37 | }; 38 | }; 39 | subtest 'size' => sub { 40 | is wq($html)->find('div')->size, 3; 41 | is wq($html)->find('body')->size, 1; 42 | is wq($html)->find('li')->size, 0; 43 | is wq($html)->find('.null')->first->size, 0; 44 | is wq($html)->find('.null')->last->size, 0; 45 | }; 46 | subtest 'map' => sub { 47 | is wq($html)->find('div')->map(sub {$_[0]}), [0, 1, 2]; 48 | is wq($html)->find('div')->map(sub {$_->attr('id')}), [qw/foo bar baz/]; 49 | }; 50 | subtest 'filter' => sub { 51 | is wq($html)->filter('div')->size, 0; 52 | is wq($html)->filter('body')->size, 0; 53 | is wq($html)->filter('li')->size, 0; 54 | is wq($html)->find('div')->filter(sub {$_->attr('id') =~ /ba/})->size, 2; 55 | is wq($html)->find('div')->filter(sub {my $i = shift; $i % 2 == 0})->size, 2; 56 | }; 57 | } 58 | 59 | -------------------------------------------------------------------------------- /t/04_element.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | use utf8; 4 | use Test2::V0; 5 | use Web::Query; 6 | 7 | test('Web::Query'); 8 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 9 | 10 | done_testing; 11 | 12 | 13 | sub test { 14 | my $class = shift; 15 | diag "testing $class"; 16 | no warnings 'redefine'; 17 | *wq = \&{$class . "::wq" }; 18 | 19 | my $html = '
  • A
  • B
  • C
  • D
  • E
  • F
'; 20 | 21 | subtest 'first' => sub { 22 | is wq($html)->find('#foo li')->first()->text(), 'A'; 23 | }; 24 | subtest 'last' => sub { 25 | is wq($html)->find('#foo li')->last()->text(), 'F'; 26 | }; 27 | 28 | } 29 | -------------------------------------------------------------------------------- /t/05_html5.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | use utf8; 4 | use Test2::V0; 5 | use Web::Query; 6 | 7 | test('Web::Query'); 8 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 9 | 10 | done_testing; 11 | 12 | 13 | sub test { 14 | my $class = shift; 15 | diag "testing $class"; 16 | no warnings 'redefine'; 17 | *wq = \&{$class . "::wq" }; 18 | 19 | is(wq('
foo
')->find('header')->first->text, 'foo'); 20 | } 21 | 22 | 23 | -------------------------------------------------------------------------------- /t/06_new_from_url_error_handling.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | use utf8; 4 | use Test2::V0; 5 | use LWP::UserAgent; 6 | use Web::Query; 7 | 8 | my $ua = LWP::UserAgent->new( agent => 'Mozilla/5.0' ); 9 | $Web::Query::UserAgent = $ua; 10 | $ua->add_handler(request_send => sub { 11 | my ($request, $ua, $h) = @_; 12 | if ($request->uri->host eq 'bad.com') { 13 | return HTTP::Response->new(500); 14 | } else { 15 | return HTTP::Response->new(200); 16 | } 17 | }); 18 | 19 | subtest 'bad url' => sub { 20 | 21 | my $q = eval { wq('http://bad.com/') }; 22 | 23 | is($q, undef); 24 | 25 | ok $@; 26 | 27 | isa_ok($Web::Query::RESPONSE, 'HTTP::Response'); 28 | is($Web::Query::RESPONSE->code, 500); 29 | 30 | isa_ok(Web::Query->last_response, 'HTTP::Response'); 31 | is(Web::Query::last_response->code, 500); 32 | }; 33 | 34 | subtest 'good status code' => sub { 35 | my $q = wq('http://good.com/'); 36 | ok($q); 37 | 38 | isa_ok($Web::Query::RESPONSE, 'HTTP::Response'); 39 | is($Web::Query::RESPONSE->code, 200); 40 | 41 | isa_ok(Web::Query->last_response, 'HTTP::Response'); 42 | is(Web::Query::last_response->code, 200); 43 | }; 44 | 45 | done_testing; 46 | 47 | -------------------------------------------------------------------------------- /t/07_remove.t: -------------------------------------------------------------------------------- 1 | # -*- perl -*- 2 | use strict; 3 | use warnings; 4 | use utf8; 5 | use Test2::V0; 6 | use Web::Query; 7 | 8 | 9 | test('Web::Query'); 10 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 11 | 12 | done_testing; 13 | 14 | 15 | sub test { 16 | my $class = shift; 17 | diag "testing $class"; 18 | no warnings 'redefine'; 19 | *wq = \&{$class . "::wq" }; 20 | 21 | subtest "remove and size" => sub { 22 | my $q = wq('t/data/foo.html'); 23 | $q->find('.foo')->remove(); 24 | is $q->find('.foo')->size() => 0, "all .foo are removed and cannot be found."; 25 | }; 26 | 27 | subtest "remove and html" => sub { 28 | my $q = wq('t/data/foo.html'); 29 | $q->find('.foo, .bar')->remove(); 30 | my $result = $q->html; 31 | $result =~ s/\s//g; 32 | 33 | like $result, qr{^test1\s*$}, ".foo and .bar are removed and not showing in html"; 34 | }; 35 | 36 | subtest "\$q->remove->end->html" => sub { 37 | my $q = wq('t/data/foo.html'); 38 | my $result = $q->find('.foo, .bar')->remove->end->html; 39 | $result =~ s/\s//g; 40 | like( 41 | $result, 42 | qr{^test1$}, 43 | "The chaining works." 44 | ); 45 | }; 46 | 47 | subtest "remove root elements" => sub { 48 | my $q = wq('t/data/foo.html'); 49 | $q->remove; 50 | is $q->size, 0, "size 0 after remove"; 51 | is join('', $q->as_html), '', "html '' after remove"; # not '<>' 52 | }; 53 | 54 | subtest "remove elements via each()" => sub { 55 | my $q = wq('t/data/foo.html'); 56 | $q->each(sub{ $_->remove }); 57 | is $q->size, 0, "size 0 after remove"; 58 | is join('', $q->as_html), '', "html '' after remove"; # not '<>' 59 | }; 60 | 61 | } 62 | -------------------------------------------------------------------------------- /t/08_indent.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | use Test2::V0; 5 | 6 | use Web::Query; 7 | 8 | plan tests => 2; 9 | 10 | my $inner = "

Hi there

"; 11 | my $html = "$inner"; 12 | 13 | is( Web::Query->new($html)->html => $inner, "no indent" ); 14 | 15 | like( Web::Query->new($html, { indent => "\t" } )->html => qr/\t/, "indented" ); 16 | -------------------------------------------------------------------------------- /t/09_as_html.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | use Test2::V0; 4 | use Web::Query; 5 | 6 | 7 | test('Web::Query'); 8 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 9 | 10 | done_testing; 11 | 12 | 13 | sub test { 14 | my $class = shift; 15 | diag "testing $class"; 16 | no warnings 'redefine'; 17 | *wq = \&{$class . "::wq" }; 18 | 19 | my $inner = "

Hi there

How is life?

"; 20 | my $html = "$inner"; 21 | 22 | my $q = Web::Query->new($html); 23 | 24 | is $q->html => $inner, "html() returns inner html"; 25 | is $q->as_html => $html, "as_html() returns element itself"; 26 | 27 | my $scalar = $q->find('p')->as_html; 28 | my @array = $q->find('p')->as_html; 29 | 30 | is $scalar => '

Hi there

', 'called in scalar context'; 31 | is \@array => [ '

Hi there

', q{

How is life?

} ], 32 | 'called in list context'; 33 | 34 | subtest 'join' => sub { 35 | is $q->find('p')->as_html(join => '!') 36 | => '

Hi there

!

How is life?

'; 37 | }; 38 | 39 | } 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /t/10_subclass.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | use Test2::V0; plan tests => 3; 5 | use FindBin; 6 | use lib 'lib'; 7 | use lib "$FindBin::Bin/lib"; 8 | 9 | use My::Web::Query; 10 | 11 | # web::query is a child class friendly 12 | my $query = wq('
foo
'); 13 | 14 | isa_ok $query, 'My::Web::Query'; 15 | 16 | $query->each(sub{ 17 | isa_ok $_[1], 'My::Web::Query'; 18 | }); 19 | 20 | isa_ok $query->_build_tree, 'My::TreeBuilder'; 21 | -------------------------------------------------------------------------------- /t/11_get_eq.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | use utf8; 4 | use Test2::V0; 5 | use Web::Query; 6 | 7 | test('Web::Query'); 8 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 9 | 10 | done_testing; 11 | 12 | 13 | sub test { 14 | my $class = shift; 15 | diag "testing $class"; 16 | no warnings 'redefine'; 17 | *wq = \&{$class . "::wq" }; 18 | 19 | my $html = '
  • A
  • B
  • C
  • D
  • E
  • F
'; 20 | 21 | subtest 'get first' => sub { 22 | my $q = wq($html)->find('#foo li'); 23 | my $elm = $q->get(0); 24 | isa_ok $elm, 'HTML::Element'; 25 | is wq($elm)->text(), 'A'; 26 | }; 27 | subtest 'get second' => sub { 28 | my $q = wq($html)->find('#foo li'); 29 | my $elm = $q->get(1); 30 | isa_ok $elm, 'HTML::Element'; 31 | is wq($elm)->text(), 'B'; 32 | }; 33 | subtest 'get last' => sub { 34 | my $q = wq($html)->find('#foo li'); 35 | my $elm = $q->get(-1); 36 | isa_ok $elm, 'HTML::Element'; 37 | is wq($elm)->text(), 'F'; 38 | }; 39 | subtest 'get before last' => sub { 40 | my $q = wq($html)->find('#foo li'); 41 | my $elm = $q->get(-2); 42 | isa_ok $elm, 'HTML::Element'; 43 | is wq($elm)->text(), 'E'; 44 | }; 45 | 46 | subtest 'eq first' => sub { 47 | is wq($html)->find('#foo li')->eq(0)->text(), 'A'; 48 | }; 49 | subtest 'eq second' => sub { 50 | is wq($html)->find('#foo li')->eq(1)->text(), 'B'; 51 | }; 52 | subtest 'eq last' => sub { 53 | is wq($html)->find('#foo li')->eq(-1)->text(), 'F'; 54 | }; 55 | subtest 'eq before last' => sub { 56 | is wq($html)->find('#foo li')->eq(-2)->text(), 'E'; 57 | }; 58 | 59 | } 60 | -------------------------------------------------------------------------------- /t/add.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use Test2::V0; 5 | use Web::Query; 6 | 7 | test('Web::Query'); 8 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 9 | 10 | done_testing; 11 | 12 | 13 | sub test { 14 | my $class = shift; 15 | diag "testing $class"; 16 | no warnings 'redefine'; 17 | *wq = \&{$class . "::wq" }; 18 | 19 | my $html = < 21 |
Foo
22 |
Bar
23 | 24 | HTML 25 | 26 | 27 | # add($object) 28 | is join('|', wq($html)->find('.foo')->add(wq($html)->find('.bar'))->as_html) 29 | => '
Foo
|
Bar
', 'add($object)'; 30 | 31 | 32 | # add($html) 33 | is join('|', wq($html)->find('.foo')->add('
Bar
')->as_html) 34 | => '
Foo
|
Bar
', 'add($html)'; 35 | 36 | # add(@elements) 37 | is join('|', wq($html)->find('.foo')->add(@{ wq($html)->find('div')->{trees}})->as_html) 38 | => '
Foo
|
Foo
|
Bar
', 'add(@elements)'; 39 | 40 | # add($selector, $xpath_context) 41 | is join('|', wq($html)->find('.foo')->add('.bar', wq($html)->{trees}->[0] )->as_html) 42 | => '
Foo
|
Bar
', 'add($selector, $xpath_context)'; 43 | 44 | subtest "add() create new object" => sub { 45 | my $wq = wq($html); 46 | my $x = $wq->find('.foo'); 47 | my $y = $x->add( $wq->find('.bar') ); 48 | 49 | is $x->size => 1, "original object"; 50 | is $y->size => 2, "new object"; 51 | }; 52 | 53 | subtest "add() doesn't add the same node twice" => sub { 54 | my $wq = wq($html); 55 | my $x = $wq->find('.foo')->add( $wq->find('.foo') ); 56 | is $x->size => 1, "only one node"; 57 | }; 58 | } 59 | -------------------------------------------------------------------------------- /t/after.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | use lib 'lib'; 5 | 6 | use Test2::V0; 7 | use Web::Query; 8 | 9 | test('Web::Query'); 10 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 11 | 12 | done_testing; 13 | 14 | 15 | sub test { 16 | my $class = shift; 17 | diag "testing $class"; 18 | no warnings 'redefine'; 19 | *wq = \&{$class . "::wq" }; 20 | 21 | my $html = '
Hello
Goodbye
'; 22 | 23 | is wq($html)->find('.inner')->after('

Test

')->end->as_html, 24 | '
Hello

Test

Goodbye

Test

', 'after'; 25 | } 26 | -------------------------------------------------------------------------------- /t/append.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use lib 'lib'; 5 | use Test2::V0; 6 | use Web::Query; 7 | 8 | test('Web::Query'); 9 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 10 | 11 | done_testing; 12 | 13 | 14 | sub test { 15 | my $class = shift; 16 | diag "testing $class"; 17 | no warnings 'redefine'; 18 | *wq = \&{$class . "::wq" }; 19 | 20 | my $html = '
Hello
Goodbye
'; 21 | 22 | is wq($html)->find('.inner')->append('

Test

')->end->as_html, 23 | '
Hello

Test

Goodbye

Test

', 'append'; 24 | } -------------------------------------------------------------------------------- /t/attr.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | use utf8; 4 | use Test2::V0; 5 | use Web::Query; 6 | 7 | test('Web::Query'); 8 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 9 | 10 | done_testing; 11 | 12 | 13 | sub test { 14 | my $class = shift; 15 | diag "testing $class"; 16 | no warnings 'redefine'; 17 | *wq = \&{$class . "::wq" }; 18 | 19 | subtest 'set many attrs at the same time' => sub { 20 | my $doc = wq( '
hi
' ); 21 | 22 | $doc->attr( 23 | foo => 1, 24 | bar => 'baz', 25 | ); 26 | 27 | is $doc->attr('foo') => 1, 'foo is set'; 28 | is $doc->attr('bar') => 'baz', 'bar is set'; 29 | }; 30 | 31 | subtest 'code ref as setter' => sub { 32 | my $doc = wq( '
kitten
' ); 33 | 34 | $doc->find('img')->attr(alt => sub{ $_ ||= 'A picture' }); 35 | 36 | is [ $doc->find('img')->attr('alt') ], 37 | [ 'A picture', 'kitten' ]; 38 | } 39 | 40 | 41 | } 42 | -------------------------------------------------------------------------------- /t/bad-url-with-options.t: -------------------------------------------------------------------------------- 1 | use Test2::V0; 2 | use Test2::Tools::Exception qw/dies/; 3 | 4 | use strict; 5 | use warnings; 6 | use utf8; 7 | use LWP::UserAgent; 8 | use Web::Query; 9 | 10 | my $ua = $Web::Query::UserAgent = LWP::UserAgent->new( agent => 'Mozilla/5.0' ); 11 | 12 | $ua->add_handler(request_send => sub { 13 | my ($request) = @_; 14 | my $code = $request->uri->host eq 'bad.com' ? 500 : 200; 15 | return HTTP::Response->new($code); 16 | }); 17 | 18 | plan tests => 2; 19 | 20 | ok dies { 21 | Web::Query->new('http://bad.com/'); 22 | }, "without options"; 23 | 24 | ok dies { 25 | Web::Query->new('http://bad.com/',{indent=>3}); 26 | }, "with options"; 27 | 28 | 29 | -------------------------------------------------------------------------------- /t/before.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use lib 'lib'; 5 | use Test2::V0; 6 | use Web::Query; 7 | 8 | test('Web::Query'); 9 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 10 | 11 | done_testing; 12 | 13 | 14 | sub test { 15 | my $class = shift; 16 | diag "testing $class"; 17 | no warnings 'redefine'; 18 | *wq = \&{$class . "::wq" }; 19 | 20 | my $html = '
Hello
Goodbye
'; 21 | 22 | is wq($html)->find('.inner')->before('

Test

')->end->as_html, 23 | '

Test

Hello

Test

Goodbye
', 'before'; 24 | } -------------------------------------------------------------------------------- /t/bug-text-contents.t: -------------------------------------------------------------------------------- 1 | # see https://github.com/tokuhirom/Web-Query/issues/47 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Test2::V0; 7 | 8 | use lib 't/lib'; 9 | use WQTest; 10 | 11 | my $html = <<'HTML'; 12 | 13 |

Hello

14 |

World

15 | 16 | HTML 17 | 18 | WQTest::test { 19 | my $q = $_[0]->new($html); 20 | 21 | isa_ok $q, 'Web::Query'; 22 | 23 | my @text; 24 | my @contents; 25 | 26 | $q->find('p')->each(sub { 27 | my ($i, $elem) = @_; 28 | push @text, $elem->text; 29 | push @contents, $elem->contents; 30 | }); 31 | 32 | is \@text, [qw/ Hello World /], 'elements'; 33 | 34 | is @contents, 2, 'two contents'; 35 | 36 | isa_ok $_, 'Web::Query' for @contents;; 37 | 38 | is $contents[0]->text => 'Hello'; 39 | is $contents[1]->text => 'World'; 40 | }; 41 | -------------------------------------------------------------------------------- /t/class.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Test2::V0; 7 | 8 | use lib 't/lib'; 9 | 10 | use WQTest; 11 | 12 | WQTest::test { 13 | my $class = shift; 14 | 15 | subtest 'toggle_class' => sub { test_toggle_class($class) }; 16 | 17 | subtest 'add_class' => sub { test_add_class($class) }; 18 | 19 | }; 20 | 21 | sub test_toggle_class { 22 | my $class = shift; 23 | 24 | my $q = $class->new(q{ 25 | 30 | })->find('a'); 31 | 32 | $q->toggle_class( 'foo' ); 33 | 34 | is $q->map( sub { $_->has_class('foo') } ), [ undef, 1, undef ]; 35 | 36 | $q->toggle_class( 'foo', 'bar' ); 37 | is $q->map( sub { $_->has_class('foo') } ), [ 1, undef, 1 ]; 38 | is $q->map( sub { $_->has_class('bar') } ), [ undef, 1, 1 ]; 39 | 40 | subtest "double toggling" => sub { 41 | $q->toggle_class( 'foo', 'foo' ); 42 | is $q->map( sub { $_->has_class('foo') } ), [ undef, 1, undef ]; 43 | }; 44 | } 45 | 46 | sub test_add_class { 47 | my $class = shift; 48 | 49 | my $html = '
Hello
Goodbye
'; 50 | 51 | my $wq = $class->new($html); 52 | 53 | $wq->find('.inner')->add_class('foo bar inner'); 54 | is $wq->as_html, '
Hello
Goodbye
', 'add_class("foo bar inner")'; 55 | 56 | # add_class(CODE) 57 | $wq = $class->new($html); 58 | 59 | $wq->find('.inner')->add_class(sub{ 60 | my ($i, $current, $el) = @_; 61 | return "foo-$i bar"; 62 | }); 63 | 64 | is $wq->as_html, '
Hello
Goodbye
', 'add_class(CODE)'; 65 | 66 | } 67 | -------------------------------------------------------------------------------- /t/clone.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | use Test2::V0; 4 | use Web::Query; 5 | 6 | test('Web::Query'); 7 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 8 | 9 | done_testing; 10 | 11 | 12 | sub test { 13 | my $class = shift; 14 | diag "testing $class"; 15 | no warnings 'redefine'; 16 | *wq = \&{$class . "::wq" }; 17 | 18 | my $html = '

Hithereworld

'; 19 | is wq($html)->clone->as_html, $html, 'clone'; 20 | } 21 | 22 | -------------------------------------------------------------------------------- /t/contents.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | use Test2::V0; 5 | use Web::Query; 6 | 7 | test('Web::Query'); 8 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 9 | 10 | done_testing; 11 | 12 | 13 | sub test { 14 | my $class = shift; 15 | diag "testing $class"; 16 | no warnings 'redefine'; 17 | *wq = \&{$class . "::wq" }; 18 | 19 | my $html = "

foo

bar

baz
"; 20 | 21 | is join('|', wq($html)->contents->as_html), '

foo

|

bar

|baz', 'contents()'; 22 | is join('|', wq($html)->contents('p')->as_html), '

foo

|

bar

', 'contents("p")'; 23 | 24 | is wq('

foo

')->contents->as_html => 'foo'; 25 | } 26 | -------------------------------------------------------------------------------- /t/data/foo.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | test1 5 | 6 | 7 |
10 |
11 | bar! 12 |
13 | 14 | 15 | -------------------------------------------------------------------------------- /t/data/html5_snippet.html: -------------------------------------------------------------------------------- 1 | foo
bar
baz
2 | -------------------------------------------------------------------------------- /t/destroy.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | use lib 'lib'; 6 | use Test2::V0; 7 | use Web::Query (); 8 | 9 | my $wq = Web::Query->new(''); 10 | local $@ = 'foo'; 11 | $wq->DESTROY; 12 | is $@, 'foo', 'eval error string should not be clobbered'; 13 | 14 | done_testing; 15 | -------------------------------------------------------------------------------- /t/detach.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use lib 'lib'; 5 | use Test2::V0; 6 | use Web::Query; 7 | 8 | test('Web::Query'); 9 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 10 | 11 | done_testing; 12 | 13 | 14 | sub test { 15 | my $class = shift; 16 | diag "testing $class"; 17 | no warnings 'redefine'; 18 | *wq = \&{$class . "::wq" }; 19 | 20 | my $wq = wq('

Hello

Goodbye

'); 21 | 22 | my $detached = $wq->find('.inner')->detach; 23 | is join('', $detached->as_html), '

Hello

Goodbye

', 'detach - retval'; 24 | is $wq->as_html, '
', 'detach - original object modified'; 25 | is $detached->find('p')->size, 2, 'find() works on detached elements'; 26 | 27 | } 28 | -------------------------------------------------------------------------------- /t/filter.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | use Test2::V0; 5 | use Test::Exception; 6 | 7 | use lib 't/lib'; 8 | 9 | use WQTest; 10 | 11 | WQTest::test { 12 | my $class = shift; 13 | 14 | my $html = < 16 |
Hello
17 | 18 | 19 |
20 |
Hello
21 |
22 | HTML 23 | 24 | my $q = $class->new($html); 25 | 26 | subtest "selector" => sub { 27 | is $q->filter('span')->size, 0; 28 | is $q->filter('div.container')->size, 1; 29 | is $q->filter('div')->size, 2; 30 | }; 31 | 32 | subtest coderef => sub { 33 | is $q->size, 2; 34 | 35 | is $q->filter(sub { $_->has_class( 'container' ) } )->size, 1; 36 | 37 | # 'filter' on a coderef was modifying the parent element 38 | is $q->size, 2, 'still two elements'; 39 | }; 40 | 41 | subtest on_text => sub { on_text($class) }; 42 | }; 43 | 44 | sub on_text { 45 | my $class = shift; 46 | 47 | my $wq = $class->new('

bar

Standalone Text'); 48 | 49 | lives_ok { $wq->filter('.foo') }, "doesn't explode on text nodes"; 50 | } 51 | -------------------------------------------------------------------------------- /t/find.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | use Test2::V0; 4 | use Web::Query; 5 | 6 | test('Web::Query'); 7 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 8 | 9 | done_testing; 10 | 11 | 12 | sub test { 13 | my $class = shift; 14 | diag "testing $class"; 15 | no warnings 'redefine'; 16 | *wq = \&{$class . "::wq" }; 17 | 18 | my $wq = wq(< 21 |
Hello
22 | 23 | 24 |
25 |
Hello
26 |
27 | 28 | HTML 29 | 30 | is $wq->find('.inner')->size, 2, 'find() on multiple tree object'; 31 | 32 | is wq('1')->find('html')->size, 0, 'find() does not include root elements'; 33 | is(wq('
foo
bar
')->find('div')->size, 0); 34 | 35 | } 36 | -------------------------------------------------------------------------------- /t/has_class.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use lib 'lib'; 5 | use Test2::V0; 6 | use Web::Query; 7 | 8 | test('Web::Query'); 9 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 10 | 11 | done_testing; 12 | 13 | 14 | sub test { 15 | my $class = shift; 16 | diag "testing $class"; 17 | no warnings 'redefine'; 18 | *wq = \&{$class . "::wq" }; 19 | 20 | my $wq = wq('
Hello
Goodbye
'); 21 | 22 | is $wq->find('.inner')->has_class('inner'), 1, 'has_class - positive'; 23 | is $wq->find('.inner')->has_class('nahh'), undef, 'has_class - negative'; 24 | } 25 | -------------------------------------------------------------------------------- /t/insert_after.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use lib 'lib'; 5 | use Test2::V0; 6 | use Web::Query; 7 | 8 | test('Web::Query'); 9 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 10 | 11 | done_testing; 12 | 13 | 14 | sub test { 15 | my $class = shift; 16 | diag "testing $class"; 17 | no warnings 'redefine'; 18 | *wq = \&{$class . "::wq" }; 19 | 20 | my $wq = wq('
Hello
Goodbye
'); 21 | 22 | wq('

Test

')->insert_after($wq->find('.inner')); 23 | is $wq->as_html, '
Hello

Test

Goodbye

Test

', 'insert_after'; 24 | } -------------------------------------------------------------------------------- /t/insert_before.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use lib 'lib'; 5 | use Test2::V0; 6 | use Web::Query; 7 | 8 | test('Web::Query'); 9 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 10 | 11 | done_testing; 12 | 13 | 14 | sub test { 15 | my $class = shift; 16 | diag "testing $class"; 17 | no warnings 'redefine'; 18 | *wq = \&{$class . "::wq" }; 19 | 20 | my $wq = wq('
Hello
Goodbye
'); 21 | 22 | wq('

Test

')->insert_before($wq->find('.inner')); 23 | is $wq->as_html, '

Test

Hello

Test

Goodbye
', 'insert_before'; 24 | } -------------------------------------------------------------------------------- /t/lib/My/TreeBuilder.pm: -------------------------------------------------------------------------------- 1 | package My::TreeBuilder; 2 | use parent qw/HTML::TreeBuilder::XPath/; 3 | 1; -------------------------------------------------------------------------------- /t/lib/My/Web/Query.pm: -------------------------------------------------------------------------------- 1 | package My::Web::Query; 2 | 3 | use strict; 4 | use warnings; 5 | use parent qw/Web::Query Exporter/; 6 | use My::TreeBuilder; 7 | 8 | our @EXPORT = qw/wq/; 9 | 10 | sub wq { My::Web::Query->new(@_) } 11 | 12 | sub _build_tree { 13 | my ($self, $content) = @_; 14 | my $tree = My::TreeBuilder->new(); 15 | $tree->ignore_unknown(0); 16 | $tree->store_comments(1); 17 | $tree; 18 | } -------------------------------------------------------------------------------- /t/lib/WQTest.pm: -------------------------------------------------------------------------------- 1 | package WQTest; 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Test::More; 7 | 8 | sub test(&) { 9 | my $code = shift; 10 | 11 | plan tests => 3; 12 | 13 | use_ok 'Web::Query'; 14 | 15 | for my $class ( qw/ Web::Query Web::Query::LibXML / ) { 16 | subtest $class => sub { 17 | if( $class =~ /LibXML/ ) { 18 | plan skip_all => "can't load $class" unless eval "use $class; 1"; 19 | } 20 | 21 | $code->($class, \&{$class . "::wq" }); 22 | }; 23 | } 24 | 25 | } 26 | 27 | 1; 28 | -------------------------------------------------------------------------------- /t/match_and_not.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | use Test2::V0; 5 | 6 | use lib 't/lib'; 7 | 8 | use WQTest; 9 | 10 | WQTest::test { 11 | my $class = shift; 12 | 13 | my $wq = $class->new(< 15 |

one

16 |

two

17 |

three

18 | 19 | 20 | HTML 21 | 22 | is $wq->find('p')->not( '#second' )->size => 2, 'not'; 23 | is $wq->find('p')->filter( '#second' )->size => 1, 'filter'; 24 | 25 | subtest 'match' => sub { 26 | is [ $wq->find('p')->match( '.foo' ) ], [ 1, '', 1 ], "list context"; 27 | is scalar $wq->find('p')->match( '.foo' ) => 1, "scalar context"; 28 | }; 29 | 30 | } 31 | -------------------------------------------------------------------------------- /t/new.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | use Test2::V0; 5 | 6 | use lib 't/lib'; 7 | 8 | use WQTest; 9 | 10 | WQTest::test { 11 | my $class = shift; 12 | 13 | subtest 'create an empty $q' => sub { 14 | my $new = $class->new; 15 | 16 | $new = $new->add( '

something

' ); 17 | 18 | is $new->as_html => '

something

'; 19 | }; 20 | } 21 | -------------------------------------------------------------------------------- /t/next.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | use Test2::V0; 4 | use Web::Query; 5 | 6 | test('Web::Query'); 7 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 8 | 9 | done_testing; 10 | 11 | sub test { 12 | my $class = shift; 13 | diag "testing $class"; 14 | no warnings 'redefine'; 15 | *wq = \&{$class . "::wq"}; 16 | 17 | my $wq = wq(< 20 |
Hello
21 |
World
22 | 23 | 24 |
25 |
Hello
26 |
World
27 |
28 | HTML 29 | 30 | my $elem = $wq->find('.d1')->next; 31 | is $elem->size, 2; 32 | is $elem->attr('class'), 'd2', 'next'; 33 | 34 | subtest 'next->as_html' => sub { 35 | plan tests => 6; 36 | 37 | $wq = wq( q{ 38 |
39 | one 40 | two 41 | three
42 | } ); 43 | 44 | my @expected = ( 45 | [ b => qr/one/ ], 46 | [ '#text' => qr/two/ ], 47 | [ 'i' => qr/three/ ], 48 | ); 49 | 50 | my $next = $wq->find('b'); 51 | while( $next->size ) { 52 | my $exp = shift @expected; 53 | is $next->tagname => $exp->[0], 'tagname'; 54 | like $next->as_html => $exp->[1], 'as_html'; 55 | $next = $next->next; 56 | }; 57 | }; 58 | } 59 | -------------------------------------------------------------------------------- /t/next_until.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | use Test2::V0; 5 | 6 | use Test2::V0; 7 | use Web::Query; 8 | 9 | test('Web::Query'); 10 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 11 | 12 | done_testing; 13 | 14 | sub test { 15 | my $class = shift; 16 | diag "testing $class"; 17 | no warnings 'redefine'; 18 | *wq = \&{$class . "::wq"}; 19 | 20 | my $wq = wq(q{ 21 |
22 |

one

23 |

two

24 |

three

25 |

four

26 |

five

27 |

six

28 |
29 | }); 30 | 31 | for my $id ( qw/ first second / ) { 32 | my $next = $wq->find('#'.$id)->next_until('h1'); 33 | is $next->size => 2; 34 | } 35 | 36 | is $wq->find('#first')->next_until('h1')->and_back->size => 3, "and_back"; 37 | 38 | is $wq->find('h1')->next_until('h1')->size => 4; 39 | is $wq->find('h1')->next_until('foo')->size => 5; 40 | } 41 | -------------------------------------------------------------------------------- /t/no_space_compacting.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | use Test2::V0; plan tests => 3; 5 | 6 | use Web::Query; 7 | 8 | is( Web::Query->new_from_html(<<'END')->as_html, '

hello there

', 'spaces trimmed' ); 9 |

hello there

10 | END 11 | 12 | is( Web::Query->new_from_html(<<'END', {no_space_compacting => 1})->as_html, '

hello there

', 'spaces left' ); 13 |

hello there

14 | END 15 | 16 | subtest 'LibXML' => sub { 17 | eval "require Web::Query::LibXML; 1" 18 | or plan skip_all => "couldn't load Web::Query::LibXML"; 19 | 20 | # LibXML doesn't trim by default 21 | 22 | is( Web::Query::LibXML->new_from_html(<<'END')->as_html, '

hello there

' ); 23 |

hello there

24 | END 25 | 26 | is( Web::Query::LibXML->new_from_html(<<'END', {no_space_compacting => 1})->as_html, '

hello there

' ); 27 |

hello there

28 | END 29 | }; 30 | 31 | 32 | -------------------------------------------------------------------------------- /t/node-types.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | use Test2::V0; 5 | 6 | use lib 't/lib'; 7 | 8 | use WQTest; 9 | 10 | WQTest::test { 11 | my $class = shift; 12 | 13 | my $q = $class->new_from_html(<<'END'); 14 | 15 | 16 | 17 | one 18 |

two

19 | 20 |
21 | 22 | 23 | END 24 | 25 | my $contents = $q->find('x')->contents; 26 | 27 | is $contents->find('p')->html => 'two', 'skip over text and comments'; 28 | 29 | like $contents->filter(sub{ $_->tagname eq '#text' })->as_html 30 | => qr'one', '#text'; 31 | 32 | like $contents->filter(sub{ $_->tagname eq '#comment' })->as_html 33 | => qr'three', '#comment'; 34 | } 35 | 36 | 37 | -------------------------------------------------------------------------------- /t/prepend.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use lib 'lib'; 5 | use Test2::V0; 6 | use Web::Query; 7 | 8 | test('Web::Query'); 9 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 10 | 11 | done_testing; 12 | 13 | 14 | sub test { 15 | my $class = shift; 16 | diag "testing $class"; 17 | no warnings 'redefine'; 18 | *wq = \&{$class . "::wq" }; 19 | 20 | 21 | my $html = '
Hello
Goodbye
'; 22 | 23 | is wq($html)->find('.inner')->prepend('

Test

')->end->as_html, 24 | '

Test

Hello

Test

Goodbye
', 'prepend'; 25 | 26 | } -------------------------------------------------------------------------------- /t/prev.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | use Test2::V0; 4 | use Web::Query; 5 | 6 | test('Web::Query'); 7 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 8 | 9 | done_testing; 10 | 11 | sub test { 12 | my $class = shift; 13 | diag "testing $class"; 14 | no warnings 'redefine'; 15 | *wq = \&{$class . "::wq"}; 16 | 17 | my $wq = wq(< 20 |
Hello
21 |
World
22 | 23 | 24 |
25 |
Hello
26 |
World
27 |
28 | HTML 29 | 30 | my $elem = $wq->find('.d2')->prev; 31 | is $elem->size, 2; 32 | is $elem->attr('class'), 'd1', 'previous'; 33 | } 34 | -------------------------------------------------------------------------------- /t/processing-instructions.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | use lib 't/lib'; 5 | 6 | use Test2::V0; 7 | 8 | use WQTest; 9 | 10 | my $doc = <<'END'; 11 |
12 | 13 |

stuff

14 |

alpha

15 |

aaa

16 |
17 | END 18 | 19 | WQTest::test { 20 | my $class = shift; 21 | 22 | plan skip_all => "not working for $class" 23 | if $class eq 'Web::Query'; 24 | 25 | like $class->new($doc)->find(\"//processing-instruction('xml-stylesheet')")->as_html 26 | => qr/style.css/; 27 | 28 | } 29 | -------------------------------------------------------------------------------- /t/remove.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | use Test2::V0; 5 | 6 | use lib 't/lib'; 7 | 8 | use WQTest; 9 | 10 | WQTest::test { 11 | my $class = shift; 12 | 13 | my $wq = $class->new_from_html( '
', { indent => "\t" } ); 14 | $wq->find('foo')->remove; 15 | 16 | is $wq->as_html => '
'; 17 | 18 | for my $method ( qw/ each map / ) { 19 | subtest $method => sub { 20 | plan tests => 5; 21 | 22 | my $wq = new_wq($class); 23 | 24 | $wq->find('p')->$method(sub{ 25 | pass "deleting " . $_->text; 26 | $_->remove; 27 | }); 28 | 29 | is $wq->find('p')->size => 0, "all deleted"; 30 | }; 31 | } 32 | }; 33 | 34 | sub new_wq { 35 | shift->new(<<'END'); 36 |
37 |

one

38 |

two

39 |

three

40 |

four

41 |
42 | END 43 | } 44 | -------------------------------------------------------------------------------- /t/remove_class.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use lib 'lib'; 5 | use Test2::V0; 6 | use Web::Query; 7 | 8 | test('Web::Query'); 9 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 10 | 11 | done_testing; 12 | 13 | 14 | sub test { 15 | my $class = shift; 16 | diag "testing $class"; 17 | no warnings 'redefine'; 18 | *wq = \&{$class . "::wq" }; 19 | 20 | my $wq = wq('
Hello
Goodbye
'); 21 | my $rv = $wq->find('.inner')->remove_class('foo bar'); 22 | 23 | isa_ok $rv, ['Web::Query'], 'remove_class returned'; 24 | is $wq->as_html, '
Hello
Goodbye
', 'remove_class("foo bar")'; 25 | 26 | $wq = wq('
Hello
Goodbye
'); 27 | $wq->find('.inner')->remove_class(sub{ 'foo bar' }); 28 | 29 | is $wq->as_html, '
Hello
Goodbye
', 'remove_class(CODE)'; 30 | 31 | } 32 | -------------------------------------------------------------------------------- /t/replace_with.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | use Test2::V0; 4 | 5 | my @modules = qw/ Web::Query Web::Query::LibXML /; 6 | 7 | plan tests => scalar @modules; 8 | 9 | subtest $_ => sub { test($_) } for @modules; 10 | 11 | sub test { 12 | my $class = shift; 13 | 14 | eval "require $class; 1" 15 | or plan skip_all => "couldn't load $class"; 16 | 17 | no warnings 'redefine'; 18 | *wq = \&{$class . "::wq" }; 19 | 20 | my $html = '

Hithereworld

'; 21 | 22 | is wq($html)->find('b')->replace_with('Hello')->end->as_html 23 | => '

Hellothereworld

'; 24 | 25 | my $q = wq( $html ); 26 | 27 | is $q->find('u')->replace_with($q->find('b'))->end->as_html 28 | => '

thereHi

'; 29 | 30 | is wq($html)->find('*')->replace_with(sub { 31 | my $i = $_->text; 32 | return "<$i>"; 33 | } )->end->as_html => '

'; 34 | 35 | is wq($html)->find('*')->replace_with( '' )->end->as_html 36 | => '

'; 37 | 38 | is wq('

foo

')->find('span') 39 | ->replace_with(sub { $_->contents }) 40 | ->end->as_html => '

foo

'; 41 | } 42 | -------------------------------------------------------------------------------- /t/special-attributes.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | use Test2::V0; 5 | 6 | use lib 't/lib'; 7 | 8 | use WQTest; 9 | 10 | WQTest::test { 11 | my $class = shift; 12 | 13 | subtest 'data()' => sub { test_data($class) }; 14 | 15 | subtest 'name()' => sub { test_name($class) }; 16 | 17 | subtest 'id()' => sub { test_id($class) }; 18 | 19 | }; 20 | 21 | sub test_data { 22 | my $class = shift; 23 | 24 | my $wq = $class->new_from_html(q{ 25 |
26 | 27 |
28 | }); 29 | 30 | subtest setter => sub { 31 | $wq->find('a')->data( foo => 'bar' ); 32 | pass; 33 | }; 34 | 35 | subtest 'getter' => sub { 36 | is $wq->find('a')->data('foo') => 'bar'; 37 | }; 38 | 39 | } 40 | 41 | sub test_name { 42 | my $class = shift; 43 | 44 | my $wq = $class->new_from_html(q{ 45 |
46 | 47 | 48 | 49 |
50 | }); 51 | 52 | subtest 'getter' => sub { 53 | is [ $wq->find('a,b,c')->name ], [ 'foo', undef, 'bar' ], "getter, list context"; 54 | is scalar $wq->find('a,b,c')->name, 'foo', "getter, scalar context"; 55 | }; 56 | 57 | subtest setter => sub { 58 | $wq->find('a,b,c')->name( 'quux' ); 59 | is $wq->find($_)->name => 'quux' for 'a'..'c'; 60 | } 61 | } 62 | 63 | 64 | sub test_id { 65 | my $class = shift; 66 | 67 | my $wq = $class->new_from_html(q{ 68 |
69 | 70 | 71 | 1 72 | 2 73 | 3 74 |
75 | }); 76 | 77 | is [ $wq->find('a')->id ] => [ undef ], "no id, list context"; 78 | is scalar $wq->find('a')->id => undef, "no id, scalar context"; 79 | 80 | is $wq->find('#foo')->id => 'foo', 'single element'; 81 | is scalar($wq->find('#foo')->id) => 'foo', 'single element, scalar context'; 82 | 83 | is [ $wq->find('c')->id ], [ 'bar', undef, 'baz' ], 'many elements, list context'; 84 | is scalar $wq->find('c')->id, 'bar', 'many elements, scalar context'; 85 | 86 | $wq->find('b')->id('fool'); 87 | is $wq->find('#fool')->tagname => 'b', 'change id, scalar'; 88 | 89 | isa_ok $wq->find('c')->id('buz'), 'Web::Query'; 90 | 91 | is $wq->find('c')->id('buz')->size => 1, 'only the first element'; 92 | is $wq->find('#buz')->text => 1, "change first element"; 93 | 94 | my $i = 0; 95 | $wq->find('c')->id(sub{ 'new_'.$i++ }); 96 | 97 | is $wq->find('#new_'.$_)->size => 1 for 0..2; 98 | } 99 | -------------------------------------------------------------------------------- /t/split.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | use lib 't/lib'; 5 | 6 | use Test2::V0; 7 | use Web::Query; 8 | 9 | use WQTest; 10 | 11 | my $doc = <<'END'; 12 |
13 |

stuff

14 |

alpha

15 |

aaa

16 |

beta>

17 |

gamma

18 |

bbb

19 |

ccc

20 |
21 | END 22 | 23 | WQTest::test { 24 | my $class = shift; 25 | 26 | subtest 'straight split' => sub { 27 | my @splitted = $class->new($doc)->split( 'h1' ); 28 | 29 | is scalar @splitted => 4; 30 | like $splitted[0]->as_html(join => ''), 31 | qr/stuff/; 32 | like $splitted[1]->as_html(join => ''), 33 | qr/alpha.*aaa/s; 34 | like $splitted[2]->as_html(join => ''), 35 | qr/beta/; 36 | like $splitted[3]->as_html(join => ''), 37 | qr/gamma.*ccc/s; 38 | }; 39 | 40 | subtest 'split in pairs' => sub { 41 | my @splitted = $class->new($doc)->split( 'h1', pairs => 1 ); 42 | 43 | is scalar @splitted => 4; 44 | like $splitted[0][1]->as_html(join => ''), 45 | qr/stuff/; 46 | like $splitted[1][0]->as_html(join => ''), 47 | qr/alpha/; 48 | like $splitted[1][1]->as_html(join => ''), 49 | qr/aaa/; 50 | }; 51 | 52 | subtest 'skip leading' => sub { 53 | my @splitted = $class->new($doc)->split( 'h1', pairs => 1, skip_leading => 1 ); 54 | 55 | is scalar @splitted => 3; 56 | like $splitted[0][0]->as_html( join => '' ), 57 | qr/alpha/; 58 | like $splitted[0][1]->as_html( join => '' ), 59 | qr/aaa/; 60 | }; 61 | 62 | } 63 | -------------------------------------------------------------------------------- /t/store_comments.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | use Test2::V0; 4 | use Web::Query; 5 | 6 | test('Web::Query'); 7 | test('Web::Query::LibXML') if eval "require Web::Query::LibXML; 1"; 8 | 9 | done_testing; 10 | 11 | 12 | sub test { 13 | my $class = shift; 14 | diag "testing $class"; 15 | no warnings 'redefine'; 16 | *wq = \&{$class . "::wq" }; 17 | 18 | 19 | my $source = '
'; 20 | 21 | is join('', wq($source)->as_html), $source, 'constructor stores comments'; 22 | 23 | is wq($source)->find('header')->html('

')->as_html, '

', 'html() stores comments'; 24 | 25 | } -------------------------------------------------------------------------------- /t/tagname.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | use Test2::V0; 5 | 6 | my @modules = qw/ Web::Query Web::Query::LibXML /; 7 | 8 | plan tests => scalar @modules; 9 | 10 | for my $module ( @modules ) { 11 | subtest $module => sub { 12 | eval "require $module; 1" 13 | or plan skip_all => "couldn't load $module"; 14 | 15 | my $wq = $module->new_from_html(<<'END'); 16 |

hello

there

17 | END 18 | 19 | $wq->find('p')->each(sub{ $_->tagname('q') }); 20 | 21 | is $wq->as_html, '
hellothere
', 'p -> q'; 22 | }; 23 | } 24 | -------------------------------------------------------------------------------- /t/xpath.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | use Test2::V0; 5 | 6 | my @modules = qw/ Web::Query Web::Query::LibXML /; 7 | 8 | plan tests => scalar @modules; 9 | 10 | for my $module ( @modules ) { 11 | subtest $module => sub { 12 | eval "require $module; 1" 13 | or plan skip_all => "couldn't load $module"; 14 | 15 | my $wq = $module->new_from_html(<<'END'); 16 |

hello

there

17 | END 18 | 19 | is $wq->find('b')->html => 'hello', 'css'; 20 | is $wq->find('//b')->text => 'hello', 'xpath'; 21 | }; 22 | } 23 | -------------------------------------------------------------------------------- /weaver.ini: -------------------------------------------------------------------------------- 1 | [@CorePrep] 2 | 3 | [-SingleEncoding] 4 | 5 | [Name] 6 | [Version] 7 | 8 | [Region / prelude] 9 | 10 | [Generic / SYNOPSIS] 11 | [Generic / DESCRIPTION] 12 | [Generic / OVERVIEW] 13 | 14 | [Collect / ATTRIBUTES] 15 | command = attr 16 | 17 | [Collect / METHODS] 18 | command = method 19 | 20 | [Collect / FUNCTIONS] 21 | command = func 22 | 23 | [Leftovers] 24 | 25 | [Region / postlude] 26 | 27 | [Bugs] 28 | 29 | [-Transformer / Lists] 30 | transformer = List 31 | format_name = list 32 | -------------------------------------------------------------------------------- /xt/live/01_simple.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | use utf8; 4 | use Test::More; 5 | use Web::Query; 6 | 7 | binmode Test::More->builder->$_, ":utf8" for qw/output failure_output todo_output/; 8 | 9 | my @res; 10 | wq('https://techblog.babyl.ca/') 11 | ->find('div') 12 | ->each(sub { 13 | my $i = shift; 14 | push @res, $_->text; 15 | note(sprintf "%d) %s\n", $i+1, $_->text) 16 | }); 17 | 18 | ok @res; 19 | 20 | done_testing; 21 | --------------------------------------------------------------------------------