├── README.pod ├── .editorconfig ├── lib └── HTML5 │ ├── DOM │ ├── AsyncResult.pm │ ├── CSS.pm │ ├── Text.pm │ ├── Comment.pm │ ├── DocType.pm │ ├── Document.pm │ ├── Fragment.pm │ ├── CSS │ │ ├── Selector │ │ │ └── Entry.pm │ │ └── Selector.pm │ ├── Element.pm │ ├── Tree.pm │ ├── Node.pm │ ├── Encoding.pm │ ├── TokenList.pm │ └── Collection.pm │ └── DOM.pm ├── .gitmodules ├── modest_config.h ├── modest_myencoding.c ├── MANIFEST.SKIP ├── typemap ├── modest_myurl.c ├── .travis.yml ├── modest_myfont.c ├── leaks.pl ├── modest_mycore.c ├── modest_myport.c ├── t ├── 2-encodings.t ├── 1-api-use-utf8.t └── 0-api.t ├── modest_myhtml.c ├── LICENSE ├── examples ├── simple.pl └── html5lib_tests.pl ├── modest_modest.c ├── scripts ├── tags.txt └── gen.pl ├── Makefile.PL ├── CHANGES ├── modest_mycss.c ├── port └── openbsd │ └── mcsync.c ├── gen ├── tags_ua_style.c └── modest_errors.c ├── utils.h └── utils.c /README.pod: -------------------------------------------------------------------------------- 1 | lib/HTML5/DOM.pod -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = tab 5 | indent_size = 4 6 | charset = utf-8 7 | -------------------------------------------------------------------------------- /lib/HTML5/DOM/AsyncResult.pm: -------------------------------------------------------------------------------- 1 | package HTML5::DOM::AsyncResult; 2 | use strict; 3 | use warnings; 4 | 5 | 1; 6 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "third_party/modest"] 2 | path = third_party/modest 3 | url = https://github.com/lexborisov/Modest 4 | -------------------------------------------------------------------------------- /lib/HTML5/DOM/CSS.pm: -------------------------------------------------------------------------------- 1 | package HTML5::DOM::CSS; 2 | use strict; 3 | use warnings; 4 | 5 | use HTML5::DOM::CSS::Selector; 6 | 7 | 1; 8 | -------------------------------------------------------------------------------- /lib/HTML5/DOM/Text.pm: -------------------------------------------------------------------------------- 1 | package HTML5::DOM::Text; 2 | use strict; 3 | use warnings; 4 | 5 | use HTML5::DOM::Node; 6 | 7 | our @ISA = ("HTML5::DOM::Node"); 8 | 9 | 1; 10 | -------------------------------------------------------------------------------- /lib/HTML5/DOM/Comment.pm: -------------------------------------------------------------------------------- 1 | package HTML5::DOM::Comment; 2 | use strict; 3 | use warnings; 4 | 5 | use HTML5::DOM::Node; 6 | 7 | our @ISA = ("HTML5::DOM::Node"); 8 | 9 | 1; 10 | -------------------------------------------------------------------------------- /lib/HTML5/DOM/DocType.pm: -------------------------------------------------------------------------------- 1 | package HTML5::DOM::DocType; 2 | use strict; 3 | use warnings; 4 | 5 | use HTML5::DOM::Node; 6 | 7 | our @ISA = ("HTML5::DOM::Node"); 8 | 9 | 1; 10 | -------------------------------------------------------------------------------- /lib/HTML5/DOM/Document.pm: -------------------------------------------------------------------------------- 1 | package HTML5::DOM::Document; 2 | use strict; 3 | use warnings; 4 | 5 | use HTML5::DOM::Node; 6 | 7 | our @ISA = ("HTML5::DOM::Element"); 8 | 9 | 1; 10 | -------------------------------------------------------------------------------- /lib/HTML5/DOM/Fragment.pm: -------------------------------------------------------------------------------- 1 | package HTML5::DOM::Fragment; 2 | use strict; 3 | use warnings; 4 | 5 | use HTML5::DOM::Node; 6 | 7 | our @ISA = ("HTML5::DOM::Element"); 8 | 9 | 1; 10 | -------------------------------------------------------------------------------- /modest_config.h: -------------------------------------------------------------------------------- 1 | #define _DEFAULT_SOURCE 1 2 | #define _BSD_SOURCE 1 3 | #define _POSIX_C_SOURCE 199309L 4 | 5 | #if (defined(_WIN32) || defined(_WIN64)) 6 | #define MyCORE_OS_WINDOWS_NT 7 | #endif 8 | -------------------------------------------------------------------------------- /lib/HTML5/DOM/CSS/Selector/Entry.pm: -------------------------------------------------------------------------------- 1 | package HTML5::DOM::CSS::Selector::Entry; 2 | use strict; 3 | use warnings; 4 | 5 | use overload 6 | '""' => sub { shift->text }, 7 | '%{}' => sub { shift->specificity }, 8 | 'bool' => sub { 1 }, 9 | fallback => 1; 10 | 11 | 1; 12 | __END__ 13 | -------------------------------------------------------------------------------- /modest_myencoding.c: -------------------------------------------------------------------------------- 1 | #include "modest_config.h" 2 | 3 | // myencoding 4 | #include "third_party/modest/source/myencoding/encoding.c" 5 | #include "third_party/modest/source/myencoding/detect.c" 6 | #include "third_party/modest/source/myencoding/mystring.c" 7 | 8 | // myunicode 9 | #include "third_party/modest/source/myunicode/myosi.c" 10 | -------------------------------------------------------------------------------- /lib/HTML5/DOM/Element.pm: -------------------------------------------------------------------------------- 1 | package HTML5::DOM::Element; 2 | use strict; 3 | use warnings; 4 | 5 | use HTML5::DOM::Node; 6 | use HTML5::DOM::TokenList; 7 | 8 | our @ISA = ("HTML5::DOM::Node"); 9 | 10 | sub classList { 11 | return HTML5::DOM::TokenList->new($_[0], "class"); 12 | } 13 | 14 | sub className { 15 | my $class = $_[0]->attr("class"); 16 | return defined $class ? $class : ""; 17 | } 18 | 19 | 1; 20 | -------------------------------------------------------------------------------- /MANIFEST.SKIP: -------------------------------------------------------------------------------- 1 | #!install_default 2 | 3 | third_party/.*?\.pm$ 4 | third_party/.*?\.pl$ 5 | \.git 6 | third_party/modest/utils/ 7 | third_party/modest/test/ 8 | third_party/modest/examples/ 9 | third_party/modest/devel/ 10 | third_party/modest/third_party/font/ 11 | Makefile$ 12 | Makefile\.mk$ 13 | Makefile\.bin\.cfg$ 14 | Makefile\.cfg$ 15 | MANIFEST.bak 16 | MYMETA.*?$ 17 | .gz 18 | .tar 19 | .md 20 | blib/ 21 | pm_to_lib 22 | -------------------------------------------------------------------------------- /lib/HTML5/DOM/Tree.pm: -------------------------------------------------------------------------------- 1 | package HTML5::DOM::Tree; 2 | use strict; 3 | use warnings; 4 | 5 | use overload 6 | '""' => sub { $_[0]->document->html }, 7 | '@{}' => sub { [$_[0]->document] }, 8 | 'bool' => sub { 1 }, 9 | '==' => sub { defined $_[1] && $_[0]->isSameTree($_[1]) }, 10 | '!=' => sub { !defined $_[1] || !$_[0]->isSameTree($_[1]) }, 11 | fallback => 1; 12 | 13 | sub text { shift->document->text(@_) } 14 | sub html { shift->document->html(@_) } 15 | 16 | 1; 17 | -------------------------------------------------------------------------------- /typemap: -------------------------------------------------------------------------------- 1 | TYPEMAP 2 | 3 | HTML5::DOM T_PTROBJ 4 | HTML5::DOM::Collection T_PTROBJ 5 | HTML5::DOM::Node T_PTROBJ 6 | HTML5::DOM::DocType T_PTROBJ 7 | HTML5::DOM::Element T_PTROBJ 8 | HTML5::DOM::Fragment T_PTROBJ 9 | HTML5::DOM::Comment T_PTROBJ 10 | HTML5::DOM::Text T_PTROBJ 11 | HTML5::DOM::Node T_PTROBJ 12 | HTML5::DOM::Node T_PTROBJ 13 | HTML5::DOM::Tree T_PTROBJ 14 | HTML5::DOM::AsyncResult T_PTROBJ 15 | 16 | HTML5::DOM::CSS T_PTROBJ 17 | HTML5::DOM::CSS::Selector T_PTROBJ 18 | HTML5::DOM::CSS::Selector::Entry T_PTROBJ 19 | -------------------------------------------------------------------------------- /modest_myurl.c: -------------------------------------------------------------------------------- 1 | #include "modest_config.h" 2 | 3 | // myurl 4 | #include "third_party/modest/source/myurl/myosi.c" 5 | #include "third_party/modest/source/myurl/punycode.c" 6 | #include "third_party/modest/source/myurl/path.c" 7 | #include "third_party/modest/source/myurl/scheme.c" 8 | #include "third_party/modest/source/myurl/url.c" 9 | #include "third_party/modest/source/myurl/utils.c" 10 | #include "third_party/modest/source/myurl/serialization.c" 11 | #include "third_party/modest/source/myurl/parser_end.c" 12 | #include "third_party/modest/source/myurl/host.c" 13 | #include "third_party/modest/source/myurl/parser.c" 14 | -------------------------------------------------------------------------------- /lib/HTML5/DOM/CSS/Selector.pm: -------------------------------------------------------------------------------- 1 | package HTML5::DOM::CSS::Selector; 2 | use strict; 3 | use warnings; 4 | 5 | use HTML5::DOM::CSS::Selector::Entry; 6 | 7 | use overload 8 | '""' => sub { shift->text }, 9 | '@{}' => sub { shift->array }, 10 | 'bool' => sub { 1 }, 11 | fallback => 1; 12 | 13 | sub new { 14 | my ($class, $text) = @_; 15 | return HTML5::DOM::CSS->new->parseSelector($text); 16 | } 17 | 18 | # TODO: implement in XS? 19 | sub array { 20 | my $self = shift; 21 | my @tmp; 22 | my $l = $self->length; 23 | for (my $i = 0; $i < $l; ++$i) { 24 | push @tmp, $self->entry($i); 25 | } 26 | return \@tmp; 27 | } 28 | 29 | 1; 30 | __END__ 31 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | jobs: 2 | include: 3 | - stage: Windows 4 | os: windows 5 | language: shell 6 | before_install: 7 | - cinst -y strawberryperl 8 | - export "PATH=/c/Strawberry/perl/site/bin:/c/Strawberry/perl/bin:/c/Strawberry/c/bin:$PATH" 9 | install: 10 | - cpanm --notest --installdeps . 11 | script: 12 | - perl Makefile.PL && gmake.exe test TEST_VERBOSE=1 13 | 14 | language: perl 15 | perl: 16 | - "5.36" 17 | - "5.30" 18 | - "5.22" 19 | - "5.20" 20 | - "5.18" 21 | - "5.16" 22 | - "5.14" 23 | - "5.8" 24 | os: 25 | - linux 26 | 27 | script: perl Makefile.PL && make test TEST_VERBOSE=1 28 | -------------------------------------------------------------------------------- /modest_myfont.c: -------------------------------------------------------------------------------- 1 | #include "modest_config.h" 2 | 3 | // myfont 4 | #include "third_party/modest/source/myfont/glyf.c" 5 | #include "third_party/modest/source/myfont/myosi.c" 6 | #include "third_party/modest/source/myfont/myfont.c" 7 | #include "third_party/modest/source/myfont/os_2.c" 8 | #include "third_party/modest/source/myfont/head.c" 9 | #include "third_party/modest/source/myfont/hhea.c" 10 | #include "third_party/modest/source/myfont/vmtx.c" 11 | #include "third_party/modest/source/myfont/vhea.c" 12 | #include "third_party/modest/source/myfont/name.c" 13 | #include "third_party/modest/source/myfont/maxp.c" 14 | #include "third_party/modest/source/myfont/pclt.c" 15 | #include "third_party/modest/source/myfont/cmap.c" 16 | #include "third_party/modest/source/myfont/hmtx.c" 17 | #include "third_party/modest/source/myfont/loca.c" 18 | -------------------------------------------------------------------------------- /leaks.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | use warnings; 3 | use strict; 4 | use File::Slurp qw|read_file write_file|; 5 | use File::Basename qw|dirname|; 6 | use POSIX; 7 | 8 | for my $file (glob("t/*.t")) { 9 | my $text = read_file($file); 10 | 11 | if ($text =~ /(.*?)<\/test-body>/sim) { 12 | print $file."\n"; 13 | my $code = 14 | ' 15 | use warnings; 16 | use strict; 17 | use Test::LeakTrace; 18 | use HTML5::DOM; 19 | 20 | sub ok { 21 | print "ok - ".$_[1]."\n" if ($_[0]); 22 | print "not ok - ".$_[1]."\n" if (!$_[0]); 23 | }; 24 | sub done_testing { }; 25 | sub require_ok { }; 26 | sub can_ok { }; 27 | sub isa_ok { }; 28 | 29 | no_leaks_ok { 30 | (sub { 31 | '.$1.'; 32 | 1; 33 | })->(); 34 | }; 35 | '; 36 | eval($code); 37 | die "$@" if ($@); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /modest_mycore.c: -------------------------------------------------------------------------------- 1 | #include "modest_config.h" 2 | 3 | // mycore 4 | #include "third_party/modest/source/mycore/mythread.c" 5 | #include "third_party/modest/source/mycore/thread_queue.c" 6 | #include "third_party/modest/source/mycore/utils/mchar_async.c" 7 | #include "third_party/modest/source/mycore/utils/mhash.c" 8 | #include "third_party/modest/source/mycore/utils/mctree.c" 9 | #include "third_party/modest/source/mycore/utils/mcobject_async.c" 10 | #include "third_party/modest/source/mycore/utils/mcobject.c" 11 | #include "third_party/modest/source/mycore/utils/mcsync.c" 12 | #include "third_party/modest/source/mycore/utils/mcsimple.c" 13 | #include "third_party/modest/source/mycore/utils/avl_tree.c" 14 | #include "third_party/modest/source/mycore/myosi.c" 15 | #include "third_party/modest/source/mycore/utils.c" 16 | #include "third_party/modest/source/mycore/mystring.c" 17 | #include "third_party/modest/source/mycore/incoming.c" 18 | -------------------------------------------------------------------------------- /modest_myport.c: -------------------------------------------------------------------------------- 1 | #include "modest_config.h" 2 | 3 | // myport 4 | #ifdef MyCORE_OS_WINDOWS_NT 5 | #include "third_party/modest/source/myport/windows_nt/mycore/io.c" 6 | #include "third_party/modest/source/myport/windows_nt/mycore/utils/mcsync.c" 7 | #include "third_party/modest/source/myport/windows_nt/mycore/memory.c" 8 | #include "third_party/modest/source/myport/windows_nt/mycore/thread.c" 9 | #include "third_party/modest/source/myport/windows_nt/mycore/perf.c" 10 | #else 11 | #include "third_party/modest/source/myport/posix/mycore/io.c" 12 | 13 | #if MyCORE_USE_SEMAPHORE_INSTEAD_OF_MUTEX 14 | #include "port/openbsd/mcsync.c" 15 | #else 16 | #include "third_party/modest/source/myport/posix/mycore/utils/mcsync.c" 17 | #endif 18 | 19 | #include "third_party/modest/source/myport/posix/mycore/memory.c" 20 | #include "third_party/modest/source/myport/posix/mycore/thread.c" 21 | #include "third_party/modest/source/myport/posix/mycore/perf.c" 22 | #endif 23 | -------------------------------------------------------------------------------- /t/2-encodings.t: -------------------------------------------------------------------------------- 1 | use warnings; 2 | use strict; 3 | use Test::More; 4 | 5 | # 6 | 7 | use Encode; 8 | 9 | require_ok('HTML5::DOM'); 10 | 11 | my $encodings = ["WINDOWS-1251", "KOI8-U", "KOI8-R", "UTF-16LE", "UTF-8"]; 12 | my $test_str = "тест test :)"; 13 | 14 | for my $enc (@$encodings) { 15 | my $parser; 16 | 17 | my $from_str = $test_str; 18 | Encode::from_to($from_str, "UTF-8", $enc); 19 | 20 | my $to_str = $from_str; 21 | Encode::from_to($to_str, $enc, "UTF-8"); 22 | 23 | $parser = HTML5::DOM->new({encoding => $enc}); 24 | ok($parser->parse($from_str)->body->text eq $to_str, $enc.' - set encoding in new()'); 25 | 26 | $parser = HTML5::DOM->new; 27 | ok($parser->parse($from_str, {encoding => $enc})->body->text eq $to_str, $enc.' - set encoding in parse()'); 28 | 29 | $parser = HTML5::DOM->new; 30 | 31 | my ($enc_id) = HTML5::DOM::Encoding::detectAuto($to_str); 32 | if (!$enc_id) { 33 | ok($parser->parse($from_str, {default_encoding => $enc})->body->text eq $to_str, $enc.' - set default_encoding in parse()'); 34 | } 35 | } 36 | 37 | done_testing; 38 | 39 | # 40 | -------------------------------------------------------------------------------- /modest_myhtml.c: -------------------------------------------------------------------------------- 1 | #include "modest_config.h" 2 | 3 | // myhtml 4 | #include "third_party/modest/source/myhtml/tree.c" 5 | #include "third_party/modest/source/myhtml/data_process.c" 6 | #include "third_party/modest/source/myhtml/mynamespace.c" 7 | #include "third_party/modest/source/myhtml/tokenizer.c" 8 | #include "third_party/modest/source/myhtml/tokenizer_doctype.c" 9 | #include "third_party/modest/source/myhtml/tokenizer_end.c" 10 | #include "third_party/modest/source/myhtml/tokenizer_script.c" 11 | #include "third_party/modest/source/myhtml/tag_init.c" 12 | #include "third_party/modest/source/myhtml/rules.c" 13 | #include "third_party/modest/source/myhtml/callback.c" 14 | #include "third_party/modest/source/myhtml/serialization.c" 15 | #include "third_party/modest/source/myhtml/myhtml.c" 16 | #include "third_party/modest/source/myhtml/mystring.c" 17 | #include "third_party/modest/source/myhtml/tag.c" 18 | #include "third_party/modest/source/myhtml/token.c" 19 | #include "third_party/modest/source/myhtml/stream.c" 20 | #include "third_party/modest/source/myhtml/charef.c" 21 | #include "third_party/modest/source/myhtml/parser.c" 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2018 Kirill Zhumarin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /examples/simple.pl: -------------------------------------------------------------------------------- 1 | use warnings; 2 | use strict; 3 | use HTML5::DOM; 4 | 5 | # create parser object 6 | my $parser = HTML5::DOM->new; 7 | 8 | # parse some html 9 | my $tree = $parser->parse(' 10 | 11 | 19 | '); 20 | 21 | # find one element by CSS selector 22 | my $ul = $tree->at('ul.list'); 23 | 24 | # prints tag 25 | print $ul->tag."\n"; # ul 26 | 27 | # check if