├── .gitignore
├── Changes
├── MANIFEST
├── Makefile.PL
├── README.markdown
├── Tidy.xs
├── bin
    └── webtidy
├── lib
    └── HTML
    │   ├── Tidy.pm
    │   └── Tidy
    │       └── Message.pm
├── perlcriticrc
├── ppport.h
├── t
    ├── 00-load.t
    ├── cfg-for-parse.cfg
    ├── cfg-for-parse.t
    ├── clean-crash.t
    ├── clean.t
    ├── extra-quote.t
    ├── ignore-text.t
    ├── ignore.t
    ├── illegal-options.t
    ├── levels.t
    ├── message.t
    ├── opt-00.t
    ├── parse-crash.t
    ├── parse-errors.t
    ├── parse.t
    ├── perfect.t
    ├── roundtrip.t
    ├── segfault-form.t
    ├── simple.t
    ├── too-many-titles.t
    ├── unicode-nbsp.t
    ├── unicode.html
    ├── unicode.t
    ├── venus.cfg
    ├── venus.html
    ├── venus.t
    ├── version.t
    ├── wordwrap.cfg
    └── wordwrap.t
├── tags
└── xt
    ├── pod-coverage.t
    └── pod.t


/.gitignore:
--------------------------------------------------------------------------------
 1 | *~
 2 | *.o
 3 | Makefile
 4 | Makefile.old
 5 | pm_to_blib
 6 | blib/
 7 | Tidy.bs
 8 | Tidy.c
 9 | MYMETA.json
10 | MYMETA.yml
11 | 


--------------------------------------------------------------------------------
/Changes:
--------------------------------------------------------------------------------
 1 | Revision history for Perl extension HTML::Tidy.
 2 | 
 3 | 
 4 | 1.60    Wed Sep 13 10:34:35 CDT 2017
 5 | ====================================
 6 | No functionality changes.  Fixes failing tests caused by differing
 7 | versions of the tidy library.  Thanks, Rufus Cable.
 8 | 
 9 | 
10 | 1.58    Sat May 27 00:03:51 CDT 2017
11 | ====================================
12 | No functionality changes.  This just fixes some tests.  This will probably
13 | be the last release that uses the old tidy/tidyp.  There is a new tidy
14 | that supports HTML 5 and I'm going to start working on making HTML::Tidy
15 | use that.
16 | 
17 | [FIXES]
18 | Failing tests on Windows.  Thanks, Klaus Baldermann. (GH #11, GH #23)
19 | 
20 | [INTERNALS]
21 | Added more tests.  Thanks, Hunter McMillen.
22 | 
23 | 
24 | 1.56    Sun Sep 22 16:39:40 CDT 2013
25 | ====================================
26 | [ENHANCEMENTS]
27 | webtidy's version statement now shows the version number of the underlying
28 | tidyp library.
29 | 
30 | [FIXES]
31 | Fixed an undef warning in parse().  Thanks, Vladimir Timofeev.
32 | 
33 | utf8 was never encoded correctly.  Thanks, Vladimir Timofeev and Alistair
34 | Francis.
35 | 
36 | The ->parse() method would sometimes return false even though everything
37 | worked fine.  Thanks, @sebaer.
38 | 
39 | 
40 | 
41 | 1.54    Fri Sep 17 00:44:36 CDT 2010
42 | ====================================
43 | Please note that the bug tracker for HTML::Tidy is now at
44 | http://github.com/petdance/html-tidy.
45 | 
46 | [FIXES]
47 | Fixed incorrect calls to croak().  Thanks, Steve Grazzini.
48 | 
49 | [DOCUMENTATION]
50 | Updating all docs referring to libtidyp and Alien::Libtidyp.
51 | 
52 | 
53 | 1.52    Wed May 12 2010
54 | =======================
55 | First release since the major overhaul that relies on libtidyp.
56 | 
57 | Now relies on Perl 5.8.  I'm not at all interested in supporting
58 | ancient Perl version.
59 | 
60 | HTML::Tidy now relies on the libtidyp that Andy Lester maintains
61 | on github.
62 | 
63 | http://github.com/petdance/libtidyp
64 | 
65 | [ENHANCEMENTS]
66 | Now includes support for TIDY_INFO messages.
67 | 
68 | Improve support for Windows platforms.
69 | 
70 | [FIXES]
71 | Fixed a segfault if there are no errors.
72 | 
73 | Allow for either "tidyp" or "HTML Tidy" as a program name in tests.
74 | 


--------------------------------------------------------------------------------
/MANIFEST:
--------------------------------------------------------------------------------
 1 | bin/webtidy
 2 | Changes
 3 | lib/HTML/Tidy.pm
 4 | lib/HTML/Tidy/Message.pm
 5 | Makefile.PL
 6 | MANIFEST
 7 | perlcriticrc
 8 | ppport.h
 9 | README.markdown
10 | t/00-load.t
11 | t/cfg-for-parse.cfg
12 | t/cfg-for-parse.t
13 | t/clean.t
14 | t/clean-crash.t
15 | t/extra-quote.t
16 | t/ignore-text.t
17 | t/ignore.t
18 | t/illegal-options.t
19 | t/levels.t
20 | t/message.t
21 | t/opt-00.t
22 | t/parse.t
23 | t/parse-errors.t
24 | t/perfect.t
25 | t/pod-coverage.t
26 | t/pod.t
27 | t/roundtrip.t
28 | t/segfault-form.t
29 | t/simple.t
30 | t/too-many-titles.t
31 | t/unicode.html
32 | t/unicode.t
33 | t/unicode-nbsp.t
34 | t/venus.cfg
35 | t/venus.html
36 | t/venus.t
37 | t/version.t
38 | t/wordwrap.cfg
39 | t/wordwrap.t
40 | Tidy.xs
41 | 


--------------------------------------------------------------------------------
/Makefile.PL:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/perl
  2 | 
  3 | package main;
  4 | 
  5 | use 5.008000;
  6 | use strict;
  7 | use warnings;
  8 | use ExtUtils::MakeMaker;
  9 | use ExtUtils::Liblist;
 10 | use Config;
 11 | 
 12 | my $libs = '-ltidyp';
 13 | my $inc = "-I. -I/usr/include/tidyp -I/usr/local/include/tidyp -I$Config{usrinc}/tidyp";
 14 | 
 15 | eval { require Alien::Tidyp; };
 16 | 
 17 | if ( !$@ ) {
 18 |     print "Using tidyp via Alien::Tidyp\n";
 19 |     $libs = Alien::Tidyp->config('LIBS');
 20 |     $inc = Alien::Tidyp->config('INC');
 21 | }
 22 | else {
 23 |     print "Alien::Tidyp not found. Looking for for tidyp on your system.\n";
 24 |     my @vars = ExtUtils::Liblist->ext( '-L/usr/lib -L/usr/local/lib -ltidyp', 0, 1 );
 25 |     $libs = $vars[2];
 26 | 
 27 |     if ( !$libs ) {
 28 |         $libs = '-ltidyp';
 29 |         print <<'EOF';
 30 | 
 31 | It seems that you don't have tidyp installed.  HTML::Tidy does no
 32 | real work on its own.  It's just a wrapper around tidyp.
 33 | 
 34 | Please read the README.markdown file for details on how to install tidyp.
 35 | 
 36 | If you do have tidyp installed, but Makefile.PL can't detect it,
 37 | go ahead and try building.  If HTML::Tidy builds and tests correctly,
 38 | please file a ticket at Github at
 39 | http://github.com/petdance/html-tidy/issues, so we can fix the
 40 | library detection code.
 41 | 
 42 | EOF
 43 |     }
 44 | }
 45 | 
 46 | eval { require LWP::Simple; };
 47 | 
 48 | if ( $@ ) {
 49 |     print <<'EOF';
 50 | 
 51 | NOTE: It seems that you don't have LWP::Simple installed.
 52 |       The webtidy program will not be able to retrieve web pages.
 53 | 
 54 | EOF
 55 | }
 56 | 
 57 | my $parms = {
 58 |     NAME                => 'HTML::Tidy',
 59 |     AUTHOR              => 'Andy Lester <andy@petdance.com>',
 60 |     VERSION_FROM        => 'lib/HTML/Tidy.pm',
 61 |     ABSTRACT_FROM       => 'lib/HTML/Tidy.pm',
 62 |     PREREQ_PM           => {
 63 |         'Encode'          => 0,   # for tests
 64 |         'Exporter'        => 0,
 65 |         'Getopt::Long'    => 0,   # in webtidy
 66 |         'Test::More'      => '0.98', # For subtest()
 67 |         'Test::Builder'   => 0,
 68 |         'Test::Exception' => 0,
 69 |         'Carp'            => 0,
 70 |         'overload'        => 0,
 71 |         'constant'        => 0,
 72 |     },
 73 | 
 74 |     LIBS                => [$libs],
 75 |     NEEDS_LINKING       => 1,
 76 |     INC                 => $inc,
 77 | 
 78 |     EXE_FILES           => [qw(bin/webtidy)],
 79 |     dist                => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', },
 80 |     clean               => { FILES => 'HTML-Tidy-*' },
 81 | };
 82 | 
 83 | if ( $ExtUtils::MakeMaker::VERSION ge '6.45_01' ) {
 84 |     $parms->{META_MERGE} = {
 85 |         resources => {
 86 |             license     => 'http://www.opensource.org/licenses/artistic-license-2.0.php',
 87 |             homepage    => 'http://github.com/petdance/html-tidy',
 88 |             bugtracker  => 'http://github.com/petdance/html-tidy/issues',
 89 |             repository  => 'http://github.com/petdance/html-tidy',
 90 |         }
 91 |     };
 92 |     $parms->{LICENSE} = 'artistic_2';
 93 | }
 94 | if ( $ExtUtils::MakeMaker::VERSION ge '6.47_02' ) {
 95 |     $parms->{MIN_PERL_VERSION} = 5.008;
 96 | }
 97 | 
 98 | WriteMakefile( %{$parms} );
 99 | 
100 | sub MY::postamble {
101 | return <<'MAKE_FRAG';
102 | .PHONY: tags critic
103 | 
104 | tags:
105 | 	ctags -f tags --recurse --totals \
106 | 		--exclude=blib --exclude=t/lib \
107 | 		--exclude=.svn --exclude='*~' \
108 | 		--languages=C,Perl --langmap=Perl:+.t \
109 | 		.
110 | 
111 | critic:
112 | 	perlcritic -1 \
113 | 		-profile perlcriticrc \
114 | 		.
115 | 
116 | MAKE_FRAG
117 | }
118 | 


--------------------------------------------------------------------------------
/README.markdown:
--------------------------------------------------------------------------------
 1 | HTML::Tidy
 2 | ==========
 3 | HTML::Tidy is an HTML checker in a handy dandy object.  It's meant as
 4 | a companion to [HTML::Lint][1], which is written in Perl but is not
 5 | nearly as capable as HTML::Tidy.
 6 | 
 7 | 
 8 | PREREQUISITES
 9 | =============
10 | HTML::Tidy does very little work.  The real work of HTML::Tidy is
11 | done by the tidyp library, which is written in C.  To use HTML::Tidy,
12 | you must install tidyp.
13 | 
14 | There are two, perhaps three, ways to install tidyp:
15 | 
16 | * Get a tarball from the [tidyp source distributions][2] from
17 | Github and and build it like any other C library.  Note that you
18 | must get a source tarball, *not* just clone the source tree via
19 | github.
20 | 
21 | * Install the [Alien::Tidyp][3] Perl module, which automates the
22 | tidyp installation process.
23 | 
24 | * Your operating system may also have a package for tidyp that you
25 | can install.  As of this writing, these operating systems are known
26 | to provide tidyp library:
27 |     * Fedora contains [tidyp-devel package][4]
28 |     * FreeBSD contains [tidyp port][5]
29 | 
30 | You need only do one of these steps.
31 | 
32 | 
33 | INSTALLATION
34 | ============
35 | Once you have libtidyp installed via one of the previous methods,
36 | install HTML::Tidy like any standard Perl module.
37 | 
38 |     perl Makefile.PL
39 |     make
40 |     make test
41 |     make install
42 | 
43 | 
44 | COPYRIGHT AND LICENSE
45 | =====================
46 | Copyright (C) 2004-2017 by Andy Lester
47 | 
48 | This library is free software.  It may be redistributed and modified
49 | under the Artistic License v2.0.
50 | 
51 |   [1]: http://search.cpan.org/dist/HTML-Lint/       "HTML::Lint"
52 |   [2]: http://github.com/petdance/tidyp/downloads   "tidyp source distributions"
53 |   [3]: http://search.cpan.org/dist/Alien-Tidyp/     "Alien::Tidyp"
54 |   [4]: https://apps.fedoraproject.org/packages/tidyp "tidyp-devel package"
55 |   [5]: http://fbsdmon.org/ports/textproc/tidyp       "tidyp port"
56 | 


--------------------------------------------------------------------------------
/Tidy.xs:
--------------------------------------------------------------------------------
  1 | #include "EXTERN.h"
  2 | #include "perl.h"
  3 | #include "XSUB.h"
  4 | 
  5 | #include <tidyp.h>
  6 | #include <buffio.h>
  7 | #include <stdio.h>
  8 | #include <errno.h>
  9 | 
 10 | 
 11 | static void
 12 | _load_config_hash(TidyDoc tdoc, HV *tidy_options)
 13 | {
 14 |     HE *entry;
 15 | 
 16 |     (void) hv_iterinit(tidy_options);
 17 | 
 18 |     while ( (entry = hv_iternext(tidy_options)) != NULL ) {
 19 |         I32 key_len;
 20 | 
 21 |         const char * const key = hv_iterkey(entry,&key_len);
 22 |         const TidyOption opt = tidyGetOptionByName(tdoc,key);
 23 | 
 24 |         if (!opt) {
 25 |             warn( "HTML::Tidy: Unrecognized option: \"%s\"\n",key );
 26 |         }
 27 |         else {
 28 |             const TidyOptionId id   = tidyOptGetId(opt);
 29 |             SV * const sv_data      = hv_iterval(tidy_options,entry);
 30 |             STRLEN data_len;
 31 |             const char * const data = SvPV(sv_data,data_len);
 32 | 
 33 |             if ( ! tidyOptSetValue(tdoc,id,data) ) {
 34 |                 warn( "HTML::Tidy: Can't set option: \"%s\" to \"%s\"\n", key, data );
 35 |             }
 36 |         }
 37 |     }
 38 | }
 39 | MODULE = HTML::Tidy         PACKAGE = HTML::Tidy
 40 | 
 41 | PROTOTYPES: ENABLE
 42 | 
 43 | void
 44 | _tidy_messages(input, configfile, tidy_options)
 45 |     INPUT:
 46 |         const char *input
 47 |         const char *configfile
 48 |         HV *tidy_options
 49 |     PREINIT:
 50 |         TidyBuffer errbuf = {0};
 51 |         TidyDoc tdoc = tidyCreate(); /* Initialize "document" */
 52 |         const char* newline;
 53 |         int rc = 0;
 54 |     PPCODE:
 55 |         tidyBufInit(&errbuf);
 56 |         rc = ( tidyOptSetValue( tdoc, TidyCharEncoding, "utf8" ) ? rc : -1 );
 57 | 
 58 |         if ( (rc >= 0 ) && configfile && *configfile ) {
 59 |             rc = tidyLoadConfig( tdoc, configfile );
 60 |         }
 61 | 
 62 |         if ( rc >= 0 ) {
 63 |             _load_config_hash(tdoc,tidy_options);
 64 |         }
 65 | 
 66 |         if ( rc >= 0 ) {
 67 |             /* Capture diagnostics */
 68 |             rc = tidySetErrorBuffer( tdoc, &errbuf );
 69 |         }
 70 | 
 71 |         if ( rc >= 0 ) {
 72 |             /* Parse the input */
 73 |             rc = tidyParseString( tdoc, input );
 74 |         }
 75 | 
 76 |         if ( rc >= 0 && errbuf.bp) {
 77 |             XPUSHs( sv_2mortal(newSVpvn((char *)errbuf.bp, errbuf.size)) );
 78 | 
 79 |             /* TODO: Make this a function */
 80 |             switch ( tidyOptGetInt(tdoc,TidyNewline) ) {
 81 |                 case TidyLF:
 82 |                     newline = "\n";
 83 |                     break;
 84 |                 case TidyCR:
 85 |                     newline = "\r";
 86 |                     break;
 87 |                 default:
 88 |                     newline = "\r\n";
 89 |                     break;
 90 |             }
 91 |             XPUSHs( sv_2mortal(newSVpv(newline, 0)) );
 92 |         }
 93 |         else {
 94 |             rc = -1;
 95 |         }
 96 | 
 97 |         if ( errbuf.bp )
 98 |             tidyBufFree( &errbuf );
 99 |         tidyRelease( tdoc );
100 | 
101 |         if ( rc < 0 ) {
102 |             XSRETURN_UNDEF;
103 |         }
104 | 
105 | 
106 | void
107 | _tidy_clean(input, configfile, tidy_options)
108 |     INPUT:
109 |         const char *input
110 |         const char *configfile
111 |         HV *tidy_options
112 |     PREINIT:
113 |         TidyBuffer errbuf = {0};
114 |         TidyBuffer output = {0};
115 |         TidyDoc tdoc = tidyCreate(); /* Initialize "document" */
116 |         const char* newline;
117 |         int rc = 0;
118 |     PPCODE:
119 |         tidyBufInit(&output);
120 |         tidyBufInit(&errbuf);
121 |         /* Set our default first. */
122 |         /* Don't word-wrap */
123 |         rc = ( tidyOptSetInt( tdoc, TidyWrapLen, 0 ) ? rc : -1 );
124 | 
125 |         if ( (rc >= 0 ) && configfile && *configfile ) {
126 |             rc = tidyLoadConfig( tdoc, configfile );
127 |         }
128 | 
129 |         /* XXX I think this cascade is a bug waiting to happen */
130 | 
131 |         if ( rc >= 0 ) {
132 |             rc = ( tidyOptSetValue( tdoc, TidyCharEncoding, "utf8" ) ? rc : -1 );
133 |         }
134 | 
135 |         if ( rc >= 0 ) {
136 |             _load_config_hash( tdoc, tidy_options );
137 |         }
138 | 
139 |         if ( rc >= 0 ) {
140 |             rc = tidySetErrorBuffer( tdoc, &errbuf );  /* Capture diagnostics */
141 |         }
142 | 
143 |         if ( rc >= 0 ) {
144 |             rc = tidyParseString( tdoc, input );   /* Parse the input */
145 |         }
146 | 
147 |         if ( rc >= 0 ) {
148 |             rc = tidyCleanAndRepair(tdoc);
149 |         }
150 | 
151 |         if ( rc > 1 ) {
152 |             rc = ( tidyOptSetBool( tdoc, TidyForceOutput, yes ) ? rc : -1 );
153 |         }
154 | 
155 |         if ( rc >= 0) {
156 |             rc = tidySaveBuffer( tdoc, &output );
157 |         }
158 | 
159 |         if ( rc >= 0) {
160 |             rc = tidyRunDiagnostics( tdoc );
161 |         }
162 | 
163 |         if ( rc >= 0 && output.bp && errbuf.bp ) {
164 |             XPUSHs( sv_2mortal(newSVpvn((char *)output.bp, output.size)) );
165 |             XPUSHs( sv_2mortal(newSVpvn((char *)errbuf.bp, errbuf.size)) );
166 | 
167 |             /* TODO: Hoist this into a function */
168 |             switch ( tidyOptGetInt(tdoc,TidyNewline) ) {
169 |                 case TidyLF:
170 |                     newline = "\n";
171 |                     break;
172 |                 case TidyCR:
173 |                     newline = "\r";
174 |                     break;
175 |                 default: 
176 |                     newline = "\r\n";
177 |                     break;
178 |             }
179 |             XPUSHs( sv_2mortal(newSVpv(newline, 0)) );
180 |         }
181 |         else {
182 |             rc = -1;
183 |         }
184 | 
185 |         tidyBufFree( &output );
186 |         tidyBufFree( &errbuf );
187 |         tidyRelease( tdoc );
188 | 
189 |         if ( rc < 0 ) {
190 |             XSRETURN_UNDEF;
191 |         }
192 | 
193 | 
194 | SV*
195 | _tidyp_version()
196 |     PREINIT:
197 |         const char* version;
198 |     CODE:
199 |         version = tidyVersion();
200 |         RETVAL = newSVpv(version,0); /* will be automatically "mortalized" */
201 |     OUTPUT:
202 |         RETVAL
203 | 


--------------------------------------------------------------------------------
/bin/webtidy:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | 
 3 | use warnings;
 4 | use strict;
 5 | 
 6 | use Getopt::Long;
 7 | use HTML::Tidy;
 8 | 
 9 | my $help;
10 | my $context;
11 | 
12 | my $tidy = HTML::Tidy->new;
13 | 
14 | GetOptions(
15 |     'help|version'  => \$help,
16 |     'context:i'     => \$context,
17 |     'noerrors'      => sub { $tidy->ignore( type => [ TIDY_ERROR ] ) },
18 |     'nowarnings'    => sub { $tidy->ignore( type => [ TIDY_WARNING ] ) },
19 | ) or $help = 1;
20 | 
21 | if ( !@ARGV || $help ) {
22 |     print "webtidy v$HTML::Tidy::VERSION using tidyp v" . HTML::Tidy::tidyp_version() . "\n";
23 |     print <DATA>;
24 |     exit 1;
25 | }
26 | 
27 | 
28 | for my $url ( @ARGV ) {
29 |     my @lines;
30 |     if ( $url =~ /^https?:/ ) {
31 |         if ( !eval { require LWP::Simple; 1; } ) {
32 |             warn q{Can't retrieve URLs without LWP::Simple installed};
33 |             next;
34 |         }
35 | 
36 |         my $content = LWP::Simple::get( $url );
37 |         if ( $content ) {
38 |             @lines = split( /\n/, $content );
39 |             $_ = "$_\n" for @lines;
40 |         } else {
41 |             warn "Unable to fetch $url\n";
42 |             next;
43 |         }
44 |     } else {
45 |         open( my $fh, '<', $url ) or die "Can't open $url: $!";
46 |         @lines = <$fh>;
47 |         close $fh;
48 |     }
49 | 
50 |     $tidy->parse( $url, @lines );
51 |     for my $message ( $tidy->messages ) {
52 |         print $message->as_string(), "\n";
53 |         if ( defined $context ) {
54 |             $context += 0;
55 |             my $lineno = $message->line - 1;
56 | 
57 |             my $start = $lineno-$context;
58 |             $start = 0 if $start < 0;
59 | 
60 |             my $end = $lineno+$context;
61 |             $end = $#lines if $end > $#lines;
62 | 
63 |             for my $i ( $start..$end ) {
64 |                 printf( '%5d: %s', $i+1, $lines[$i] );
65 |             }
66 |             print "\n";
67 |         }
68 |     }
69 |     $tidy->clear_messages();
70 | } # for files
71 | 
72 | __END__
73 | Usage: webtidy [filename or url]... (filename - reads STDIN)
74 |     --context[=n]   Show the offending line (and n surrounding lines)
75 |     --noerrors      Ignore errors
76 |     --nowarnings    Ignore warnings
77 | 
78 |     --help          This message
79 | 
80 | webtidy is free software.  You may modify or distribute it under the
81 | terms of the Artistic License v2.0.
82 | 


--------------------------------------------------------------------------------
/lib/HTML/Tidy.pm:
--------------------------------------------------------------------------------
  1 | package HTML::Tidy;
  2 | 
  3 | use 5.008;
  4 | use strict;
  5 | use warnings;
  6 | use Carp ();
  7 | 
  8 | use HTML::Tidy::Message;
  9 | 
 10 | =head1 NAME
 11 | 
 12 | HTML::Tidy - (X)HTML validation in a Perl object
 13 | 
 14 | =head1 VERSION
 15 | 
 16 | Version 1.60
 17 | 
 18 | =cut
 19 | 
 20 | our $VERSION = '1.60';
 21 | 
 22 | =head1 SYNOPSIS
 23 | 
 24 |     use HTML::Tidy;
 25 | 
 26 |     my $tidy = HTML::Tidy->new( {config_file => 'path/to/config'} );
 27 |     $tidy->ignore( type => TIDY_WARNING, type => TIDY_INFO );
 28 |     $tidy->parse( "foo.html", $contents_of_foo );
 29 | 
 30 |     for my $message ( $tidy->messages ) {
 31 |         print $message->as_string;
 32 |     }
 33 | 
 34 | =head1 DESCRIPTION
 35 | 
 36 | C<HTML::Tidy> is an HTML checker in a handy dandy object.  It's meant as
 37 | a replacement for L<HTML::Lint|HTML::Lint>.  If you're currently an L<HTML::Lint|HTML::Lint>
 38 | user looking to migrate, see the section L</Converting from HTML::Lint>.
 39 | 
 40 | =head1 EXPORTS
 41 | 
 42 | Message types C<TIDY_ERROR>, C<TIDY_WARNING> and C<TIDY_INFO>.
 43 | 
 44 | Everything else is an object method.
 45 | 
 46 | =cut
 47 | 
 48 | use base 'Exporter';
 49 | 
 50 | use constant TIDY_ERROR   => 3;
 51 | use constant TIDY_WARNING => 2;
 52 | use constant TIDY_INFO    => 1;
 53 | 
 54 | our @EXPORT = qw( TIDY_ERROR TIDY_WARNING TIDY_INFO );
 55 | 
 56 | =head1 METHODS
 57 | 
 58 | =head2 new()
 59 | 
 60 | Create an HTML::Tidy object.
 61 | 
 62 |     my $tidy = HTML::Tidy->new();
 63 | 
 64 | Optionally you can give a hashref of configuration parms.
 65 | 
 66 |     my $tidy = HTML::Tidy->new( {config_file => 'path/to/tidy.cfg'} );
 67 | 
 68 | This configuration file will be read and used when you clean or parse an HTML file.
 69 | 
 70 | You can also pass options directly to tidyp.
 71 | 
 72 |     my $tidy = HTML::Tidy->new( {
 73 |                                     output_xhtml => 1,
 74 |                                     tidy_mark => 0,
 75 |                                 } );
 76 | 
 77 | See C<tidyp -help-config> for the list of options supported by tidyp.
 78 | 
 79 | The following options are not supported by C<HTML::Tidy>:
 80 | 
 81 | =over 4
 82 | 
 83 | =item * quiet
 84 | 
 85 | =back
 86 | 
 87 | =cut
 88 | 
 89 | sub new {
 90 |     my $class = shift;
 91 |     my $args = shift || {};
 92 |     my @unsupported_options = qw(
 93 |         force-output
 94 |         gnu-emacs-file
 95 |         gnu-emacs
 96 |         keep-time
 97 |         quiet
 98 |         slide-style
 99 |         write-back
100 |     ); # REVIEW perhaps a list of supported options would be better
101 | 
102 |     my $self = bless {
103 |         messages     => [],
104 |         ignore_type  => [],
105 |         ignore_text  => [],
106 |         config_file  => '',
107 |         tidy_options => {},
108 |     }, $class;
109 | 
110 |     for my $key (keys %{$args} ) {
111 |         if ($key eq 'config_file') {
112 |             $self->{config_file} = $args->{$key};
113 |             next;
114 |         }
115 | 
116 |         my $newkey = $key;
117 |         $newkey =~ tr/_/-/;
118 | 
119 |         if ( grep {$newkey eq $_} @unsupported_options ) {
120 |             Carp::croak( "Unsupported option: $newkey" );
121 |         }
122 | 
123 |         $self->{tidy_options}->{$newkey} = $args->{$key};
124 |     }
125 | 
126 |     return $self;
127 | }
128 | 
129 | =head2 messages()
130 | 
131 | Returns the messages accumulated.
132 | 
133 | =cut
134 | 
135 | sub messages {
136 |     my $self = shift;
137 | 
138 |     return @{$self->{messages}};
139 | }
140 | 
141 | =head2 clear_messages()
142 | 
143 | Clears the list of messages, in case you want to print and clear, print
144 | and clear.  If you don't clear the messages, then each time you call
145 | L<parse()|parse( $filename, $str [, $str...] )> you'll be accumulating more in the list.
146 | 
147 | =cut
148 | 
149 | sub clear_messages {
150 |     my $self = shift;
151 | 
152 |     $self->{messages} = [];
153 | 
154 |     return;
155 | }
156 | 
157 | =head2 ignore( parm => value [, parm => value ] )
158 | 
159 | Specify types of messages to ignore.  Note that the ignore flags must be
160 | set B<before> calling C<parse()>.  You can call C<ignore()> as many times
161 | as necessary to set up all your restrictions; the options will stack up.
162 | 
163 | =over 4
164 | 
165 | =item * type => TIDY_INFO|TIDY_WARNING|TIDY_ERROR
166 | 
167 | Specifies the type of messages you want to ignore, either info or warnings
168 | or errors.  If you wanted, you could call ignore on all three and get
169 | no messages at all.
170 | 
171 |     $tidy->ignore( type => TIDY_WARNING );
172 | 
173 | =item * text => qr/regex/
174 | 
175 | =item * text => [ qr/regex1/, qr/regex2/, ... ]
176 | 
177 | Checks the text of the message against the specified regex or regexes,
178 | and ignores the message if there's a match.  The value for the I<text>
179 | parm may be either a regex, or a reference to a list of regexes.
180 | 
181 |     $tidy->ignore( text => qr/DOCTYPE/ );
182 |     $tidy->ignore( text => [ qr/unsupported/, qr/proprietary/i ] );
183 | 
184 | =back
185 | 
186 | =cut
187 | 
188 | sub ignore {
189 |     my $self = shift;
190 |     my @parms = @_;
191 | 
192 |     while ( @parms ) {
193 |         my $parm = shift @parms;
194 |         my $value = shift @parms;
195 |         my @values = ref($value) eq 'ARRAY' ? @{$value} : ($value);
196 | 
197 |         Carp::croak( qq{Invalid ignore type of "$parm"} )
198 |             unless ($parm eq 'text') or ($parm eq 'type');
199 | 
200 |         push( @{$self->{"ignore_$parm"}}, @values );
201 |     } # while
202 | 
203 |     return;
204 | } # ignore
205 | 
206 | =head2 parse( $filename, $str [, $str...] )
207 | 
208 | Parses a string, or list of strings, that make up a single HTML file.
209 | 
210 | The I<$filename> parm is only used as an identifier for your use.
211 | The file is not actually read and opened.
212 | 
213 | Returns true if all went OK, or false if there was some problem calling
214 | tidy, or parsing tidy's output.
215 | 
216 | =cut
217 | 
218 | sub parse {
219 |     my $self = shift;
220 |     my $filename = shift;
221 |     if (@_ == 0) {
222 |         Carp::croak('Usage: parse($filename,$str [, $str...])') ## no critic
223 |     }
224 |     my $html = join( '', @_ );
225 | 
226 |     utf8::encode($html) if utf8::is_utf8($html);
227 |     my ($errorblock,$newline) = _tidy_messages( $html, $self->{config_file}, $self->{tidy_options} );
228 |     return 1 unless defined $errorblock;
229 | 
230 |     utf8::decode($errorblock);
231 | 
232 |     return !$self->_parse_errors($filename, $errorblock, $newline);
233 | }
234 | 
235 | sub _parse_errors {
236 |     my $self = shift;
237 |     my $filename = shift;
238 |     my $errs = shift;
239 |     my $newline = shift;
240 | 
241 |     my $parse_errors;
242 | 
243 |     my @lines = split( /$newline/, $errs );
244 | 
245 |     for my $line ( @lines ) {
246 |         chomp $line;
247 | 
248 |         my $message;
249 |         if ( $line =~ /^line (\d+) column (\d+) - (Warning|Error|Info): (.+)$/ ) {
250 |             my ($line, $col, $type, $text) = ($1, $2, $3, $4);
251 |             $type =
252 |                 ($type eq 'Warning') ? TIDY_WARNING :
253 |                 ($type eq 'Info')    ? TIDY_INFO :
254 |                                        TIDY_ERROR;
255 |             $message = HTML::Tidy::Message->new( $filename, $type, $line, $col, $text );
256 | 
257 |         }
258 |         elsif ( $line =~ m/^Info: (.+)$/  ) {
259 |             # Info line we don't want
260 | 
261 |             my $text = $1;
262 |             $message = HTML::Tidy::Message->new( $filename, TIDY_INFO, undef, undef, $text );
263 |         }
264 |         elsif ( $line =~ /^\d+ warnings?, \d+ errors? were found!/ ) {
265 |             # Summary line we don't want
266 | 
267 |         }
268 |         elsif ( $line eq 'No warnings or errors were found.' ) {
269 |             # Summary line we don't want
270 | 
271 |         }
272 |         elsif ( $line eq 'This document has errors that must be fixed before' ) {
273 |             # Summary line we don't want
274 | 
275 |         }
276 |         elsif ( $line eq 'using HTML Tidy to generate a tidied up version.' ) {
277 |             # Summary line we don't want
278 | 
279 |         }
280 |         elsif ( $line =~ m/^\s*$/  ) {
281 |             # Blank line we don't want
282 | 
283 |         }
284 |         else {
285 |             Carp::carp "HTML::Tidy: Unknown error type: $line";
286 |             ++$parse_errors;
287 |         }
288 |         push( @{$self->{messages}}, $message )
289 |             if $message && $self->_is_keeper( $message );
290 |     } # for
291 |     return $parse_errors;
292 | }
293 | 
294 | =head2 clean( $str [, $str...] )
295 | 
296 | Cleans a string, or list of strings, that make up a single HTML file.
297 | 
298 | Returns the cleaned string as a single string.
299 | 
300 | =cut
301 | 
302 | sub clean {
303 |     my $self = shift;
304 |     if (@_ == 0) {
305 |         Carp::croak('Usage: clean($str [, $str...])') ## no critic
306 |     }
307 |     my $text = join( '', @_ );
308 | 
309 |     utf8::encode($text) if utf8::is_utf8($text);
310 |     if ( defined $text ) {
311 |         $text .= "\n";
312 |     }
313 | 
314 |     my ($cleaned, $errbuf, $newline) = _tidy_clean( $text,
315 |                                           $self->{config_file},
316 |                                           $self->{tidy_options});
317 |     utf8::decode($cleaned);
318 |     utf8::decode($errbuf);
319 | 
320 |     $self->_parse_errors('', $errbuf, $newline);
321 |     return $cleaned;
322 | }
323 | 
324 | # Tells whether a given message object is one that we should keep.
325 | 
326 | sub _is_keeper {
327 |     my $self = shift;
328 | 
329 |     my $message = shift;
330 | 
331 |     my @ignore_types = @{$self->{ignore_type}};
332 |     if ( @ignore_types ) {
333 |         return if grep { $message->type == $_ } @ignore_types;
334 |     }
335 | 
336 |     my @ignore_texts = @{$self->{ignore_text}};
337 |     if ( @ignore_texts ) {
338 |         return if grep { $message->text =~ $_ } @ignore_texts;
339 |     }
340 | 
341 |     return 1;
342 | }
343 | 
344 | =head2 tidyp_version()
345 | 
346 | =head2 libtidyp_version()
347 | 
348 | Returns the version of the underling tidyp library.
349 | 
350 | =cut
351 | 
352 | # backcompat
353 | sub libtidyp_version { return shift->tidyp_version }
354 | 
355 | sub tidyp_version {
356 |     my $version_str = _tidyp_version();
357 | 
358 |     return $version_str;
359 | }
360 | 
361 | require XSLoader;
362 | XSLoader::load('HTML::Tidy', $VERSION);
363 | 
364 | 1;
365 | 
366 | __END__
367 | 
368 | =head1 INSTALLING TIDYP
369 | 
370 | C<HTML::Tidy> requires that C<tidyp> be installed on your system.
371 | You can obtain tidyp through your distribution's package manager
372 | (make sure you install the development package with headers), or from
373 | the tidyp Git repository at L<http://github.com/petdance/tidyp>.
374 | 
375 | =head1 CONVERTING FROM C<HTML::Lint>
376 | 
377 | C<HTML::Tidy> is different from C<HTML::Lint> in a number of crucial ways.
378 | 
379 | =over 4
380 | 
381 | =item * It's not pure Perl
382 | 
383 | C<HTML::Tidy> is mostly a happy wrapper around tidyp.
384 | 
385 | =item * The real work is done by someone else
386 | 
387 | Changes to tidyp may come down the pipe that I don't have control over.
388 | That's the price we pay for having it do a darn good job.
389 | 
390 | =item * It's no longer bundled with its C<Test::> counterpart
391 | 
392 | L<HTML::Lint|HTML::Lint> came bundled with C<Test::HTML::Lint>, but
393 | L<Test::HTML::Tidy|Test::HTML::Tidy> is a separate distribution.  This saves the people
394 | who don't want the C<Test::> framework from pulling it in, and all its
395 | prerequisite modules.
396 | 
397 | =back
398 | 
399 | =head1 BUGS & FEEDBACK
400 | 
401 | Please report any bugs or feature requests at the issue tracker on github
402 | L<http://github.com/petdance/html-tidy/issues>.  I will be notified,
403 | and then you'll automatically be notified of progress on your bug as I
404 | make changes.
405 | 
406 | Please do NOT use L<http://rt.cpan.org>.
407 | 
408 | =head1 SUPPORT
409 | 
410 | You can find documentation for this module with the perldoc command.
411 | 
412 |     perldoc HTML::Tidy
413 | 
414 | You can also look for information at:
415 | 
416 | =over 4
417 | 
418 | =item * HTML::Tidy's issue queue at github
419 | 
420 | L<http://github.com/petdance/html-tidy/issues>
421 | 
422 | =item * AnnoCPAN: Annotated CPAN documentation
423 | 
424 | L<http://annocpan.org/dist/HTML-Tidy>
425 | 
426 | =item * CPAN Ratings
427 | 
428 | L<http://cpanratings.perl.org/d/HTML-Tidy>
429 | 
430 | =item * search.cpan.org
431 | 
432 | L<http://search.cpan.org/dist/HTML-Tidy>
433 | 
434 | =item * Git source code repository
435 | 
436 | L<http://github.com/petdance/html-tidy>
437 | 
438 | =back
439 | 
440 | =head1 ACKNOWLEDGEMENTS
441 | 
442 | Thanks to
443 | Rufus Cable,
444 | Jonathan Rockway,
445 | and Robert Bachmann for contributions.
446 | 
447 | =head1 AUTHOR
448 | 
449 | Andy Lester, C<< <andy at petdance.com> >>
450 | 
451 | =head1 COPYRIGHT & LICENSE
452 | 
453 | Copyright (C) 2005-2017 by Andy Lester
454 | 
455 | This library is free software.  You mean modify or distribute it under
456 | the Artistic License v2.0.
457 | 
458 | =cut
459 | 


--------------------------------------------------------------------------------
/lib/HTML/Tidy/Message.pm:
--------------------------------------------------------------------------------
  1 | package HTML::Tidy::Message;
  2 | 
  3 | use warnings;
  4 | use strict;
  5 | use overload
  6 |     q{""} => \&as_string,
  7 |     fallback => 'sounds like a good idea';
  8 | 
  9 | =head1 NAME
 10 | 
 11 | HTML::Tidy::Message - Message object for the Tidy functionality
 12 | 
 13 | =head1 SYNOPSIS
 14 | 
 15 | See L<HTML::Tidy|HTML::Tidy> for all the gory details.
 16 | 
 17 | =head1 EXPORTS
 18 | 
 19 | None.  It's all object-based.
 20 | 
 21 | =head1 METHODS
 22 | 
 23 | Almost everything is an accessor.
 24 | 
 25 | =head2 new( $file, $line, $column, $text )
 26 | 
 27 | Create an object.  It's not very exciting.
 28 | 
 29 | =cut
 30 | 
 31 | sub new {
 32 |     my $class  = shift;
 33 | 
 34 |     my $file   = shift;
 35 |     my $type   = shift;
 36 |     my $line   = shift || 0;
 37 |     my $column = shift || 0;
 38 |     my $text   = shift;
 39 | 
 40 |     # Add an element that says what tag caused the error (B, TR, etc)
 41 |     # so that we can match 'em up down the road.
 42 |     my $self  = {
 43 |         _file   => $file,
 44 |         _type   => $type,
 45 |         _line   => $line,
 46 |         _column => $column,
 47 |         _text   => $text,
 48 |     };
 49 | 
 50 |     bless $self, $class;
 51 | 
 52 |     return $self;
 53 | }
 54 | 
 55 | =head2 where()
 56 | 
 57 | Returns a formatted string that describes where in the file the
 58 | error has occurred.
 59 | 
 60 | For example,
 61 | 
 62 |     (14:23)
 63 | 
 64 | for line 14, column 23.
 65 | 
 66 | The terrible thing about this function is that it's both a plain
 67 | ol' formatting function as in
 68 | 
 69 |     my $str = where( 14, 23 );
 70 | 
 71 | AND it's an object method, as in:
 72 | 
 73 |     my $str = $error->where();
 74 | 
 75 | I don't know what I was thinking when I set it up this way, but
 76 | it's bad practice.
 77 | 
 78 | =cut
 79 | 
 80 | sub where {
 81 |     my $self = shift;
 82 | 
 83 |     return '-' unless $self->line && $self->column;
 84 | 
 85 |     return sprintf( '(%d:%d)', $self->line, $self->column );
 86 | }
 87 | 
 88 | =head2 as_string()
 89 | 
 90 | Returns a nicely-formatted string for printing out to stdout or some similar user thing.
 91 | 
 92 | =cut
 93 | 
 94 | sub as_string {
 95 |     my $self = shift;
 96 | 
 97 |     my %strings = (
 98 |         1 => 'Info',
 99 |         2 => 'Warning',
100 |         3 => 'Error',
101 |     );
102 | 
103 |     return sprintf( '%s %s %s: %s',
104 |         $self->file, $self->where, $strings{$self->type}, $self->text );
105 | }
106 | 
107 | =head2 file()
108 | 
109 | Returns the filename of the error, as set by the caller.
110 | 
111 | =head2 type()
112 | 
113 | Returns the type of the error.  This will either be C<TIDY_ERROR>,
114 | or C<TIDY_WARNING>.
115 | 
116 | =head2 line()
117 | 
118 | Returns the line number of the error, or 0 if there isn't an applicable
119 | line number.
120 | 
121 | =head2 column()
122 | 
123 | Returns the column number, or 0 if there isn't an applicable column
124 | number.
125 | 
126 | =head2 text()
127 | 
128 | Returns the text of the message.  This does not include a type string,
129 | like "Info: ".
130 | 
131 | =cut
132 | 
133 | sub file    { my $self = shift; return $self->{_file} }
134 | sub type    { my $self = shift; return $self->{_type} }
135 | sub line    { my $self = shift; return $self->{_line} }
136 | sub column  { my $self = shift; return $self->{_column} }
137 | sub text    { my $self = shift; return $self->{_text} }
138 | 
139 | 
140 | =head1 COPYRIGHT & LICENSE
141 | 
142 | Copyright 2005-2017 Andy Lester.
143 | 
144 | This program is free software; you can redistribute it and/or modify
145 | it under the terms of the Artistic License v2.0.
146 | 
147 | =head1 AUTHOR
148 | 
149 | Andy Lester, C<< <andy@petdance.com> >>
150 | 
151 | =cut
152 | 
153 | 1; # happy
154 | 


--------------------------------------------------------------------------------
/perlcriticrc:
--------------------------------------------------------------------------------
 1 | [-CodeLayout::ProhibitParensWithBuiltins]
 2 | [CodeLayout::ProhibitHardTabs]
 3 | allow_leading_tabs = 0
 4 | [-CodeLayout::RequireTidyCode]
 5 | # Never works for me.
 6 | 
 7 | [Compatibility::PodMinimumVersion]
 8 | above_version = 5.008008
 9 | 
10 | [-ControlStructures::ProhibitPostfixControls]
11 | 
12 | [-Documentation::PodSpelling]
13 | [-Documentation::RequirePodAtEnd]
14 | [-Documentation::RequirePodSections]
15 | 
16 | [-InputOutput::RequireCheckedSyscalls]
17 | functions = open opendir read readline readdir close closedir
18 | 
19 | [-Miscellanea::RequireRcsKeywords]
20 | 
21 | [-Modules::RequireVersionVar]
22 | 
23 | [-RegularExpressions::ProhibitEscapedMetacharacters]
24 | [-RegularExpressions::RequireDotMatchAnything]
25 | [-RegularExpressions::RequireExtendedFormatting]
26 | [-RegularExpressions::RequireLineBoundaryMatching]
27 | 
28 | [Subroutines::RequireArgUnpacking]
29 | 
30 | [-ValuesAndExpressions::ProhibitConstantPragma]
31 | [-ValuesAndExpressions::ProhibitNoisyQuotes]
32 | [-ValuesAndExpressions::ProhibitEmptyQuotes]
33 | [-ValuesAndExpressions::ProhibitMagicNumbers]
34 | 
35 | [-Variables::ProhibitPunctuationVars]
36 | 


--------------------------------------------------------------------------------
/ppport.h:
--------------------------------------------------------------------------------
  1 | 
  2 | /* ppport.h -- Perl/Pollution/Portability Version 2.007 
  3 |  *
  4 |  * Automatically Created by Devel::PPPort on Mon Feb 16 21:21:31 2004 
  5 |  *
  6 |  * Do NOT edit this file directly! -- Edit PPPort.pm instead.
  7 |  *
  8 |  * Version 2.x, Copyright (C) 2001, Paul Marquess.
  9 |  * Version 1.x, Copyright (C) 1999, Kenneth Albanowski.
 10 |  * This code may be used and distributed under the same license as any
 11 |  * version of Perl.
 12 |  * 
 13 |  * This version of ppport.h is designed to support operation with Perl
 14 |  * installations back to 5.004, and has been tested up to 5.8.1.
 15 |  *
 16 |  * If this version of ppport.h is failing during the compilation of this
 17 |  * module, please check if a newer version of Devel::PPPort is available
 18 |  * on CPAN before sending a bug report.
 19 |  *
 20 |  * If you are using the latest version of Devel::PPPort and it is failing
 21 |  * during compilation of this module, please send a report to perlbug@perl.com
 22 |  *
 23 |  * Include all following information:
 24 |  *
 25 |  *  1. The complete output from running "perl -V"
 26 |  *
 27 |  *  2. This file.
 28 |  *
 29 |  *  3. The name & version of the module you were trying to build.
 30 |  *
 31 |  *  4. A full log of the build that failed.
 32 |  *
 33 |  *  5. Any other information that you think could be relevant.
 34 |  *
 35 |  *
 36 |  * For the latest version of this code, please retreive the Devel::PPPort
 37 |  * module from CPAN.
 38 |  * 
 39 |  */
 40 | 
 41 | /*
 42 |  * In order for a Perl extension module to be as portable as possible
 43 |  * across differing versions of Perl itself, certain steps need to be taken.
 44 |  * Including this header is the first major one, then using dTHR is all the
 45 |  * appropriate places and using a PL_ prefix to refer to global Perl
 46 |  * variables is the second.
 47 |  *
 48 |  */
 49 | 
 50 | 
 51 | /* If you use one of a few functions that were not present in earlier
 52 |  * versions of Perl, please add a define before the inclusion of ppport.h
 53 |  * for a static include, or use the GLOBAL request in a single module to
 54 |  * produce a global definition that can be referenced from the other
 55 |  * modules.
 56 |  * 
 57 |  * Function:            Static define:           Extern define:
 58 |  * newCONSTSUB()        NEED_newCONSTSUB         NEED_newCONSTSUB_GLOBAL
 59 |  *
 60 |  */
 61 |  
 62 | 
 63 | /* To verify whether ppport.h is needed for your module, and whether any
 64 |  * special defines should be used, ppport.h can be run through Perl to check
 65 |  * your source code. Simply say:
 66 |  * 
 67 |  * 	perl -x ppport.h *.c *.h *.xs foo/bar*.c [etc]
 68 |  * 
 69 |  * The result will be a list of patches suggesting changes that should at
 70 |  * least be acceptable, if not necessarily the most efficient solution, or a
 71 |  * fix for all possible problems. It won't catch where dTHR is needed, and
 72 |  * doesn't attempt to account for global macro or function definitions,
 73 |  * nested includes, typemaps, etc.
 74 |  * 
 75 |  * In order to test for the need of dTHR, please try your module under a
 76 |  * recent version of Perl that has threading compiled-in.
 77 |  *
 78 |  */ 
 79 | 
 80 | 
 81 | /*
 82 | #!/usr/bin/perl
 83 | @ARGV = ("*.xs") if !@ARGV;
 84 | %badmacros = %funcs = %macros = (); $replace = 0;
 85 | foreach (<DATA>) {
 86 | 	$funcs{$1} = 1 if /Provide:\s+(\S+)/;
 87 | 	$macros{$1} = 1 if /^#\s*define\s+([a-zA-Z0-9_]+)/;
 88 | 	$replace = $1 if /Replace:\s+(\d+)/;
 89 | 	$badmacros{$2}=$1 if $replace and /^#\s*define\s+([a-zA-Z0-9_]+).*?\s+([a-zA-Z0-9_]+)/;
 90 | 	$badmacros{$1}=$2 if /Replace (\S+) with (\S+)/;
 91 | }
 92 | foreach $filename (map(glob($_),@ARGV)) {
 93 | 	unless (open(IN, "<$filename")) {
 94 | 		warn "Unable to read from $file: $!\n";
 95 | 		next;
 96 | 	}
 97 | 	print "Scanning $filename...\n";
 98 | 	$c = ""; while (<IN>) { $c .= $_; } close(IN);
 99 | 	$need_include = 0; %add_func = (); $changes = 0;
100 | 	$has_include = ($c =~ /#.*include.*ppport/m);
101 | 
102 | 	foreach $func (keys %funcs) {
103 | 		if ($c =~ /#.*define.*\bNEED_$func(_GLOBAL)?\b/m) {
104 | 			if ($c !~ /\b$func\b/m) {
105 | 				print "If $func isn't needed, you don't need to request it.\n" if
106 | 				$changes += ($c =~ s/^.*#.*define.*\bNEED_$func\b.*\n//m);
107 | 			} else {
108 | 				print "Uses $func\n";
109 | 				$need_include = 1;
110 | 			}
111 | 		} else {
112 | 			if ($c =~ /\b$func\b/m) {
113 | 				$add_func{$func} =1 ;
114 | 				print "Uses $func\n";
115 | 				$need_include = 1;
116 | 			}
117 | 		}
118 | 	}
119 | 
120 | 	if (not $need_include) {
121 | 		foreach $macro (keys %macros) {
122 | 			if ($c =~ /\b$macro\b/m) {
123 | 				print "Uses $macro\n";
124 | 				$need_include = 1;
125 | 			}
126 | 		}
127 | 	}
128 | 
129 | 	foreach $badmacro (keys %badmacros) {
130 | 		if ($c =~ /\b$badmacro\b/m) {
131 | 			$changes += ($c =~ s/\b$badmacro\b/$badmacros{$badmacro}/gm);
132 | 			print "Uses $badmacros{$badmacro} (instead of $badmacro)\n";
133 | 			$need_include = 1;
134 | 		}
135 | 	}
136 | 	
137 | 	if (scalar(keys %add_func) or $need_include != $has_include) {
138 | 		if (!$has_include) {
139 | 			$inc = join('',map("#define NEED_$_\n", sort keys %add_func)).
140 | 			       "#include \"ppport.h\"\n";
141 | 			$c = "$inc$c" unless $c =~ s/#.*include.*XSUB.*\n/$&$inc/m;
142 | 		} elsif (keys %add_func) {
143 | 			$inc = join('',map("#define NEED_$_\n", sort keys %add_func));
144 | 			$c = "$inc$c" unless $c =~ s/^.*#.*include.*ppport.*$/$inc$&/m;
145 | 		}
146 | 		if (!$need_include) {
147 | 			print "Doesn't seem to need ppport.h.\n";
148 | 			$c =~ s/^.*#.*include.*ppport.*\n//m;
149 | 		}
150 | 		$changes++;
151 | 	}
152 | 	
153 | 	if ($changes) {
154 | 		open(OUT,">/tmp/ppport.h.$$");
155 | 		print OUT $c;
156 | 		close(OUT);
157 | 		open(DIFF, "diff -u $filename /tmp/ppport.h.$$|");
158 | 		while (<DIFF>) { s!/tmp/ppport\.h\.$$!$filename.patched!; print STDOUT; }
159 | 		close(DIFF);
160 | 		unlink("/tmp/ppport.h.$$");
161 | 	} else {
162 | 		print "Looks OK\n";
163 | 	}
164 | }
165 | __DATA__
166 | */
167 | 
168 | #ifndef _P_P_PORTABILITY_H_
169 | #define _P_P_PORTABILITY_H_
170 | 
171 | #ifndef PERL_REVISION
172 | #   ifndef __PATCHLEVEL_H_INCLUDED__
173 | #       include <patchlevel.h>
174 | #   endif
175 | #   if !(defined(PERL_VERSION) || (SUBVERSION > 0 && defined(PATCHLEVEL)))
176 | #       include <could_not_find_Perl_patchlevel.h>
177 | #   endif
178 | #   ifndef PERL_REVISION
179 | #	define PERL_REVISION	(5)
180 |         /* Replace: 1 */
181 | #       define PERL_VERSION	PATCHLEVEL
182 | #       define PERL_SUBVERSION	SUBVERSION
183 |         /* Replace PERL_PATCHLEVEL with PERL_VERSION */
184 |         /* Replace: 0 */
185 | #   endif
186 | #endif
187 | 
188 | #define PERL_BCDVERSION ((PERL_REVISION * 0x1000000L) + (PERL_VERSION * 0x1000L) + PERL_SUBVERSION)
189 | 
190 | /* It is very unlikely that anyone will try to use this with Perl 6 
191 |    (or greater), but who knows.
192 |  */
193 | #if PERL_REVISION != 5
194 | #	error ppport.h only works with Perl version 5
195 | #endif /* PERL_REVISION != 5 */
196 | 
197 | #ifndef ERRSV
198 | #	define ERRSV perl_get_sv("@",FALSE)
199 | #endif
200 | 
201 | #if (PERL_VERSION < 4) || ((PERL_VERSION == 4) && (PERL_SUBVERSION <= 5))
202 | /* Replace: 1 */
203 | #	define PL_Sv		Sv
204 | #	define PL_compiling	compiling
205 | #	define PL_copline	copline
206 | #	define PL_curcop	curcop
207 | #	define PL_curstash	curstash
208 | #	define PL_defgv		defgv
209 | #	define PL_dirty		dirty
210 | #	define PL_dowarn	dowarn
211 | #	define PL_hints		hints
212 | #	define PL_na		na
213 | #	define PL_perldb	perldb
214 | #	define PL_rsfp_filters	rsfp_filters
215 | #	define PL_rsfpv		rsfp
216 | #	define PL_stdingv	stdingv
217 | #	define PL_sv_no		sv_no
218 | #	define PL_sv_undef	sv_undef
219 | #	define PL_sv_yes	sv_yes
220 | /* Replace: 0 */
221 | #endif
222 | 
223 | #ifdef HASATTRIBUTE
224 | #  if (defined(__GNUC__) && defined(__cplusplus)) || defined(__INTEL_COMPILER)
225 | #    define PERL_UNUSED_DECL
226 | #  else
227 | #    define PERL_UNUSED_DECL __attribute__((unused))
228 | #  endif
229 | #else
230 | #  define PERL_UNUSED_DECL
231 | #endif
232 | 
233 | #ifndef dNOOP
234 | #  define NOOP (void)0
235 | #  define dNOOP extern int Perl___notused PERL_UNUSED_DECL
236 | #endif
237 | 
238 | #ifndef dTHR
239 | #  define dTHR          dNOOP
240 | #endif
241 | 
242 | #ifndef dTHX
243 | #  define dTHX          dNOOP
244 | #  define dTHXa(x)      dNOOP
245 | #  define dTHXoa(x)     dNOOP
246 | #endif
247 | 
248 | #ifndef pTHX
249 | #    define pTHX	void
250 | #    define pTHX_
251 | #    define aTHX
252 | #    define aTHX_
253 | #endif         
254 | 
255 | /* IV could also be a quad (say, a long long), but Perls
256 |  * capable of those should have IVSIZE already. */
257 | #if !defined(IVSIZE) && defined(LONGSIZE)
258 | #   define IVSIZE LONGSIZE
259 | #endif
260 | #ifndef IVSIZE
261 | #   define IVSIZE 4 /* A bold guess, but the best we can make. */
262 | #endif
263 | 
264 | #ifndef UVSIZE
265 | #   define UVSIZE IVSIZE
266 | #endif
267 | 
268 | #ifndef NVTYPE
269 | #   if defined(USE_LONG_DOUBLE) && defined(HAS_LONG_DOUBLE)
270 | #       define NVTYPE long double
271 | #   else
272 | #       define NVTYPE double
273 | #   endif
274 | typedef NVTYPE NV;
275 | #endif
276 | 
277 | #ifndef INT2PTR
278 | 
279 | #if (IVSIZE == PTRSIZE) && (UVSIZE == PTRSIZE)
280 | #  define PTRV                  UV
281 | #  define INT2PTR(any,d)        (any)(d)
282 | #else
283 | #  if PTRSIZE == LONGSIZE
284 | #    define PTRV                unsigned long
285 | #  else
286 | #    define PTRV                unsigned
287 | #  endif
288 | #  define INT2PTR(any,d)        (any)(PTRV)(d)
289 | #endif
290 | #define NUM2PTR(any,d)  (any)(PTRV)(d)
291 | #define PTR2IV(p)       INT2PTR(IV,p)
292 | #define PTR2UV(p)       INT2PTR(UV,p)
293 | #define PTR2NV(p)       NUM2PTR(NV,p)
294 | #if PTRSIZE == LONGSIZE
295 | #  define PTR2ul(p)     (unsigned long)(p)
296 | #else
297 | #  define PTR2ul(p)     INT2PTR(unsigned long,p)        
298 | #endif
299 | 
300 | #endif /* !INT2PTR */
301 | 
302 | #ifndef boolSV
303 | #	define boolSV(b) ((b) ? &PL_sv_yes : &PL_sv_no)
304 | #endif
305 | 
306 | #ifndef gv_stashpvn
307 | #	define gv_stashpvn(str,len,flags) gv_stashpv(str,flags)
308 | #endif
309 | 
310 | #ifndef newSVpvn
311 | #	define newSVpvn(data,len) ((len) ? newSVpv ((data), (len)) : newSVpv ("", 0))
312 | #endif
313 | 
314 | #ifndef newRV_inc
315 | /* Replace: 1 */
316 | #	define newRV_inc(sv) newRV(sv)
317 | /* Replace: 0 */
318 | #endif
319 | 
320 | /* DEFSV appears first in 5.004_56 */
321 | #ifndef DEFSV
322 | #  define DEFSV	GvSV(PL_defgv)
323 | #endif
324 | 
325 | #ifndef SAVE_DEFSV
326 | #    define SAVE_DEFSV SAVESPTR(GvSV(PL_defgv))
327 | #endif
328 | 
329 | #ifndef newRV_noinc
330 | #  ifdef __GNUC__
331 | #    define newRV_noinc(sv)               \
332 |       ({                                  \
333 |           SV *nsv = (SV*)newRV(sv);       \
334 |           SvREFCNT_dec(sv);               \
335 |           nsv;                            \
336 |       })
337 | #  else
338 | #    if defined(USE_THREADS)
339 | static SV * newRV_noinc (SV * sv)
340 | {
341 |           SV *nsv = (SV*)newRV(sv);       
342 |           SvREFCNT_dec(sv);               
343 |           return nsv;                     
344 | }
345 | #    else
346 | #      define newRV_noinc(sv)    \
347 |         (PL_Sv=(SV*)newRV(sv), SvREFCNT_dec(sv), (SV*)PL_Sv)
348 | #    endif
349 | #  endif
350 | #endif
351 | 
352 | /* Provide: newCONSTSUB */
353 | 
354 | /* newCONSTSUB from IO.xs is in the core starting with 5.004_63 */
355 | #if (PERL_VERSION < 4) || ((PERL_VERSION == 4) && (PERL_SUBVERSION < 63))
356 | 
357 | #if defined(NEED_newCONSTSUB)
358 | static
359 | #else
360 | extern void newCONSTSUB(HV * stash, char * name, SV *sv);
361 | #endif
362 | 
363 | #if defined(NEED_newCONSTSUB) || defined(NEED_newCONSTSUB_GLOBAL)
364 | void
365 | newCONSTSUB(stash,name,sv)
366 | HV *stash;
367 | char *name;
368 | SV *sv;
369 | {
370 | 	U32 oldhints = PL_hints;
371 | 	HV *old_cop_stash = PL_curcop->cop_stash;
372 | 	HV *old_curstash = PL_curstash;
373 | 	line_t oldline = PL_curcop->cop_line;
374 | 	PL_curcop->cop_line = PL_copline;
375 | 
376 | 	PL_hints &= ~HINT_BLOCK_SCOPE;
377 | 	if (stash)
378 | 		PL_curstash = PL_curcop->cop_stash = stash;
379 | 
380 | 	newSUB(
381 | 
382 | #if (PERL_VERSION < 3) || ((PERL_VERSION == 3) && (PERL_SUBVERSION < 22))
383 |      /* before 5.003_22 */
384 | 		start_subparse(),
385 | #else
386 | #  if (PERL_VERSION == 3) && (PERL_SUBVERSION == 22)
387 |      /* 5.003_22 */
388 |      		start_subparse(0),
389 | #  else
390 |      /* 5.003_23  onwards */
391 |      		start_subparse(FALSE, 0),
392 | #  endif
393 | #endif
394 | 
395 | 		newSVOP(OP_CONST, 0, newSVpv(name,0)),
396 | 		newSVOP(OP_CONST, 0, &PL_sv_no),   /* SvPV(&PL_sv_no) == "" -- GMB */
397 | 		newSTATEOP(0, Nullch, newSVOP(OP_CONST, 0, sv))
398 | 	);
399 | 
400 | 	PL_hints = oldhints;
401 | 	PL_curcop->cop_stash = old_cop_stash;
402 | 	PL_curstash = old_curstash;
403 | 	PL_curcop->cop_line = oldline;
404 | }
405 | #endif
406 | 
407 | #endif /* newCONSTSUB */
408 | 
409 | #ifndef START_MY_CXT
410 | 
411 | /*
412 |  * Boilerplate macros for initializing and accessing interpreter-local
413 |  * data from C.  All statics in extensions should be reworked to use
414 |  * this, if you want to make the extension thread-safe.  See ext/re/re.xs
415 |  * for an example of the use of these macros.
416 |  *
417 |  * Code that uses these macros is responsible for the following:
418 |  * 1. #define MY_CXT_KEY to a unique string, e.g. "DynaLoader_guts"
419 |  * 2. Declare a typedef named my_cxt_t that is a structure that contains
420 |  *    all the data that needs to be interpreter-local.
421 |  * 3. Use the START_MY_CXT macro after the declaration of my_cxt_t.
422 |  * 4. Use the MY_CXT_INIT macro such that it is called exactly once
423 |  *    (typically put in the BOOT: section).
424 |  * 5. Use the members of the my_cxt_t structure everywhere as
425 |  *    MY_CXT.member.
426 |  * 6. Use the dMY_CXT macro (a declaration) in all the functions that
427 |  *    access MY_CXT.
428 |  */
429 | 
430 | #if defined(MULTIPLICITY) || defined(PERL_OBJECT) || \
431 |     defined(PERL_CAPI)    || defined(PERL_IMPLICIT_CONTEXT)
432 | 
433 | /* This must appear in all extensions that define a my_cxt_t structure,
434 |  * right after the definition (i.e. at file scope).  The non-threads
435 |  * case below uses it to declare the data as static. */
436 | #define START_MY_CXT
437 | 
438 | #if (PERL_VERSION < 4 || (PERL_VERSION == 4 && PERL_SUBVERSION < 68 ))
439 | /* Fetches the SV that keeps the per-interpreter data. */
440 | #define dMY_CXT_SV \
441 | 	SV *my_cxt_sv = perl_get_sv(MY_CXT_KEY, FALSE)
442 | #else /* >= perl5.004_68 */
443 | #define dMY_CXT_SV \
444 | 	SV *my_cxt_sv = *hv_fetch(PL_modglobal, MY_CXT_KEY,		\
445 | 				  sizeof(MY_CXT_KEY)-1, TRUE)
446 | #endif /* < perl5.004_68 */
447 | 
448 | /* This declaration should be used within all functions that use the
449 |  * interpreter-local data. */
450 | #define dMY_CXT	\
451 | 	dMY_CXT_SV;							\
452 | 	my_cxt_t *my_cxtp = INT2PTR(my_cxt_t*,SvUV(my_cxt_sv))
453 | 
454 | /* Creates and zeroes the per-interpreter data.
455 |  * (We allocate my_cxtp in a Perl SV so that it will be released when
456 |  * the interpreter goes away.) */
457 | #define MY_CXT_INIT \
458 | 	dMY_CXT_SV;							\
459 | 	/* newSV() allocates one more than needed */			\
460 | 	my_cxt_t *my_cxtp = (my_cxt_t*)SvPVX(newSV(sizeof(my_cxt_t)-1));\
461 | 	Zero(my_cxtp, 1, my_cxt_t);					\
462 | 	sv_setuv(my_cxt_sv, PTR2UV(my_cxtp))
463 | 
464 | /* This macro must be used to access members of the my_cxt_t structure.
465 |  * e.g. MYCXT.some_data */
466 | #define MY_CXT		(*my_cxtp)
467 | 
468 | /* Judicious use of these macros can reduce the number of times dMY_CXT
469 |  * is used.  Use is similar to pTHX, aTHX etc. */
470 | #define pMY_CXT		my_cxt_t *my_cxtp
471 | #define pMY_CXT_	pMY_CXT,
472 | #define _pMY_CXT	,pMY_CXT
473 | #define aMY_CXT		my_cxtp
474 | #define aMY_CXT_	aMY_CXT,
475 | #define _aMY_CXT	,aMY_CXT
476 | 
477 | #else /* single interpreter */
478 | 
479 | #define START_MY_CXT	static my_cxt_t my_cxt;
480 | #define dMY_CXT_SV	dNOOP
481 | #define dMY_CXT		dNOOP
482 | #define MY_CXT_INIT	NOOP
483 | #define MY_CXT		my_cxt
484 | 
485 | #define pMY_CXT		void
486 | #define pMY_CXT_
487 | #define _pMY_CXT
488 | #define aMY_CXT
489 | #define aMY_CXT_
490 | #define _aMY_CXT
491 | 
492 | #endif 
493 | 
494 | #endif /* START_MY_CXT */
495 | 
496 | #ifndef IVdf
497 | #  if IVSIZE == LONGSIZE
498 | #       define	IVdf		"ld"
499 | #       define	UVuf		"lu"
500 | #       define	UVof		"lo"
501 | #       define	UVxf		"lx"
502 | #       define	UVXf		"lX"
503 | #   else
504 | #       if IVSIZE == INTSIZE
505 | #           define	IVdf	"d"
506 | #           define	UVuf	"u"
507 | #           define	UVof	"o"
508 | #           define	UVxf	"x"
509 | #           define	UVXf	"X"
510 | #       endif
511 | #   endif
512 | #endif
513 | 
514 | #ifndef NVef
515 | #   if defined(USE_LONG_DOUBLE) && defined(HAS_LONG_DOUBLE) && \
516 | 	defined(PERL_PRIfldbl) /* Not very likely, but let's try anyway. */ 
517 | #       define NVef		PERL_PRIeldbl
518 | #       define NVff		PERL_PRIfldbl
519 | #       define NVgf		PERL_PRIgldbl
520 | #   else
521 | #       define NVef		"e"
522 | #       define NVff		"f"
523 | #       define NVgf		"g"
524 | #   endif
525 | #endif
526 | 
527 | #ifndef AvFILLp			/* Older perls (<=5.003) lack AvFILLp */
528 | #   define AvFILLp AvFILL
529 | #endif
530 | 
531 | #ifdef SvPVbyte
532 | #   if PERL_REVISION == 5 && PERL_VERSION < 7
533 |        /* SvPVbyte does not work in perl-5.6.1, borrowed version for 5.7.3 */
534 | #       undef SvPVbyte
535 | #       define SvPVbyte(sv, lp) \
536 |           ((SvFLAGS(sv) & (SVf_POK|SVf_UTF8)) == (SVf_POK) \
537 |            ? ((lp = SvCUR(sv)), SvPVX(sv)) : my_sv_2pvbyte(aTHX_ sv, &lp))
538 |        static char *
539 |        my_sv_2pvbyte(pTHX_ register SV *sv, STRLEN *lp)
540 |        {   
541 |            sv_utf8_downgrade(sv,0);
542 |            return SvPV(sv,*lp);
543 |        }
544 | #   endif
545 | #else
546 | #   define SvPVbyte SvPV
547 | #endif
548 | 
549 | #ifndef SvPV_nolen
550 | #   define SvPV_nolen(sv) \
551 |         ((SvFLAGS(sv) & (SVf_POK)) == SVf_POK \
552 |          ? SvPVX(sv) : sv_2pv_nolen(sv))
553 |     static char *
554 |     sv_2pv_nolen(pTHX_ register SV *sv)
555 |     {   
556 |         STRLEN n_a;
557 |         return sv_2pv(sv, &n_a);
558 |     }
559 | #endif
560 | 
561 | #ifndef get_cv
562 | #   define get_cv(name,create) perl_get_cv(name,create)
563 | #endif
564 | 
565 | #ifndef get_sv
566 | #   define get_sv(name,create) perl_get_sv(name,create)
567 | #endif
568 | 
569 | #ifndef get_av
570 | #   define get_av(name,create) perl_get_av(name,create)
571 | #endif
572 | 
573 | #ifndef get_hv
574 | #   define get_hv(name,create) perl_get_hv(name,create)
575 | #endif
576 | 
577 | #ifndef call_argv
578 | #   define call_argv perl_call_argv
579 | #endif
580 | 
581 | #ifndef call_method
582 | #   define call_method perl_call_method
583 | #endif
584 | 
585 | #ifndef call_pv
586 | #   define call_pv perl_call_pv
587 | #endif
588 | 
589 | #ifndef call_sv
590 | #   define call_sv perl_call_sv
591 | #endif
592 | 
593 | #ifndef PERL_SCAN_GREATER_THAN_UV_MAX
594 | #   define PERL_SCAN_GREATER_THAN_UV_MAX 0x02
595 | #endif
596 | 
597 | #ifndef PERL_SCAN_SILENT_ILLDIGIT
598 | #   define PERL_SCAN_SILENT_ILLDIGIT 0x04
599 | #endif
600 | 
601 | #ifndef PERL_SCAN_ALLOW_UNDERSCORES
602 | #   define PERL_SCAN_ALLOW_UNDERSCORES 0x01
603 | #endif
604 | 
605 | #ifndef PERL_SCAN_DISALLOW_PREFIX
606 | #   define PERL_SCAN_DISALLOW_PREFIX 0x02
607 | #endif
608 | 
609 | #if (PERL_VERSION >= 6)
610 | #define I32_CAST
611 | #else
612 | #define I32_CAST (I32*)
613 | #endif
614 | 
615 | #ifndef grok_hex
616 | static UV _grok_hex (char *string, STRLEN *len, I32 *flags, NV *result) {
617 |     NV r = scan_hex(string, *len, I32_CAST len);
618 |     if (r > UV_MAX) {
619 |         *flags |= PERL_SCAN_GREATER_THAN_UV_MAX;
620 |         if (result) *result = r;
621 |         return UV_MAX;
622 |     }
623 |     return (UV)r;
624 | }
625 |         
626 | #   define grok_hex(string, len, flags, result)     \
627 |         _grok_hex((string), (len), (flags), (result))
628 | #endif 
629 | 
630 | #ifndef grok_oct
631 | static UV _grok_oct (char *string, STRLEN *len, I32 *flags, NV *result) {
632 |     NV r = scan_oct(string, *len, I32_CAST len);
633 |     if (r > UV_MAX) {
634 |         *flags |= PERL_SCAN_GREATER_THAN_UV_MAX;
635 |         if (result) *result = r;
636 |         return UV_MAX;
637 |     }
638 |     return (UV)r;
639 | }
640 | 
641 | #   define grok_oct(string, len, flags, result)     \
642 |         _grok_oct((string), (len), (flags), (result))
643 | #endif
644 | 
645 | #ifndef grok_bin
646 | static UV _grok_bin (char *string, STRLEN *len, I32 *flags, NV *result) {
647 |     NV r = scan_bin(string, *len, I32_CAST len);
648 |     if (r > UV_MAX) {
649 |         *flags |= PERL_SCAN_GREATER_THAN_UV_MAX;
650 |         if (result) *result = r;
651 |         return UV_MAX;
652 |     }
653 |     return (UV)r;
654 | }
655 | 
656 | #   define grok_bin(string, len, flags, result)     \
657 |         _grok_bin((string), (len), (flags), (result))
658 | #endif
659 | 
660 | #ifndef IN_LOCALE
661 | #   define IN_LOCALE \
662 | 	(PL_curcop == &PL_compiling ? IN_LOCALE_COMPILETIME : IN_LOCALE_RUNTIME)
663 | #endif
664 | 
665 | #ifndef IN_LOCALE_RUNTIME
666 | #   define IN_LOCALE_RUNTIME   (PL_curcop->op_private & HINT_LOCALE)
667 | #endif
668 | 
669 | #ifndef IN_LOCALE_COMPILETIME
670 | #   define IN_LOCALE_COMPILETIME   (PL_hints & HINT_LOCALE)
671 | #endif
672 | 
673 | 
674 | #ifndef IS_NUMBER_IN_UV
675 | #   define IS_NUMBER_IN_UV		            0x01   
676 | #   define IS_NUMBER_GREATER_THAN_UV_MAX    0x02
677 | #   define IS_NUMBER_NOT_INT	            0x04
678 | #   define IS_NUMBER_NEG		            0x08
679 | #   define IS_NUMBER_INFINITY	            0x10 
680 | #   define IS_NUMBER_NAN                    0x20  
681 | #endif
682 |    
683 | #ifndef grok_numeric_radix
684 | #   define GROK_NUMERIC_RADIX(sp, send) grok_numeric_radix(sp, send)
685 | 
686 | #define grok_numeric_radix Perl_grok_numeric_radix
687 |     
688 | bool
689 | Perl_grok_numeric_radix(pTHX_ const char **sp, const char *send)
690 | {
691 | #ifdef USE_LOCALE_NUMERIC
692 | #if (PERL_VERSION >= 6)
693 |     if (PL_numeric_radix_sv && IN_LOCALE) { 
694 |         STRLEN len;
695 |         char* radix = SvPV(PL_numeric_radix_sv, len);
696 |         if (*sp + len <= send && memEQ(*sp, radix, len)) {
697 |             *sp += len;
698 |             return TRUE; 
699 |         }
700 |     }
701 | #else
702 |     /* pre5.6.0 perls don't have PL_numeric_radix_sv so the radix
703 |      * must manually be requested from locale.h */
704 | #include <locale.h>
705 |     struct lconv *lc = localeconv();
706 |     char *radix = lc->decimal_point;
707 |     if (radix && IN_LOCALE) { 
708 |         STRLEN len = strlen(radix);
709 |         if (*sp + len <= send && memEQ(*sp, radix, len)) {
710 |             *sp += len;
711 |             return TRUE; 
712 |         }
713 |     }
714 | #endif /* PERL_VERSION */
715 | #endif /* USE_LOCALE_NUMERIC */
716 |     /* always try "." if numeric radix didn't match because
717 |      * we may have data from different locales mixed */
718 |     if (*sp < send && **sp == '.') {
719 |         ++*sp;
720 |         return TRUE;
721 |     }
722 |     return FALSE;
723 | }
724 | #endif /* grok_numeric_radix */
725 | 
726 | #ifndef grok_number
727 | 
728 | #define grok_number Perl_grok_number
729 | 
730 | int
731 | Perl_grok_number(pTHX_ const char *pv, STRLEN len, UV *valuep)
732 | {
733 |   const char *s = pv;
734 |   const char *send = pv + len;
735 |   const UV max_div_10 = UV_MAX / 10;
736 |   const char max_mod_10 = UV_MAX % 10;
737 |   int numtype = 0;
738 |   int sawinf = 0;
739 |   int sawnan = 0;
740 | 
741 |   while (s < send && isSPACE(*s))
742 |     s++;
743 |   if (s == send) {
744 |     return 0;
745 |   } else if (*s == '-') {
746 |     s++;
747 |     numtype = IS_NUMBER_NEG;
748 |   }
749 |   else if (*s == '+')
750 |   s++;
751 | 
752 |   if (s == send)
753 |     return 0;
754 | 
755 |   /* next must be digit or the radix separator or beginning of infinity */
756 |   if (isDIGIT(*s)) {
757 |     /* UVs are at least 32 bits, so the first 9 decimal digits cannot
758 |        overflow.  */
759 |     UV value = *s - '0';
760 |     /* This construction seems to be more optimiser friendly.
761 |        (without it gcc does the isDIGIT test and the *s - '0' separately)
762 |        With it gcc on arm is managing 6 instructions (6 cycles) per digit.
763 |        In theory the optimiser could deduce how far to unroll the loop
764 |        before checking for overflow.  */
765 |     if (++s < send) {
766 |       int digit = *s - '0';
767 |       if (digit >= 0 && digit <= 9) {
768 |         value = value * 10 + digit;
769 |         if (++s < send) {
770 |           digit = *s - '0';
771 |           if (digit >= 0 && digit <= 9) {
772 |             value = value * 10 + digit;
773 |             if (++s < send) {
774 |               digit = *s - '0';
775 |               if (digit >= 0 && digit <= 9) {
776 |                 value = value * 10 + digit;
777 | 		        if (++s < send) {
778 |                   digit = *s - '0';
779 |                   if (digit >= 0 && digit <= 9) {
780 |                     value = value * 10 + digit;
781 |                     if (++s < send) {
782 |                       digit = *s - '0';
783 |                       if (digit >= 0 && digit <= 9) {
784 |                         value = value * 10 + digit;
785 |                         if (++s < send) {
786 |                           digit = *s - '0';
787 |                           if (digit >= 0 && digit <= 9) {
788 |                             value = value * 10 + digit;
789 |                             if (++s < send) {
790 |                               digit = *s - '0';
791 |                               if (digit >= 0 && digit <= 9) {
792 |                                 value = value * 10 + digit;
793 |                                 if (++s < send) {
794 |                                   digit = *s - '0';
795 |                                   if (digit >= 0 && digit <= 9) {
796 |                                     value = value * 10 + digit;
797 |                                     if (++s < send) {
798 |                                       /* Now got 9 digits, so need to check
799 |                                          each time for overflow.  */
800 |                                       digit = *s - '0';
801 |                                       while (digit >= 0 && digit <= 9
802 |                                              && (value < max_div_10
803 |                                                  || (value == max_div_10
804 |                                                      && digit <= max_mod_10))) {
805 |                                         value = value * 10 + digit;
806 |                                         if (++s < send)
807 |                                           digit = *s - '0';
808 |                                         else
809 |                                           break;
810 |                                       }
811 |                                       if (digit >= 0 && digit <= 9
812 |                                           && (s < send)) {
813 |                                         /* value overflowed.
814 |                                            skip the remaining digits, don't
815 |                                            worry about setting *valuep.  */
816 |                                         do {
817 |                                           s++;
818 |                                         } while (s < send && isDIGIT(*s));
819 |                                         numtype |=
820 |                                           IS_NUMBER_GREATER_THAN_UV_MAX;
821 |                                         goto skip_value;
822 |                                       }
823 |                                     }
824 |                                   }
825 | 				                }
826 |                               }
827 |                             }
828 |                           }
829 |                         }
830 |                       }
831 |                     }
832 |                   }
833 |                 }
834 |               }
835 |             }
836 |           }
837 | 	    }
838 |       }
839 |     }
840 |     numtype |= IS_NUMBER_IN_UV;
841 |     if (valuep)
842 |       *valuep = value;
843 | 
844 |   skip_value:
845 |     if (GROK_NUMERIC_RADIX(&s, send)) {
846 |       numtype |= IS_NUMBER_NOT_INT;
847 |       while (s < send && isDIGIT(*s))  /* optional digits after the radix */
848 |         s++;
849 |     }
850 |   }
851 |   else if (GROK_NUMERIC_RADIX(&s, send)) {
852 |     numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */
853 |     /* no digits before the radix means we need digits after it */
854 |     if (s < send && isDIGIT(*s)) {
855 |       do {
856 |         s++;
857 |       } while (s < send && isDIGIT(*s));
858 |       if (valuep) {
859 |         /* integer approximation is valid - it's 0.  */
860 |         *valuep = 0;
861 |       }
862 |     }
863 |     else
864 |       return 0;
865 |   } else if (*s == 'I' || *s == 'i') {
866 |     s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
867 |     s++; if (s == send || (*s != 'F' && *s != 'f')) return 0;
868 |     s++; if (s < send && (*s == 'I' || *s == 'i')) {
869 |       s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
870 |       s++; if (s == send || (*s != 'I' && *s != 'i')) return 0;
871 |       s++; if (s == send || (*s != 'T' && *s != 't')) return 0;
872 |       s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0;
873 |       s++;
874 |     }
875 |     sawinf = 1;
876 |   } else if (*s == 'N' || *s == 'n') {
877 |     /* XXX TODO: There are signaling NaNs and quiet NaNs. */
878 |     s++; if (s == send || (*s != 'A' && *s != 'a')) return 0;
879 |     s++; if (s == send || (*s != 'N' && *s != 'n')) return 0;
880 |     s++;
881 |     sawnan = 1;
882 |   } else
883 |     return 0;
884 | 
885 |   if (sawinf) {
886 |     numtype &= IS_NUMBER_NEG; /* Keep track of sign  */
887 |     numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT;
888 |   } else if (sawnan) {
889 |     numtype &= IS_NUMBER_NEG; /* Keep track of sign  */
890 |     numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT;
891 |   } else if (s < send) {
892 |     /* we can have an optional exponent part */
893 |     if (*s == 'e' || *s == 'E') {
894 |       /* The only flag we keep is sign.  Blow away any "it's UV"  */
895 |       numtype &= IS_NUMBER_NEG;
896 |       numtype |= IS_NUMBER_NOT_INT;
897 |       s++;
898 |       if (s < send && (*s == '-' || *s == '+'))
899 |         s++;
900 |       if (s < send && isDIGIT(*s)) {
901 |         do {
902 |           s++;
903 |         } while (s < send && isDIGIT(*s));
904 |       }
905 |       else
906 |       return 0;
907 |     }
908 |   }
909 |   while (s < send && isSPACE(*s))
910 |     s++;
911 |   if (s >= send)
912 |     return numtype;
913 |   if (len == 10 && memEQ(pv, "0 but true", 10)) {
914 |     if (valuep)
915 |       *valuep = 0;
916 |     return IS_NUMBER_IN_UV;
917 |   }
918 |   return 0;
919 | }
920 | #endif /* grok_number */
921 | #endif /* _P_P_PORTABILITY_H_ */
922 | 
923 | /* End of File ppport.h */
924 | 


--------------------------------------------------------------------------------
/t/00-load.t:
--------------------------------------------------------------------------------
 1 | #!perl -T
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::More tests => 1;
 7 | 
 8 | use HTML::Tidy;
 9 | use HTML::Tidy::Message;
10 | 
11 | diag( "Testing HTML::Tidy $HTML::Tidy::VERSION, tidyp " . HTML::Tidy->tidyp_version() . ", Perl $], $^X" );
12 | pass( 'Modules loaded' );
13 | 


--------------------------------------------------------------------------------
/t/cfg-for-parse.cfg:
--------------------------------------------------------------------------------
1 | show-warnings: 0
2 | 


--------------------------------------------------------------------------------
/t/cfg-for-parse.t:
--------------------------------------------------------------------------------
 1 | #!perl -T
 2 | 
 3 | use warnings;
 4 | use strict;
 5 | 
 6 | use Test::More tests => 3;
 7 | 
 8 | use HTML::Tidy;
 9 | 
10 | my $html = do { local $/; <DATA> };
11 | 
12 | my @expected_messages = split /\n/, q{
13 | DATA (3:1) Error: <neck> is not recognized!
14 | DATA (8:1) Error: <x> is not recognized!
15 | DATA (9:1) Error: <y> is not recognized!
16 | };
17 | 
18 | chomp @expected_messages;
19 | shift @expected_messages; # First one's blank
20 | 
21 | my $tidy = HTML::Tidy->new( { config_file => 't/cfg-for-parse.cfg' } );
22 | isa_ok( $tidy, 'HTML::Tidy' );
23 | 
24 | my $rc = $tidy->parse( 'DATA', $html );
25 | ok( $rc, 'Parsed OK' );
26 | 
27 | my @returned = map { $_->as_string } $tidy->messages;
28 | s/[\r\n]+\z// for @returned;
29 | is_deeply( \@returned, \@expected_messages, 'Matching errors' );
30 | 
31 | 
32 | __DATA__
33 | <HTML>
34 | <HEAD>
35 | <NECK>...</NECK>
36 | <TITLE>Foo
37 | </HEAD>
38 | <BODY>
39 | </B>
40 | <X>
41 | <Y>
42 | </I>
43 | </BODY>
44 | 


--------------------------------------------------------------------------------
/t/clean-crash.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -T
 2 | 
 3 | use warnings;
 4 | use strict;
 5 | 
 6 | # From a bug found by Aaron Patterson
 7 | #Full context and any attached attachments can be found at:
 8 | #<URL: https://rt.cpan.org/Ticket/Display.html?id=7254 >
 9 | #Here's a snippet of code to repro the bug, it produces an 'Illegal instruction' error
10 | 
11 | use Test::More tests => 2;
12 | 
13 | use HTML::Tidy;
14 | 
15 | my $html = do { local $/; <DATA> };
16 | 
17 | my $tidy = HTML::Tidy->new;
18 | isa_ok( $tidy, 'HTML::Tidy' );
19 | $tidy->ignore( type => TIDY_INFO );
20 | $tidy->clean( $html );
21 | 
22 | my @mess = map { $_ ? $_->as_string() : undef } $tidy->messages();
23 | pass( 'Ended OK' );
24 | 
25 | __DATA__
26 | <form action="http://www.alternation.net/cobra/index.pl">
27 | <td><input name="random" type="image" value="random creature" src="http://www.creaturesinmyhead.com/images/random.gif"></td>
28 | </form>
29 | 


--------------------------------------------------------------------------------
/t/clean.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -T
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::Exception;
 7 | use Test::More tests => 3;
 8 | 
 9 | use HTML::Tidy;
10 | 
11 | my $tidy = HTML::Tidy->new;
12 | isa_ok( $tidy, 'HTML::Tidy' );
13 | 
14 | my $expected_pattern = 'Usage: clean($str [, $str...])';
15 | throws_ok {
16 |     $tidy->clean();
17 | } qr/\Q$expected_pattern\E/,
18 | 'clean() croaks when not given a string or list of strings';
19 | 
20 | like(
21 |     $tidy->clean(''),
22 |     _expected_empty_html(),
23 |     '$tidy->clean("") returns empty HTML document',
24 | );
25 | 
26 | sub _expected_empty_html {
27 |     return qr{<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
28 | <html>
29 | <head>
30 | <meta name="generator" content="[^"]+">
31 | <title></title>
32 | </head>
33 | <body>
34 | </body>
35 | </html>
36 | };
37 | }
38 | 


--------------------------------------------------------------------------------
/t/extra-quote.t:
--------------------------------------------------------------------------------
 1 | #!perl -T
 2 | 
 3 | use warnings;
 4 | use strict;
 5 | 
 6 | # Response to an HTML::Lint request that it handle mishandled quotes.
 7 | # See https://rt.cpan.org/Ticket/Display.html?id=1459
 8 | 
 9 | use Test::More tests => 4;
10 | 
11 | use HTML::Tidy;
12 | 
13 | my $html = do { local $/ = undef; <DATA> };
14 | 
15 | my $tidy = HTML::Tidy->new;
16 | isa_ok( $tidy, 'HTML::Tidy' );
17 | 
18 | $tidy->ignore( text => qr/DOCTYPE/ );
19 | my $rc = $tidy->parse( '-', $html );
20 | ok( $rc, 'Parsed OK' );
21 | 
22 | my @expected = split /\n/, q{
23 | - (4:1) Warning: <img> unexpected or duplicate quote mark
24 | - (4:1) Warning: <img> escaping malformed URI reference
25 | - (4:1) Warning: <img> lacks "alt" attribute
26 | };
27 | chomp @expected;
28 | shift @expected; # First one's blank
29 | 
30 | my @messages = $tidy->messages;
31 | is( scalar @messages, 3, 'Should have exactly three messages' );
32 | 
33 | my @strings = map { $_->as_string } @messages;
34 | s/[\r\n]+\z// for @strings;
35 | is_deeply( \@strings, \@expected, 'Matching warnings' );
36 | 
37 | __DATA__
38 | <html>
39 | <title>Bogo</title>
40 | <body>
41 | <img src="foo alt="">
42 | </body>
43 | </html>
44 | 


--------------------------------------------------------------------------------
/t/ignore-text.t:
--------------------------------------------------------------------------------
  1 | #!perl -T
  2 | 
  3 | use warnings;
  4 | use strict;
  5 | 
  6 | use Test::More tests => 3;
  7 | 
  8 | use HTML::Tidy;
  9 | 
 10 | my $html = do { local $/; <DATA> };
 11 | 
 12 | my @expected_messages = split /\n/, q{
 13 | DATA (24:XX) Warning: unescaped & which should be written as &amp;
 14 | DATA (24:XX) Warning: unescaped & which should be written as &amp;
 15 | };
 16 | 
 17 | chomp @expected_messages;
 18 | shift @expected_messages; # First one's blank
 19 | 
 20 | IGNORE_BOGOTAG: {
 21 |     my $tidy = HTML::Tidy->new;
 22 |     isa_ok( $tidy, 'HTML::Tidy' );
 23 | 
 24 |     $tidy->ignore( text => qr/bogotag/ );
 25 |     $tidy->ignore( text => [ qr/UNESCAPED/, qr/doctype/i ] );
 26 |     # The qr/UNESCAPED/ should not ignore anything because there's no /i
 27 |     my $rc = $tidy->parse( 'DATA', $html );
 28 |     ok( $rc, 'Parsed OK' );
 29 | 
 30 |     my @returned = map { $_->as_string } $tidy->messages;
 31 |     munge_returned( \@returned, 'DATA' );
 32 |     s/[\r\n]+\z// for @returned;
 33 |     is_deeply( \@returned, \@expected_messages, 'Matching warnings' );
 34 | }
 35 | 
 36 | sub munge_returned {
 37 |     # non-1 line numbers are not reliable across libtidies
 38 |     my $returned = shift;
 39 |     my $start_line = shift || '-';
 40 | 
 41 |     for my $line ( @{$returned} ) {
 42 |         next if $line =~ m/$start_line \(\d+:1\)/;
 43 |         $line =~ s/$start_line \((\d+):(\d+)\)/$start_line ($1:XX)/;
 44 |     }
 45 | }
 46 | __DATA__
 47 | <HTML>
 48 | <HEAD>
 49 | 	<META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=iso-8859-1">
 50 | 	<META NAME="Author" Content="Andy Lester">
 51 |     	<TITLE>petdance.com: Andy Lester's Programming &amp; Writing</TITLE>
 52 | 
 53 | <STYLE type="text/css">
 54 | <!--
 55 | DIV.TOC H2 {
 56 |     font-family : tahoma, arial, helvetica, sans-serif;
 57 |     font-weight : bold;
 58 |     font-size : 15pt;
 59 |     }
 60 | 
 61 | DIV.TOC P {
 62 |     font-size : 12pt;
 63 |     font-weight : normal;
 64 |     }
 65 | -->
 66 | </STYLE>
 67 | </HEAD>
 68 | <BODY BGCOLOR="white">
 69 | <BOGOTAG>
 70 |     <IMG SRC="/pix/petdance-logo-400x312.gif" HEIGHT=312 WIDTH=400 ALT="Andy & Amy's Pet Supplies & Dance Instruction" ALIGN=RIGHT>
 71 | 	<DIV CLASS="TOC">
 72 | 	<h2>Perl, Programming &amp; Writing</h2>
 73 | 	My <A HREF="http://www.oreillynet.com/pub/au/1249">Technology &amp; publishing blog</A> at oreillynet.com<BR>
 74 | 	My <A HREF="http://use.perl.org/~petdance/journal/">Perl-specific and personal blog</A> at use.perl.org<BR>
 75 | 	<A HREF="resume/">Andy Lester's resume</A><BR>
 76 | 	Andy's <A HREF="perl/">Perl Pages</A>
 77 | 
 78 | 	<H2>The Lester Family</h2>
 79 | 	<A HREF="/andy/">Andy</A>: The Dad<BR>
 80 | 	<A HREF="http://www.parsley.org/">Amy</A>: The Mom<BR>
 81 | 	<A HREF="http://www.parsley.org/quinn/">Quinn</A>: The Girl<BR>
 82 | 	<A HREF="/baxter/">Baxter</A>: The Dog<BR>
 83 | 	<A HREF="http://www.familytreemaker.com/users/l/e/s/Andrew-R-Lester/">Our family trees</A>
 84 | 
 85 | 	<H2>About Andy &amp; Amy</H2>
 86 |     	<A HREF="/us/mush/">The Page Of Mush</A><BR>
 87 | 	<A HREF="/us/people/">People we're looking for</A><BR>
 88 |     	<A HREF="/us/herald/">Article about us and how we met</A> from the Northwest Herald<BR>
 89 | 
 90 | 	<H2>Useful Stuff</H2>
 91 | 	<A HREF="/start/">Andy's Magic Start Page</A>: Bunches of your favorite search engines, all in one place<BR>
 92 | 	<A HREF="/adds/">Add-a-page Page</A>: Bunches of website submission forms, all in one place<BR>
 93 | 	<A HREF="cf/">Cold Fusion stuff</A>: CFX_HTTP and other custom tags
 94 | 
 95 | 	<H2>Musical Information, etc</H2>
 96 |     	<A HREF="/nr/">Naked Raygun</A>: Bring your mom and your mom's friends, too<BR>
 97 | 	<A HREF="/actionpark/">Action Park</A>: A compendium of information about Big Black, Rapeman, Shellac and Steve Albini<BR>
 98 | 
 99 | 	<H2>Other things that might be fun</h2>
100 |     	<A HREF="http://www.parsley.org/"><CITE>When In Doubt, Use Parsley</CITE></A>: Amy's journal<BR>
101 | 	<A HREF="/wonder/"><CITE>Wonder</CITE></A>: Andy's old journal<BR>
102 |     	<A HREF="/us/trip/">So American It Hurts</A>: Andy &amp; Amy's trip to Graceland<BR>
103 |     	<A HREF="/andy/appliances/">Gallery Of Household Appliances</A><BR>
104 | 	<A HREF="/wants/">Media Wants</A>: Stuff I yearn to consume<BR>
105 | 	<A HREF="/andy/bingo/">Buzzword Bingo</A>
106 | 
107 | 	</DIV>
108 | 
109 | </BODY>
110 | </HTML>
111 | 


--------------------------------------------------------------------------------
/t/ignore.t:
--------------------------------------------------------------------------------
  1 | #!perl -T
  2 | 
  3 | use strict;
  4 | use warnings;
  5 | 
  6 | use Test::More tests => 9;
  7 | 
  8 | use HTML::Tidy;
  9 | 
 10 | my $html = do { local $/ = undef; <DATA> };
 11 | 
 12 | my @expected_warnings = split /\n/, q{
 13 | - (1:1) Warning: missing <!DOCTYPE> declaration
 14 | - (23:1) Warning: discarding unexpected <bogotag>
 15 | - (24:XX) Warning: unescaped & which should be written as &amp;
 16 | - (24:XX) Warning: unescaped & which should be written as &amp;
 17 | };
 18 | chomp @expected_warnings;
 19 | shift @expected_warnings; # First one's blank
 20 | 
 21 | my @expected_errors = split /\n/, q{
 22 | - (23:1) Error: <bogotag> is not recognized!
 23 | };
 24 | chomp @expected_errors;
 25 | shift @expected_errors; # First one's blank
 26 | 
 27 | WARNINGS_ONLY: {
 28 |     my $tidy = HTML::Tidy->new;
 29 |     isa_ok( $tidy, 'HTML::Tidy' );
 30 | 
 31 |     $tidy->ignore( type => TIDY_ERROR );
 32 |     my $rc = $tidy->parse( '-', $html );
 33 |     ok( $rc, 'Parsed OK' );
 34 | 
 35 |     my @returned = map { $_->as_string } $tidy->messages;
 36 |     s/[\r\n]+\z// for @returned;
 37 |     munge_returned( \@returned );
 38 |     is_deeply( \@returned, \@expected_warnings, 'Matching warnings' );
 39 | }
 40 | 
 41 | ERRORS_ONLY: {
 42 |     my $tidy = HTML::Tidy->new;
 43 |     isa_ok( $tidy, 'HTML::Tidy' );
 44 | 
 45 |     $tidy->ignore( type => TIDY_WARNING );
 46 |     my $rc = $tidy->parse( '-', $html );
 47 |     ok( $rc, 'Parsed OK' );
 48 | 
 49 |     my @returned = map { $_->as_string } $tidy->messages;
 50 |     s/[\r\n]+\z// for @returned;
 51 |     is_deeply( \@returned, \@expected_errors, 'Matching errors' );
 52 | }
 53 | 
 54 | DIES_ON_ERROR: {
 55 |     my $tidy = HTML::Tidy->new;
 56 |     isa_ok( $tidy, 'HTML::Tidy' );
 57 | 
 58 |     my $rc = eval { $tidy->ignore( blongo => TIDY_WARNING ) };
 59 |     ok( !$rc, 'eval should fail' );
 60 |     like( $@, qr/^Invalid ignore type.+blongo/, 'Throws an error' );
 61 | }
 62 | 
 63 | sub munge_returned {
 64 |     # non-1 line numbers are not reliable across libtidies
 65 |     my $returned = shift;
 66 |     my $start_line = shift || '-';
 67 | 
 68 |     for my $line ( @{$returned} ) {
 69 |         next if $line =~ /$start_line \(\d+:1\)/;
 70 |         $line =~ s/$start_line \((\d+):(\d+)\)/$start_line ($1:XX)/;
 71 |     }
 72 | }
 73 | __DATA__
 74 | <HTML>
 75 | <HEAD>
 76 | 	<META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=iso-8859-1">
 77 | 	<META NAME="Author" Content="Andy Lester">
 78 |     	<TITLE>petdance.com: Andy Lester's Programming &amp; Writing</TITLE>
 79 | 
 80 | <STYLE type="text/css">
 81 | <!--
 82 | DIV.TOC H2 {
 83 |     font-family : tahoma, arial, helvetica, sans-serif;
 84 |     font-weight : bold;
 85 |     font-size : 15pt;
 86 |     }
 87 | 
 88 | DIV.TOC P {
 89 |     font-size : 12pt;
 90 |     font-weight : normal;
 91 |     }
 92 | -->
 93 | </STYLE>
 94 | </HEAD>
 95 | <BODY BGCOLOR="white">
 96 | <BOGOTAG>
 97 |     <IMG SRC="/pix/petdance-logo-400x312.gif" HEIGHT=312 WIDTH=400 ALT="Andy & Amy's Pet Supplies & Dance Instruction" ALIGN=RIGHT>
 98 | 	<DIV CLASS="TOC">
 99 | 	<h2>Perl, Programming &amp; Writing</h2>
100 | 	My <A HREF="http://www.oreillynet.com/pub/au/1249">Technology &amp; publishing blog</A> at oreillynet.com<BR>
101 | 	My <A HREF="http://use.perl.org/~petdance/journal/">Perl-specific and personal blog</A> at use.perl.org<BR>
102 | 	<A HREF="resume/">Andy Lester's resume</A><BR>
103 | 	Andy's <A HREF="perl/">Perl Pages</A>
104 | 
105 | 	<H2>The Lester Family</h2>
106 | 	<A HREF="/andy/">Andy</A>: The Dad<BR>
107 | 	<A HREF="http://www.parsley.org/">Amy</A>: The Mom<BR>
108 | 	<A HREF="http://www.parsley.org/quinn/">Quinn</A>: The Girl<BR>
109 | 	<A HREF="/baxter/">Baxter</A>: The Dog<BR>
110 | 	<A HREF="http://www.familytreemaker.com/users/l/e/s/Andrew-R-Lester/">Our family trees</A>
111 | 
112 | 	<H2>About Andy &amp; Amy</H2>
113 |     	<A HREF="/us/mush/">The Page Of Mush</A><BR>
114 | 	<A HREF="/us/people/">People we're looking for</A><BR>
115 |     	<A HREF="/us/herald/">Article about us and how we met</A> from the Northwest Herald<BR>
116 | 
117 | 	<H2>Useful Stuff</H2>
118 | 	<A HREF="/start/">Andy's Magic Start Page</A>: Bunches of your favorite search engines, all in one place<BR>
119 | 	<A HREF="/adds/">Add-a-page Page</A>: Bunches of website submission forms, all in one place<BR>
120 | 	<A HREF="cf/">Cold Fusion stuff</A>: CFX_HTTP and other custom tags
121 | 
122 | 	<H2>Musical Information, etc</H2>
123 |     	<A HREF="/nr/">Naked Raygun</A>: Bring your mom and your mom's friends, too<BR>
124 | 	<A HREF="/actionpark/">Action Park</A>: A compendium of information about Big Black, Rapeman, Shellac and Steve Albini<BR>
125 | 
126 | 	<H2>Other things that might be fun</h2>
127 |     	<A HREF="http://www.parsley.org/"><CITE>When In Doubt, Use Parsley</CITE></A>: Amy's journal<BR>
128 | 	<A HREF="/wonder/"><CITE>Wonder</CITE></A>: Andy's old journal<BR>
129 |     	<A HREF="/us/trip/">So American It Hurts</A>: Andy &amp; Amy's trip to Graceland<BR>
130 |     	<A HREF="/andy/appliances/">Gallery Of Household Appliances</A><BR>
131 | 	<A HREF="/wants/">Media Wants</A>: Stuff I yearn to consume<BR>
132 | 	<A HREF="/andy/bingo/">Buzzword Bingo</A>
133 | 
134 | 	</DIV>
135 | 
136 | </BODY>
137 | </HTML>
138 | 


--------------------------------------------------------------------------------
/t/illegal-options.t:
--------------------------------------------------------------------------------
 1 | #!perl -T
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::Exception;
 7 | use Test::More;
 8 | 
 9 | use HTML::Tidy;
10 | 
11 | my @unsupported_options = qw(
12 |     force-output
13 |     gnu-emacs-file
14 |     gnu-emacs
15 |     keep-time
16 |     quiet
17 |     slide-style
18 |     write-back
19 | );
20 | 
21 | foreach my $option ( @unsupported_options ) {
22 |     throws_ok {
23 |         HTML::Tidy->new(
24 |             {  config_file => 't/cfg-for-parse.cfg',
25 |                $option     => 1,
26 |             }
27 |         );
28 |     } qr/\QUnsupported option: $option\E/,
29 |     "option $option is not supported";
30 | }
31 | 
32 | done_testing();
33 | 


--------------------------------------------------------------------------------
/t/levels.t:
--------------------------------------------------------------------------------
  1 | #!perl -T
  2 | 
  3 | use warnings;
  4 | use strict;
  5 | 
  6 | use Test::More tests => 3;
  7 | 
  8 | use HTML::Tidy;
  9 | 
 10 | my $tidy = HTML::Tidy->new;
 11 | isa_ok( $tidy, 'HTML::Tidy' );
 12 | my $rc = $tidy->parse( '-', <DATA> );
 13 | ok( $rc, 'Parsed OK' );
 14 | 
 15 | my @expected = split /\n/, q{
 16 | - (1:1) Warning: missing <!DOCTYPE> declaration
 17 | - (23:1) Error: <bogotag> is not recognized!
 18 | - (23:1) Warning: discarding unexpected <bogotag>
 19 | - (24:XX) Warning: unescaped & which should be written as &amp;
 20 | - (24:XX) Warning: unescaped & which should be written as &amp;
 21 | };
 22 | chomp @expected;
 23 | shift @expected; # First one's blank
 24 | 
 25 | my @messages = map { $_->as_string } $tidy->messages;
 26 | s/[\r\n]+\z// for @messages;
 27 | munge_returned( \@messages );
 28 | is_deeply( \@messages, \@expected, 'Matching messages' );
 29 | 
 30 | sub munge_returned {
 31 |     # non-1 line numbers are not reliable across libtidies
 32 |     my $returned = shift;
 33 |     my $start_line = shift || '-';
 34 | 
 35 |     for my $line ( @{$returned} ) {
 36 |         next if $line =~ /$start_line \(\d+:1\)/;
 37 |         $line =~ s/$start_line \((\d+):(\d+)\)/$start_line ($1:XX)/;
 38 |     }
 39 | 
 40 |     return;
 41 | }
 42 | 
 43 | __DATA__
 44 | <HTML>
 45 | <HEAD>
 46 | 	<META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=iso-8859-1">
 47 | 	<META NAME="Author" Content="Andy Lester">
 48 |     	<TITLE>petdance.com: Andy Lester's Programming &amp; Writing</TITLE>
 49 | 
 50 | <STYLE type="text/css">
 51 | <!--
 52 | DIV.TOC H2 {
 53 |     font-family : tahoma, arial, helvetica, sans-serif;
 54 |     font-weight : bold;
 55 |     font-size : 15pt;
 56 |     }
 57 | 
 58 | DIV.TOC P {
 59 |     font-size : 12pt;
 60 |     font-weight : normal;
 61 |     }
 62 | -->
 63 | </STYLE>
 64 | </HEAD>
 65 | <BODY BGCOLOR="white">
 66 | <BOGOTAG>
 67 |     <IMG SRC="/pix/petdance-logo-400x312.gif" HEIGHT=312 WIDTH=400 ALT="Andy & Amy's Pet Supplies & Dance Instruction" ALIGN=RIGHT>
 68 | 	<DIV CLASS="TOC">
 69 | 	<h2>Perl, Programming &amp; Writing</h2>
 70 | 	My <A HREF="http://www.oreillynet.com/pub/au/1249">Technology &amp; publishing blog</A> at oreillynet.com<BR>
 71 | 	My <A HREF="http://use.perl.org/~petdance/journal/">Perl-specific and personal blog</A> at use.perl.org<BR>
 72 | 	<A HREF="resume/">Andy Lester's resume</A><BR>
 73 | 	Andy's <A HREF="perl/">Perl Pages</A>
 74 | 
 75 | 	<H2>The Lester Family</h2>
 76 | 	<A HREF="/andy/">Andy</A>: The Dad<BR>
 77 | 	<A HREF="http://www.parsley.org/">Amy</A>: The Mom<BR>
 78 | 	<A HREF="http://www.parsley.org/quinn/">Quinn</A>: The Girl<BR>
 79 | 	<A HREF="/baxter/">Baxter</A>: The Dog<BR>
 80 | 	<A HREF="http://www.familytreemaker.com/users/l/e/s/Andrew-R-Lester/">Our family trees</A>
 81 | 
 82 | 	<H2>About Andy &amp; Amy</H2>
 83 |     	<A HREF="/us/mush/">The Page Of Mush</A><BR>
 84 | 	<A HREF="/us/people/">People we're looking for</A><BR>
 85 |     	<A HREF="/us/herald/">Article about us and how we met</A> from the Northwest Herald<BR>
 86 | 
 87 | 	<H2>Useful Stuff</H2>
 88 | 	<A HREF="/start/">Andy's Magic Start Page</A>: Bunches of your favorite search engines, all in one place<BR>
 89 | 	<A HREF="/adds/">Add-a-page Page</A>: Bunches of website submission forms, all in one place<BR>
 90 | 	<A HREF="cf/">Cold Fusion stuff</A>: CFX_HTTP and other custom tags
 91 | 
 92 | 	<H2>Musical Information, etc</H2>
 93 |     	<A HREF="/nr/">Naked Raygun</A>: Bring your mom and your mom's friends, too<BR>
 94 | 	<A HREF="/actionpark/">Action Park</A>: A compendium of information about Big Black, Rapeman, Shellac and Steve Albini<BR>
 95 | 
 96 | 	<H2>Other things that might be fun</h2>
 97 |     	<A HREF="http://www.parsley.org/"><CITE>When In Doubt, Use Parsley</CITE></A>: Amy's journal<BR>
 98 | 	<A HREF="/wonder/"><CITE>Wonder</CITE></A>: Andy's old journal<BR>
 99 |     	<A HREF="/us/trip/">So American It Hurts</A>: Andy &amp; Amy's trip to Graceland<BR>
100 |     	<A HREF="/andy/appliances/">Gallery Of Household Appliances</A><BR>
101 | 	<A HREF="/wants/">Media Wants</A>: Stuff I yearn to consume<BR>
102 | 	<A HREF="/andy/bingo/">Buzzword Bingo</A>
103 | 
104 | 	</DIV>
105 | 
106 | </BODY>
107 | </HTML>
108 | 


--------------------------------------------------------------------------------
/t/message.t:
--------------------------------------------------------------------------------
 1 | #!perl -T
 2 | 
 3 | use warnings;
 4 | use strict;
 5 | 
 6 | use Test::More tests => 4;
 7 | 
 8 | use HTML::Tidy;
 9 | use HTML::Tidy::Message;
10 | 
11 | WITH_LINE_NUMBERS: {
12 |     my $error = HTML::Tidy::Message->new( 'foo.pl', TIDY_ERROR, 2112, 5150, 'Blah blah' );
13 |     isa_ok( $error, 'HTML::Tidy::Message' );
14 | 
15 |     my %expected = (
16 |         file        => 'foo.pl',
17 |         type        => TIDY_ERROR,
18 |         line        => 2112,
19 |         column      => 5150,
20 |         text        => 'Blah blah',
21 |         as_string   => 'foo.pl (2112:5150) Error: Blah blah',
22 |     );
23 |     _match_up( $error, \%expected, 'With line numbers' );
24 | }
25 | 
26 | WITHOUT_LINE_NUMBERS: {
27 |     my $error = HTML::Tidy::Message->new( 'bar.pl', TIDY_WARNING, undef, undef, 'Blah blah' );
28 |     isa_ok( $error, 'HTML::Tidy::Message' );
29 | 
30 |     my %expected = (
31 |         file        => 'bar.pl',
32 |         type        => TIDY_WARNING,
33 |         line        => 0,
34 |         column      => 0,
35 |         text        => 'Blah blah',
36 |         as_string   => 'bar.pl - Warning: Blah blah',
37 |     );
38 |     _match_up( $error, \%expected, 'Without line numbers' );
39 | }
40 | 
41 | sub _match_up {
42 |     local $Test::Builder::Level = $Test::Builder::Level + 1;
43 | 
44 |     my $error    = shift;
45 |     my $expected = shift;
46 |     my $msg      = shift or die;
47 | 
48 |     return subtest "_matchup( $msg )" => sub {
49 |         plan tests => scalar keys %{$expected};
50 | 
51 |         for my $what ( sort keys %{$expected} ) {
52 |             is( $error->$what, $expected->{$what}, "$what matches" );
53 |         }
54 |     };
55 | }
56 | 


--------------------------------------------------------------------------------
/t/opt-00.t:
--------------------------------------------------------------------------------
 1 | #!perl -T
 2 | 
 3 | use warnings;
 4 | use strict;
 5 | 
 6 | use Test::More tests => 1;
 7 | 
 8 | use HTML::Tidy;
 9 | 
10 | my $tidy = HTML::Tidy->new({
11 |     tidy_mark           => 0,
12 |     add_xml_decl        => 1,
13 |     output_xhtml        => 1,
14 |     doctype             => 'strict',
15 |     clean               => 1,
16 |     css_prefix          => 'myprefix',
17 |     drop_empty_paras    => 0,
18 |     enclose_block_text  => 1,
19 |     escape_cdata        => 1,
20 |     hide_comments       => 1,
21 |     replace_color       => 1,
22 |     repeated_attributes => 'keep-first',
23 |     break_before_br     => 1,
24 |     vertical_space      => 1,
25 |     newline             => 'cr',
26 | });
27 | 
28 | my $input=<<'EOD';
29 | <h1>example</h1>
30 | Here's some <B>ed and <BR/>eakfest MarkUp: <!-- ... -->
31 | <font color="#0000FF" color="RED">...</font>
32 | <p></P>
33 | EOD
34 | 
35 | my $expected =<<'EOD';
36 | <?xml version="1.0"?>
37 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
38 |     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
39 | <html xmlns="http://www.w3.org/1999/xhtml">
40 | <head>
41 | <title>
42 | </title>
43 | <style type="text/css">
44 | /*<![CDATA[*/
45 |  span.myprefix-1 {color: blue}
46 | /*]]>*/
47 | </style>
48 | </head>
49 | <body>
50 | <h1>example</h1>
51 | 
52 | <p>Here's some <b>ed and
53 | <br />
54 | eakfest MarkUp: <span class="myprefix-1">...</span></b></p>
55 | 
56 | <p>
57 | </p>
58 | </body>
59 | </html>
60 | EOD
61 | 
62 | my @expected = split(/\n/, $expected);
63 | 
64 | my $result = $tidy->clean( $input );
65 | my @result = split(/\r/, $result);
66 | 
67 | is_deeply( \@result, \@expected, 'Cleaned stuff looks like what we expected');
68 | 
69 | 


--------------------------------------------------------------------------------
/t/parse-crash.t:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/perl -T
  2 | 
  3 | use warnings;
  4 | use strict;
  5 | 
  6 | # From a crashy-crash that Bob Diss and Mike O'Regan were tracking down.
  7 | # I've been unable to get it to fail again, but we might as well make a
  8 | # test out of it.
  9 | 
 10 | use Test::More tests => 2;
 11 | 
 12 | use HTML::Tidy;
 13 | 
 14 | my $html = do { local $/; <DATA> };
 15 | 
 16 | my $tidy = HTML::Tidy->new;
 17 | isa_ok( $tidy, 'HTML::Tidy' );
 18 | $tidy->ignore( type => TIDY_INFO );
 19 | $tidy->clean( $html );
 20 | 
 21 | my @mess = map { $_ ? $_->as_string() : undef } $tidy->messages();
 22 | pass( 'Ended OK' );
 23 | 
 24 | __DATA__
 25 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 26 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 27 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
 28 | <meta http-equiv="content-type" content="text/html;charset=iso-8859-1" />
 29 | <title>TITLEWAVE - Username &amp; Password Assistance</title>
 30 | <link href="/css/main-0.css" type="text/css" rel="stylesheet" media="all" />
 31 | <script type="text/javascript" src="/javascript/tw-0.js"></script>
 32 | <script type="text/javascript" src="/javascript/jquery-0.js"></script>
 33 | <script type="text/javascript" src="/javascript/jquery.hoverIntent-0.js"></script>
 34 | <script type="text/javascript" src="/javascript/hovering-0.js"></script>
 35 | <script type="text/javascript" src="/javascript/jquery.bgiframe-0.js"></script>
 36 | </head>
 37 | <body >
 38 | <div id="header">
 39 | <table width="100%" border="0" cellspacing="0" cellpadding="0">
 40 | <tr>
 41 | <td>
 42 | <a href="/index.html">
 43 | <img alt="country-graphic" src="/graphics/hc/titlewave/titlewave.gif"/>
 44 | </a>
 45 | </td>
 46 | <td align="right" valign="top">
 47 | <div id="nav">
 48 | <a name="_home" href="/index.html">
 49 | <img src="/graphics/hc/links/home.gif" alt="Home" />
 50 | </a>
 51 | </div>
 52 | </td>
 53 | </tr>
 54 | </table><div class="divider-dark"><!-- --></div><div class="divider-light"><!-- --></div></div>
 55 | <div id="container">
 56 | <table width="100%" border="0" cellspacing="0" cellpadding="5">
 57 | <tr>
 58 | <td valign="top">
 59 | <h1><span class="smallcaps">Titlewave</span> Username &amp; Password Assistance</h1><div class="divider"><!-- --></div><div class="spacer10"><!-- --></div> <form method="post" name="pwd_recover_form" action="/register/recover"><h2>Forgot your username or password?</h2>
 60 | <p>
 61 | If you have forgotten your <span class="smallcaps">Titlewave</span> username or password, simply enter
 62 | the e-mail address you registered with and we will e-mail it to you.
 63 | </p><div class="spacer20"><!-- --></div><h2>Please enter your e-mail.</h2>
 64 | <table class="form" width="100%" border="0" cellspacing="0" cellpadding="5">
 65 | <tr>
 66 | <td align="right" nowrap="nowrap" width="150">
 67 | <p class="label">E-Mail Address:&nbsp;</p>
 68 | </td>
 69 | <td>
 70 | <input type="text" name="_email" size="35" maxlength="100" value="" />
 71 | <input id="_email_go" class="button" type="image" name="_email_go" src="/graphics/hc/buttons/go.gif" />
 72 | </td>
 73 | </tr>
 74 | </table> </form>
 75 | </td>
 76 | <td valign="top" width="160"><div class="sidebar">
 77 | <h2>May We Help You?</h2>
 78 | <div class="divider"><!-- --></div>
 79 | <div class="content">
 80 | <p>We're available to answer any questions you may have about <span class="smallcaps">Titlewave </span> and your orders.</p>
 81 | <div class="spacer5"><!-- --></div>
 82 | <h4><span class="smallcaps">Titlewave</span> Questions</h4>
 83 | <p>Contact our Library Service Consultants Monday through Friday, 7:00 a.m. to 5:00 p.m. Central Time.<br/>
 84 | Phone: 888.511.5114 ext. 1164<br/>
 85 | E-mail: <a href="mailto:lsc@flr.follett.com">lsc@flr.follett.com</a></p>
 86 | <div class="spacer5"><!-- --></div>
 87 | <h4>Technical Support</h4>
 88 | <p>Contact us Monday through Friday, 7:45 a.m. to 4:15 p.m. Central Time.<br/>
 89 | Phone: 888.511.5114 ext. 1513<br/>
 90 | E-mail: <a href="mailto:titlewave@flr.follett.com">titlewave@flr.follett.com</a></p>
 91 | <div class="spacer5"><!-- --></div>
 92 | <h4>Order Questions</h4>
 93 | <p>Contact our Customer Care Consultants Monday through Friday, 6:00 a.m. to 7:00 p.m. Central Time.<br/>
 94 | Phone: 888.511.5114 ext. 1102<br/>
 95 | E-mail: <a href="mailto:customerservice@flr.follett.com">customerservice@flr.follett.com</a></p>
 96 | <div class="divider"><!-- --></div>
 97 | </div>
 98 | </div> </td>
 99 | </tr>
100 | </table>
101 | </div>
102 | <div id="footer"><div class="divider"><!-- --></div>
103 | <div id="contact_info">
104 | <p>TITLEWAVE Questions: Contact our Library Service Consultants at
105 | <a href="mailto:lsc@flr.follett.com">lsc@flr.follett.com</a> or 888.511.5114 ext. 1164 or use our <a href="/help" onclick="window.open('/help','ExportHelp',config='width=600,height=450,menubar=1,scrollbars=1'); return false;">Online Help</a><br />
106 | Order Questions: Contact our Customer Care Consultants at <a href="mailto:customerservice@flr.follett.com">customerservice@flr.follett.com</a> or
107 | 888.511.5114 ext. 1102 </p>
108 | <p>Copyright &copy; 2010 Follett Library Resources, Inc. -
109 | 1340 Ridgeview Drive - McHenry, Illinois 60050<br />
110 | Phone: 888.511.5114 or 815.759.1700 -
111 | Fax: 800.852.5458 or 815.759.9831
112 | </p>
113 | </div>
114 | </div></body></html>
115 | 


--------------------------------------------------------------------------------
/t/parse-errors.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -T
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::Exception;
 7 | use Test::More tests => 4;
 8 | 
 9 | use HTML::Tidy;
10 | 
11 | my $tidy = HTML::Tidy->new;
12 | 
13 | my $errbuf = do {
14 |     local $/;
15 |     readline(*DATA);
16 | };
17 | 
18 | CATCH_A_WARNING: {
19 |     my $stashed_warning;
20 |     my $ncalls = 0;
21 |     local $SIG{__WARN__} = sub { $stashed_warning = shift; ++$ncalls; };
22 | 
23 |     my $ret = $tidy->_parse_errors('fake_filename.html', $errbuf, "\n");
24 |     is( $ret, 1, 'encountered 1 parsing error' );
25 |     is( scalar @{$tidy->{messages}},  7, 'got 7 messages when parsing errors' );
26 | 
27 |     # Check our warnings.
28 |     is( $ncalls, 1, 'Warning should have been called exactly once' );
29 |     like( $stashed_warning, qr/HTML::Tidy: Unknown error type: FAKE_ERROR_TYPE at/, 'Expected warning' );
30 | }
31 | 
32 | __DATA__
33 | line 1 column 1 - Warning: missing <!DOCTYPE> declaration
34 | line 1 column 1 - Warning: plain text isn\'t allowed in <head> elements
35 | line 1 column 1 - Info: <head> previously mentioned
36 | line 1 column 1 - Warning: inserting implicit <body>
37 | line 1 column 13 - Warning: missing </b>
38 | line 1 column 1 - Warning: inserting missing \'title\' element
39 | Info: Document content looks like HTML 3.2
40 | 
41 | FAKE_ERROR_TYPE
42 | 5 warnings, 0 errors were found!
43 | 
44 | 


--------------------------------------------------------------------------------
/t/parse.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -T
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::Exception;
 7 | use Test::More tests => 2;
 8 | 
 9 | use HTML::Tidy;
10 | 
11 | my $tidy = HTML::Tidy->new;
12 | isa_ok( $tidy, 'HTML::Tidy' );
13 | 
14 | my $expected_pattern = 'Usage: parse($filename,$str [, $str...])';
15 | throws_ok {
16 |     $tidy->parse('fake-filename.txt');
17 | } qr/\Q$expected_pattern\E/,
18 | 'parse() dies when not given a string or array of strings to parse';
19 | 


--------------------------------------------------------------------------------
/t/perfect.t:
--------------------------------------------------------------------------------
 1 | #!perl -T
 2 | 
 3 | use warnings;
 4 | use strict;
 5 | 
 6 | use Test::More tests => 3;
 7 | 
 8 | use HTML::Tidy;
 9 | 
10 | my $html = join '', <DATA>;
11 | 
12 | # No errors at all.
13 | 
14 | my $tidy = HTML::Tidy->new;
15 | isa_ok( $tidy, 'HTML::Tidy' );
16 | 
17 | $tidy->ignore( type => TIDY_INFO );
18 | my $rc = $tidy->parse( '-', $html );
19 | ok( $rc, 'Parsed OK' );
20 | 
21 | my @returned = map { $_->as_string } $tidy->messages;
22 | is_deeply( \@returned, [], 'Should have no messages' );
23 | # User reported a segfault when there are no messages.  By gum, he was
24 | # right.
25 | 
26 | __DATA__
27 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
28 | "http://www.w3.org/TR/html4/loose.dtd">
29 | <HTML>
30 | <HEAD>
31 |         <META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=iso-8859-1">
32 |         <META NAME="Author" Content="Andy Lester">
33 |         <TITLE>petdance.com: Andy Lester's Programming &amp; Writing</TITLE>
34 | 
35 | <STYLE type="text/css">
36 | <!--
37 | DIV.TOC H2 {
38 |     font-family : tahoma, arial, helvetica, sans-serif;
39 |     font-weight : bold;
40 |     font-size : 15pt;
41 |     }
42 | 
43 | DIV.TOC P {
44 |     font-size : 12pt;
45 |     font-weight : normal;
46 |     }
47 | -->
48 | </STYLE>
49 | </HEAD>
50 | <BODY BGCOLOR="white">
51 | 
52 |     <IMG SRC="/pix/petdance-logo-400x312.gif" HEIGHT=312 WIDTH=400 ALT="Andy &amp; Amy's Pet Supplies &amp; Dance Instruction" ALIGN=RIGHT>
53 |         <DIV CLASS="TOC">
54 |         <h2>Perl, Programming &amp; Writing</h2>
55 |         My <A HREF="http://www.oreillynet.com/pub/au/1249">Technology &amp; publishing blog</A> at oreillynet.com<BR>
56 |         My <A HREF="http://use.perl.org/~petdance/journal/">Perl-specific and personal blog</A> at use.perl.org<BR>
57 |         <A HREF="resume/">Andy Lester's resume</A><BR>
58 |         Andy's <A HREF="perl/">Perl Pages</A>
59 | 
60 |         <H2>The Lester Family</h2>
61 |         <A HREF="/andy/">Andy</A>: The Dad<BR>
62 |         <A HREF="http://www.parsley.org/">Amy</A>: The Mom<BR>
63 |         <A HREF="http://www.parsley.org/quinn/">Quinn</A>: The Girl<BR>
64 |         <A HREF="/baxter/">Baxter</A>: The Dog<BR>
65 |         <A HREF="http://www.familytreemaker.com/users/l/e/s/Andrew-R-Lester/">Our family trees</A>
66 | 
67 |         <H2>About Andy &amp; Amy</H2>
68 |         <A HREF="/us/mush/">The Page Of Mush</A><BR>
69 |         <A HREF="/us/people/">People we're looking for</A><BR>
70 |         <A HREF="/us/herald/">Article about us and how we met</A> from the Northwest Herald<BR>
71 | 
72 |         <H2>Useful Stuff</H2>
73 |         <A HREF="/start/">Andy's Magic Start Page</A>: Bunches of your favorite search engines, all in one place<BR>
74 |         <A HREF="/adds/">Add-a-page Page</A>: Bunches of website submission forms, all in one place<BR>
75 |         <A HREF="cf/">Cold Fusion stuff</A>: CFX_HTTP and other custom tags
76 | 
77 |         <H2>Musical Information, etc</H2>
78 |         <A HREF="/nr/">Naked Raygun</A>: Bring your mom and your mom's friends, too<BR>
79 |         <A HREF="/actionpark/">Action Park</A>: A compendium of information about Big Black, Rapeman, Shellac and Steve Albini<BR>
80 | 
81 |         <H2>Other things that might be fun</h2>
82 |         <A HREF="http://www.parsley.org/"><CITE>When In Doubt, Use Parsley</CITE></A>: Amy's journal<BR>
83 |         <A HREF="/wonder/"><CITE>Wonder</CITE></A>: Andy's old journal<BR>
84 |         <A HREF="/us/trip/">So American It Hurts</A>: Andy &amp; Amy's trip to Graceland<BR>
85 |         <A HREF="/andy/appliances/">Gallery Of Household Appliances</A><BR>
86 |         <A HREF="/wants/">Media Wants</A>: Stuff I yearn to consume<BR>
87 |         <A HREF="/andy/bingo/">Buzzword Bingo</A>
88 | 
89 |         </DIV>
90 | 
91 | </BODY>
92 | </HTML>
93 | 


--------------------------------------------------------------------------------
/t/roundtrip.t:
--------------------------------------------------------------------------------
 1 | #!perl -T
 2 | 
 3 | use warnings;
 4 | use strict;
 5 | 
 6 | use Test::More tests => 3;
 7 | 
 8 | use HTML::Tidy;
 9 | 
10 | my $args = { newline => 'Lf' };
11 | my $tidy = HTML::Tidy->new($args);
12 | isa_ok( $tidy, 'HTML::Tidy' );
13 | $tidy->ignore( type => TIDY_INFO );
14 | 
15 | # clean once
16 | $tidy->ignore( text => qr/DOCTYPE/ );
17 | my $html = '<a href="http://www.example.com/"><em>This is a test.</a>';
18 | my $clean = $tidy->clean( $html );
19 | 
20 | # then verify that it meets tidy's high standards
21 | $tidy = HTML::Tidy->new($args); # reset messages;
22 | $tidy->ignore( type => TIDY_INFO );
23 | $clean = $tidy->clean($clean);
24 | my @messages = $tidy->messages( $clean );
25 | 
26 | is_deeply( \@messages, [], q{The cleaned stuff shouldn't have any errors} );
27 | 
28 | $clean =~ s/"(HTML Tidy|tidyp).+w3\.org"/"Tidy"/;
29 | 
30 | my $expected = do { local $/ = undef; <DATA> };
31 | is( $clean, $expected, 'Cleaned up properly' );
32 | 
33 | __DATA__
34 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
35 | <html>
36 | <head>
37 | <meta name="generator" content="Tidy">
38 | <title></title>
39 | </head>
40 | <body>
41 | <a href="http://www.example.com/"><em>This is a test.</em></a>
42 | </body>
43 | </html>
44 | 


--------------------------------------------------------------------------------
/t/segfault-form.t:
--------------------------------------------------------------------------------
 1 | #!perl -Tw
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | use Test::More tests => 3;
 6 | 
 7 | use HTML::Tidy;
 8 | my $html = do { local $/ = undef; <DATA>; };
 9 | 
10 | my $tidy = HTML::Tidy->new;
11 | isa_ok( $tidy, 'HTML::Tidy' );
12 | $tidy->clean( $html );
13 | isa_ok( $tidy, 'HTML::Tidy' );
14 | pass( 'Cleaned OK' );
15 | 
16 | __DATA__
17 | <form action="http://www.alternation.net/cobra/index.pl">
18 | <td><input name="random" type="image" value="random creature" src="http://www.creaturesinmyhead.com/images/random.gif"></td>
19 | </form>
20 | 


--------------------------------------------------------------------------------
/t/simple.t:
--------------------------------------------------------------------------------
 1 | #!perl -T
 2 | 
 3 | use warnings;
 4 | use strict;
 5 | 
 6 | use Test::More tests => 4;
 7 | 
 8 | use HTML::Tidy;
 9 | 
10 | my $html = join '', <DATA>;
11 | 
12 | my $tidy = HTML::Tidy->new;
13 | isa_ok( $tidy, 'HTML::Tidy' );
14 | 
15 | $tidy->ignore( type => TIDY_INFO );
16 | my $rc = $tidy->parse( '-', $html );
17 | ok( $rc, 'Parsed OK' );
18 | 
19 | my @messages = $tidy->messages;
20 | is( scalar @messages, 5, 'Right number of initial messages' );
21 | 
22 | $tidy->clear_messages;
23 | is_deeply( [$tidy->messages], [], 'Cleared the messages' );
24 | 
25 | __DATA__
26 | <html>
27 |     <body><head>blah blah</head>
28 |         <title>Barf</title>
29 |         <body>
30 |             <p>more blah
31 |             </P>
32 |         </body>
33 |     </html>
34 | 
35 | 


--------------------------------------------------------------------------------
/t/too-many-titles.t:
--------------------------------------------------------------------------------
 1 | #!perl -T
 2 | 
 3 | use warnings;
 4 | use strict;
 5 | 
 6 | use Test::More tests => 3;
 7 | 
 8 | use HTML::Tidy;
 9 | 
10 | my $html = join '', <DATA>;
11 | 
12 | my @expected = split /\n/, q{
13 | - (1:1) Warning: missing <!DOCTYPE> declaration
14 | - (4:9) Warning: too many title elements in <head>
15 | };
16 | chomp @expected;
17 | shift @expected; # First one's blank
18 | 
19 | my $tidy = HTML::Tidy->new;
20 | isa_ok( $tidy, 'HTML::Tidy' );
21 | $tidy->ignore( type => TIDY_INFO );
22 | my $rc = $tidy->parse( '-', $html );
23 | ok( $rc, 'Parsed OK' );
24 | 
25 | my @returned = map { $_->as_string } $tidy->messages;
26 | s/[\r\n]+\z// for @returned;
27 | is_deeply( \@returned, \@expected, 'Matching warnings' );
28 | 
29 | __DATA__
30 | <HTML>
31 |     <HEAD>
32 |         <TITLE>Test stuff</TITLE>
33 |         <TITLE>As if one title isn't enough</TITLE>
34 |     </HEAD>
35 |     <BODY BGCOLOR="white">
36 |         <P>This is my paragraph</P>
37 |     </BODY>
38 | </HTML>
39 | 


--------------------------------------------------------------------------------
/t/unicode-nbsp.t:
--------------------------------------------------------------------------------
 1 | #!perl -T
 2 | 
 3 | use warnings;
 4 | use strict;
 5 | 
 6 | use Test::More tests => 3;
 7 | 
 8 | use HTML::Tidy;
 9 | 
10 | use Encode;
11 | 
12 | my $bytes_string = "\x{c2}\x{a0}"; #UTF8 nbsp
13 | my $perl_chars   = Encode::decode('utf8',$bytes_string); # Perl chars of utf8 byte string
14 | 
15 | my $tidy = HTML::Tidy->new({ show_body_only => 1 });
16 | 
17 | my $newline = $tidy->clean( '' ); # HTML::Tidy adds a platform-dependent "newline".
18 | like( $newline, qr/^\r?\n?$/, 'Tidy Newline' ); # should be CR or LF or both
19 | 
20 | my $expected_after_tidy = "&nbsp;$newline"; # HTML::Tidy should convert the nbsp to an HTML entity (and add a newline).
21 | 
22 | is( $tidy->clean( $perl_chars ), $expected_after_tidy, 'Perl chars OK' );
23 | is( $tidy->clean( $bytes_string ), $expected_after_tidy, 'Byte string OK' );
24 | 


--------------------------------------------------------------------------------
/t/unicode.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
 2 | <html>
 3 | <head>
 4 | <title>日本語のホムページ</title>
 5 | </head>
 6 | <body>
 7 | <p>Unicodeが好きですか?</p>
 8 | </body>
 9 | </html>
10 | 


--------------------------------------------------------------------------------
/t/unicode.t:
--------------------------------------------------------------------------------
 1 | #!perl -T
 2 | # Copyright (c) 2006 Jonathan Rockway <jrockway@cpan.org>
 3 | 
 4 | use warnings;
 5 | use strict;
 6 | 
 7 | use Test::More tests => 9;
 8 | 
 9 | use HTML::Tidy;
10 | use Encode ();
11 | use Carp;
12 | 
13 | my $args = { newline => 'Lf' };
14 | my $tidy = HTML::Tidy->new($args);
15 | $tidy->ignore( type => TIDY_INFO );
16 | 
17 | # Suck in the reference HTML document.
18 | open( my $html_in, '<:utf8', 't/unicode.html' ) or Carp::croak( "Can't read unicode.html: $!" );
19 | my $html = do { local $/; <$html_in> };
20 | close $html_in;
21 | 
22 | # Suck in the correct, cleaned doc (from DATA)
23 | binmode DATA, ':utf8';
24 | my $reference = do {local $/; <DATA>};
25 | 
26 | # Make sure both are unicode characters (not utf-x octets).
27 | ok(utf8::is_utf8($html), 'html is utf8');
28 | ok(utf8::is_utf8($reference), 'reference is utf8');
29 | 
30 | my $clean = $tidy->clean( $html );
31 | ok(utf8::is_utf8($clean), 'cleaned output is also unicode');
32 | 
33 | $clean =~ s/"HTML Tidy.+w3\.org"/"Tidy"/;
34 | $clean =~ s/"(HTML Tidy|tidyp).+w3\.org"/"Tidy"/;
35 | is($clean, $reference, q{Cleanup didn't break anything});
36 | 
37 | my @messages = $tidy->messages;
38 | is_deeply( \@messages, [], q{There still shouldn't be any errors} );
39 | 
40 | $tidy = HTML::Tidy->new($args);
41 | isa_ok( $tidy, 'HTML::Tidy' );
42 | my $rc = $tidy->parse( '', $html );
43 | ok( $rc, 'Parsed OK' );
44 | @messages = $tidy->messages;
45 | is_deeply( \@messages, [], q{There still shouldn't be any errors} );
46 | 
47 | subtest 'Try send bytes to clean method.' => sub {
48 |     my $html = Encode::encode('utf8',$html);
49 |     ok(!utf8::is_utf8($html), 'html is row bytes');
50 |     my $clean = $tidy->clean( $html );
51 |     ok(utf8::is_utf8($clean), 'but cleaned output is string');
52 |     $clean =~ s/"HTML Tidy.+w3\.org"/"Tidy"/;
53 |     $clean =~ s/"(HTML Tidy|tidyp).+w3\.org"/"Tidy"/;
54 |     is($clean, $reference, q{Cleanup didn't break anything});
55 | };
56 | 
57 | __DATA__
58 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
59 | <html>
60 | <head>
61 | <meta name="generator" content="Tidy">
62 | <title>日本語のホムページ</title>
63 | </head>
64 | <body>
65 | <p>Unicodeが好きですか?</p>
66 | </body>
67 | </html>
68 | 


--------------------------------------------------------------------------------
/t/venus.cfg:
--------------------------------------------------------------------------------
 1 | // HTML Tidy configuration file 
 2 | bare: yes
 3 | // 
 4 | // This actually caused a segmentation fault in a MSHTML created doc
 5 | //
 6 | //clean: yes
 7 | drop-proprietary-attributes: yes
 8 | drop-empty-paras: yes
 9 | break-before-br: yes
10 | word-2000: yes
11 | //tidy-mark: yes
12 | tidy-mark: no
13 | //add-xml-space: yes
14 | output-xml: yes
15 | enclose-text: yes
16 | enclose-block-text: yes
17 | char-encoding: utf8
18 | force-output: yes
19 | 
20 | indent: yes
21 | quiet: yes
22 | //add-xml-decl: yes
23 | //gnu-emacs: yes
24 | 
25 | // make sure we are using "\n", even on Win32
26 | newline: LF
27 | 


--------------------------------------------------------------------------------
/t/venus.html:
--------------------------------------------------------------------------------
1 | <HTML>  <HEAD>  <META NAME="GENERATOR" CONTENT="Adobe PageMill 3.0 Mac">  <TITLE>Venus Flytrap for 100 Question</TITLE></HEAD><BODY BGCOLOR="#ffffff" LINK="#5b3d23" ALINK="#8c6136" VLINK="#be844a"BACKGROUND="../../WetlandGraphics/PaperBG.gif"><H1><CENTER><IMG SRC="../../WetlandGraphics/KildeerLogo2.gif"WIDTH="345" HEIGHT="21" ALIGN="BOTTOM" BORDER="0" NATURALSIZEFLAG="0"></CENTER></H1><H1><CENTER>Wetland Plants Jeopardy</CENTER></H1><H1><CENTER><FONT COLOR="#ed181e">Venus Flytrap for 100</FONT></CENTER></H1><H1><CENTER><IMG SRC="ST100.gif" WIDTH="100" HEIGHT="101" ALIGN="BOTTOM"NATURALSIZEFLAG="3"></CENTER></H1><P>&nbsp;</P><H2><CENTER><FONT COLOR="#ed181e">Question:</FONT> What does theVenus Flytrap feed on?</CENTER></H2><H4><CENTER><A HREF="Venus100Ans.html">Click here for the answer.</A></CENTER></H4><H4><CENTER><IMG SRC="../../WetlandGraphics/GoldbarThread.gif"WIDTH="648" HEIGHT="4" ALIGN="BOTTOM" NATURALSIZEFLAG="3"></CENTER></H4><H4><CENTER>| <A HREF="../../General/Map.html">Map</A> | <A HREF="../../General/SiteSearch.html">SiteSearch</A> | <A HREF="../../General/Terms.html">Terms</A> | <AHREF="../../General/Credits.html">Credits</A> | <A HREF="../../General/Feedback.html">Feedback</A>|</CENTER></H4><ADDRESS><CENTER>  <h4>    <center>      <p><img src="../../WetlandGraphics/GoldbarThread.gif"width="648" height="4" align="BOTTOM" naturalsizeflag="3"></p>    </center>  </h4>  <div align="center">     <address></address>  </div>  <address></address>  <address></address>  <address></address>  <address></address>  <address></address>  <address></address>  <address></address>  <address></address>  <address></address>  <address></address>  <address></address>  <address></address>  <address></address>  <address>   <center>    <address>Created for the Museums in the Classroom program sponsored by Illinois     State Board of Education, the Brookfield Zoo, the Illinois State Museum.,     and Kildeer Countryside CCSD 96.</address>    <address>&nbsp;</address>    <address>Authors: Twin Groves Museums in the Classroom Team,</address>    <address>School: Twin Groves Junior High School, Buffalo Grove, Illinois 60089</address>  </center>  </address>  <address></address>  <address>   <center>    Created: 27 June 1998- Updated: 6 October 2003   </center>  </address></CENTER></ADDRESS><ADDRESS></ADDRESS></BODY></HTML>


--------------------------------------------------------------------------------
/t/venus.t:
--------------------------------------------------------------------------------
 1 | #!perl -T
 2 | 
 3 | use warnings;
 4 | use strict;
 5 | 
 6 | use Test::More tests => 2;
 7 | 
 8 | use HTML::Tidy;
 9 | 
10 | my $filename = 't/venus.html';
11 | open( my $fh, '<', $filename ) or die "Can't open $filename: $!\n";
12 | my $raw = do { local $/ = undef; <$fh> };
13 | close $fh;
14 | 
15 | my $cfg = 't/venus.cfg';
16 | my $tidy = HTML::Tidy->new( {config_file => $cfg} );
17 | isa_ok( $tidy, 'HTML::Tidy' );
18 | 
19 | my $cooked = $tidy->clean( $raw );
20 | my @cooked = split( /\n/, $cooked );
21 | chomp @cooked;
22 | 
23 | my @expected = <DATA>;
24 | chomp @expected;
25 | is_deeply( \@cooked, \@expected, 'Cooked stuff looks like what we expected' );
26 | 
27 | __DATA__
28 | <html>
29 |   <head>
30 |     <meta name="GENERATOR" content="Adobe PageMill 3.0 Mac" />
31 |     <title>Venus Flytrap for 100 Question</title>
32 |   </head>
33 |   <body bgcolor="#FFFFFF" link="#5B3D23" alink="#8C6136" vlink="#BE844A" background="../../WetlandGraphics/PaperBG.gif">
34 |     <center>
35 |       <h1>
36 |         <img src="../../WetlandGraphics/KildeerLogo2.gif" width="345" height="21" align="bottom" border="0" />
37 |       </h1>
38 |     </center>
39 |     <center>
40 |       <h1>Wetland Plants Jeopardy</h1>
41 |     </center>
42 |     <center>
43 |       <h1>
44 |         <font color="#ED181E">Venus Flytrap for 100</font>
45 |       </h1>
46 |     </center>
47 |     <center>
48 |       <h1>
49 |         <img src="ST100.gif" width="100" height="101" align="bottom" />
50 |       </h1>
51 |     </center>
52 |     <p> </p>
53 |     <center>
54 |       <h2>
55 |       <font color="#ED181E">Question:</font> What does the Venus Flytrap feed on?</h2>
56 |     </center>
57 |     <center>
58 |       <h4>
59 |         <a href="Venus100Ans.html">Click here for the answer.</a>
60 |       </h4>
61 |     </center>
62 |     <center>
63 |       <h4>
64 |         <img src="../../WetlandGraphics/GoldbarThread.gif" width="648" height="4" align="bottom" />
65 |       </h4>
66 |     </center>
67 |     <center>
68 |       <h4>| 
69 |       <a href="../../General/Map.html">Map</a> | 
70 |       <a href="../../General/SiteSearch.html">Site Search</a> | 
71 |       <a href="../../General/Terms.html">Terms</a> | 
72 |       <a href="../../General/Credits.html">Credits</a> | 
73 |       <a href="../../General/Feedback.html">Feedback</a> |</h4>
74 |     </center>
75 |     <center>
76 |       <p>
77 |         <img src="../../WetlandGraphics/GoldbarThread.gif" width="648" height="4" align="bottom" />
78 |       </p>
79 |     </center>
80 |     <div align="center"></div>
81 |     <center>
82 |       <address>Created for the Museums in the Classroom program sponsored by Illinois State Board of Education, the Brookfield Zoo, the Illinois State Museum., and Kildeer Countryside CCSD 96.</address>
83 |       <address> </address>
84 |       <address>Authors: Twin Groves Museums in the Classroom Team,</address>
85 |       <address>School: Twin Groves Junior High School, Buffalo Grove, Illinois 60089</address>
86 |     </center>
87 |     <center>Created: 27 June 1998- Updated: 6 October 2003</center>
88 |   </body>
89 | </html>
90 | 


--------------------------------------------------------------------------------
/t/version.t:
--------------------------------------------------------------------------------
 1 | #!perl -T
 2 | 
 3 | use warnings;
 4 | use strict;
 5 | 
 6 | use Test::More tests => 4;
 7 | 
 8 | use HTML::Tidy;
 9 | 
10 | for my $version_string (HTML::Tidy->tidyp_version, HTML::Tidy->libtidyp_version) {
11 |     like( $version_string, qr/^\d\.\d{2,}$/, 'Valid version string' );
12 |     cmp_ok( $version_string, '>=', '0.90', 'Version is greater than 0.90, which is the one I maintain' );
13 | }
14 | 


--------------------------------------------------------------------------------
/t/wordwrap.cfg:
--------------------------------------------------------------------------------
1 | tidy-mark: 0
2 | newline: LF
3 | wrap: 12
4 | 


--------------------------------------------------------------------------------
/t/wordwrap.t:
--------------------------------------------------------------------------------
 1 | #!perl -T
 2 | 
 3 | use warnings;
 4 | use strict;
 5 | 
 6 | use Test::More tests => 1;
 7 | 
 8 | use HTML::Tidy;
 9 | 
10 | my $input=q{Here's some <B>ed and <BR/>eakfest MarkUp};
11 | 
12 | my $expected=<<'EOD';
13 | <!DOCTYPE 
14 | html PUBLIC "-//W3C//DTD HTML 3.2//EN">
15 | <html>
16 | <head>
17 | <title>
18 | </title>
19 | </head>
20 | <body>
21 | Here's some
22 | <b>ed
23 | and<br>
24 | eakfest
25 | MarkUp</b>
26 | </body>
27 | </html>
28 | EOD
29 | my @expected = split(/\n/, $expected);
30 | 
31 | my $cfg = 't/wordwrap.cfg';
32 | my $tidy = HTML::Tidy->new( {config_file => $cfg} );
33 | 
34 | my $result = $tidy->clean( $input );
35 | my @result = split(/\n/, $result);
36 | is_deeply( \@result, \@expected, 'Cleaned stuff looks like what we expected');
37 | 
38 | 


--------------------------------------------------------------------------------
/tags:
--------------------------------------------------------------------------------
 1 | !_TAG_FILE_FORMAT	2	/extended format; --format=1 will not append ;" to lines/
 2 | !_TAG_FILE_SORTED	1	/0=unsorted, 1=sorted, 2=foldcase/
 3 | !_TAG_PROGRAM_AUTHOR	Darren Hiebert	/dhiebert@users.sourceforge.net/
 4 | !_TAG_PROGRAM_NAME	Exuberant Ctags	//
 5 | !_TAG_PROGRAM_URL	http://ctags.sourceforge.net	/official site/
 6 | !_TAG_PROGRAM_VERSION	5.8	//
 7 | DIES_ON_ERROR	t/ignore.t	/^DIES_ON_ERROR: {$/;"	l
 8 | ERRORS_ONLY	t/ignore.t	/^ERRORS_ONLY: {$/;"	l
 9 | HTML::Tidy	lib/HTML/Tidy.pm	/^package HTML::Tidy;$/;"	p
10 | HTML::Tidy::Message	lib/HTML/Tidy/Message.pm	/^package HTML::Tidy::Message;$/;"	p
11 | IGNORE_BOGOTAG	t/ignore-text.t	/^IGNORE_BOGOTAG: {$/;"	l
12 | PERL_ARGS_ASSERT_CROAK_XS_USAGE	Tidy.c	144;"	d	file:
13 | PERL_ARGS_ASSERT_CROAK_XS_USAGE	Tidy.c	171;"	d	file:
14 | PERL_DECIMAL_VERSION	Tidy.c	63;"	d	file:
15 | PERL_UNUSED_VAR	Tidy.c	50;"	d	file:
16 | PERL_VERSION_DECIMAL	Tidy.c	60;"	d	file:
17 | PERL_VERSION_GE	Tidy.c	67;"	d	file:
18 | PERL_VERSION_LE	Tidy.c	71;"	d	file:
19 | S_croak_xs_usage	Tidy.c	/^S_croak_xs_usage(pTHX_ const CV *const cv, const char *const params)$/;"	f
20 | TIDY_ERROR	lib/HTML/Tidy.pm	/^use constant TIDY_ERROR   => 3;$/;"	c
21 | TIDY_INFO	lib/HTML/Tidy.pm	/^use constant TIDY_INFO    => 1;$/;"	c
22 | TIDY_WARNING	lib/HTML/Tidy.pm	/^use constant TIDY_WARNING => 2;$/;"	c
23 | WARNINGS_ONLY	t/ignore.t	/^WARNINGS_ONLY: {$/;"	l
24 | WITHOUT_LINE_NUMBERS	t/message.t	/^WITHOUT_LINE_NUMBERS: {$/;"	l
25 | WITH_LINE_NUMBERS	t/message.t	/^WITH_LINE_NUMBERS: {$/;"	l
26 | XS_EUPXS	Tidy.c	/^XS_EUPXS(XS_HTML__Tidy__tidy_clean)$/;"	f
27 | XS_EUPXS	Tidy.c	/^XS_EUPXS(XS_HTML__Tidy__tidy_messages)$/;"	f
28 | XS_EUPXS	Tidy.c	/^XS_EUPXS(XS_HTML__Tidy__tidyp_version)$/;"	f
29 | XS_EUPXS	Tidy.c	135;"	d	file:
30 | XS_EUPXS	Tidy.c	137;"	d	file:
31 | XS_EUPXS	Tidy.c	140;"	d	file:
32 | XS_EXTERNAL	Tidy.c	/^XS_EXTERNAL(boot_HTML__Tidy)$/;"	f
33 | XS_EXTERNAL	Tidy.c	100;"	d	file:
34 | XS_EXTERNAL	Tidy.c	104;"	d	file:
35 | XS_EXTERNAL	Tidy.c	107;"	d	file:
36 | XS_EXTERNAL	Tidy.c	123;"	d	file:
37 | XS_EXTERNAL	Tidy.c	88;"	d	file:
38 | XS_EXTERNAL	Tidy.c	91;"	d	file:
39 | XS_EXTERNAL	Tidy.c	95;"	d	file:
40 | XS_HTML__Tidy__tidy_clean	Tidy.c	/^XS_EUPXS(XS_HTML__Tidy__tidy_clean); \/* prototype to pass -Wmissing-prototypes *\/$/;"	v
41 | XS_HTML__Tidy__tidy_messages	Tidy.c	/^XS_EUPXS(XS_HTML__Tidy__tidy_messages); \/* prototype to pass -Wmissing-prototypes *\/$/;"	v
42 | XS_HTML__Tidy__tidyp_version	Tidy.c	/^XS_EUPXS(XS_HTML__Tidy__tidyp_version); \/* prototype to pass -Wmissing-prototypes *\/$/;"	v
43 | XS_INTERNAL	Tidy.c	101;"	d	file:
44 | XS_INTERNAL	Tidy.c	105;"	d	file:
45 | XS_INTERNAL	Tidy.c	108;"	d	file:
46 | XS_INTERNAL	Tidy.c	126;"	d	file:
47 | XS_INTERNAL	Tidy.c	89;"	d	file:
48 | XS_INTERNAL	Tidy.c	92;"	d	file:
49 | XS_INTERNAL	Tidy.c	96;"	d	file:
50 | _is_keeper	lib/HTML/Tidy.pm	/^sub _is_keeper {$/;"	s
51 | _load_config_hash	Tidy.c	/^_load_config_hash(TidyDoc tdoc, HV *tidy_options)$/;"	f	file:
52 | _match_up	t/message.t	/^sub _match_up {$/;"	s
53 | _parse_errors	lib/HTML/Tidy.pm	/^sub _parse_errors {$/;"	s
54 | as_string	lib/HTML/Tidy/Message.pm	/^sub as_string {$/;"	s
55 | clean	lib/HTML/Tidy.pm	/^sub clean {$/;"	s
56 | clear_messages	lib/HTML/Tidy.pm	/^sub clear_messages {$/;"	s
57 | column	lib/HTML/Tidy/Message.pm	/^sub column  { my $self = shift; return $self->{_column} }$/;"	s
58 | croak_xs_usage	Tidy.c	174;"	d	file:
59 | croak_xs_usage	Tidy.c	176;"	d	file:
60 | dVAR	Tidy.c	54;"	d	file:
61 | file	lib/HTML/Tidy/Message.pm	/^sub file    { my $self = shift; return $self->{_file} }$/;"	s
62 | ignore	lib/HTML/Tidy.pm	/^sub ignore {$/;"	s
63 | libtidyp_version	lib/HTML/Tidy.pm	/^sub libtidyp_version { return shift->tidyp_version }$/;"	s
64 | line	lib/HTML/Tidy/Message.pm	/^sub line    { my $self = shift; return $self->{_line} }$/;"	s
65 | messages	lib/HTML/Tidy.pm	/^sub messages {$/;"	s
66 | munge_returned	t/ignore-text.t	/^sub munge_returned {$/;"	s
67 | munge_returned	t/ignore.t	/^sub munge_returned {$/;"	s
68 | munge_returned	t/levels.t	/^sub munge_returned {$/;"	s
69 | new	lib/HTML/Tidy.pm	/^sub new {$/;"	s
70 | new	lib/HTML/Tidy/Message.pm	/^sub new {$/;"	s
71 | newXSproto_portable	Tidy.c	185;"	d	file:
72 | newXSproto_portable	Tidy.c	187;"	d	file:
73 | parse	lib/HTML/Tidy.pm	/^sub parse {$/;"	s
74 | text	lib/HTML/Tidy/Message.pm	/^sub text    { my $self = shift; return $self->{_text} }$/;"	s
75 | tidyp_version	lib/HTML/Tidy.pm	/^sub tidyp_version {$/;"	s
76 | type	lib/HTML/Tidy/Message.pm	/^sub type    { my $self = shift; return $self->{_type} }$/;"	s
77 | where	lib/HTML/Tidy/Message.pm	/^sub where {$/;"	s
78 | 


--------------------------------------------------------------------------------
/xt/pod-coverage.t:
--------------------------------------------------------------------------------
 1 | #!perl -Tw
 2 | 
 3 | use 5.010001;
 4 | use strict;
 5 | use warnings;
 6 | 
 7 | use Test::More;
 8 | use Test::Pod::Coverage 1.04;
 9 | 
10 | all_pod_coverage_ok();
11 | 


--------------------------------------------------------------------------------
/xt/pod.t:
--------------------------------------------------------------------------------
 1 | #!perl -T
 2 | 
 3 | use warnings;
 4 | use strict;
 5 | 
 6 | use 5.010001;
 7 | use Test::More;
 8 | 
 9 | use Test::Pod 1.14;
10 | all_pod_files_ok();
11 | 


--------------------------------------------------------------------------------